From edf2c4eda8ef83c1c86aacd7395d51fc59b89ea1 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Mon, 11 Jan 2016 11:04:40 +0000 Subject: [PATCH] JAL-1989 map hidden column ranges --- src/jalview/util/Comparison.java | 9 +- src/jalview/util/MappingUtils.java | 181 +++++++++++++++++++++++-------- test/jalview/util/MappingUtilsTest.java | 71 ++++++++++++ 3 files changed, 210 insertions(+), 51 deletions(-) diff --git a/src/jalview/util/Comparison.java b/src/jalview/util/Comparison.java index dfc243d..8902e2c 100644 --- a/src/jalview/util/Comparison.java +++ b/src/jalview/util/Comparison.java @@ -265,10 +265,13 @@ public class Comparison char[][] letters = new char[seqs.length][]; for (int i = 0; i < seqs.length; i++) { - char[] sequence = seqs[i].getSequence(); - if (sequence != null) + if (seqs[i] != null) { - letters[i] = sequence; + char[] sequence = seqs[i].getSequence(); + if (sequence != null) + { + letters[i] = sequence; + } } } diff --git a/src/jalview/util/MappingUtils.java b/src/jalview/util/MappingUtils.java index 47f7017..45d166d 100644 --- a/src/jalview/util/MappingUtils.java +++ b/src/jalview/util/MappingUtils.java @@ -529,68 +529,153 @@ public final class MappingUtils * For each mapped column, find the range of columns that residues in that * column map to. */ - for (Object obj : colsel.getSelected()) + List fromSequences = mapFrom.getAlignment().getSequences(); + List toSequences = mapTo.getAlignment().getSequences(); + + for (Integer sel : colsel.getSelected()) { - int col = ((Integer) obj).intValue(); - int mappedToMin = Integer.MAX_VALUE; - int mappedToMax = Integer.MIN_VALUE; + mapColumn(sel.intValue(), codonFrames, mappedColumns, fromSequences, + toSequences, fromGapChar); + } + + for (int[] hidden : colsel.getHiddenColumns()) + { + mapHiddenColumns(hidden, codonFrames, mappedColumns, fromSequences, + toSequences, fromGapChar); + } + return mappedColumns; + } + + /** + * Helper method that maps a [start, end] hidden column range to its mapped + * equivalent + * + * @param hidden + * @param mappings + * @param mappedColumns + * @param fromSequences + * @param toSequences + * @param fromGapChar + */ + protected static void mapHiddenColumns(int[] hidden, + Set mappings, + ColumnSelection mappedColumns, List fromSequences, + List toSequences, char fromGapChar) + { + for (int col = hidden[0]; col <= hidden[1]; col++) + { + int[] mappedTo = findMappedColumns(col, mappings, fromSequences, + toSequences, fromGapChar); /* - * For each sequence in the 'from' alignment + * Add the range of hidden columns to the mapped selection (converting + * base 1 to base 0). */ - for (SequenceI fromSeq : mapFrom.getAlignment().getSequences()) + if (mappedTo != null) { - /* - * Ignore gaps (unmapped anyway) - */ - if (fromSeq.getCharAt(col) == fromGapChar) - { - continue; - } + mappedColumns.hideColumns(mappedTo[0] - 1, mappedTo[1] - 1); + } + } + } + + /** + * Helper method to map one column selection + * + * @param col + * the column number (base 0) + * @param mappings + * the sequence mappings + * @param mappedColumns + * the mapped column selections to add to + * @param fromSequences + * @param toSequences + * @param fromGapChar + */ + protected static void mapColumn(int col, Set mappings, + ColumnSelection mappedColumns, List fromSequences, + List toSequences, char fromGapChar) + { + int[] mappedTo = findMappedColumns(col, mappings, fromSequences, + toSequences, fromGapChar); + + /* + * Add the range of mapped columns to the mapped selection (converting + * base 1 to base 0). Note that this may include intron-only regions which + * lie between the start and end ranges of the selection. + */ + if (mappedTo != null) + { + for (int i = mappedTo[0]; i <= mappedTo[1]; i++) + { + mappedColumns.addElement(i - 1); + } + } + } + + /** + * Helper method to find the range of columns mapped to from one column. + * Returns the maximal range of columns mapped to from all sequences in the + * source column, or null if no mappings were found. + * + * @param col + * @param mappings + * @param fromSequences + * @param toSequences + * @param fromGapChar + * @return + */ + protected static int[] findMappedColumns(int col, + Set mappings, List fromSequences, + List toSequences, char fromGapChar) + { + int[] mappedTo = new int[] { Integer.MAX_VALUE, Integer.MIN_VALUE }; + boolean found = false; + + /* + * For each sequence in the 'from' alignment + */ + for (SequenceI fromSeq : fromSequences) + { + /* + * Ignore gaps (unmapped anyway) + */ + if (fromSeq.getCharAt(col) == fromGapChar) + { + continue; + } + + /* + * Get the residue position and find the mapped position. + */ + int residuePos = fromSeq.findPosition(col); + SearchResults sr = buildSearchResults(fromSeq, residuePos, + mappings); + for (Match m : sr.getResults()) + { + int mappedStartResidue = m.getStart(); + int mappedEndResidue = m.getEnd(); + SequenceI mappedSeq = m.getSequence(); /* - * Get the residue position and find the mapped position. + * Locate the aligned sequence whose dataset is mappedSeq. TODO a + * datamodel that can do this efficiently. */ - int residuePos = fromSeq.findPosition(col); - SearchResults sr = buildSearchResults(fromSeq, residuePos, - codonFrames); - for (Match m : sr.getResults()) + for (SequenceI toSeq : toSequences) { - int mappedStartResidue = m.getStart(); - int mappedEndResidue = m.getEnd(); - SequenceI mappedSeq = m.getSequence(); - - /* - * Locate the aligned sequence whose dataset is mappedSeq. TODO a - * datamodel that can do this efficiently. - */ - for (SequenceI toSeq : mapTo.getAlignment().getSequences()) + if (toSeq.getDatasetSequence() == mappedSeq) { - if (toSeq.getDatasetSequence() == mappedSeq) - { - int mappedStartCol = toSeq.findIndex(mappedStartResidue); - int mappedEndCol = toSeq.findIndex(mappedEndResidue); - mappedToMin = Math.min(mappedToMin, mappedStartCol); - mappedToMax = Math.max(mappedToMax, mappedEndCol); - // System.out.println(fromSeq.getName() + " mapped to cols " - // + mappedStartCol + ":" + mappedEndCol); - break; - // note: remove break if we ever want to map one to many sequences - } + int mappedStartCol = toSeq.findIndex(mappedStartResidue); + int mappedEndCol = toSeq.findIndex(mappedEndResidue); + mappedTo[0] = Math.min(mappedTo[0], mappedStartCol); + mappedTo[1] = Math.max(mappedTo[1], mappedEndCol); + found = true; + break; + // note: remove break if we ever want to map one to many sequences } } } - /* - * Add the range of mapped columns to the mapped selection (converting - * base 1 to base 0). Note that this may include intron-only regions which - * lie between the start and end ranges of the selection. - */ - for (int i = mappedToMin; i <= mappedToMax; i++) - { - mappedColumns.addElement(i - 1); - } } - return mappedColumns; + return found ? mappedTo : null; } /** diff --git a/test/jalview/util/MappingUtilsTest.java b/test/jalview/util/MappingUtilsTest.java index 032af30..51c99af 100644 --- a/test/jalview/util/MappingUtilsTest.java +++ b/test/jalview/util/MappingUtilsTest.java @@ -736,4 +736,75 @@ public class MappingUtilsTest assertEquals(12, e.getPosition()); assertEquals(6, e.getNumber()); } + + /** + * Test mapping a column selection including hidden columns + * + * @throws IOException + */ + @Test(groups = { "Functional" }) + public void testMapColumnSelection_hiddenColumns() throws IOException + { + setupMappedAlignments(); + + ColumnSelection colsel = new ColumnSelection(); + + /* + * Column 0 in protein picks up Seq2/L, Seq3/G which map to cols 0-4 and 0-3 + * in dna respectively, overall 0-4 + */ + colsel.hideColumns(0); + ColumnSelection cs = MappingUtils.mapColumnSelection(colsel, + proteinView, dnaView); + assertEquals("[]", cs.getSelected().toString()); + List hidden = cs.getHiddenColumns(); + assertEquals(1, hidden.size()); + assertEquals("[0, 4]", Arrays.toString(hidden.get(0))); + + /* + * Column 1 in protein picks up Seq1/K which maps to cols 0-3 in dna + */ + colsel.revealAllHiddenColumns(); + colsel.hideColumns(1); + cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView); + hidden = cs.getHiddenColumns(); + assertEquals(1, hidden.size()); + assertEquals("[0, 3]", Arrays.toString(hidden.get(0))); + + /* + * Column 2 in protein picks up gaps only - no mapping + */ + colsel.revealAllHiddenColumns(); + colsel.clear(); + colsel.hideColumns(2); + cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView); + assertTrue(cs.getHiddenColumns().isEmpty()); + + /* + * Column 3 in protein picks up Seq1/P, Seq2/Q, Seq3/S which map to columns + * 6-9, 6-10, 5-8 respectively, overall to 5-10 + */ + colsel.revealAllHiddenColumns(); + colsel.clear(); + colsel.hideColumns(3); // 5-10 hidden in dna + colsel.addElement(1); // 0-3 selected in dna + cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView); + assertEquals("[0, 1, 2, 3]", cs.getSelected().toString()); + hidden = cs.getHiddenColumns(); + assertEquals(1, hidden.size()); + assertEquals("[5, 10]", Arrays.toString(hidden.get(0))); + + /* + * Combine hiding columns 1 and 3 to get discontiguous hidden columns + */ + colsel.revealAllHiddenColumns(); + colsel.clear(); + colsel.hideColumns(1); + colsel.hideColumns(3); + cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView); + hidden = cs.getHiddenColumns(); + assertEquals(2, hidden.size()); + assertEquals("[0, 3]", Arrays.toString(hidden.get(0))); + assertEquals("[5, 10]", Arrays.toString(hidden.get(1))); + } } -- 1.7.10.2