X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Futil%2FMappingUtils.java;h=09d4b1306136770e74ca31d089c1c8b9c485bdb5;hb=ad09c54f294b945fd4d037a93f5fa8eced6c1797;hp=22714b850d36ce5d21f37069fd484d7c4e7a9d0b;hpb=be47cdd2406f63ccb1fcf424194e82302dfc9360;p=jalview.git diff --git a/src/jalview/util/MappingUtils.java b/src/jalview/util/MappingUtils.java index 22714b8..09d4b13 100644 --- a/src/jalview/util/MappingUtils.java +++ b/src/jalview/util/MappingUtils.java @@ -31,8 +31,10 @@ import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.AlignmentI; import jalview.datamodel.AlignmentOrder; import jalview.datamodel.ColumnSelection; +import jalview.datamodel.HiddenColumns; +import jalview.datamodel.SearchResultMatchI; import jalview.datamodel.SearchResults; -import jalview.datamodel.SearchResults.Match; +import jalview.datamodel.SearchResultsI; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; @@ -106,7 +108,7 @@ public final class MappingUtils * Cache a copy of the target sequences so we can mimic successive edits on * them. This lets us compute mappings for all edits in the set. */ - Map targetCopies = new HashMap(); + Map targetCopies = new HashMap<>(); for (SequenceI seq : mapTo.getSequences()) { SequenceI ds = seq.getDatasetSequence(); @@ -194,7 +196,7 @@ public final class MappingUtils /* * Determine all mappings from this position to mapped sequences. */ - SearchResults sr = buildSearchResults(seq, seqpos, mappings); + SearchResultsI sr = buildSearchResults(seq, seqpos, mappings); if (!sr.isEmpty()) { @@ -217,8 +219,9 @@ public final class MappingUtils * Shift Delete start position left, as it acts on positions to its * right. */ - int mappedEditPos = action == Action.DELETE_GAP ? match[0] - - mappedCount : match[0]; + int mappedEditPos = action == Action.DELETE_GAP + ? match[0] - mappedCount + : match[0]; Edit e = result.new Edit(action, new SequenceI[] { targetSeq }, mappedEditPos, mappedCount, gapChar); result.addEdit(e); @@ -228,15 +231,15 @@ public final class MappingUtils */ if (action == Action.INSERT_GAP) { - copyTarget.setSequence(new String(StringUtils.insertCharAt( - copyTarget.getSequence(), mappedEditPos, mappedCount, - gapChar))); + copyTarget.setSequence(new String( + StringUtils.insertCharAt(copyTarget.getSequence(), + mappedEditPos, mappedCount, gapChar))); } else if (action == Action.DELETE_GAP) { - copyTarget.setSequence(new String(StringUtils.deleteChars( - copyTarget.getSequence(), mappedEditPos, - mappedEditPos + mappedCount))); + copyTarget.setSequence(new String( + StringUtils.deleteChars(copyTarget.getSequence(), + mappedEditPos, mappedEditPos + mappedCount))); } } } @@ -266,10 +269,10 @@ public final class MappingUtils * @param seqmappings * @return */ - public static SearchResults buildSearchResults(SequenceI seq, int index, + public static SearchResultsI buildSearchResults(SequenceI seq, int index, List seqmappings) { - SearchResults results = new SearchResults(); + SearchResultsI results = new SearchResults(); addSearchResults(results, seq, index, seqmappings); return results; } @@ -283,7 +286,7 @@ public final class MappingUtils * @param index * @param seqmappings */ - public static void addSearchResults(SearchResults results, SequenceI seq, + public static void addSearchResults(SearchResultsI results, SequenceI seq, int index, List seqmappings) { if (index >= seq.getStart() && index <= seq.getEnd()) @@ -319,7 +322,7 @@ public final class MappingUtils * Copy group name, colours etc, but not sequences or sequence colour scheme */ SequenceGroup mappedGroup = new SequenceGroup(sg); - mappedGroup.cs = mapTo.getGlobalColourScheme(); + mappedGroup.setColourScheme(mapTo.getGlobalColourScheme()); mappedGroup.clear(); int minStartCol = -1; @@ -361,8 +364,9 @@ public final class MappingUtils for (AlignedCodonFrame acf : codonFrames) { - SequenceI mappedSequence = targetIsNucleotide ? acf - .getDnaForAaSeq(selected) : acf.getAaForDnaSeq(selected); + SequenceI mappedSequence = targetIsNucleotide + ? acf.getDnaForAaSeq(selected) + : acf.getAaForDnaSeq(selected); if (mappedSequence != null) { for (SequenceI seq : mapTo.getAlignment().getSequences()) @@ -374,16 +378,18 @@ public final class MappingUtils /* * Found a sequence mapping. Locate the start/end mapped residues. */ - List mapping = Arrays.asList(new AlignedCodonFrame[] { acf }); - SearchResults sr = buildSearchResults(selected, + List mapping = Arrays + .asList(new AlignedCodonFrame[] + { acf }); + SearchResultsI sr = buildSearchResults(selected, startResiduePos, mapping); - for (Match m : sr.getResults()) + for (SearchResultMatchI m : sr.getResults()) { mappedStartResidue = m.getStart(); mappedEndResidue = m.getEnd(); } sr = buildSearchResults(selected, endResiduePos, mapping); - for (Match m : sr.getResults()) + for (SearchResultMatchI m : sr.getResults()) { mappedStartResidue = Math.min(mappedStartResidue, m.getStart()); @@ -395,11 +401,11 @@ public final class MappingUtils * returns a base 1 position, SequenceGroup uses base 0 */ int mappedStartCol = seq.findIndex(mappedStartResidue) - 1; - minStartCol = minStartCol == -1 ? mappedStartCol : Math.min( - minStartCol, mappedStartCol); + minStartCol = minStartCol == -1 ? mappedStartCol + : Math.min(minStartCol, mappedStartCol); int mappedEndCol = seq.findIndex(mappedEndResidue) - 1; - maxEndCol = maxEndCol == -1 ? mappedEndCol : Math.max( - maxEndCol, mappedEndCol); + maxEndCol = maxEndCol == -1 ? mappedEndCol + : Math.max(maxEndCol, mappedEndCol); mappedGroup.addSequence(seq, false); break; } @@ -426,11 +432,11 @@ public final class MappingUtils * the mappings available * @return */ - public static CommandI mapOrderCommand(OrderCommand command, - boolean undo, AlignmentI mapTo, List mappings) + public static CommandI mapOrderCommand(OrderCommand command, boolean undo, + AlignmentI mapTo, List mappings) { SequenceI[] sortOrder = command.getSequenceOrder(undo); - List mappedOrder = new ArrayList(); + List mappedOrder = new ArrayList<>(); int j = 0; /* @@ -506,90 +512,175 @@ public final class MappingUtils * @param mapTo * @return */ - public static ColumnSelection mapColumnSelection(ColumnSelection colsel, - AlignViewportI mapFrom, AlignViewportI mapTo) + public static void mapColumnSelection(ColumnSelection colsel, + HiddenColumns hiddencols, AlignViewportI mapFrom, + AlignViewportI mapTo, ColumnSelection newColSel, + HiddenColumns newHidden) { boolean targetIsNucleotide = mapTo.isNucleotide(); AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo; List codonFrames = protein.getAlignment() .getCodonFrames(); - ColumnSelection mappedColumns = new ColumnSelection(); if (colsel == null) { - return mappedColumns; + return; // mappedColumns; } char fromGapChar = mapFrom.getAlignment().getGapCharacter(); - // FIXME allow for hidden columns - /* * For each mapped column, find the range of columns that residues in that * column map to. */ - for (Object obj : colsel.getSelected()) + List fromSequences = mapFrom.getAlignment().getSequences(); + List toSequences = mapTo.getAlignment().getSequences(); + + for (Integer sel : colsel.getSelected()) + { + mapColumn(sel.intValue(), codonFrames, newColSel, fromSequences, + toSequences, fromGapChar); + } + + Iterator regions = hiddencols.iterator(); + while (regions.hasNext()) { - int col = ((Integer) obj).intValue(); - int mappedToMin = Integer.MAX_VALUE; - int mappedToMax = Integer.MIN_VALUE; + mapHiddenColumns(regions.next(), codonFrames, newHidden, + fromSequences, + toSequences, fromGapChar); + } + return; // mappedColumns; + } + + /** + * Helper method that maps a [start, end] hidden column range to its mapped + * equivalent + * + * @param hidden + * @param mappings + * @param mappedColumns + * @param fromSequences + * @param toSequences + * @param fromGapChar + */ + protected static void mapHiddenColumns(int[] hidden, + List mappings, HiddenColumns mappedColumns, + List fromSequences, List toSequences, + char fromGapChar) + { + for (int col = hidden[0]; col <= hidden[1]; col++) + { + int[] mappedTo = findMappedColumns(col, mappings, fromSequences, + toSequences, fromGapChar); /* - * For each sequence in the 'from' alignment + * Add the range of hidden columns to the mapped selection (converting + * base 1 to base 0). */ - for (SequenceI fromSeq : mapFrom.getAlignment().getSequences()) + if (mappedTo != null) { - /* - * Ignore gaps (unmapped anyway) - */ - if (fromSeq.getCharAt(col) == fromGapChar) - { - continue; - } + mappedColumns.hideColumns(mappedTo[0] - 1, mappedTo[1] - 1); + } + } + } + + /** + * Helper method to map one column selection + * + * @param col + * the column number (base 0) + * @param mappings + * the sequence mappings + * @param mappedColumns + * the mapped column selections to add to + * @param fromSequences + * @param toSequences + * @param fromGapChar + */ + protected static void mapColumn(int col, List mappings, + ColumnSelection mappedColumns, List fromSequences, + List toSequences, char fromGapChar) + { + int[] mappedTo = findMappedColumns(col, mappings, fromSequences, + toSequences, fromGapChar); + + /* + * Add the range of mapped columns to the mapped selection (converting + * base 1 to base 0). Note that this may include intron-only regions which + * lie between the start and end ranges of the selection. + */ + if (mappedTo != null) + { + for (int i = mappedTo[0]; i <= mappedTo[1]; i++) + { + mappedColumns.addElement(i - 1); + } + } + } + + /** + * Helper method to find the range of columns mapped to from one column. + * Returns the maximal range of columns mapped to from all sequences in the + * source column, or null if no mappings were found. + * + * @param col + * @param mappings + * @param fromSequences + * @param toSequences + * @param fromGapChar + * @return + */ + protected static int[] findMappedColumns(int col, + List mappings, List fromSequences, + List toSequences, char fromGapChar) + { + int[] mappedTo = new int[] { Integer.MAX_VALUE, Integer.MIN_VALUE }; + boolean found = false; + + /* + * For each sequence in the 'from' alignment + */ + for (SequenceI fromSeq : fromSequences) + { + /* + * Ignore gaps (unmapped anyway) + */ + if (fromSeq.getCharAt(col) == fromGapChar) + { + continue; + } + + /* + * Get the residue position and find the mapped position. + */ + int residuePos = fromSeq.findPosition(col); + SearchResultsI sr = buildSearchResults(fromSeq, residuePos, mappings); + for (SearchResultMatchI m : sr.getResults()) + { + int mappedStartResidue = m.getStart(); + int mappedEndResidue = m.getEnd(); + SequenceI mappedSeq = m.getSequence(); /* - * Get the residue position and find the mapped position. + * Locate the aligned sequence whose dataset is mappedSeq. TODO a + * datamodel that can do this efficiently. */ - int residuePos = fromSeq.findPosition(col); - SearchResults sr = buildSearchResults(fromSeq, residuePos, - codonFrames); - for (Match m : sr.getResults()) + for (SequenceI toSeq : toSequences) { - int mappedStartResidue = m.getStart(); - int mappedEndResidue = m.getEnd(); - SequenceI mappedSeq = m.getSequence(); - - /* - * Locate the aligned sequence whose dataset is mappedSeq. TODO a - * datamodel that can do this efficiently. - */ - for (SequenceI toSeq : mapTo.getAlignment().getSequences()) + if (toSeq.getDatasetSequence() == mappedSeq) { - if (toSeq.getDatasetSequence() == mappedSeq) - { - int mappedStartCol = toSeq.findIndex(mappedStartResidue); - int mappedEndCol = toSeq.findIndex(mappedEndResidue); - mappedToMin = Math.min(mappedToMin, mappedStartCol); - mappedToMax = Math.max(mappedToMax, mappedEndCol); - // System.out.println(fromSeq.getName() + " mapped to cols " - // + mappedStartCol + ":" + mappedEndCol); - break; - // note: remove break if we ever want to map one to many sequences - } + int mappedStartCol = toSeq.findIndex(mappedStartResidue); + int mappedEndCol = toSeq.findIndex(mappedEndResidue); + mappedTo[0] = Math.min(mappedTo[0], mappedStartCol); + mappedTo[1] = Math.max(mappedTo[1], mappedEndCol); + found = true; + break; + // note: remove break if we ever want to map one to many sequences } } } - /* - * Add the range of mapped columns to the mapped selection (converting - * base 1 to base 0). Note that this may include intron-only regions which - * lie between the start and end ranges of the selection. - */ - for (int i = mappedToMin; i <= mappedToMax; i++) - { - mappedColumns.addElement(i - 1); - } } - return mappedColumns; + return found ? mappedTo : null; } /** @@ -608,14 +699,14 @@ public final class MappingUtils public static List findCodonsFor(SequenceI seq, int col, List mappings) { - List result = new ArrayList(); + List result = new ArrayList<>(); int dsPos = seq.findPosition(col); for (AlignedCodonFrame mapping : mappings) { if (mapping.involvesSequence(seq)) { - List codons = mapping.getMappedCodons( - seq.getDatasetSequence(), dsPos); + List codons = mapping + .getMappedCodons(seq.getDatasetSequence(), dsPos); if (codons != null) { result.addAll(codons); @@ -670,7 +761,24 @@ public final class MappingUtils public static List findMappingsForSequence( SequenceI sequence, List mappings) { - List result = new ArrayList(); + return findMappingsForSequenceAndOthers(sequence, mappings, null); + } + + /** + * Returns a list of any mappings that are from or to the given (aligned or + * dataset) sequence, optionally limited to mappings involving one of a given + * list of sequences. + * + * @param sequence + * @param mappings + * @param filterList + * @return + */ + public static List findMappingsForSequenceAndOthers( + SequenceI sequence, List mappings, + List filterList) + { + List result = new ArrayList<>(); if (sequence == null || mappings == null) { return result; @@ -679,9 +787,158 @@ public final class MappingUtils { if (mapping.involvesSequence(sequence)) { - result.add(mapping); + if (filterList != null) + { + for (SequenceI otherseq : filterList) + { + SequenceI otherDataset = otherseq.getDatasetSequence(); + if (otherseq == sequence + || otherseq == sequence.getDatasetSequence() + || (otherDataset != null && (otherDataset == sequence + || otherDataset == sequence + .getDatasetSequence()))) + { + // skip sequences in subset which directly relate to sequence + continue; + } + if (mapping.involvesSequence(otherseq)) + { + // selected a mapping contained in subselect alignment + result.add(mapping); + break; + } + } + } + else + { + result.add(mapping); + } } } return result; } + + /** + * Returns the total length of the supplied ranges, which may be as single + * [start, end] or multiple [start, end, start, end ...] + * + * @param ranges + * @return + */ + public static int getLength(List ranges) + { + if (ranges == null) + { + return 0; + } + int length = 0; + for (int[] range : ranges) + { + if (range.length % 2 != 0) + { + System.err.println( + "Error unbalance start/end ranges: " + ranges.toString()); + return 0; + } + for (int i = 0; i < range.length - 1; i += 2) + { + length += Math.abs(range[i + 1] - range[i]) + 1; + } + } + return length; + } + + /** + * Answers true if any range includes the given value + * + * @param ranges + * @param value + * @return + */ + public static boolean contains(List ranges, int value) + { + if (ranges == null) + { + return false; + } + for (int[] range : ranges) + { + if (range[1] >= range[0] && value >= range[0] && value <= range[1]) + { + /* + * value within ascending range + */ + return true; + } + if (range[1] < range[0] && value <= range[0] && value >= range[1]) + { + /* + * value within descending range + */ + return true; + } + } + return false; + } + + /** + * Removes a specified number of positions from the start of a ranges list. + * For example, could be used to adjust cds ranges to allow for an incomplete + * start codon. Subranges are removed completely, or their start positions + * adjusted, until the required number of positions has been removed from the + * range. Reverse strand ranges are supported. The input array is not + * modified. + * + * @param removeCount + * @param ranges + * an array of [start, end, start, end...] positions + * @return a new array with the first removeCount positions removed + */ + public static int[] removeStartPositions(int removeCount, + final int[] ranges) + { + if (removeCount <= 0) + { + return ranges; + } + + int[] copy = Arrays.copyOf(ranges, ranges.length); + int sxpos = -1; + int cdspos = 0; + for (int x = 0; x < copy.length && sxpos == -1; x += 2) + { + cdspos += Math.abs(copy[x + 1] - copy[x]) + 1; + if (removeCount < cdspos) + { + /* + * we have removed enough, time to finish + */ + sxpos = x; + + /* + * increment start of first exon, or decrement if reverse strand + */ + if (copy[x] <= copy[x + 1]) + { + copy[x] = copy[x + 1] - cdspos + removeCount + 1; + } + else + { + copy[x] = copy[x + 1] + cdspos - removeCount - 1; + } + break; + } + } + + if (sxpos > 0) + { + /* + * we dropped at least one entire sub-range - compact the array + */ + int[] nxon = new int[copy.length - sxpos]; + System.arraycopy(copy, sxpos, nxon, 0, copy.length - sxpos); + return nxon; + } + return copy; + } }