X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Futil%2FMappingUtils.java;h=dbbf8a06f379946455f9c69a22fcbda3fd669ed7;hb=4a3def9f59cefe629c9a33d87483283aee085928;hp=16db13a09fbc1eea4afcf625f9b5904ce655a348;hpb=1e8c7a9ab9f5da589d0aa2482fd2e3361c320d57;p=jalview.git diff --git a/src/jalview/util/MappingUtils.java b/src/jalview/util/MappingUtils.java index 16db13a..dbbf8a0 100644 --- a/src/jalview/util/MappingUtils.java +++ b/src/jalview/util/MappingUtils.java @@ -20,30 +20,35 @@ */ package jalview.util; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + import jalview.analysis.AlignmentSorter; import jalview.api.AlignViewportI; +import jalview.bin.Console; import jalview.commands.CommandI; import jalview.commands.EditCommand; import jalview.commands.EditCommand.Action; import jalview.commands.EditCommand.Edit; import jalview.commands.OrderCommand; import jalview.datamodel.AlignedCodonFrame; +import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping; import jalview.datamodel.AlignmentI; import jalview.datamodel.AlignmentOrder; import jalview.datamodel.ColumnSelection; +import jalview.datamodel.HiddenColumns; +import jalview.datamodel.Mapping; +import jalview.datamodel.SearchResultMatchI; import jalview.datamodel.SearchResults; -import jalview.datamodel.SearchResults.Match; +import jalview.datamodel.SearchResultsI; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; - /** * Helper methods for manipulations involving sequence mappings. * @@ -76,7 +81,7 @@ public final class MappingUtils action = action.getUndoAction(); } // TODO write this - System.err.println("MappingUtils.mapCutOrPaste not yet implemented"); + Console.error("MappingUtils.mapCutOrPaste not yet implemented"); } /** @@ -106,7 +111,7 @@ public final class MappingUtils * Cache a copy of the target sequences so we can mimic successive edits on * them. This lets us compute mappings for all edits in the set. */ - Map targetCopies = new HashMap(); + Map targetCopies = new HashMap<>(); for (SequenceI seq : mapTo.getSequences()) { SequenceI ds = seq.getDatasetSequence(); @@ -194,7 +199,7 @@ public final class MappingUtils /* * Determine all mappings from this position to mapped sequences. */ - SearchResults sr = buildSearchResults(seq, seqpos, mappings); + SearchResultsI sr = buildSearchResults(seq, seqpos, mappings); if (!sr.isEmpty()) { @@ -217,8 +222,9 @@ public final class MappingUtils * Shift Delete start position left, as it acts on positions to its * right. */ - int mappedEditPos = action == Action.DELETE_GAP ? match[0] - - mappedCount : match[0]; + int mappedEditPos = action == Action.DELETE_GAP + ? match[0] - mappedCount + : match[0]; Edit e = result.new Edit(action, new SequenceI[] { targetSeq }, mappedEditPos, mappedCount, gapChar); result.addEdit(e); @@ -228,15 +234,15 @@ public final class MappingUtils */ if (action == Action.INSERT_GAP) { - copyTarget.setSequence(new String(StringUtils.insertCharAt( - copyTarget.getSequence(), mappedEditPos, mappedCount, - gapChar))); + copyTarget.setSequence(new String( + StringUtils.insertCharAt(copyTarget.getSequence(), + mappedEditPos, mappedCount, gapChar))); } else if (action == Action.DELETE_GAP) { - copyTarget.setSequence(new String(StringUtils.deleteChars( - copyTarget.getSequence(), mappedEditPos, - mappedEditPos + mappedCount))); + copyTarget.setSequence(new String( + StringUtils.deleteChars(copyTarget.getSequence(), + mappedEditPos, mappedEditPos + mappedCount))); } } } @@ -266,10 +272,10 @@ public final class MappingUtils * @param seqmappings * @return */ - public static SearchResults buildSearchResults(SequenceI seq, int index, + public static SearchResultsI buildSearchResults(SequenceI seq, int index, List seqmappings) { - SearchResults results = new SearchResults(); + SearchResultsI results = new SearchResults(); addSearchResults(results, seq, index, seqmappings); return results; } @@ -283,7 +289,7 @@ public final class MappingUtils * @param index * @param seqmappings */ - public static void addSearchResults(SearchResults results, SequenceI seq, + public static void addSearchResults(SearchResultsI results, SequenceI seq, int index, List seqmappings) { if (index >= seq.getStart() && index <= seq.getEnd()) @@ -319,7 +325,7 @@ public final class MappingUtils * Copy group name, colours etc, but not sequences or sequence colour scheme */ SequenceGroup mappedGroup = new SequenceGroup(sg); - mappedGroup.cs = mapTo.getGlobalColourScheme(); + mappedGroup.setColourScheme(mapTo.getGlobalColourScheme()); mappedGroup.clear(); int minStartCol = -1; @@ -358,32 +364,35 @@ public final class MappingUtils */ int startResiduePos = selected.findPosition(firstUngappedPos); int endResiduePos = selected.findPosition(lastUngappedPos); - - for (AlignedCodonFrame acf : codonFrames) + for (SequenceI seq : mapTo.getAlignment().getSequences()) { - SequenceI mappedSequence = targetIsNucleotide ? acf - .getDnaForAaSeq(selected) : acf.getAaForDnaSeq(selected); - if (mappedSequence != null) + int mappedStartResidue = 0; + int mappedEndResidue = 0; + for (AlignedCodonFrame acf : codonFrames) { - for (SequenceI seq : mapTo.getAlignment().getSequences()) + // rather than use acf.getCoveringMapping() we iterate through all + // mappings to make sure all CDS are selected for a protein + for (SequenceToSequenceMapping map : acf.getMappings()) { - int mappedStartResidue = 0; - int mappedEndResidue = 0; - if (seq.getDatasetSequence() == mappedSequence) + if (map.covers(selected) && map.covers(seq)) { /* * Found a sequence mapping. Locate the start/end mapped residues. */ - List mapping = Arrays.asList(new AlignedCodonFrame[] { acf }); - SearchResults sr = buildSearchResults(selected, + List mapping = Arrays + .asList(new AlignedCodonFrame[] + { acf }); + // locate start + SearchResultsI sr = buildSearchResults(selected, startResiduePos, mapping); - for (Match m : sr.getResults()) + for (SearchResultMatchI m : sr.getResults()) { mappedStartResidue = m.getStart(); mappedEndResidue = m.getEnd(); } + // locate end - allowing for adjustment of start range sr = buildSearchResults(selected, endResiduePos, mapping); - for (Match m : sr.getResults()) + for (SearchResultMatchI m : sr.getResults()) { mappedStartResidue = Math.min(mappedStartResidue, m.getStart()); @@ -395,11 +404,11 @@ public final class MappingUtils * returns a base 1 position, SequenceGroup uses base 0 */ int mappedStartCol = seq.findIndex(mappedStartResidue) - 1; - minStartCol = minStartCol == -1 ? mappedStartCol : Math.min( - minStartCol, mappedStartCol); + minStartCol = minStartCol == -1 ? mappedStartCol + : Math.min(minStartCol, mappedStartCol); int mappedEndCol = seq.findIndex(mappedEndResidue) - 1; - maxEndCol = maxEndCol == -1 ? mappedEndCol : Math.max( - maxEndCol, mappedEndCol); + maxEndCol = maxEndCol == -1 ? mappedEndCol + : Math.max(maxEndCol, mappedEndCol); mappedGroup.addSequence(seq, false); break; } @@ -426,11 +435,11 @@ public final class MappingUtils * the mappings available * @return */ - public static CommandI mapOrderCommand(OrderCommand command, - boolean undo, AlignmentI mapTo, List mappings) + public static CommandI mapOrderCommand(OrderCommand command, boolean undo, + AlignmentI mapTo, List mappings) { SequenceI[] sortOrder = command.getSequenceOrder(undo); - List mappedOrder = new ArrayList(); + List mappedOrder = new ArrayList<>(); int j = 0; /* @@ -443,18 +452,21 @@ public final class MappingUtils { for (AlignedCodonFrame acf : mappings) { - SequenceI mappedSeq = mappingToNucleotide ? acf.getDnaForAaSeq(seq) - : acf.getAaForDnaSeq(seq); - if (mappedSeq != null) + for (SequenceI seq2 : mapTo.getSequences()) { - for (SequenceI seq2 : mapTo.getSequences()) + /* + * the corresponding peptide / CDS is the one for which there is + * a complete ('covering') mapping to 'seq' + */ + SequenceI peptide = mappingToNucleotide ? seq2 : seq; + SequenceI cds = mappingToNucleotide ? seq : seq2; + SequenceToSequenceMapping s2s = acf.getCoveringMapping(cds, + peptide); + if (s2s != null) { - if (seq2.getDatasetSequence() == mappedSeq) - { - mappedOrder.add(seq2); - j++; - break; - } + mappedOrder.add(seq2); + j++; + break; } } } @@ -506,18 +518,19 @@ public final class MappingUtils * @param mapTo * @return */ - public static ColumnSelection mapColumnSelection(ColumnSelection colsel, - AlignViewportI mapFrom, AlignViewportI mapTo) + public static void mapColumnSelection(ColumnSelection colsel, + HiddenColumns hiddencols, AlignViewportI mapFrom, + AlignViewportI mapTo, ColumnSelection newColSel, + HiddenColumns newHidden) { boolean targetIsNucleotide = mapTo.isNucleotide(); AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo; List codonFrames = protein.getAlignment() .getCodonFrames(); - ColumnSelection mappedColumns = new ColumnSelection(); if (colsel == null) { - return mappedColumns; + return; } char fromGapChar = mapFrom.getAlignment().getGapCharacter(); @@ -531,16 +544,17 @@ public final class MappingUtils for (Integer sel : colsel.getSelected()) { - mapColumn(sel.intValue(), codonFrames, mappedColumns, fromSequences, + mapColumn(sel.intValue(), codonFrames, newColSel, fromSequences, toSequences, fromGapChar); } - for (int[] hidden : colsel.getHiddenColumns()) + Iterator regions = hiddencols.iterator(); + while (regions.hasNext()) { - mapHiddenColumns(hidden, codonFrames, mappedColumns, fromSequences, - toSequences, fromGapChar); + mapHiddenColumns(regions.next(), codonFrames, newHidden, + fromSequences, toSequences, fromGapChar); } - return mappedColumns; + return; } /** @@ -555,9 +569,9 @@ public final class MappingUtils * @param fromGapChar */ protected static void mapHiddenColumns(int[] hidden, - List mappings, - ColumnSelection mappedColumns, List fromSequences, - List toSequences, char fromGapChar) + List mappings, HiddenColumns mappedColumns, + List fromSequences, List toSequences, + char fromGapChar) { for (int col = hidden[0]; col <= hidden[1]; col++) { @@ -588,8 +602,7 @@ public final class MappingUtils * @param toSequences * @param fromGapChar */ - protected static void mapColumn(int col, - List mappings, + protected static void mapColumn(int col, List mappings, ColumnSelection mappedColumns, List fromSequences, List toSequences, char fromGapChar) { @@ -646,9 +659,8 @@ public final class MappingUtils * Get the residue position and find the mapped position. */ int residuePos = fromSeq.findPosition(col); - SearchResults sr = buildSearchResults(fromSeq, residuePos, - mappings); - for (Match m : sr.getResults()) + SearchResultsI sr = buildSearchResults(fromSeq, residuePos, mappings); + for (SearchResultMatchI m : sr.getResults()) { int mappedStartResidue = m.getStart(); int mappedEndResidue = m.getEnd(); @@ -660,7 +672,9 @@ public final class MappingUtils */ for (SequenceI toSeq : toSequences) { - if (toSeq.getDatasetSequence() == mappedSeq) + if (toSeq.getDatasetSequence() == mappedSeq + && mappedStartResidue >= toSeq.getStart() + && mappedEndResidue <= toSeq.getEnd()) { int mappedStartCol = toSeq.findIndex(mappedStartResidue); int mappedEndCol = toSeq.findIndex(mappedEndResidue); @@ -692,14 +706,14 @@ public final class MappingUtils public static List findCodonsFor(SequenceI seq, int col, List mappings) { - List result = new ArrayList(); + List result = new ArrayList<>(); int dsPos = seq.findPosition(col); for (AlignedCodonFrame mapping : mappings) { if (mapping.involvesSequence(seq)) { - List codons = mapping.getMappedCodons( - seq.getDatasetSequence(), dsPos); + List codons = mapping + .getMappedCodons(seq.getDatasetSequence(), dsPos); if (codons != null) { result.addAll(codons); @@ -754,74 +768,66 @@ public final class MappingUtils public static List findMappingsForSequence( SequenceI sequence, List mappings) { - List result = new ArrayList(); - if (sequence == null || mappings == null) - { - return result; - } - for (AlignedCodonFrame mapping : mappings) - { - if (mapping.involvesSequence(sequence)) - { - result.add(mapping); - } - } - return result; + return findMappingsForSequenceAndOthers(sequence, mappings, null); } /** - * Remove the last 3 mapped positions from the given ranges + * Returns a list of any mappings that are from or to the given (aligned or + * dataset) sequence, optionally limited to mappings involving one of a given + * list of sequences. * - * @param ranges - * @param mappedLength + * @param sequence + * @param mappings + * @param filterList + * @return */ - public static void unmapStopCodon(List ranges, - int mappedLength) + public static List findMappingsForSequenceAndOthers( + SequenceI sequence, List mappings, + List filterList) { - if (mappedLength < 3) + List result = new ArrayList<>(); + if (sequence == null || mappings == null) { - return; + return result; } - boolean done = false; - int targetLength = mappedLength - 3; - int mapped = 0; - Iterator it = ranges.iterator(); - while (!done && it.hasNext()) + for (AlignedCodonFrame mapping : mappings) { - int[] range = it.next(); - int length = Math.abs(range[1] - range[0]) + 1; - if (mapped + length == targetLength) - { - done = true; - } - else if (mapped + length < targetLength) - { - mapped += length; - continue; - } - else + if (mapping.involvesSequence(sequence)) { - /* - * need just a bit of this range - */ - int needed = targetLength - mapped; - int sense = range[1] >= range[0] ? 1 : -1; - range[1] = range[0] + (sense * (needed - 1)); - done = true; + if (filterList != null) + { + for (SequenceI otherseq : filterList) + { + SequenceI otherDataset = otherseq.getDatasetSequence(); + if (otherseq == sequence + || otherseq == sequence.getDatasetSequence() + || (otherDataset != null && (otherDataset == sequence + || otherDataset == sequence + .getDatasetSequence()))) + { + // skip sequences in subset which directly relate to sequence + continue; + } + if (mapping.involvesSequence(otherseq)) + { + // selected a mapping contained in subselect alignment + result.add(mapping); + break; + } + } + } + else + { + result.add(mapping); + } } } - /* - * remove any trailing ranges - */ - while (it.hasNext()) - { - it.next(); - it.remove(); - } + return result; } /** - * Returns the total length of the supplied ranges + * Returns the total length of the supplied ranges, which may be as single + * [start, end] or multiple [start, end, start, end ...] * * @param ranges * @return @@ -835,7 +841,16 @@ public final class MappingUtils int length = 0; for (int[] range : ranges) { - length += Math.abs(range[1] - range[0]) + 1; + if (range.length % 2 != 0) + { + Console.error( + "Error unbalance start/end ranges: " + ranges.toString()); + return 0; + } + for (int i = 0; i < range.length - 1; i += 2) + { + length += Math.abs(range[i + 1] - range[i]) + 1; + } } return length; } @@ -872,4 +887,243 @@ public final class MappingUtils } return false; } + + /** + * Removes a specified number of positions from the start of a ranges list. + * For example, could be used to adjust cds ranges to allow for an incomplete + * start codon. Subranges are removed completely, or their start positions + * adjusted, until the required number of positions has been removed from the + * range. Reverse strand ranges are supported. The input array is not + * modified. + * + * @param removeCount + * @param ranges + * an array of [start, end, start, end...] positions + * @return a new array with the first removeCount positions removed + */ + public static int[] removeStartPositions(int removeCount, + final int[] ranges) + { + if (removeCount <= 0) + { + return ranges; + } + + int[] copy = Arrays.copyOf(ranges, ranges.length); + int sxpos = -1; + int cdspos = 0; + for (int x = 0; x < copy.length && sxpos == -1; x += 2) + { + cdspos += Math.abs(copy[x + 1] - copy[x]) + 1; + if (removeCount < cdspos) + { + /* + * we have removed enough, time to finish + */ + sxpos = x; + + /* + * increment start of first exon, or decrement if reverse strand + */ + if (copy[x] <= copy[x + 1]) + { + copy[x] = copy[x + 1] - cdspos + removeCount + 1; + } + else + { + copy[x] = copy[x + 1] + cdspos - removeCount - 1; + } + break; + } + } + + if (sxpos > 0) + { + /* + * we dropped at least one entire sub-range - compact the array + */ + int[] nxon = new int[copy.length - sxpos]; + System.arraycopy(copy, sxpos, nxon, 0, copy.length - sxpos); + return nxon; + } + return copy; + } + + /** + * Answers true if range's start-end positions include those of queryRange, + * where either range might be in reverse direction, else false + * + * @param range + * a start-end range + * @param queryRange + * a candidate subrange of range (start2-end2) + * @return + */ + public static boolean rangeContains(int[] range, int[] queryRange) + { + if (range == null || queryRange == null || range.length != 2 + || queryRange.length != 2) + { + /* + * invalid arguments + */ + return false; + } + + int min = Math.min(range[0], range[1]); + int max = Math.max(range[0], range[1]); + + return (min <= queryRange[0] && max >= queryRange[0] + && min <= queryRange[1] && max >= queryRange[1]); + } + + /** + * Removes the specified number of positions from the given ranges. Provided + * to allow a stop codon to be stripped from a CDS sequence so that it matches + * the peptide translation length. + * + * @param positions + * @param ranges + * a list of (single) [start, end] ranges + * @return + */ + public static void removeEndPositions(int positions, List ranges) + { + int toRemove = positions; + Iterator it = new ReverseListIterator<>(ranges); + while (toRemove > 0) + { + int[] endRange = it.next(); + if (endRange.length != 2) + { + /* + * not coded for [start1, end1, start2, end2, ...] + */ + Console.error( + "MappingUtils.removeEndPositions doesn't handle multiple ranges"); + return; + } + + int length = endRange[1] - endRange[0] + 1; + if (length <= 0) + { + /* + * not coded for a reverse strand range (end < start) + */ + Console.error( + "MappingUtils.removeEndPositions doesn't handle reverse strand"); + return; + } + if (length > toRemove) + { + endRange[1] -= toRemove; + toRemove = 0; + } + else + { + toRemove -= length; + it.remove(); + } + } + } + + /** + * Converts a list of {@code start-end} ranges to a single array of + * {@code start1, end1, start2, ... } ranges + * + * @param ranges + * @return + */ + public static int[] rangeListToArray(List ranges) + { + int rangeCount = ranges.size(); + int[] result = new int[rangeCount * 2]; + int j = 0; + for (int i = 0; i < rangeCount; i++) + { + int[] range = ranges.get(i); + result[j++] = range[0]; + result[j++] = range[1]; + } + return result; + } + + /* + * Returns the maximal start-end positions in the given (ordered) list of + * ranges which is overlapped by the given begin-end range, or null if there + * is no overlap. + * + *
+   * Examples:
+   *   if ranges is {[4, 8], [10, 12], [16, 19]}
+   * then
+   *   findOverlap(ranges, 1, 20) == [4, 19]
+   *   findOverlap(ranges, 6, 11) == [6, 11]
+   *   findOverlap(ranges, 9, 15) == [10, 12]
+   *   findOverlap(ranges, 13, 15) == null
+   * 
+ * + * @param ranges + * @param begin + * @param end + * @return + */ + protected static int[] findOverlap(List ranges, final int begin, + final int end) + { + boolean foundStart = false; + int from = 0; + int to = 0; + + /* + * traverse the ranges to find the first position (if any) >= begin, + * and the last position (if any) <= end + */ + for (int[] range : ranges) + { + if (!foundStart) + { + if (range[0] >= begin) + { + /* + * first range that starts with, or follows, begin + */ + foundStart = true; + from = Math.max(range[0], begin); + } + else if (range[1] >= begin) + { + /* + * first range that contains begin + */ + foundStart = true; + from = begin; + } + } + + if (range[0] <= end) + { + to = Math.min(end, range[1]); + } + } + + return foundStart && to >= from ? new int[] { from, to } : null; + } + + public static Map putWithDuplicationCheck(Map map, K key, + V value) + { + if (!map.containsKey(key)) + { + map.put(key, value); + } + else + { + Console.warn( + "Attempt to add duplicate entry detected for map with key: " + + key.toString() + " and value: " + value.toString()); + } + + return map; + } }