X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Futil%2FMappingUtils.java;h=2e30132c0105fd0fde8576434688b2d86db877eb;hb=d2bf6a7b7e1bbb4774f13ba2733487ae6b7aae6d;hp=45d166d0ad2976ea5cc94a0f68de0f1bb736c81d;hpb=edf2c4eda8ef83c1c86aacd7395d51fc59b89ea1;p=jalview.git diff --git a/src/jalview/util/MappingUtils.java b/src/jalview/util/MappingUtils.java index 45d166d..2e30132 100644 --- a/src/jalview/util/MappingUtils.java +++ b/src/jalview/util/MappingUtils.java @@ -31,19 +31,19 @@ import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.AlignmentI; import jalview.datamodel.AlignmentOrder; import jalview.datamodel.ColumnSelection; +import jalview.datamodel.SearchResultMatchI; import jalview.datamodel.SearchResults; -import jalview.datamodel.SearchResults.Match; +import jalview.datamodel.SearchResultsI; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; import java.util.ArrayList; -import java.util.Collections; +import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.Set; /** * Helper methods for manipulations involving sequence mappings. @@ -69,7 +69,7 @@ public final class MappingUtils */ protected static void mapCutOrPaste(Edit edit, boolean undo, List targetSeqs, EditCommand result, - Set mappings) + List mappings) { Action action = edit.getAction(); if (undo) @@ -93,7 +93,7 @@ public final class MappingUtils */ public static EditCommand mapEditCommand(EditCommand command, boolean undo, final AlignmentI mapTo, char gapChar, - Set mappings) + List mappings) { /* * For now, only support mapping from protein edits to cDna @@ -165,7 +165,7 @@ public final class MappingUtils Map originalSequences, final List targetSeqs, Map targetCopies, char gapChar, - EditCommand result, Set mappings) + EditCommand result, List mappings) { Action action = edit.getAction(); @@ -195,7 +195,7 @@ public final class MappingUtils /* * Determine all mappings from this position to mapped sequences. */ - SearchResults sr = buildSearchResults(seq, seqpos, mappings); + SearchResultsI sr = buildSearchResults(seq, seqpos, mappings); if (!sr.isEmpty()) { @@ -267,10 +267,10 @@ public final class MappingUtils * @param seqmappings * @return */ - public static SearchResults buildSearchResults(SequenceI seq, int index, - Set seqmappings) + public static SearchResultsI buildSearchResults(SequenceI seq, int index, + List seqmappings) { - SearchResults results = new SearchResults(); + SearchResultsI results = new SearchResults(); addSearchResults(results, seq, index, seqmappings); return results; } @@ -284,8 +284,8 @@ public final class MappingUtils * @param index * @param seqmappings */ - public static void addSearchResults(SearchResults results, SequenceI seq, - int index, Set seqmappings) + public static void addSearchResults(SearchResultsI results, SequenceI seq, + int index, List seqmappings) { if (index >= seq.getStart() && index <= seq.getEnd()) { @@ -314,13 +314,13 @@ public final class MappingUtils */ boolean targetIsNucleotide = mapTo.isNucleotide(); AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo; - Set codonFrames = protein.getAlignment() + List codonFrames = protein.getAlignment() .getCodonFrames(); /* * Copy group name, colours etc, but not sequences or sequence colour scheme */ SequenceGroup mappedGroup = new SequenceGroup(sg); - mappedGroup.cs = mapTo.getGlobalColourScheme(); + mappedGroup.setColourScheme(mapTo.getGlobalColourScheme()); mappedGroup.clear(); int minStartCol = -1; @@ -375,16 +375,17 @@ public final class MappingUtils /* * Found a sequence mapping. Locate the start/end mapped residues. */ - SearchResults sr = buildSearchResults(selected, - startResiduePos, Collections.singleton(acf)); - for (Match m : sr.getResults()) + List mapping = Arrays + .asList(new AlignedCodonFrame[] { acf }); + SearchResultsI sr = buildSearchResults(selected, + startResiduePos, mapping); + for (SearchResultMatchI m : sr.getResults()) { mappedStartResidue = m.getStart(); mappedEndResidue = m.getEnd(); } - sr = buildSearchResults(selected, endResiduePos, - Collections.singleton(acf)); - for (Match m : sr.getResults()) + sr = buildSearchResults(selected, endResiduePos, mapping); + for (SearchResultMatchI m : sr.getResults()) { mappedStartResidue = Math.min(mappedStartResidue, m.getStart()); @@ -428,7 +429,7 @@ public final class MappingUtils * @return */ public static CommandI mapOrderCommand(OrderCommand command, - boolean undo, AlignmentI mapTo, Set mappings) + boolean undo, AlignmentI mapTo, List mappings) { SequenceI[] sortOrder = command.getSequenceOrder(undo); List mappedOrder = new ArrayList(); @@ -512,7 +513,7 @@ public final class MappingUtils { boolean targetIsNucleotide = mapTo.isNucleotide(); AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo; - Set codonFrames = protein.getAlignment() + List codonFrames = protein.getAlignment() .getCodonFrames(); ColumnSelection mappedColumns = new ColumnSelection(); @@ -523,8 +524,6 @@ public final class MappingUtils char fromGapChar = mapFrom.getAlignment().getGapCharacter(); - // FIXME allow for hidden columns - /* * For each mapped column, find the range of columns that residues in that * column map to. @@ -558,9 +557,9 @@ public final class MappingUtils * @param fromGapChar */ protected static void mapHiddenColumns(int[] hidden, - Set mappings, - ColumnSelection mappedColumns, List fromSequences, - List toSequences, char fromGapChar) + List mappings, ColumnSelection mappedColumns, + List fromSequences, List toSequences, + char fromGapChar) { for (int col = hidden[0]; col <= hidden[1]; col++) { @@ -591,9 +590,10 @@ public final class MappingUtils * @param toSequences * @param fromGapChar */ - protected static void mapColumn(int col, Set mappings, - ColumnSelection mappedColumns, List fromSequences, - List toSequences, char fromGapChar) + protected static void mapColumn(int col, + List mappings, ColumnSelection mappedColumns, + List fromSequences, List toSequences, + char fromGapChar) { int[] mappedTo = findMappedColumns(col, mappings, fromSequences, toSequences, fromGapChar); @@ -625,7 +625,7 @@ public final class MappingUtils * @return */ protected static int[] findMappedColumns(int col, - Set mappings, List fromSequences, + List mappings, List fromSequences, List toSequences, char fromGapChar) { int[] mappedTo = new int[] { Integer.MAX_VALUE, Integer.MIN_VALUE }; @@ -648,9 +648,8 @@ public final class MappingUtils * Get the residue position and find the mapped position. */ int residuePos = fromSeq.findPosition(col); - SearchResults sr = buildSearchResults(fromSeq, residuePos, - mappings); - for (Match m : sr.getResults()) + SearchResultsI sr = buildSearchResults(fromSeq, residuePos, mappings); + for (SearchResultMatchI m : sr.getResults()) { int mappedStartResidue = m.getStart(); int mappedEndResidue = m.getEnd(); @@ -679,8 +678,8 @@ public final class MappingUtils } /** - * Returns the mapped codon for a given aligned sequence column position (base - * 0). + * Returns the mapped codon or codons for a given aligned sequence column + * position (base 0). * * @param seq * an aligned peptide sequence @@ -688,26 +687,32 @@ public final class MappingUtils * an aligned column position (base 0) * @param mappings * a set of codon mappings - * @return the bases of the mapped codon in the cDNA dataset sequence, or null - * if not found + * @return the bases of the mapped codon(s) in the cDNA dataset sequence(s), + * or an empty list if none found */ - public static char[] findCodonFor(SequenceI seq, int col, - Set mappings) + public static List findCodonsFor(SequenceI seq, int col, + List mappings) { + List result = new ArrayList(); int dsPos = seq.findPosition(col); for (AlignedCodonFrame mapping : mappings) { if (mapping.involvesSequence(seq)) { - return mapping.getMappedCodon(seq.getDatasetSequence(), dsPos); + List codons = mapping.getMappedCodons( + seq.getDatasetSequence(), dsPos); + if (codons != null) + { + result.addAll(codons); + } } } - return null; + return result; } /** - * Converts a series of [start, end] ranges into an array of individual - * positions. + * Converts a series of [start, end] range pairs into an array of individual + * positions. This also caters for 'reverse strand' (start > end) cases. * * @param ranges * @return @@ -720,17 +725,21 @@ public final class MappingUtils int count = 0; for (int i = 0; i < ranges.length - 1; i += 2) { - count += ranges[i + 1] - ranges[i] + 1; + count += Math.abs(ranges[i + 1] - ranges[i]) + 1; } int[] result = new int[count]; int k = 0; for (int i = 0; i < ranges.length - 1; i += 2) { - for (int j = ranges[i]; j <= ranges[i + 1]; j++) + int from = ranges[i]; + final int to = ranges[i + 1]; + int step = from <= to ? 1 : -1; + do { - result[k++] = j; - } + result[k++] = from; + from += step; + } while (from != to + step); } return result; } @@ -744,7 +753,24 @@ public final class MappingUtils * @return */ public static List findMappingsForSequence( - SequenceI sequence, Set mappings) + SequenceI sequence, List mappings) + { + return findMappingsForSequenceAndOthers(sequence, mappings, null); + } + + /** + * Returns a list of any mappings that are from or to the given (aligned or + * dataset) sequence, optionally limited to mappings involving one of a given + * list of sequences. + * + * @param sequence + * @param mappings + * @param filterList + * @return + */ + public static List findMappingsForSequenceAndOthers( + SequenceI sequence, List mappings, + List filterList) { List result = new ArrayList(); if (sequence == null || mappings == null) @@ -755,9 +781,157 @@ public final class MappingUtils { if (mapping.involvesSequence(sequence)) { - result.add(mapping); + if (filterList != null) + { + for (SequenceI otherseq : filterList) + { + SequenceI otherDataset = otherseq.getDatasetSequence(); + if (otherseq == sequence + || otherseq == sequence.getDatasetSequence() + || (otherDataset != null && (otherDataset == sequence || otherDataset == sequence + .getDatasetSequence()))) + { + // skip sequences in subset which directly relate to sequence + continue; + } + if (mapping.involvesSequence(otherseq)) + { + // selected a mapping contained in subselect alignment + result.add(mapping); + break; + } + } + } + else + { + result.add(mapping); + } } } return result; } + + /** + * Returns the total length of the supplied ranges, which may be as single + * [start, end] or multiple [start, end, start, end ...] + * + * @param ranges + * @return + */ + public static int getLength(List ranges) + { + if (ranges == null) + { + return 0; + } + int length = 0; + for (int[] range : ranges) + { + if (range.length % 2 != 0) + { + System.err.println("Error unbalance start/end ranges: " + + ranges.toString()); + return 0; + } + for (int i = 0; i < range.length - 1; i += 2) + { + length += Math.abs(range[i + 1] - range[i]) + 1; + } + } + return length; + } + + /** + * Answers true if any range includes the given value + * + * @param ranges + * @param value + * @return + */ + public static boolean contains(List ranges, int value) + { + if (ranges == null) + { + return false; + } + for (int[] range : ranges) + { + if (range[1] >= range[0] && value >= range[0] && value <= range[1]) + { + /* + * value within ascending range + */ + return true; + } + if (range[1] < range[0] && value <= range[0] && value >= range[1]) + { + /* + * value within descending range + */ + return true; + } + } + return false; + } + + /** + * Removes a specified number of positions from the start of a ranges list. + * For example, could be used to adjust cds ranges to allow for an incomplete + * start codon. Subranges are removed completely, or their start positions + * adjusted, until the required number of positions has been removed from the + * range. Reverse strand ranges are supported. The input array is not + * modified. + * + * @param removeCount + * @param ranges + * an array of [start, end, start, end...] positions + * @return a new array with the first removeCount positions removed + */ + public static int[] removeStartPositions(int removeCount, + final int[] ranges) + { + if (removeCount <= 0) + { + return ranges; + } + + int[] copy = Arrays.copyOf(ranges, ranges.length); + int sxpos = -1; + int cdspos = 0; + for (int x = 0; x < copy.length && sxpos == -1; x += 2) + { + cdspos += Math.abs(copy[x + 1] - copy[x]) + 1; + if (removeCount < cdspos) + { + /* + * we have removed enough, time to finish + */ + sxpos = x; + + /* + * increment start of first exon, or decrement if reverse strand + */ + if (copy[x] <= copy[x + 1]) + { + copy[x] = copy[x + 1] - cdspos + removeCount + 1; + } + else + { + copy[x] = copy[x + 1] + cdspos - removeCount - 1; + } + break; + } + } + + if (sxpos > 0) + { + /* + * we dropped at least one entire sub-range - compact the array + */ + int[] nxon = new int[copy.length - sxpos]; + System.arraycopy(copy, sxpos, nxon, 0, copy.length - sxpos); + return nxon; + } + return copy; + } }