X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Futil%2FMappingUtils.java;h=515ff51f7795babb6408bc5f044c2af936dc93b7;hb=14e9fd31857401af4f5547430330d3d557cbd277;hp=45d166d0ad2976ea5cc94a0f68de0f1bb736c81d;hpb=edf2c4eda8ef83c1c86aacd7395d51fc59b89ea1;p=jalview.git diff --git a/src/jalview/util/MappingUtils.java b/src/jalview/util/MappingUtils.java index 45d166d..515ff51 100644 --- a/src/jalview/util/MappingUtils.java +++ b/src/jalview/util/MappingUtils.java @@ -38,12 +38,11 @@ import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; import java.util.ArrayList; -import java.util.Collections; +import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.Set; /** * Helper methods for manipulations involving sequence mappings. @@ -69,7 +68,7 @@ public final class MappingUtils */ protected static void mapCutOrPaste(Edit edit, boolean undo, List targetSeqs, EditCommand result, - Set mappings) + List mappings) { Action action = edit.getAction(); if (undo) @@ -93,7 +92,7 @@ public final class MappingUtils */ public static EditCommand mapEditCommand(EditCommand command, boolean undo, final AlignmentI mapTo, char gapChar, - Set mappings) + List mappings) { /* * For now, only support mapping from protein edits to cDna @@ -165,7 +164,7 @@ public final class MappingUtils Map originalSequences, final List targetSeqs, Map targetCopies, char gapChar, - EditCommand result, Set mappings) + EditCommand result, List mappings) { Action action = edit.getAction(); @@ -268,7 +267,7 @@ public final class MappingUtils * @return */ public static SearchResults buildSearchResults(SequenceI seq, int index, - Set seqmappings) + List seqmappings) { SearchResults results = new SearchResults(); addSearchResults(results, seq, index, seqmappings); @@ -285,7 +284,7 @@ public final class MappingUtils * @param seqmappings */ public static void addSearchResults(SearchResults results, SequenceI seq, - int index, Set seqmappings) + int index, List seqmappings) { if (index >= seq.getStart() && index <= seq.getEnd()) { @@ -314,7 +313,7 @@ public final class MappingUtils */ boolean targetIsNucleotide = mapTo.isNucleotide(); AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo; - Set codonFrames = protein.getAlignment() + List codonFrames = protein.getAlignment() .getCodonFrames(); /* * Copy group name, colours etc, but not sequences or sequence colour scheme @@ -375,15 +374,15 @@ public final class MappingUtils /* * Found a sequence mapping. Locate the start/end mapped residues. */ + List mapping = Arrays.asList(new AlignedCodonFrame[] { acf }); SearchResults sr = buildSearchResults(selected, - startResiduePos, Collections.singleton(acf)); + startResiduePos, mapping); for (Match m : sr.getResults()) { mappedStartResidue = m.getStart(); mappedEndResidue = m.getEnd(); } - sr = buildSearchResults(selected, endResiduePos, - Collections.singleton(acf)); + sr = buildSearchResults(selected, endResiduePos, mapping); for (Match m : sr.getResults()) { mappedStartResidue = Math.min(mappedStartResidue, @@ -428,7 +427,7 @@ public final class MappingUtils * @return */ public static CommandI mapOrderCommand(OrderCommand command, - boolean undo, AlignmentI mapTo, Set mappings) + boolean undo, AlignmentI mapTo, List mappings) { SequenceI[] sortOrder = command.getSequenceOrder(undo); List mappedOrder = new ArrayList(); @@ -512,7 +511,7 @@ public final class MappingUtils { boolean targetIsNucleotide = mapTo.isNucleotide(); AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo; - Set codonFrames = protein.getAlignment() + List codonFrames = protein.getAlignment() .getCodonFrames(); ColumnSelection mappedColumns = new ColumnSelection(); @@ -523,8 +522,6 @@ public final class MappingUtils char fromGapChar = mapFrom.getAlignment().getGapCharacter(); - // FIXME allow for hidden columns - /* * For each mapped column, find the range of columns that residues in that * column map to. @@ -558,7 +555,7 @@ public final class MappingUtils * @param fromGapChar */ protected static void mapHiddenColumns(int[] hidden, - Set mappings, + List mappings, ColumnSelection mappedColumns, List fromSequences, List toSequences, char fromGapChar) { @@ -591,7 +588,8 @@ public final class MappingUtils * @param toSequences * @param fromGapChar */ - protected static void mapColumn(int col, Set mappings, + protected static void mapColumn(int col, + List mappings, ColumnSelection mappedColumns, List fromSequences, List toSequences, char fromGapChar) { @@ -625,7 +623,7 @@ public final class MappingUtils * @return */ protected static int[] findMappedColumns(int col, - Set mappings, List fromSequences, + List mappings, List fromSequences, List toSequences, char fromGapChar) { int[] mappedTo = new int[] { Integer.MAX_VALUE, Integer.MIN_VALUE }; @@ -679,8 +677,8 @@ public final class MappingUtils } /** - * Returns the mapped codon for a given aligned sequence column position (base - * 0). + * Returns the mapped codon or codons for a given aligned sequence column + * position (base 0). * * @param seq * an aligned peptide sequence @@ -688,26 +686,32 @@ public final class MappingUtils * an aligned column position (base 0) * @param mappings * a set of codon mappings - * @return the bases of the mapped codon in the cDNA dataset sequence, or null - * if not found + * @return the bases of the mapped codon(s) in the cDNA dataset sequence(s), + * or an empty list if none found */ - public static char[] findCodonFor(SequenceI seq, int col, - Set mappings) + public static List findCodonsFor(SequenceI seq, int col, + List mappings) { + List result = new ArrayList(); int dsPos = seq.findPosition(col); for (AlignedCodonFrame mapping : mappings) { if (mapping.involvesSequence(seq)) { - return mapping.getMappedCodon(seq.getDatasetSequence(), dsPos); + List codons = mapping.getMappedCodons( + seq.getDatasetSequence(), dsPos); + if (codons != null) + { + result.addAll(codons); + } } } - return null; + return result; } /** - * Converts a series of [start, end] ranges into an array of individual - * positions. + * Converts a series of [start, end] range pairs into an array of individual + * positions. This also caters for 'reverse strand' (start > end) cases. * * @param ranges * @return @@ -720,17 +724,21 @@ public final class MappingUtils int count = 0; for (int i = 0; i < ranges.length - 1; i += 2) { - count += ranges[i + 1] - ranges[i] + 1; + count += Math.abs(ranges[i + 1] - ranges[i]) + 1; } int[] result = new int[count]; int k = 0; for (int i = 0; i < ranges.length - 1; i += 2) { - for (int j = ranges[i]; j <= ranges[i + 1]; j++) + int from = ranges[i]; + final int to = ranges[i + 1]; + int step = from <= to ? 1 : -1; + do { - result[k++] = j; - } + result[k++] = from; + from += step; + } while (from != to + step); } return result; } @@ -744,7 +752,14 @@ public final class MappingUtils * @return */ public static List findMappingsForSequence( - SequenceI sequence, Set mappings) + SequenceI sequence, List mappings) + { + return findMappingsForSequenceAndOthers(sequence, mappings, null); + } + + public static List findMappingsForSequenceAndOthers( + SequenceI sequence, List mappings, + AlignmentI alignment) { List result = new ArrayList(); if (sequence == null || mappings == null) @@ -755,9 +770,157 @@ public final class MappingUtils { if (mapping.involvesSequence(sequence)) { - result.add(mapping); + if (alignment != null) + { + for (SequenceI otherseq : alignment.getSequences()) + { + if (otherseq == sequence + || (otherseq.getDatasetSequence() != null && (otherseq + .getDatasetSequence() == sequence || otherseq + .getDatasetSequence() == sequence + .getDatasetSequence()))) + { + // skip sequences in subset which directly relate to sequence + continue; + } + if (mapping.involvesSequence(otherseq)) + { + // selected a mapping contained in subselect alignment + result.add(mapping); + break; + } + } + } + else + { + result.add(mapping); + } } } return result; } + + /** + * Returns the total length of the supplied ranges, which may be as single + * [start, end] or multiple [start, end, start, end ...] + * + * @param ranges + * @return + */ + public static int getLength(List ranges) + { + if (ranges == null) + { + return 0; + } + int length = 0; + for (int[] range : ranges) + { + if (range.length % 2 != 0) + { + System.err.println("Error unbalance start/end ranges: " + + ranges.toString()); + return 0; + } + for (int i = 0; i < range.length - 1; i += 2) + { + length += Math.abs(range[i + 1] - range[i]) + 1; + } + } + return length; + } + + /** + * Answers true if any range includes the given value + * + * @param ranges + * @param value + * @return + */ + public static boolean contains(List ranges, int value) + { + if (ranges == null) + { + return false; + } + for (int[] range : ranges) + { + if (range[1] >= range[0] && value >= range[0] && value <= range[1]) + { + /* + * value within ascending range + */ + return true; + } + if (range[1] < range[0] && value <= range[0] && value >= range[1]) + { + /* + * value within descending range + */ + return true; + } + } + return false; + } + + /** + * Removes a specified number of positions from the start of a ranges list. + * For example, could be used to adjust cds ranges to allow for an incomplete + * start codon. Subranges are removed completely, or their start positions + * adjusted, until the required number of positions has been removed from the + * range. Reverse strand ranges are supported. The input array is not + * modified. + * + * @param removeCount + * @param ranges + * an array of [start, end, start, end...] positions + * @return a new array with the first removeCount positions removed + */ + public static int[] removeStartPositions(int removeCount, + final int[] ranges) + { + if (removeCount <= 0) + { + return ranges; + } + + int[] copy = Arrays.copyOf(ranges, ranges.length); + int sxpos = -1; + int cdspos = 0; + for (int x = 0; x < copy.length && sxpos == -1; x += 2) + { + cdspos += Math.abs(copy[x + 1] - copy[x]) + 1; + if (removeCount < cdspos) + { + /* + * we have removed enough, time to finish + */ + sxpos = x; + + /* + * increment start of first exon, or decrement if reverse strand + */ + if (copy[x] <= copy[x + 1]) + { + copy[x] = copy[x + 1] - cdspos + removeCount + 1; + } + else + { + copy[x] = copy[x + 1] + cdspos - removeCount - 1; + } + break; + } + } + + if (sxpos > 0) + { + /* + * we dropped at least one entire sub-range - compact the array + */ + int[] nxon = new int[copy.length - sxpos]; + System.arraycopy(copy, sxpos, nxon, 0, copy.length - sxpos); + return nxon; + } + return copy; + } }