2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.analysis.AlignmentSorter;
24 import jalview.api.AlignViewportI;
25 import jalview.commands.CommandI;
26 import jalview.commands.EditCommand;
27 import jalview.commands.EditCommand.Action;
28 import jalview.commands.EditCommand.Edit;
29 import jalview.commands.OrderCommand;
30 import jalview.datamodel.AlignedCodonFrame;
31 import jalview.datamodel.AlignmentI;
32 import jalview.datamodel.AlignmentOrder;
33 import jalview.datamodel.ColumnSelection;
34 import jalview.datamodel.HiddenColumns;
35 import jalview.datamodel.SearchResultMatchI;
36 import jalview.datamodel.SearchResults;
37 import jalview.datamodel.SearchResultsI;
38 import jalview.datamodel.Sequence;
39 import jalview.datamodel.SequenceGroup;
40 import jalview.datamodel.SequenceI;
42 import java.util.ArrayList;
43 import java.util.Arrays;
44 import java.util.HashMap;
45 import java.util.Iterator;
46 import java.util.List;
50 * Helper methods for manipulations involving sequence mappings.
55 public final class MappingUtils
59 * Helper method to map a CUT or PASTE command.
62 * the original command
64 * if true, the command is to be undone
66 * the mapped sequences to apply the mapped command to
68 * the mapped EditCommand to add to
71 protected static void mapCutOrPaste(Edit edit, boolean undo,
72 List<SequenceI> targetSeqs, EditCommand result,
73 List<AlignedCodonFrame> mappings)
75 Action action = edit.getAction();
78 action = action.getUndoAction();
81 System.err.println("MappingUtils.mapCutOrPaste not yet implemented");
85 * Returns a new EditCommand representing the given command as mapped to the
86 * given sequences. If there is no mapping, returns null.
95 public static EditCommand mapEditCommand(EditCommand command,
96 boolean undo, final AlignmentI mapTo, char gapChar,
97 List<AlignedCodonFrame> mappings)
100 * For now, only support mapping from protein edits to cDna
102 if (!mapTo.isNucleotide())
108 * Cache a copy of the target sequences so we can mimic successive edits on
109 * them. This lets us compute mappings for all edits in the set.
111 Map<SequenceI, SequenceI> targetCopies = new HashMap<>();
112 for (SequenceI seq : mapTo.getSequences())
114 SequenceI ds = seq.getDatasetSequence();
117 final SequenceI copy = new Sequence(seq);
118 copy.setDatasetSequence(ds);
119 targetCopies.put(ds, copy);
124 * Compute 'source' sequences as they were before applying edits:
126 Map<SequenceI, SequenceI> originalSequences = command.priorState(undo);
128 EditCommand result = new EditCommand();
129 Iterator<Edit> edits = command.getEditIterator(!undo);
130 while (edits.hasNext())
132 Edit edit = edits.next();
133 if (edit.getAction() == Action.CUT
134 || edit.getAction() == Action.PASTE)
136 mapCutOrPaste(edit, undo, mapTo.getSequences(), result, mappings);
138 else if (edit.getAction() == Action.INSERT_GAP
139 || edit.getAction() == Action.DELETE_GAP)
141 mapInsertOrDelete(edit, undo, originalSequences,
142 mapTo.getSequences(), targetCopies, gapChar, result,
146 return result.getSize() > 0 ? result : null;
150 * Helper method to map an edit command to insert or delete gaps.
153 * the original command
155 * if true, the action is to undo the command
156 * @param originalSequences
157 * the sequences the command acted on
159 * @param targetCopies
162 * the new EditCommand to add mapped commands to
165 protected static void mapInsertOrDelete(Edit edit, boolean undo,
166 Map<SequenceI, SequenceI> originalSequences,
167 final List<SequenceI> targetSeqs,
168 Map<SequenceI, SequenceI> targetCopies, char gapChar,
169 EditCommand result, List<AlignedCodonFrame> mappings)
171 Action action = edit.getAction();
174 * Invert sense of action if an Undo.
178 action = action.getUndoAction();
180 final int count = edit.getNumber();
181 final int editPos = edit.getPosition();
182 for (SequenceI seq : edit.getSequences())
185 * Get residue position at (or to right of) edit location. Note we use our
186 * 'copy' of the sequence before editing for this.
188 SequenceI ds = seq.getDatasetSequence();
193 final SequenceI actedOn = originalSequences.get(ds);
194 final int seqpos = actedOn.findPosition(editPos);
197 * Determine all mappings from this position to mapped sequences.
199 SearchResultsI sr = buildSearchResults(seq, seqpos, mappings);
203 for (SequenceI targetSeq : targetSeqs)
205 ds = targetSeq.getDatasetSequence();
210 SequenceI copyTarget = targetCopies.get(ds);
211 final int[] match = sr.getResults(copyTarget, 0,
212 copyTarget.getLength());
215 final int ratio = 3; // TODO: compute this - how?
216 final int mappedCount = count * ratio;
219 * Shift Delete start position left, as it acts on positions to its
222 int mappedEditPos = action == Action.DELETE_GAP
223 ? match[0] - mappedCount
225 Edit e = result.new Edit(action, new SequenceI[] { targetSeq },
226 mappedEditPos, mappedCount, gapChar);
230 * and 'apply' the edit to our copy of its target sequence
232 if (action == Action.INSERT_GAP)
234 copyTarget.setSequence(new String(
235 StringUtils.insertCharAt(copyTarget.getSequence(),
236 mappedEditPos, mappedCount, gapChar)));
238 else if (action == Action.DELETE_GAP)
240 copyTarget.setSequence(new String(
241 StringUtils.deleteChars(copyTarget.getSequence(),
242 mappedEditPos, mappedEditPos + mappedCount)));
248 * and 'apply' the edit to our copy of its source sequence
250 if (action == Action.INSERT_GAP)
252 actedOn.setSequence(new String(StringUtils.insertCharAt(
253 actedOn.getSequence(), editPos, count, gapChar)));
255 else if (action == Action.DELETE_GAP)
257 actedOn.setSequence(new String(StringUtils.deleteChars(
258 actedOn.getSequence(), editPos, editPos + count)));
264 * Returns a SearchResults object describing the mapped region corresponding
265 * to the specified sequence position.
272 public static SearchResultsI buildSearchResults(SequenceI seq, int index,
273 List<AlignedCodonFrame> seqmappings)
275 SearchResultsI results = new SearchResults();
276 addSearchResults(results, seq, index, seqmappings);
281 * Adds entries to a SearchResults object describing the mapped region
282 * corresponding to the specified sequence position.
289 public static void addSearchResults(SearchResultsI results, SequenceI seq,
290 int index, List<AlignedCodonFrame> seqmappings)
292 if (index >= seq.getStart() && index <= seq.getEnd())
294 for (AlignedCodonFrame acf : seqmappings)
296 acf.markMappedRegion(seq, index, results);
302 * Returns a (possibly empty) SequenceGroup containing any sequences in the
303 * mapped viewport corresponding to the given group in the source viewport.
310 public static SequenceGroup mapSequenceGroup(final SequenceGroup sg,
311 final AlignViewportI mapFrom, final AlignViewportI mapTo)
314 * Note the SequenceGroup holds aligned sequences, the mappings hold dataset
317 boolean targetIsNucleotide = mapTo.isNucleotide();
318 AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
319 List<AlignedCodonFrame> codonFrames = protein.getAlignment()
322 * Copy group name, colours etc, but not sequences or sequence colour scheme
324 SequenceGroup mappedGroup = new SequenceGroup(sg);
325 mappedGroup.setColourScheme(mapTo.getGlobalColourScheme());
328 int minStartCol = -1;
330 final int selectionStartRes = sg.getStartRes();
331 final int selectionEndRes = sg.getEndRes();
332 for (SequenceI selected : sg.getSequences())
335 * Find the widest range of non-gapped positions in the selection range
337 int firstUngappedPos = selectionStartRes;
338 while (firstUngappedPos <= selectionEndRes
339 && Comparison.isGap(selected.getCharAt(firstUngappedPos)))
345 * If this sequence is only gaps in the selected range, skip it
347 if (firstUngappedPos > selectionEndRes)
352 int lastUngappedPos = selectionEndRes;
353 while (lastUngappedPos >= selectionStartRes
354 && Comparison.isGap(selected.getCharAt(lastUngappedPos)))
360 * Find the selected start/end residue positions in sequence
362 int startResiduePos = selected.findPosition(firstUngappedPos);
363 int endResiduePos = selected.findPosition(lastUngappedPos);
365 for (AlignedCodonFrame acf : codonFrames)
367 SequenceI mappedSequence = targetIsNucleotide
368 ? acf.getDnaForAaSeq(selected)
369 : acf.getAaForDnaSeq(selected);
370 if (mappedSequence != null)
372 for (SequenceI seq : mapTo.getAlignment().getSequences())
374 int mappedStartResidue = 0;
375 int mappedEndResidue = 0;
376 if (seq.getDatasetSequence() == mappedSequence)
379 * Found a sequence mapping. Locate the start/end mapped residues.
381 List<AlignedCodonFrame> mapping = Arrays
382 .asList(new AlignedCodonFrame[]
384 SearchResultsI sr = buildSearchResults(selected,
385 startResiduePos, mapping);
386 for (SearchResultMatchI m : sr.getResults())
388 mappedStartResidue = m.getStart();
389 mappedEndResidue = m.getEnd();
391 sr = buildSearchResults(selected, endResiduePos, mapping);
392 for (SearchResultMatchI m : sr.getResults())
394 mappedStartResidue = Math.min(mappedStartResidue,
396 mappedEndResidue = Math.max(mappedEndResidue, m.getEnd());
400 * Find the mapped aligned columns, save the range. Note findIndex
401 * returns a base 1 position, SequenceGroup uses base 0
403 int mappedStartCol = seq.findIndex(mappedStartResidue) - 1;
404 minStartCol = minStartCol == -1 ? mappedStartCol
405 : Math.min(minStartCol, mappedStartCol);
406 int mappedEndCol = seq.findIndex(mappedEndResidue) - 1;
407 maxEndCol = maxEndCol == -1 ? mappedEndCol
408 : Math.max(maxEndCol, mappedEndCol);
409 mappedGroup.addSequence(seq, false);
416 mappedGroup.setStartRes(minStartCol < 0 ? 0 : minStartCol);
417 mappedGroup.setEndRes(maxEndCol < 0 ? 0 : maxEndCol);
422 * Returns an OrderCommand equivalent to the given one, but acting on mapped
423 * sequences as described by the mappings, or null if no mapping can be made.
426 * the original order command
428 * if true, the action is to undo the sort
430 * the alignment we are mapping to
432 * the mappings available
435 public static CommandI mapOrderCommand(OrderCommand command, boolean undo,
436 AlignmentI mapTo, List<AlignedCodonFrame> mappings)
438 SequenceI[] sortOrder = command.getSequenceOrder(undo);
439 List<SequenceI> mappedOrder = new ArrayList<>();
443 * Assumption: we are only interested in a cDNA/protein mapping; refactor in
444 * future if we want to support sorting (c)dna as (c)dna or protein as
447 boolean mappingToNucleotide = mapTo.isNucleotide();
448 for (SequenceI seq : sortOrder)
450 for (AlignedCodonFrame acf : mappings)
452 SequenceI mappedSeq = mappingToNucleotide ? acf.getDnaForAaSeq(seq)
453 : acf.getAaForDnaSeq(seq);
454 if (mappedSeq != null)
456 for (SequenceI seq2 : mapTo.getSequences())
458 if (seq2.getDatasetSequence() == mappedSeq)
460 mappedOrder.add(seq2);
470 * Return null if no mappings made.
478 * Add any unmapped sequences on the end of the sort in their original
481 if (j < mapTo.getHeight())
483 for (SequenceI seq : mapTo.getSequences())
485 if (!mappedOrder.contains(seq))
487 mappedOrder.add(seq);
493 * Have to sort the sequences before constructing the OrderCommand - which
494 * then resorts them?!?
496 final SequenceI[] mappedOrderArray = mappedOrder
497 .toArray(new SequenceI[mappedOrder.size()]);
498 SequenceI[] oldOrder = mapTo.getSequencesArray();
499 AlignmentSorter.sortBy(mapTo, new AlignmentOrder(mappedOrderArray));
500 final OrderCommand result = new OrderCommand(command.getDescription(),
506 * Returns a ColumnSelection in the 'mapTo' view which corresponds to the
507 * given selection in the 'mapFrom' view. We assume one is nucleotide, the
508 * other is protein (and holds the mappings from codons to protein residues).
515 public static void mapColumnSelection(ColumnSelection colsel,
516 HiddenColumns hiddencols, AlignViewportI mapFrom,
517 AlignViewportI mapTo, ColumnSelection newColSel,
518 HiddenColumns newHidden)
520 boolean targetIsNucleotide = mapTo.isNucleotide();
521 AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
522 List<AlignedCodonFrame> codonFrames = protein.getAlignment()
527 return; // mappedColumns;
530 char fromGapChar = mapFrom.getAlignment().getGapCharacter();
533 * For each mapped column, find the range of columns that residues in that
536 List<SequenceI> fromSequences = mapFrom.getAlignment().getSequences();
537 List<SequenceI> toSequences = mapTo.getAlignment().getSequences();
539 for (Integer sel : colsel.getSelected())
541 mapColumn(sel.intValue(), codonFrames, newColSel, fromSequences,
542 toSequences, fromGapChar);
545 Iterator<int[]> regions = hiddencols.iterator();
546 while (regions.hasNext())
548 mapHiddenColumns(regions.next(), codonFrames, newHidden,
550 toSequences, fromGapChar);
552 return; // mappedColumns;
556 * Helper method that maps a [start, end] hidden column range to its mapped
561 * @param mappedColumns
562 * @param fromSequences
566 protected static void mapHiddenColumns(int[] hidden,
567 List<AlignedCodonFrame> mappings, HiddenColumns mappedColumns,
568 List<SequenceI> fromSequences, List<SequenceI> toSequences,
571 for (int col = hidden[0]; col <= hidden[1]; col++)
573 int[] mappedTo = findMappedColumns(col, mappings, fromSequences,
574 toSequences, fromGapChar);
577 * Add the range of hidden columns to the mapped selection (converting
580 if (mappedTo != null)
582 mappedColumns.hideColumns(mappedTo[0] - 1, mappedTo[1] - 1);
588 * Helper method to map one column selection
591 * the column number (base 0)
593 * the sequence mappings
594 * @param mappedColumns
595 * the mapped column selections to add to
596 * @param fromSequences
600 protected static void mapColumn(int col, List<AlignedCodonFrame> mappings,
601 ColumnSelection mappedColumns, List<SequenceI> fromSequences,
602 List<SequenceI> toSequences, char fromGapChar)
604 int[] mappedTo = findMappedColumns(col, mappings, fromSequences,
605 toSequences, fromGapChar);
608 * Add the range of mapped columns to the mapped selection (converting
609 * base 1 to base 0). Note that this may include intron-only regions which
610 * lie between the start and end ranges of the selection.
612 if (mappedTo != null)
614 for (int i = mappedTo[0]; i <= mappedTo[1]; i++)
616 mappedColumns.addElement(i - 1);
622 * Helper method to find the range of columns mapped to from one column.
623 * Returns the maximal range of columns mapped to from all sequences in the
624 * source column, or null if no mappings were found.
628 * @param fromSequences
633 protected static int[] findMappedColumns(int col,
634 List<AlignedCodonFrame> mappings, List<SequenceI> fromSequences,
635 List<SequenceI> toSequences, char fromGapChar)
637 int[] mappedTo = new int[] { Integer.MAX_VALUE, Integer.MIN_VALUE };
638 boolean found = false;
641 * For each sequence in the 'from' alignment
643 for (SequenceI fromSeq : fromSequences)
646 * Ignore gaps (unmapped anyway)
648 if (fromSeq.getCharAt(col) == fromGapChar)
654 * Get the residue position and find the mapped position.
656 int residuePos = fromSeq.findPosition(col);
657 SearchResultsI sr = buildSearchResults(fromSeq, residuePos, mappings);
658 for (SearchResultMatchI m : sr.getResults())
660 int mappedStartResidue = m.getStart();
661 int mappedEndResidue = m.getEnd();
662 SequenceI mappedSeq = m.getSequence();
665 * Locate the aligned sequence whose dataset is mappedSeq. TODO a
666 * datamodel that can do this efficiently.
668 for (SequenceI toSeq : toSequences)
670 if (toSeq.getDatasetSequence() == mappedSeq)
672 int mappedStartCol = toSeq.findIndex(mappedStartResidue);
673 int mappedEndCol = toSeq.findIndex(mappedEndResidue);
674 mappedTo[0] = Math.min(mappedTo[0], mappedStartCol);
675 mappedTo[1] = Math.max(mappedTo[1], mappedEndCol);
678 // note: remove break if we ever want to map one to many sequences
683 return found ? mappedTo : null;
687 * Returns the mapped codon or codons for a given aligned sequence column
691 * an aligned peptide sequence
693 * an aligned column position (base 0)
695 * a set of codon mappings
696 * @return the bases of the mapped codon(s) in the cDNA dataset sequence(s),
697 * or an empty list if none found
699 public static List<char[]> findCodonsFor(SequenceI seq, int col,
700 List<AlignedCodonFrame> mappings)
702 List<char[]> result = new ArrayList<>();
703 int dsPos = seq.findPosition(col);
704 for (AlignedCodonFrame mapping : mappings)
706 if (mapping.involvesSequence(seq))
708 List<char[]> codons = mapping
709 .getMappedCodons(seq.getDatasetSequence(), dsPos);
712 result.addAll(codons);
720 * Converts a series of [start, end] range pairs into an array of individual
721 * positions. This also caters for 'reverse strand' (start > end) cases.
726 public static int[] flattenRanges(int[] ranges)
729 * Count how many positions altogether
732 for (int i = 0; i < ranges.length - 1; i += 2)
734 count += Math.abs(ranges[i + 1] - ranges[i]) + 1;
737 int[] result = new int[count];
739 for (int i = 0; i < ranges.length - 1; i += 2)
741 int from = ranges[i];
742 final int to = ranges[i + 1];
743 int step = from <= to ? 1 : -1;
748 } while (from != to + step);
754 * Returns a list of any mappings that are from or to the given (aligned or
761 public static List<AlignedCodonFrame> findMappingsForSequence(
762 SequenceI sequence, List<AlignedCodonFrame> mappings)
764 return findMappingsForSequenceAndOthers(sequence, mappings, null);
768 * Returns a list of any mappings that are from or to the given (aligned or
769 * dataset) sequence, optionally limited to mappings involving one of a given
777 public static List<AlignedCodonFrame> findMappingsForSequenceAndOthers(
778 SequenceI sequence, List<AlignedCodonFrame> mappings,
779 List<SequenceI> filterList)
781 List<AlignedCodonFrame> result = new ArrayList<>();
782 if (sequence == null || mappings == null)
786 for (AlignedCodonFrame mapping : mappings)
788 if (mapping.involvesSequence(sequence))
790 if (filterList != null)
792 for (SequenceI otherseq : filterList)
794 SequenceI otherDataset = otherseq.getDatasetSequence();
795 if (otherseq == sequence
796 || otherseq == sequence.getDatasetSequence()
797 || (otherDataset != null && (otherDataset == sequence
798 || otherDataset == sequence
799 .getDatasetSequence())))
801 // skip sequences in subset which directly relate to sequence
804 if (mapping.involvesSequence(otherseq))
806 // selected a mapping contained in subselect alignment
822 * Returns the total length of the supplied ranges, which may be as single
823 * [start, end] or multiple [start, end, start, end ...]
828 public static int getLength(List<int[]> ranges)
835 for (int[] range : ranges)
837 if (range.length % 2 != 0)
840 "Error unbalance start/end ranges: " + ranges.toString());
843 for (int i = 0; i < range.length - 1; i += 2)
845 length += Math.abs(range[i + 1] - range[i]) + 1;
852 * Answers true if any range includes the given value
858 public static boolean contains(List<int[]> ranges, int value)
864 for (int[] range : ranges)
866 if (range[1] >= range[0] && value >= range[0] && value <= range[1])
869 * value within ascending range
873 if (range[1] < range[0] && value <= range[0] && value >= range[1])
876 * value within descending range
885 * Removes a specified number of positions from the start of a ranges list.
886 * For example, could be used to adjust cds ranges to allow for an incomplete
887 * start codon. Subranges are removed completely, or their start positions
888 * adjusted, until the required number of positions has been removed from the
889 * range. Reverse strand ranges are supported. The input array is not
894 * an array of [start, end, start, end...] positions
895 * @return a new array with the first removeCount positions removed
897 public static int[] removeStartPositions(int removeCount,
900 if (removeCount <= 0)
905 int[] copy = Arrays.copyOf(ranges, ranges.length);
908 for (int x = 0; x < copy.length && sxpos == -1; x += 2)
910 cdspos += Math.abs(copy[x + 1] - copy[x]) + 1;
911 if (removeCount < cdspos)
914 * we have removed enough, time to finish
919 * increment start of first exon, or decrement if reverse strand
921 if (copy[x] <= copy[x + 1])
923 copy[x] = copy[x + 1] - cdspos + removeCount + 1;
927 copy[x] = copy[x + 1] + cdspos - removeCount - 1;
936 * we dropped at least one entire sub-range - compact the array
938 int[] nxon = new int[copy.length - sxpos];
939 System.arraycopy(copy, sxpos, nxon, 0, copy.length - sxpos);
946 * Answers true if range's start-end positions include those of queryRange,
947 * where either range might be in reverse direction, else false
952 * a candidate subrange of range (start2-end2)
955 public static boolean rangeContains(int[] range, int[] queryRange)
957 if (range == null || queryRange == null || range.length != 2
958 || queryRange.length != 2)
966 int min = Math.min(range[0], range[1]);
967 int max = Math.max(range[0], range[1]);
969 return (min <= queryRange[0] && max >= queryRange[0]
970 && min <= queryRange[1] && max >= queryRange[1]);
974 * Removes the specified number of positions from the given ranges. Provided
975 * to allow a stop codon to be stripped from a CDS sequence so that it matches
976 * the peptide translation length.
980 * a list of (single) [start, end] ranges
983 public static void removeEndPositions(int positions,
986 int toRemove = positions;
987 Iterator<int[]> it = new ReverseListIterator<>(ranges);
990 int[] endRange = it.next();
991 if (endRange.length != 2)
994 * not coded for [start1, end1, start2, end2, ...]
997 .println("MappingUtils.removeEndPositions doesn't handle multiple ranges");
1001 int length = endRange[1] - endRange[0] + 1;
1005 * not coded for a reverse strand range (end < start)
1008 .println("MappingUtils.removeEndPositions doesn't handle reverse strand");
1011 if (length > toRemove)
1013 endRange[1] -= toRemove;