2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.analysis.AlignmentSorter;
24 import jalview.api.AlignViewportI;
25 import jalview.bin.Cache;
26 import jalview.commands.CommandI;
27 import jalview.commands.EditCommand;
28 import jalview.commands.EditCommand.Action;
29 import jalview.commands.EditCommand.Edit;
30 import jalview.commands.OrderCommand;
31 import jalview.datamodel.AlignedCodonFrame;
32 import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
33 import jalview.datamodel.AlignmentI;
34 import jalview.datamodel.AlignmentOrder;
35 import jalview.datamodel.ColumnSelection;
36 import jalview.datamodel.HiddenColumns;
37 import jalview.datamodel.SearchResultMatchI;
38 import jalview.datamodel.SearchResults;
39 import jalview.datamodel.SearchResultsI;
40 import jalview.datamodel.Sequence;
41 import jalview.datamodel.SequenceGroup;
42 import jalview.datamodel.SequenceI;
44 import java.util.ArrayList;
45 import java.util.Arrays;
46 import java.util.HashMap;
47 import java.util.Iterator;
48 import java.util.List;
52 * Helper methods for manipulations involving sequence mappings.
57 public final class MappingUtils
61 * Helper method to map a CUT or PASTE command.
64 * the original command
66 * if true, the command is to be undone
68 * the mapped sequences to apply the mapped command to
70 * the mapped EditCommand to add to
73 protected static void mapCutOrPaste(Edit edit, boolean undo,
74 List<SequenceI> targetSeqs, EditCommand result,
75 List<AlignedCodonFrame> mappings)
77 Action action = edit.getAction();
80 action = action.getUndoAction();
83 Cache.log.error("MappingUtils.mapCutOrPaste not yet implemented");
87 * Returns a new EditCommand representing the given command as mapped to the
88 * given sequences. If there is no mapping, returns null.
97 public static EditCommand mapEditCommand(EditCommand command,
98 boolean undo, final AlignmentI mapTo, char gapChar,
99 List<AlignedCodonFrame> mappings)
102 * For now, only support mapping from protein edits to cDna
104 if (!mapTo.isNucleotide())
110 * Cache a copy of the target sequences so we can mimic successive edits on
111 * them. This lets us compute mappings for all edits in the set.
113 Map<SequenceI, SequenceI> targetCopies = new HashMap<>();
114 for (SequenceI seq : mapTo.getSequences())
116 SequenceI ds = seq.getDatasetSequence();
119 final SequenceI copy = new Sequence(seq);
120 copy.setDatasetSequence(ds);
121 targetCopies.put(ds, copy);
126 * Compute 'source' sequences as they were before applying edits:
128 Map<SequenceI, SequenceI> originalSequences = command.priorState(undo);
130 EditCommand result = new EditCommand();
131 Iterator<Edit> edits = command.getEditIterator(!undo);
132 while (edits.hasNext())
134 Edit edit = edits.next();
135 if (edit.getAction() == Action.CUT
136 || edit.getAction() == Action.PASTE)
138 mapCutOrPaste(edit, undo, mapTo.getSequences(), result, mappings);
140 else if (edit.getAction() == Action.INSERT_GAP
141 || edit.getAction() == Action.DELETE_GAP)
143 mapInsertOrDelete(edit, undo, originalSequences,
144 mapTo.getSequences(), targetCopies, gapChar, result,
148 return result.getSize() > 0 ? result : null;
152 * Helper method to map an edit command to insert or delete gaps.
155 * the original command
157 * if true, the action is to undo the command
158 * @param originalSequences
159 * the sequences the command acted on
161 * @param targetCopies
164 * the new EditCommand to add mapped commands to
167 protected static void mapInsertOrDelete(Edit edit, boolean undo,
168 Map<SequenceI, SequenceI> originalSequences,
169 final List<SequenceI> targetSeqs,
170 Map<SequenceI, SequenceI> targetCopies, char gapChar,
171 EditCommand result, List<AlignedCodonFrame> mappings)
173 Action action = edit.getAction();
176 * Invert sense of action if an Undo.
180 action = action.getUndoAction();
182 final int count = edit.getNumber();
183 final int editPos = edit.getPosition();
184 for (SequenceI seq : edit.getSequences())
187 * Get residue position at (or to right of) edit location. Note we use our
188 * 'copy' of the sequence before editing for this.
190 SequenceI ds = seq.getDatasetSequence();
195 final SequenceI actedOn = originalSequences.get(ds);
196 final int seqpos = actedOn.findPosition(editPos);
199 * Determine all mappings from this position to mapped sequences.
201 SearchResultsI sr = buildSearchResults(seq, seqpos, mappings);
205 for (SequenceI targetSeq : targetSeqs)
207 ds = targetSeq.getDatasetSequence();
212 SequenceI copyTarget = targetCopies.get(ds);
213 final int[] match = sr.getResults(copyTarget, 0,
214 copyTarget.getLength());
217 final int ratio = 3; // TODO: compute this - how?
218 final int mappedCount = count * ratio;
221 * Shift Delete start position left, as it acts on positions to its
224 int mappedEditPos = action == Action.DELETE_GAP
225 ? match[0] - mappedCount
227 Edit e = result.new Edit(action, new SequenceI[] { targetSeq },
228 mappedEditPos, mappedCount, gapChar);
232 * and 'apply' the edit to our copy of its target sequence
234 if (action == Action.INSERT_GAP)
236 copyTarget.setSequence(new String(
237 StringUtils.insertCharAt(copyTarget.getSequence(),
238 mappedEditPos, mappedCount, gapChar)));
240 else if (action == Action.DELETE_GAP)
242 copyTarget.setSequence(new String(
243 StringUtils.deleteChars(copyTarget.getSequence(),
244 mappedEditPos, mappedEditPos + mappedCount)));
250 * and 'apply' the edit to our copy of its source sequence
252 if (action == Action.INSERT_GAP)
254 actedOn.setSequence(new String(StringUtils.insertCharAt(
255 actedOn.getSequence(), editPos, count, gapChar)));
257 else if (action == Action.DELETE_GAP)
259 actedOn.setSequence(new String(StringUtils.deleteChars(
260 actedOn.getSequence(), editPos, editPos + count)));
266 * Returns a SearchResults object describing the mapped region corresponding
267 * to the specified sequence position.
274 public static SearchResultsI buildSearchResults(SequenceI seq, int index,
275 List<AlignedCodonFrame> seqmappings)
277 SearchResultsI results = new SearchResults();
278 addSearchResults(results, seq, index, seqmappings);
283 * Adds entries to a SearchResults object describing the mapped region
284 * corresponding to the specified sequence position.
291 public static void addSearchResults(SearchResultsI results, SequenceI seq,
292 int index, List<AlignedCodonFrame> seqmappings)
294 if (index >= seq.getStart() && index <= seq.getEnd())
296 for (AlignedCodonFrame acf : seqmappings)
298 acf.markMappedRegion(seq, index, results);
304 * Returns a (possibly empty) SequenceGroup containing any sequences in the
305 * mapped viewport corresponding to the given group in the source viewport.
312 public static SequenceGroup mapSequenceGroup(final SequenceGroup sg,
313 final AlignViewportI mapFrom, final AlignViewportI mapTo)
316 * Note the SequenceGroup holds aligned sequences, the mappings hold dataset
319 boolean targetIsNucleotide = mapTo.isNucleotide();
320 AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
321 List<AlignedCodonFrame> codonFrames = protein.getAlignment()
324 * Copy group name, colours etc, but not sequences or sequence colour scheme
326 SequenceGroup mappedGroup = new SequenceGroup(sg);
327 mappedGroup.setColourScheme(mapTo.getGlobalColourScheme());
330 int minStartCol = -1;
332 final int selectionStartRes = sg.getStartRes();
333 final int selectionEndRes = sg.getEndRes();
334 for (SequenceI selected : sg.getSequences())
337 * Find the widest range of non-gapped positions in the selection range
339 int firstUngappedPos = selectionStartRes;
340 while (firstUngappedPos <= selectionEndRes
341 && Comparison.isGap(selected.getCharAt(firstUngappedPos)))
347 * If this sequence is only gaps in the selected range, skip it
349 if (firstUngappedPos > selectionEndRes)
354 int lastUngappedPos = selectionEndRes;
355 while (lastUngappedPos >= selectionStartRes
356 && Comparison.isGap(selected.getCharAt(lastUngappedPos)))
362 * Find the selected start/end residue positions in sequence
364 int startResiduePos = selected.findPosition(firstUngappedPos);
365 int endResiduePos = selected.findPosition(lastUngappedPos);
367 for (AlignedCodonFrame acf : codonFrames)
369 for (SequenceI seq : mapTo.getAlignment().getSequences())
371 SequenceI peptide = targetIsNucleotide ? selected : seq;
372 SequenceI cds = targetIsNucleotide ? seq : selected;
373 SequenceToSequenceMapping s2s = acf.getCoveringMapping(cds,
379 int mappedStartResidue = 0;
380 int mappedEndResidue = 0;
381 List<AlignedCodonFrame> mapping = Arrays.asList(acf);
382 SearchResultsI sr = buildSearchResults(selected, startResiduePos,
384 for (SearchResultMatchI m : sr.getResults())
386 mappedStartResidue = m.getStart();
387 mappedEndResidue = m.getEnd();
389 sr = buildSearchResults(selected, endResiduePos, mapping);
390 for (SearchResultMatchI m : sr.getResults())
392 mappedStartResidue = Math.min(mappedStartResidue, m.getStart());
393 mappedEndResidue = Math.max(mappedEndResidue, m.getEnd());
397 * Find the mapped aligned columns, save the range. Note findIndex
398 * returns a base 1 position, SequenceGroup uses base 0
400 int mappedStartCol = seq.findIndex(mappedStartResidue) - 1;
401 minStartCol = minStartCol == -1 ? mappedStartCol
402 : Math.min(minStartCol, mappedStartCol);
403 int mappedEndCol = seq.findIndex(mappedEndResidue) - 1;
404 maxEndCol = maxEndCol == -1 ? mappedEndCol
405 : Math.max(maxEndCol, mappedEndCol);
406 mappedGroup.addSequence(seq, false);
411 mappedGroup.setStartRes(minStartCol < 0 ? 0 : minStartCol);
412 mappedGroup.setEndRes(maxEndCol < 0 ? 0 : maxEndCol);
417 * Returns an OrderCommand equivalent to the given one, but acting on mapped
418 * sequences as described by the mappings, or null if no mapping can be made.
421 * the original order command
423 * if true, the action is to undo the sort
425 * the alignment we are mapping to
427 * the mappings available
430 public static CommandI mapOrderCommand(OrderCommand command, boolean undo,
431 AlignmentI mapTo, List<AlignedCodonFrame> mappings)
433 SequenceI[] sortOrder = command.getSequenceOrder(undo);
434 List<SequenceI> mappedOrder = new ArrayList<>();
438 * Assumption: we are only interested in a cDNA/protein mapping; refactor in
439 * future if we want to support sorting (c)dna as (c)dna or protein as
442 boolean mappingToNucleotide = mapTo.isNucleotide();
443 for (SequenceI seq : sortOrder)
445 for (AlignedCodonFrame acf : mappings)
447 for (SequenceI seq2 : mapTo.getSequences())
450 * the corresponding peptide / CDS is the one for which there is
451 * a complete ('covering') mapping to 'seq'
453 SequenceI peptide = mappingToNucleotide ? seq2 : seq;
454 SequenceI cds = mappingToNucleotide ? seq : seq2;
455 SequenceToSequenceMapping s2s = acf.getCoveringMapping(cds,
459 mappedOrder.add(seq2);
468 * Return null if no mappings made.
476 * Add any unmapped sequences on the end of the sort in their original
479 if (j < mapTo.getHeight())
481 for (SequenceI seq : mapTo.getSequences())
483 if (!mappedOrder.contains(seq))
485 mappedOrder.add(seq);
491 * Have to sort the sequences before constructing the OrderCommand - which
492 * then resorts them?!?
494 final SequenceI[] mappedOrderArray = mappedOrder
495 .toArray(new SequenceI[mappedOrder.size()]);
496 SequenceI[] oldOrder = mapTo.getSequencesArray();
497 AlignmentSorter.sortBy(mapTo, new AlignmentOrder(mappedOrderArray));
498 final OrderCommand result = new OrderCommand(command.getDescription(),
504 * Returns a ColumnSelection in the 'mapTo' view which corresponds to the
505 * given selection in the 'mapFrom' view. We assume one is nucleotide, the
506 * other is protein (and holds the mappings from codons to protein residues).
513 public static void mapColumnSelection(ColumnSelection colsel,
514 HiddenColumns hiddencols, AlignViewportI mapFrom,
515 AlignViewportI mapTo, ColumnSelection newColSel,
516 HiddenColumns newHidden)
518 boolean targetIsNucleotide = mapTo.isNucleotide();
519 AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
520 List<AlignedCodonFrame> codonFrames = protein.getAlignment()
528 char fromGapChar = mapFrom.getAlignment().getGapCharacter();
531 * For each mapped column, find the range of columns that residues in that
534 List<SequenceI> fromSequences = mapFrom.getAlignment().getSequences();
535 List<SequenceI> toSequences = mapTo.getAlignment().getSequences();
537 for (Integer sel : colsel.getSelected())
539 mapColumn(sel.intValue(), codonFrames, newColSel, fromSequences,
540 toSequences, fromGapChar);
543 Iterator<int[]> regions = hiddencols.iterator();
544 while (regions.hasNext())
546 mapHiddenColumns(regions.next(), codonFrames, newHidden,
547 fromSequences, toSequences, fromGapChar);
553 * Helper method that maps a [start, end] hidden column range to its mapped
558 * @param mappedColumns
559 * @param fromSequences
563 protected static void mapHiddenColumns(int[] hidden,
564 List<AlignedCodonFrame> mappings, HiddenColumns mappedColumns,
565 List<SequenceI> fromSequences, List<SequenceI> toSequences,
568 for (int col = hidden[0]; col <= hidden[1]; col++)
570 int[] mappedTo = findMappedColumns(col, mappings, fromSequences,
571 toSequences, fromGapChar);
574 * Add the range of hidden columns to the mapped selection (converting
577 if (mappedTo != null)
579 mappedColumns.hideColumns(mappedTo[0] - 1, mappedTo[1] - 1);
585 * Helper method to map one column selection
588 * the column number (base 0)
590 * the sequence mappings
591 * @param mappedColumns
592 * the mapped column selections to add to
593 * @param fromSequences
597 protected static void mapColumn(int col, List<AlignedCodonFrame> mappings,
598 ColumnSelection mappedColumns, List<SequenceI> fromSequences,
599 List<SequenceI> toSequences, char fromGapChar)
601 int[] mappedTo = findMappedColumns(col, mappings, fromSequences,
602 toSequences, fromGapChar);
605 * Add the range of mapped columns to the mapped selection (converting
606 * base 1 to base 0). Note that this may include intron-only regions which
607 * lie between the start and end ranges of the selection.
609 if (mappedTo != null)
611 for (int i = mappedTo[0]; i <= mappedTo[1]; i++)
613 mappedColumns.addElement(i - 1);
619 * Helper method to find the range of columns mapped to from one column.
620 * Returns the maximal range of columns mapped to from all sequences in the
621 * source column, or null if no mappings were found.
625 * @param fromSequences
630 protected static int[] findMappedColumns(int col,
631 List<AlignedCodonFrame> mappings, List<SequenceI> fromSequences,
632 List<SequenceI> toSequences, char fromGapChar)
634 int[] mappedTo = new int[] { Integer.MAX_VALUE, Integer.MIN_VALUE };
635 boolean found = false;
638 * For each sequence in the 'from' alignment
640 for (SequenceI fromSeq : fromSequences)
643 * Ignore gaps (unmapped anyway)
645 if (fromSeq.getCharAt(col) == fromGapChar)
651 * Get the residue position and find the mapped position.
653 int residuePos = fromSeq.findPosition(col);
654 SearchResultsI sr = buildSearchResults(fromSeq, residuePos, mappings);
655 for (SearchResultMatchI m : sr.getResults())
657 int mappedStartResidue = m.getStart();
658 int mappedEndResidue = m.getEnd();
659 SequenceI mappedSeq = m.getSequence();
662 * Locate the aligned sequence whose dataset is mappedSeq. TODO a
663 * datamodel that can do this efficiently.
665 for (SequenceI toSeq : toSequences)
667 if (toSeq.getDatasetSequence() == mappedSeq
668 && mappedStartResidue >= toSeq.getStart()
669 && mappedEndResidue <= toSeq.getEnd())
671 int mappedStartCol = toSeq.findIndex(mappedStartResidue);
672 int mappedEndCol = toSeq.findIndex(mappedEndResidue);
673 mappedTo[0] = Math.min(mappedTo[0], mappedStartCol);
674 mappedTo[1] = Math.max(mappedTo[1], mappedEndCol);
677 // note: remove break if we ever want to map one to many sequences
682 return found ? mappedTo : null;
686 * Returns the mapped codon or codons for a given aligned sequence column
690 * an aligned peptide sequence
692 * an aligned column position (base 0)
694 * a set of codon mappings
695 * @return the bases of the mapped codon(s) in the cDNA dataset sequence(s),
696 * or an empty list if none found
698 public static List<char[]> findCodonsFor(SequenceI seq, int col,
699 List<AlignedCodonFrame> mappings)
701 List<char[]> result = new ArrayList<>();
702 int dsPos = seq.findPosition(col);
703 for (AlignedCodonFrame mapping : mappings)
705 if (mapping.involvesSequence(seq))
707 List<char[]> codons = mapping
708 .getMappedCodons(seq.getDatasetSequence(), dsPos);
711 result.addAll(codons);
719 * Converts a series of [start, end] range pairs into an array of individual
720 * positions. This also caters for 'reverse strand' (start > end) cases.
725 public static int[] flattenRanges(int[] ranges)
728 * Count how many positions altogether
731 for (int i = 0; i < ranges.length - 1; i += 2)
733 count += Math.abs(ranges[i + 1] - ranges[i]) + 1;
736 int[] result = new int[count];
738 for (int i = 0; i < ranges.length - 1; i += 2)
740 int from = ranges[i];
741 final int to = ranges[i + 1];
742 int step = from <= to ? 1 : -1;
747 } while (from != to + step);
753 * Returns a list of any mappings that are from or to the given (aligned or
760 public static List<AlignedCodonFrame> findMappingsForSequence(
761 SequenceI sequence, List<AlignedCodonFrame> mappings)
763 return findMappingsForSequenceAndOthers(sequence, mappings, null);
767 * Returns a list of any mappings that are from or to the given (aligned or
768 * dataset) sequence, optionally limited to mappings involving one of a given
776 public static List<AlignedCodonFrame> findMappingsForSequenceAndOthers(
777 SequenceI sequence, List<AlignedCodonFrame> mappings,
778 List<SequenceI> filterList)
780 List<AlignedCodonFrame> result = new ArrayList<>();
781 if (sequence == null || mappings == null)
785 for (AlignedCodonFrame mapping : mappings)
787 if (mapping.involvesSequence(sequence))
789 if (filterList != null)
791 for (SequenceI otherseq : filterList)
793 SequenceI otherDataset = otherseq.getDatasetSequence();
794 if (otherseq == sequence
795 || otherseq == sequence.getDatasetSequence()
796 || (otherDataset != null && (otherDataset == sequence
797 || otherDataset == sequence
798 .getDatasetSequence())))
800 // skip sequences in subset which directly relate to sequence
803 if (mapping.involvesSequence(otherseq))
805 // selected a mapping contained in subselect alignment
821 * Returns the total length of the supplied ranges, which may be as single
822 * [start, end] or multiple [start, end, start, end ...]
827 public static int getLength(List<int[]> ranges)
834 for (int[] range : ranges)
836 if (range.length % 2 != 0)
839 "Error unbalance start/end ranges: " + ranges.toString());
842 for (int i = 0; i < range.length - 1; i += 2)
844 length += Math.abs(range[i + 1] - range[i]) + 1;
851 * Answers true if any range includes the given value
857 public static boolean contains(List<int[]> ranges, int value)
863 for (int[] range : ranges)
865 if (range[1] >= range[0] && value >= range[0] && value <= range[1])
868 * value within ascending range
872 if (range[1] < range[0] && value <= range[0] && value >= range[1])
875 * value within descending range
884 * Removes a specified number of positions from the start of a ranges list.
885 * For example, could be used to adjust cds ranges to allow for an incomplete
886 * start codon. Subranges are removed completely, or their start positions
887 * adjusted, until the required number of positions has been removed from the
888 * range. Reverse strand ranges are supported. The input array is not
893 * an array of [start, end, start, end...] positions
894 * @return a new array with the first removeCount positions removed
896 public static int[] removeStartPositions(int removeCount,
899 if (removeCount <= 0)
904 int[] copy = Arrays.copyOf(ranges, ranges.length);
907 for (int x = 0; x < copy.length && sxpos == -1; x += 2)
909 cdspos += Math.abs(copy[x + 1] - copy[x]) + 1;
910 if (removeCount < cdspos)
913 * we have removed enough, time to finish
918 * increment start of first exon, or decrement if reverse strand
920 if (copy[x] <= copy[x + 1])
922 copy[x] = copy[x + 1] - cdspos + removeCount + 1;
926 copy[x] = copy[x + 1] + cdspos - removeCount - 1;
935 * we dropped at least one entire sub-range - compact the array
937 int[] nxon = new int[copy.length - sxpos];
938 System.arraycopy(copy, sxpos, nxon, 0, copy.length - sxpos);
945 * Answers true if range's start-end positions include those of queryRange,
946 * where either range might be in reverse direction, else false
951 * a candidate subrange of range (start2-end2)
954 public static boolean rangeContains(int[] range, int[] queryRange)
956 if (range == null || queryRange == null || range.length != 2
957 || queryRange.length != 2)
965 int min = Math.min(range[0], range[1]);
966 int max = Math.max(range[0], range[1]);
968 return (min <= queryRange[0] && max >= queryRange[0]
969 && min <= queryRange[1] && max >= queryRange[1]);
973 * Removes the specified number of positions from the given ranges. Provided
974 * to allow a stop codon to be stripped from a CDS sequence so that it matches
975 * the peptide translation length.
979 * a list of (single) [start, end] ranges
982 public static void removeEndPositions(int positions, List<int[]> ranges)
984 int toRemove = positions;
985 Iterator<int[]> it = new ReverseListIterator<>(ranges);
988 int[] endRange = it.next();
989 if (endRange.length != 2)
992 * not coded for [start1, end1, start2, end2, ...]
995 "MappingUtils.removeEndPositions doesn't handle multiple ranges");
999 int length = endRange[1] - endRange[0] + 1;
1003 * not coded for a reverse strand range (end < start)
1006 "MappingUtils.removeEndPositions doesn't handle reverse strand");
1009 if (length > toRemove)
1011 endRange[1] -= toRemove;
1023 * Converts a list of [start, end] ranges to a single array of [start, end,
1029 public static int[] listToArray(List<int[]> ranges)
1031 int[] result = new int[ranges.size() * 2];
1033 for (int[] range : ranges)
1035 result[i++] = range[0];
1036 result[i++] = range[1];