2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.analysis.AlignmentSorter;
24 import jalview.api.AlignViewportI;
25 import jalview.commands.CommandI;
26 import jalview.commands.EditCommand;
27 import jalview.commands.EditCommand.Action;
28 import jalview.commands.EditCommand.Edit;
29 import jalview.commands.OrderCommand;
30 import jalview.datamodel.AlignedCodonFrame;
31 import jalview.datamodel.AlignmentI;
32 import jalview.datamodel.AlignmentOrder;
33 import jalview.datamodel.ColumnSelection;
34 import jalview.datamodel.HiddenColumns;
35 import jalview.datamodel.SearchResultMatchI;
36 import jalview.datamodel.SearchResults;
37 import jalview.datamodel.SearchResultsI;
38 import jalview.datamodel.Sequence;
39 import jalview.datamodel.SequenceGroup;
40 import jalview.datamodel.SequenceI;
42 import java.util.ArrayList;
43 import java.util.Arrays;
44 import java.util.HashMap;
45 import java.util.Iterator;
46 import java.util.List;
50 * Helper methods for manipulations involving sequence mappings.
55 public final class MappingUtils
59 * Helper method to map a CUT or PASTE command.
62 * the original command
64 * if true, the command is to be undone
66 * the mapped sequences to apply the mapped command to
68 * the mapped EditCommand to add to
71 protected static void mapCutOrPaste(Edit edit, boolean undo,
72 List<SequenceI> targetSeqs, EditCommand result,
73 List<AlignedCodonFrame> mappings)
75 Action action = edit.getAction();
78 action = action.getUndoAction();
81 System.err.println("MappingUtils.mapCutOrPaste not yet implemented");
85 * Returns a new EditCommand representing the given command as mapped to the
86 * given sequences. If there is no mapping, returns null.
95 public static EditCommand mapEditCommand(EditCommand command,
96 boolean undo, final AlignmentI mapTo, char gapChar,
97 List<AlignedCodonFrame> mappings)
100 * For now, only support mapping from protein edits to cDna
102 if (!mapTo.isNucleotide())
108 * Cache a copy of the target sequences so we can mimic successive edits on
109 * them. This lets us compute mappings for all edits in the set.
111 Map<SequenceI, SequenceI> targetCopies = new HashMap<>();
112 for (SequenceI seq : mapTo.getSequences())
114 SequenceI ds = seq.getDatasetSequence();
117 final SequenceI copy = new Sequence(seq);
118 copy.setDatasetSequence(ds);
119 targetCopies.put(ds, copy);
124 * Compute 'source' sequences as they were before applying edits:
126 Map<SequenceI, SequenceI> originalSequences = command.priorState(undo);
128 EditCommand result = new EditCommand();
129 Iterator<Edit> edits = command.getEditIterator(!undo);
130 while (edits.hasNext())
132 Edit edit = edits.next();
133 if (edit.getAction() == Action.CUT
134 || edit.getAction() == Action.PASTE)
136 mapCutOrPaste(edit, undo, mapTo.getSequences(), result, mappings);
138 else if (edit.getAction() == Action.INSERT_GAP
139 || edit.getAction() == Action.DELETE_GAP)
141 mapInsertOrDelete(edit, undo, originalSequences,
142 mapTo.getSequences(), targetCopies, gapChar, result,
146 return result.getSize() > 0 ? result : null;
150 * Helper method to map an edit command to insert or delete gaps.
153 * the original command
155 * if true, the action is to undo the command
156 * @param originalSequences
157 * the sequences the command acted on
159 * @param targetCopies
162 * the new EditCommand to add mapped commands to
165 protected static void mapInsertOrDelete(Edit edit, boolean undo,
166 Map<SequenceI, SequenceI> originalSequences,
167 final List<SequenceI> targetSeqs,
168 Map<SequenceI, SequenceI> targetCopies, char gapChar,
169 EditCommand result, List<AlignedCodonFrame> mappings)
171 Action action = edit.getAction();
174 * Invert sense of action if an Undo.
178 action = action.getUndoAction();
180 final int count = edit.getNumber();
181 final int editPos = edit.getPosition();
182 for (SequenceI seq : edit.getSequences())
185 * Get residue position at (or to right of) edit location. Note we use our
186 * 'copy' of the sequence before editing for this.
188 SequenceI ds = seq.getDatasetSequence();
193 final SequenceI actedOn = originalSequences.get(ds);
194 final int seqpos = actedOn.findPosition(editPos);
197 * Determine all mappings from this position to mapped sequences.
199 SearchResultsI sr = buildSearchResults(seq, seqpos, mappings);
203 for (SequenceI targetSeq : targetSeqs)
205 ds = targetSeq.getDatasetSequence();
210 SequenceI copyTarget = targetCopies.get(ds);
211 final int[] match = sr.getResults(copyTarget, 0,
212 copyTarget.getLength());
215 final int ratio = 3; // TODO: compute this - how?
216 final int mappedCount = count * ratio;
219 * Shift Delete start position left, as it acts on positions to its
222 int mappedEditPos = action == Action.DELETE_GAP ? match[0]
223 - mappedCount : match[0];
224 Edit e = result.new Edit(action, new SequenceI[] { targetSeq },
225 mappedEditPos, mappedCount, gapChar);
229 * and 'apply' the edit to our copy of its target sequence
231 if (action == Action.INSERT_GAP)
233 copyTarget.setSequence(new String(StringUtils.insertCharAt(
234 copyTarget.getSequence(), mappedEditPos, mappedCount,
237 else if (action == Action.DELETE_GAP)
239 copyTarget.setSequence(new String(StringUtils.deleteChars(
240 copyTarget.getSequence(), mappedEditPos,
241 mappedEditPos + mappedCount)));
247 * and 'apply' the edit to our copy of its source sequence
249 if (action == Action.INSERT_GAP)
251 actedOn.setSequence(new String(StringUtils.insertCharAt(
252 actedOn.getSequence(), editPos, count, gapChar)));
254 else if (action == Action.DELETE_GAP)
256 actedOn.setSequence(new String(StringUtils.deleteChars(
257 actedOn.getSequence(), editPos, editPos + count)));
263 * Returns a SearchResults object describing the mapped region corresponding
264 * to the specified sequence position.
271 public static SearchResultsI buildSearchResults(SequenceI seq, int index,
272 List<AlignedCodonFrame> seqmappings)
274 SearchResultsI results = new SearchResults();
275 addSearchResults(results, seq, index, seqmappings);
280 * Adds entries to a SearchResults object describing the mapped region
281 * corresponding to the specified sequence position.
288 public static void addSearchResults(SearchResultsI results, SequenceI seq,
289 int index, List<AlignedCodonFrame> seqmappings)
291 if (index >= seq.getStart() && index <= seq.getEnd())
293 for (AlignedCodonFrame acf : seqmappings)
295 acf.markMappedRegion(seq, index, results);
301 * Returns a (possibly empty) SequenceGroup containing any sequences in the
302 * mapped viewport corresponding to the given group in the source viewport.
309 public static SequenceGroup mapSequenceGroup(final SequenceGroup sg,
310 final AlignViewportI mapFrom, final AlignViewportI mapTo)
313 * Note the SequenceGroup holds aligned sequences, the mappings hold dataset
316 boolean targetIsNucleotide = mapTo.isNucleotide();
317 AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
318 List<AlignedCodonFrame> codonFrames = protein.getAlignment()
321 * Copy group name, colours etc, but not sequences or sequence colour scheme
323 SequenceGroup mappedGroup = new SequenceGroup(sg);
324 mappedGroup.setColourScheme(mapTo.getGlobalColourScheme());
327 int minStartCol = -1;
329 final int selectionStartRes = sg.getStartRes();
330 final int selectionEndRes = sg.getEndRes();
331 for (SequenceI selected : sg.getSequences())
334 * Find the widest range of non-gapped positions in the selection range
336 int firstUngappedPos = selectionStartRes;
337 while (firstUngappedPos <= selectionEndRes
338 && Comparison.isGap(selected.getCharAt(firstUngappedPos)))
344 * If this sequence is only gaps in the selected range, skip it
346 if (firstUngappedPos > selectionEndRes)
351 int lastUngappedPos = selectionEndRes;
352 while (lastUngappedPos >= selectionStartRes
353 && Comparison.isGap(selected.getCharAt(lastUngappedPos)))
359 * Find the selected start/end residue positions in sequence
361 int startResiduePos = selected.findPosition(firstUngappedPos);
362 int endResiduePos = selected.findPosition(lastUngappedPos);
364 for (AlignedCodonFrame acf : codonFrames)
366 SequenceI mappedSequence = targetIsNucleotide ? acf
367 .getDnaForAaSeq(selected) : acf.getAaForDnaSeq(selected);
368 if (mappedSequence != null)
370 for (SequenceI seq : mapTo.getAlignment().getSequences())
372 int mappedStartResidue = 0;
373 int mappedEndResidue = 0;
374 if (seq.getDatasetSequence() == mappedSequence)
377 * Found a sequence mapping. Locate the start/end mapped residues.
379 List<AlignedCodonFrame> mapping = Arrays
380 .asList(new AlignedCodonFrame[] { acf });
381 SearchResultsI sr = buildSearchResults(selected,
382 startResiduePos, mapping);
383 for (SearchResultMatchI m : sr.getResults())
385 mappedStartResidue = m.getStart();
386 mappedEndResidue = m.getEnd();
388 sr = buildSearchResults(selected, endResiduePos, mapping);
389 for (SearchResultMatchI m : sr.getResults())
391 mappedStartResidue = Math.min(mappedStartResidue,
393 mappedEndResidue = Math.max(mappedEndResidue, m.getEnd());
397 * Find the mapped aligned columns, save the range. Note findIndex
398 * returns a base 1 position, SequenceGroup uses base 0
400 int mappedStartCol = seq.findIndex(mappedStartResidue) - 1;
401 minStartCol = minStartCol == -1 ? mappedStartCol : Math.min(
402 minStartCol, mappedStartCol);
403 int mappedEndCol = seq.findIndex(mappedEndResidue) - 1;
404 maxEndCol = maxEndCol == -1 ? mappedEndCol : Math.max(
405 maxEndCol, mappedEndCol);
406 mappedGroup.addSequence(seq, false);
413 mappedGroup.setStartRes(minStartCol < 0 ? 0 : minStartCol);
414 mappedGroup.setEndRes(maxEndCol < 0 ? 0 : maxEndCol);
419 * Returns an OrderCommand equivalent to the given one, but acting on mapped
420 * sequences as described by the mappings, or null if no mapping can be made.
423 * the original order command
425 * if true, the action is to undo the sort
427 * the alignment we are mapping to
429 * the mappings available
432 public static CommandI mapOrderCommand(OrderCommand command,
433 boolean undo, AlignmentI mapTo, List<AlignedCodonFrame> mappings)
435 SequenceI[] sortOrder = command.getSequenceOrder(undo);
436 List<SequenceI> mappedOrder = new ArrayList<>();
440 * Assumption: we are only interested in a cDNA/protein mapping; refactor in
441 * future if we want to support sorting (c)dna as (c)dna or protein as
444 boolean mappingToNucleotide = mapTo.isNucleotide();
445 for (SequenceI seq : sortOrder)
447 for (AlignedCodonFrame acf : mappings)
449 SequenceI mappedSeq = mappingToNucleotide ? acf.getDnaForAaSeq(seq)
450 : acf.getAaForDnaSeq(seq);
451 if (mappedSeq != null)
453 for (SequenceI seq2 : mapTo.getSequences())
455 if (seq2.getDatasetSequence() == mappedSeq)
457 mappedOrder.add(seq2);
467 * Return null if no mappings made.
475 * Add any unmapped sequences on the end of the sort in their original
478 if (j < mapTo.getHeight())
480 for (SequenceI seq : mapTo.getSequences())
482 if (!mappedOrder.contains(seq))
484 mappedOrder.add(seq);
490 * Have to sort the sequences before constructing the OrderCommand - which
491 * then resorts them?!?
493 final SequenceI[] mappedOrderArray = mappedOrder
494 .toArray(new SequenceI[mappedOrder.size()]);
495 SequenceI[] oldOrder = mapTo.getSequencesArray();
496 AlignmentSorter.sortBy(mapTo, new AlignmentOrder(mappedOrderArray));
497 final OrderCommand result = new OrderCommand(command.getDescription(),
503 * Returns a ColumnSelection in the 'mapTo' view which corresponds to the
504 * given selection in the 'mapFrom' view. We assume one is nucleotide, the
505 * other is protein (and holds the mappings from codons to protein residues).
512 public static void mapColumnSelection(ColumnSelection colsel,
513 HiddenColumns hiddencols, AlignViewportI mapFrom,
514 AlignViewportI mapTo, ColumnSelection newColSel,
515 HiddenColumns newHidden)
517 boolean targetIsNucleotide = mapTo.isNucleotide();
518 AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
519 List<AlignedCodonFrame> codonFrames = protein.getAlignment()
524 return; // mappedColumns;
527 char fromGapChar = mapFrom.getAlignment().getGapCharacter();
530 * For each mapped column, find the range of columns that residues in that
533 List<SequenceI> fromSequences = mapFrom.getAlignment().getSequences();
534 List<SequenceI> toSequences = mapTo.getAlignment().getSequences();
536 for (Integer sel : colsel.getSelected())
538 mapColumn(sel.intValue(), codonFrames, newColSel, fromSequences,
539 toSequences, fromGapChar);
542 for (int[] hidden : hiddencols.getHiddenColumnsCopy())
544 mapHiddenColumns(hidden, codonFrames, newHidden, fromSequences,
545 toSequences, fromGapChar);
547 return; // mappedColumns;
551 * Helper method that maps a [start, end] hidden column range to its mapped
556 * @param mappedColumns
557 * @param fromSequences
561 protected static void mapHiddenColumns(int[] hidden,
562 List<AlignedCodonFrame> mappings, HiddenColumns mappedColumns,
563 List<SequenceI> fromSequences, List<SequenceI> toSequences,
566 for (int col = hidden[0]; col <= hidden[1]; col++)
568 int[] mappedTo = findMappedColumns(col, mappings, fromSequences,
569 toSequences, fromGapChar);
572 * Add the range of hidden columns to the mapped selection (converting
575 if (mappedTo != null)
577 mappedColumns.hideColumns(mappedTo[0] - 1, mappedTo[1] - 1);
583 * Helper method to map one column selection
586 * the column number (base 0)
588 * the sequence mappings
589 * @param mappedColumns
590 * the mapped column selections to add to
591 * @param fromSequences
595 protected static void mapColumn(int col,
596 List<AlignedCodonFrame> mappings, ColumnSelection mappedColumns,
597 List<SequenceI> fromSequences, List<SequenceI> toSequences,
600 int[] mappedTo = findMappedColumns(col, mappings, fromSequences,
601 toSequences, fromGapChar);
604 * Add the range of mapped columns to the mapped selection (converting
605 * base 1 to base 0). Note that this may include intron-only regions which
606 * lie between the start and end ranges of the selection.
608 if (mappedTo != null)
610 for (int i = mappedTo[0]; i <= mappedTo[1]; i++)
612 mappedColumns.addElement(i - 1);
618 * Helper method to find the range of columns mapped to from one column.
619 * Returns the maximal range of columns mapped to from all sequences in the
620 * source column, or null if no mappings were found.
624 * @param fromSequences
629 protected static int[] findMappedColumns(int col,
630 List<AlignedCodonFrame> mappings, List<SequenceI> fromSequences,
631 List<SequenceI> toSequences, char fromGapChar)
633 int[] mappedTo = new int[] { Integer.MAX_VALUE, Integer.MIN_VALUE };
634 boolean found = false;
637 * For each sequence in the 'from' alignment
639 for (SequenceI fromSeq : fromSequences)
642 * Ignore gaps (unmapped anyway)
644 if (fromSeq.getCharAt(col) == fromGapChar)
650 * Get the residue position and find the mapped position.
652 int residuePos = fromSeq.findPosition(col);
653 SearchResultsI sr = buildSearchResults(fromSeq, residuePos, mappings);
654 for (SearchResultMatchI m : sr.getResults())
656 int mappedStartResidue = m.getStart();
657 int mappedEndResidue = m.getEnd();
658 SequenceI mappedSeq = m.getSequence();
661 * Locate the aligned sequence whose dataset is mappedSeq. TODO a
662 * datamodel that can do this efficiently.
664 for (SequenceI toSeq : toSequences)
666 if (toSeq.getDatasetSequence() == mappedSeq)
668 int mappedStartCol = toSeq.findIndex(mappedStartResidue);
669 int mappedEndCol = toSeq.findIndex(mappedEndResidue);
670 mappedTo[0] = Math.min(mappedTo[0], mappedStartCol);
671 mappedTo[1] = Math.max(mappedTo[1], mappedEndCol);
674 // note: remove break if we ever want to map one to many sequences
679 return found ? mappedTo : null;
683 * Returns the mapped codon or codons for a given aligned sequence column
687 * an aligned peptide sequence
689 * an aligned column position (base 0)
691 * a set of codon mappings
692 * @return the bases of the mapped codon(s) in the cDNA dataset sequence(s),
693 * or an empty list if none found
695 public static List<char[]> findCodonsFor(SequenceI seq, int col,
696 List<AlignedCodonFrame> mappings)
698 List<char[]> result = new ArrayList<>();
699 int dsPos = seq.findPosition(col);
700 for (AlignedCodonFrame mapping : mappings)
702 if (mapping.involvesSequence(seq))
704 List<char[]> codons = mapping.getMappedCodons(
705 seq.getDatasetSequence(), dsPos);
708 result.addAll(codons);
716 * Converts a series of [start, end] range pairs into an array of individual
717 * positions. This also caters for 'reverse strand' (start > end) cases.
722 public static int[] flattenRanges(int[] ranges)
725 * Count how many positions altogether
728 for (int i = 0; i < ranges.length - 1; i += 2)
730 count += Math.abs(ranges[i + 1] - ranges[i]) + 1;
733 int[] result = new int[count];
735 for (int i = 0; i < ranges.length - 1; i += 2)
737 int from = ranges[i];
738 final int to = ranges[i + 1];
739 int step = from <= to ? 1 : -1;
744 } while (from != to + step);
750 * Returns a list of any mappings that are from or to the given (aligned or
757 public static List<AlignedCodonFrame> findMappingsForSequence(
758 SequenceI sequence, List<AlignedCodonFrame> mappings)
760 return findMappingsForSequenceAndOthers(sequence, mappings, null);
764 * Returns a list of any mappings that are from or to the given (aligned or
765 * dataset) sequence, optionally limited to mappings involving one of a given
773 public static List<AlignedCodonFrame> findMappingsForSequenceAndOthers(
774 SequenceI sequence, List<AlignedCodonFrame> mappings,
775 List<SequenceI> filterList)
777 List<AlignedCodonFrame> result = new ArrayList<>();
778 if (sequence == null || mappings == null)
782 for (AlignedCodonFrame mapping : mappings)
784 if (mapping.involvesSequence(sequence))
786 if (filterList != null)
788 for (SequenceI otherseq : filterList)
790 SequenceI otherDataset = otherseq.getDatasetSequence();
791 if (otherseq == sequence
792 || otherseq == sequence.getDatasetSequence()
793 || (otherDataset != null && (otherDataset == sequence || otherDataset == sequence
794 .getDatasetSequence())))
796 // skip sequences in subset which directly relate to sequence
799 if (mapping.involvesSequence(otherseq))
801 // selected a mapping contained in subselect alignment
817 * Returns the total length of the supplied ranges, which may be as single
818 * [start, end] or multiple [start, end, start, end ...]
823 public static int getLength(List<int[]> ranges)
830 for (int[] range : ranges)
832 if (range.length % 2 != 0)
834 System.err.println("Error unbalance start/end ranges: "
835 + ranges.toString());
838 for (int i = 0; i < range.length - 1; i += 2)
840 length += Math.abs(range[i + 1] - range[i]) + 1;
847 * Answers true if any range includes the given value
853 public static boolean contains(List<int[]> ranges, int value)
859 for (int[] range : ranges)
861 if (range[1] >= range[0] && value >= range[0] && value <= range[1])
864 * value within ascending range
868 if (range[1] < range[0] && value <= range[0] && value >= range[1])
871 * value within descending range
880 * Removes a specified number of positions from the start of a ranges list.
881 * For example, could be used to adjust cds ranges to allow for an incomplete
882 * start codon. Subranges are removed completely, or their start positions
883 * adjusted, until the required number of positions has been removed from the
884 * range. Reverse strand ranges are supported. The input array is not
889 * an array of [start, end, start, end...] positions
890 * @return a new array with the first removeCount positions removed
892 public static int[] removeStartPositions(int removeCount,
895 if (removeCount <= 0)
900 int[] copy = Arrays.copyOf(ranges, ranges.length);
903 for (int x = 0; x < copy.length && sxpos == -1; x += 2)
905 cdspos += Math.abs(copy[x + 1] - copy[x]) + 1;
906 if (removeCount < cdspos)
909 * we have removed enough, time to finish
914 * increment start of first exon, or decrement if reverse strand
916 if (copy[x] <= copy[x + 1])
918 copy[x] = copy[x + 1] - cdspos + removeCount + 1;
922 copy[x] = copy[x + 1] + cdspos - removeCount - 1;
931 * we dropped at least one entire sub-range - compact the array
933 int[] nxon = new int[copy.length - sxpos];
934 System.arraycopy(copy, sxpos, nxon, 0, copy.length - sxpos);