2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.analysis.AlignmentSorter;
24 import jalview.api.AlignViewportI;
25 import jalview.commands.CommandI;
26 import jalview.commands.EditCommand;
27 import jalview.commands.EditCommand.Action;
28 import jalview.commands.EditCommand.Edit;
29 import jalview.commands.OrderCommand;
30 import jalview.datamodel.AlignedCodonFrame;
31 import jalview.datamodel.AlignmentI;
32 import jalview.datamodel.AlignmentOrder;
33 import jalview.datamodel.ColumnSelection;
34 import jalview.datamodel.SearchResultMatchI;
35 import jalview.datamodel.SearchResults;
36 import jalview.datamodel.SearchResultsI;
37 import jalview.datamodel.Sequence;
38 import jalview.datamodel.SequenceGroup;
39 import jalview.datamodel.SequenceI;
41 import java.util.ArrayList;
42 import java.util.Arrays;
43 import java.util.HashMap;
44 import java.util.Iterator;
45 import java.util.List;
49 * Helper methods for manipulations involving sequence mappings.
54 public final class MappingUtils
58 * Helper method to map a CUT or PASTE command.
61 * the original command
63 * if true, the command is to be undone
65 * the mapped sequences to apply the mapped command to
67 * the mapped EditCommand to add to
70 protected static void mapCutOrPaste(Edit edit, boolean undo,
71 List<SequenceI> targetSeqs, EditCommand result,
72 List<AlignedCodonFrame> mappings)
74 Action action = edit.getAction();
77 action = action.getUndoAction();
80 System.err.println("MappingUtils.mapCutOrPaste not yet implemented");
84 * Returns a new EditCommand representing the given command as mapped to the
85 * given sequences. If there is no mapping, returns null.
94 public static EditCommand mapEditCommand(EditCommand command,
95 boolean undo, final AlignmentI mapTo, char gapChar,
96 List<AlignedCodonFrame> mappings)
99 * For now, only support mapping from protein edits to cDna
101 if (!mapTo.isNucleotide())
107 * Cache a copy of the target sequences so we can mimic successive edits on
108 * them. This lets us compute mappings for all edits in the set.
110 Map<SequenceI, SequenceI> targetCopies = new HashMap<SequenceI, SequenceI>();
111 for (SequenceI seq : mapTo.getSequences())
113 SequenceI ds = seq.getDatasetSequence();
116 final SequenceI copy = new Sequence(seq);
117 copy.setDatasetSequence(ds);
118 targetCopies.put(ds, copy);
123 * Compute 'source' sequences as they were before applying edits:
125 Map<SequenceI, SequenceI> originalSequences = command.priorState(undo);
127 EditCommand result = new EditCommand();
128 Iterator<Edit> edits = command.getEditIterator(!undo);
129 while (edits.hasNext())
131 Edit edit = edits.next();
132 if (edit.getAction() == Action.CUT
133 || edit.getAction() == Action.PASTE)
135 mapCutOrPaste(edit, undo, mapTo.getSequences(), result, mappings);
137 else if (edit.getAction() == Action.INSERT_GAP
138 || edit.getAction() == Action.DELETE_GAP)
140 mapInsertOrDelete(edit, undo, originalSequences,
141 mapTo.getSequences(), targetCopies, gapChar, result,
145 return result.getSize() > 0 ? result : null;
149 * Helper method to map an edit command to insert or delete gaps.
152 * the original command
154 * if true, the action is to undo the command
155 * @param originalSequences
156 * the sequences the command acted on
158 * @param targetCopies
161 * the new EditCommand to add mapped commands to
164 protected static void mapInsertOrDelete(Edit edit, boolean undo,
165 Map<SequenceI, SequenceI> originalSequences,
166 final List<SequenceI> targetSeqs,
167 Map<SequenceI, SequenceI> targetCopies, char gapChar,
168 EditCommand result, List<AlignedCodonFrame> mappings)
170 Action action = edit.getAction();
173 * Invert sense of action if an Undo.
177 action = action.getUndoAction();
179 final int count = edit.getNumber();
180 final int editPos = edit.getPosition();
181 for (SequenceI seq : edit.getSequences())
184 * Get residue position at (or to right of) edit location. Note we use our
185 * 'copy' of the sequence before editing for this.
187 SequenceI ds = seq.getDatasetSequence();
192 final SequenceI actedOn = originalSequences.get(ds);
193 final int seqpos = actedOn.findPosition(editPos);
196 * Determine all mappings from this position to mapped sequences.
198 SearchResultsI sr = buildSearchResults(seq, seqpos, mappings);
202 for (SequenceI targetSeq : targetSeqs)
204 ds = targetSeq.getDatasetSequence();
209 SequenceI copyTarget = targetCopies.get(ds);
210 final int[] match = sr.getResults(copyTarget, 0,
211 copyTarget.getLength());
214 final int ratio = 3; // TODO: compute this - how?
215 final int mappedCount = count * ratio;
218 * Shift Delete start position left, as it acts on positions to its
221 int mappedEditPos = action == Action.DELETE_GAP ? match[0]
222 - mappedCount : match[0];
223 Edit e = result.new Edit(action, new SequenceI[] { targetSeq },
224 mappedEditPos, mappedCount, gapChar);
228 * and 'apply' the edit to our copy of its target sequence
230 if (action == Action.INSERT_GAP)
232 copyTarget.setSequence(new String(StringUtils.insertCharAt(
233 copyTarget.getSequence(), mappedEditPos, mappedCount,
236 else if (action == Action.DELETE_GAP)
238 copyTarget.setSequence(new String(StringUtils.deleteChars(
239 copyTarget.getSequence(), mappedEditPos,
240 mappedEditPos + mappedCount)));
246 * and 'apply' the edit to our copy of its source sequence
248 if (action == Action.INSERT_GAP)
250 actedOn.setSequence(new String(StringUtils.insertCharAt(
251 actedOn.getSequence(), editPos, count, gapChar)));
253 else if (action == Action.DELETE_GAP)
255 actedOn.setSequence(new String(StringUtils.deleteChars(
256 actedOn.getSequence(), editPos, editPos + count)));
262 * Returns a SearchResults object describing the mapped region corresponding
263 * to the specified sequence position.
270 public static SearchResultsI buildSearchResults(SequenceI seq, int index,
271 List<AlignedCodonFrame> seqmappings)
273 SearchResultsI results = new SearchResults();
274 addSearchResults(results, seq, index, seqmappings);
279 * Adds entries to a SearchResults object describing the mapped region
280 * corresponding to the specified sequence position.
287 public static void addSearchResults(SearchResultsI results, SequenceI seq,
288 int index, List<AlignedCodonFrame> seqmappings)
290 if (index >= seq.getStart() && index <= seq.getEnd())
292 for (AlignedCodonFrame acf : seqmappings)
294 acf.markMappedRegion(seq, index, results);
300 * Returns a (possibly empty) SequenceGroup containing any sequences in the
301 * mapped viewport corresponding to the given group in the source viewport.
308 public static SequenceGroup mapSequenceGroup(final SequenceGroup sg,
309 final AlignViewportI mapFrom, final AlignViewportI mapTo)
312 * Note the SequenceGroup holds aligned sequences, the mappings hold dataset
315 boolean targetIsNucleotide = mapTo.isNucleotide();
316 AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
317 List<AlignedCodonFrame> codonFrames = protein.getAlignment()
320 * Copy group name, colours etc, but not sequences or sequence colour scheme
322 SequenceGroup mappedGroup = new SequenceGroup(sg);
323 mappedGroup.cs = mapTo.getGlobalColourScheme();
326 int minStartCol = -1;
328 final int selectionStartRes = sg.getStartRes();
329 final int selectionEndRes = sg.getEndRes();
330 for (SequenceI selected : sg.getSequences())
333 * Find the widest range of non-gapped positions in the selection range
335 int firstUngappedPos = selectionStartRes;
336 while (firstUngappedPos <= selectionEndRes
337 && Comparison.isGap(selected.getCharAt(firstUngappedPos)))
343 * If this sequence is only gaps in the selected range, skip it
345 if (firstUngappedPos > selectionEndRes)
350 int lastUngappedPos = selectionEndRes;
351 while (lastUngappedPos >= selectionStartRes
352 && Comparison.isGap(selected.getCharAt(lastUngappedPos)))
358 * Find the selected start/end residue positions in sequence
360 int startResiduePos = selected.findPosition(firstUngappedPos);
361 int endResiduePos = selected.findPosition(lastUngappedPos);
363 for (AlignedCodonFrame acf : codonFrames)
365 SequenceI mappedSequence = targetIsNucleotide ? acf
366 .getDnaForAaSeq(selected) : acf.getAaForDnaSeq(selected);
367 if (mappedSequence != null)
369 for (SequenceI seq : mapTo.getAlignment().getSequences())
371 int mappedStartResidue = 0;
372 int mappedEndResidue = 0;
373 if (seq.getDatasetSequence() == mappedSequence)
376 * Found a sequence mapping. Locate the start/end mapped residues.
378 List<AlignedCodonFrame> mapping = Arrays
379 .asList(new AlignedCodonFrame[] { acf });
380 SearchResultsI sr = buildSearchResults(selected,
381 startResiduePos, mapping);
382 for (SearchResultMatchI m : sr.getResults())
384 mappedStartResidue = m.getStart();
385 mappedEndResidue = m.getEnd();
387 sr = buildSearchResults(selected, endResiduePos, mapping);
388 for (SearchResultMatchI m : sr.getResults())
390 mappedStartResidue = Math.min(mappedStartResidue,
392 mappedEndResidue = Math.max(mappedEndResidue, m.getEnd());
396 * Find the mapped aligned columns, save the range. Note findIndex
397 * returns a base 1 position, SequenceGroup uses base 0
399 int mappedStartCol = seq.findIndex(mappedStartResidue) - 1;
400 minStartCol = minStartCol == -1 ? mappedStartCol : Math.min(
401 minStartCol, mappedStartCol);
402 int mappedEndCol = seq.findIndex(mappedEndResidue) - 1;
403 maxEndCol = maxEndCol == -1 ? mappedEndCol : Math.max(
404 maxEndCol, mappedEndCol);
405 mappedGroup.addSequence(seq, false);
412 mappedGroup.setStartRes(minStartCol < 0 ? 0 : minStartCol);
413 mappedGroup.setEndRes(maxEndCol < 0 ? 0 : maxEndCol);
418 * Returns an OrderCommand equivalent to the given one, but acting on mapped
419 * sequences as described by the mappings, or null if no mapping can be made.
422 * the original order command
424 * if true, the action is to undo the sort
426 * the alignment we are mapping to
428 * the mappings available
431 public static CommandI mapOrderCommand(OrderCommand command,
432 boolean undo, AlignmentI mapTo, List<AlignedCodonFrame> mappings)
434 SequenceI[] sortOrder = command.getSequenceOrder(undo);
435 List<SequenceI> mappedOrder = new ArrayList<SequenceI>();
439 * Assumption: we are only interested in a cDNA/protein mapping; refactor in
440 * future if we want to support sorting (c)dna as (c)dna or protein as
443 boolean mappingToNucleotide = mapTo.isNucleotide();
444 for (SequenceI seq : sortOrder)
446 for (AlignedCodonFrame acf : mappings)
448 SequenceI mappedSeq = mappingToNucleotide ? acf.getDnaForAaSeq(seq)
449 : acf.getAaForDnaSeq(seq);
450 if (mappedSeq != null)
452 for (SequenceI seq2 : mapTo.getSequences())
454 if (seq2.getDatasetSequence() == mappedSeq)
456 mappedOrder.add(seq2);
466 * Return null if no mappings made.
474 * Add any unmapped sequences on the end of the sort in their original
477 if (j < mapTo.getHeight())
479 for (SequenceI seq : mapTo.getSequences())
481 if (!mappedOrder.contains(seq))
483 mappedOrder.add(seq);
489 * Have to sort the sequences before constructing the OrderCommand - which
490 * then resorts them?!?
492 final SequenceI[] mappedOrderArray = mappedOrder
493 .toArray(new SequenceI[mappedOrder.size()]);
494 SequenceI[] oldOrder = mapTo.getSequencesArray();
495 AlignmentSorter.sortBy(mapTo, new AlignmentOrder(mappedOrderArray));
496 final OrderCommand result = new OrderCommand(command.getDescription(),
502 * Returns a ColumnSelection in the 'mapTo' view which corresponds to the
503 * given selection in the 'mapFrom' view. We assume one is nucleotide, the
504 * other is protein (and holds the mappings from codons to protein residues).
511 public static ColumnSelection mapColumnSelection(ColumnSelection colsel,
512 AlignViewportI mapFrom, AlignViewportI mapTo)
514 boolean targetIsNucleotide = mapTo.isNucleotide();
515 AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
516 List<AlignedCodonFrame> codonFrames = protein.getAlignment()
518 ColumnSelection mappedColumns = new ColumnSelection();
522 return mappedColumns;
525 char fromGapChar = mapFrom.getAlignment().getGapCharacter();
528 * For each mapped column, find the range of columns that residues in that
531 List<SequenceI> fromSequences = mapFrom.getAlignment().getSequences();
532 List<SequenceI> toSequences = mapTo.getAlignment().getSequences();
534 for (Integer sel : colsel.getSelected())
536 mapColumn(sel.intValue(), codonFrames, mappedColumns, fromSequences,
537 toSequences, fromGapChar);
540 for (int[] hidden : colsel.getHiddenColumns())
542 mapHiddenColumns(hidden, codonFrames, mappedColumns, fromSequences,
543 toSequences, fromGapChar);
545 return mappedColumns;
549 * Helper method that maps a [start, end] hidden column range to its mapped
554 * @param mappedColumns
555 * @param fromSequences
559 protected static void mapHiddenColumns(int[] hidden,
560 List<AlignedCodonFrame> mappings, ColumnSelection mappedColumns,
561 List<SequenceI> fromSequences, List<SequenceI> toSequences,
564 for (int col = hidden[0]; col <= hidden[1]; col++)
566 int[] mappedTo = findMappedColumns(col, mappings, fromSequences,
567 toSequences, fromGapChar);
570 * Add the range of hidden columns to the mapped selection (converting
573 if (mappedTo != null)
575 mappedColumns.hideColumns(mappedTo[0] - 1, mappedTo[1] - 1);
581 * Helper method to map one column selection
584 * the column number (base 0)
586 * the sequence mappings
587 * @param mappedColumns
588 * the mapped column selections to add to
589 * @param fromSequences
593 protected static void mapColumn(int col,
594 List<AlignedCodonFrame> mappings, ColumnSelection mappedColumns,
595 List<SequenceI> fromSequences, List<SequenceI> toSequences,
598 int[] mappedTo = findMappedColumns(col, mappings, fromSequences,
599 toSequences, fromGapChar);
602 * Add the range of mapped columns to the mapped selection (converting
603 * base 1 to base 0). Note that this may include intron-only regions which
604 * lie between the start and end ranges of the selection.
606 if (mappedTo != null)
608 for (int i = mappedTo[0]; i <= mappedTo[1]; i++)
610 mappedColumns.addElement(i - 1);
616 * Helper method to find the range of columns mapped to from one column.
617 * Returns the maximal range of columns mapped to from all sequences in the
618 * source column, or null if no mappings were found.
622 * @param fromSequences
627 protected static int[] findMappedColumns(int col,
628 List<AlignedCodonFrame> mappings, List<SequenceI> fromSequences,
629 List<SequenceI> toSequences, char fromGapChar)
631 int[] mappedTo = new int[] { Integer.MAX_VALUE, Integer.MIN_VALUE };
632 boolean found = false;
635 * For each sequence in the 'from' alignment
637 for (SequenceI fromSeq : fromSequences)
640 * Ignore gaps (unmapped anyway)
642 if (fromSeq.getCharAt(col) == fromGapChar)
648 * Get the residue position and find the mapped position.
650 int residuePos = fromSeq.findPosition(col);
651 SearchResultsI sr = buildSearchResults(fromSeq, residuePos, mappings);
652 for (SearchResultMatchI m : sr.getResults())
654 int mappedStartResidue = m.getStart();
655 int mappedEndResidue = m.getEnd();
656 SequenceI mappedSeq = m.getSequence();
659 * Locate the aligned sequence whose dataset is mappedSeq. TODO a
660 * datamodel that can do this efficiently.
662 for (SequenceI toSeq : toSequences)
664 if (toSeq.getDatasetSequence() == mappedSeq)
666 int mappedStartCol = toSeq.findIndex(mappedStartResidue);
667 int mappedEndCol = toSeq.findIndex(mappedEndResidue);
668 mappedTo[0] = Math.min(mappedTo[0], mappedStartCol);
669 mappedTo[1] = Math.max(mappedTo[1], mappedEndCol);
672 // note: remove break if we ever want to map one to many sequences
677 return found ? mappedTo : null;
681 * Returns the mapped codon or codons for a given aligned sequence column
685 * an aligned peptide sequence
687 * an aligned column position (base 0)
689 * a set of codon mappings
690 * @return the bases of the mapped codon(s) in the cDNA dataset sequence(s),
691 * or an empty list if none found
693 public static List<char[]> findCodonsFor(SequenceI seq, int col,
694 List<AlignedCodonFrame> mappings)
696 List<char[]> result = new ArrayList<char[]>();
697 int dsPos = seq.findPosition(col);
698 for (AlignedCodonFrame mapping : mappings)
700 if (mapping.involvesSequence(seq))
702 List<char[]> codons = mapping.getMappedCodons(
703 seq.getDatasetSequence(), dsPos);
706 result.addAll(codons);
714 * Converts a series of [start, end] range pairs into an array of individual
715 * positions. This also caters for 'reverse strand' (start > end) cases.
720 public static int[] flattenRanges(int[] ranges)
723 * Count how many positions altogether
726 for (int i = 0; i < ranges.length - 1; i += 2)
728 count += Math.abs(ranges[i + 1] - ranges[i]) + 1;
731 int[] result = new int[count];
733 for (int i = 0; i < ranges.length - 1; i += 2)
735 int from = ranges[i];
736 final int to = ranges[i + 1];
737 int step = from <= to ? 1 : -1;
742 } while (from != to + step);
748 * Returns a list of any mappings that are from or to the given (aligned or
755 public static List<AlignedCodonFrame> findMappingsForSequence(
756 SequenceI sequence, List<AlignedCodonFrame> mappings)
758 return findMappingsForSequenceAndOthers(sequence, mappings, null);
762 * Returns a list of any mappings that are from or to the given (aligned or
763 * dataset) sequence, optionally limited to mappings involving one of a given
771 public static List<AlignedCodonFrame> findMappingsForSequenceAndOthers(
772 SequenceI sequence, List<AlignedCodonFrame> mappings,
773 List<SequenceI> filterList)
775 List<AlignedCodonFrame> result = new ArrayList<AlignedCodonFrame>();
776 if (sequence == null || mappings == null)
780 for (AlignedCodonFrame mapping : mappings)
782 if (mapping.involvesSequence(sequence))
784 if (filterList != null)
786 for (SequenceI otherseq : filterList)
788 SequenceI otherDataset = otherseq.getDatasetSequence();
789 if (otherseq == sequence
790 || otherseq == sequence.getDatasetSequence()
791 || (otherDataset != null && (otherDataset == sequence || otherDataset == sequence
792 .getDatasetSequence())))
794 // skip sequences in subset which directly relate to sequence
797 if (mapping.involvesSequence(otherseq))
799 // selected a mapping contained in subselect alignment
815 * Returns the total length of the supplied ranges, which may be as single
816 * [start, end] or multiple [start, end, start, end ...]
821 public static int getLength(List<int[]> ranges)
828 for (int[] range : ranges)
830 if (range.length % 2 != 0)
832 System.err.println("Error unbalance start/end ranges: "
833 + ranges.toString());
836 for (int i = 0; i < range.length - 1; i += 2)
838 length += Math.abs(range[i + 1] - range[i]) + 1;
845 * Answers true if any range includes the given value
851 public static boolean contains(List<int[]> ranges, int value)
857 for (int[] range : ranges)
859 if (range[1] >= range[0] && value >= range[0] && value <= range[1])
862 * value within ascending range
866 if (range[1] < range[0] && value <= range[0] && value >= range[1])
869 * value within descending range
878 * Removes a specified number of positions from the start of a ranges list.
879 * For example, could be used to adjust cds ranges to allow for an incomplete
880 * start codon. Subranges are removed completely, or their start positions
881 * adjusted, until the required number of positions has been removed from the
882 * range. Reverse strand ranges are supported. The input array is not
887 * an array of [start, end, start, end...] positions
888 * @return a new array with the first removeCount positions removed
890 public static int[] removeStartPositions(int removeCount,
893 if (removeCount <= 0)
898 int[] copy = Arrays.copyOf(ranges, ranges.length);
901 for (int x = 0; x < copy.length && sxpos == -1; x += 2)
903 cdspos += Math.abs(copy[x + 1] - copy[x]) + 1;
904 if (removeCount < cdspos)
907 * we have removed enough, time to finish
912 * increment start of first exon, or decrement if reverse strand
914 if (copy[x] <= copy[x + 1])
916 copy[x] = copy[x + 1] - cdspos + removeCount + 1;
920 copy[x] = copy[x + 1] + cdspos - removeCount - 1;
929 * we dropped at least one entire sub-range - compact the array
931 int[] nxon = new int[copy.length - sxpos];
932 System.arraycopy(copy, sxpos, nxon, 0, copy.length - sxpos);