2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.analysis.AlignmentSorter;
24 import jalview.api.AlignViewportI;
25 import jalview.commands.CommandI;
26 import jalview.commands.EditCommand;
27 import jalview.commands.EditCommand.Action;
28 import jalview.commands.EditCommand.Edit;
29 import jalview.commands.OrderCommand;
30 import jalview.datamodel.AlignedCodonFrame;
31 import jalview.datamodel.AlignmentI;
32 import jalview.datamodel.AlignmentOrder;
33 import jalview.datamodel.ColumnSelection;
34 import jalview.datamodel.SearchResults;
35 import jalview.datamodel.SearchResults.Match;
36 import jalview.datamodel.Sequence;
37 import jalview.datamodel.SequenceGroup;
38 import jalview.datamodel.SequenceI;
40 import java.util.ArrayList;
41 import java.util.Arrays;
42 import java.util.HashMap;
43 import java.util.Iterator;
44 import java.util.List;
48 * Helper methods for manipulations involving sequence mappings.
53 public final class MappingUtils
57 * Helper method to map a CUT or PASTE command.
60 * the original command
62 * if true, the command is to be undone
64 * the mapped sequences to apply the mapped command to
66 * the mapped EditCommand to add to
69 protected static void mapCutOrPaste(Edit edit, boolean undo,
70 List<SequenceI> targetSeqs, EditCommand result,
71 List<AlignedCodonFrame> mappings)
73 Action action = edit.getAction();
76 action = action.getUndoAction();
79 System.err.println("MappingUtils.mapCutOrPaste not yet implemented");
83 * Returns a new EditCommand representing the given command as mapped to the
84 * given sequences. If there is no mapping, returns null.
93 public static EditCommand mapEditCommand(EditCommand command,
94 boolean undo, final AlignmentI mapTo, char gapChar,
95 List<AlignedCodonFrame> mappings)
98 * For now, only support mapping from protein edits to cDna
100 if (!mapTo.isNucleotide())
106 * Cache a copy of the target sequences so we can mimic successive edits on
107 * them. This lets us compute mappings for all edits in the set.
109 Map<SequenceI, SequenceI> targetCopies = new HashMap<SequenceI, SequenceI>();
110 for (SequenceI seq : mapTo.getSequences())
112 SequenceI ds = seq.getDatasetSequence();
115 final SequenceI copy = new Sequence(seq);
116 copy.setDatasetSequence(ds);
117 targetCopies.put(ds, copy);
122 * Compute 'source' sequences as they were before applying edits:
124 Map<SequenceI, SequenceI> originalSequences = command.priorState(undo);
126 EditCommand result = new EditCommand();
127 Iterator<Edit> edits = command.getEditIterator(!undo);
128 while (edits.hasNext())
130 Edit edit = edits.next();
131 if (edit.getAction() == Action.CUT
132 || edit.getAction() == Action.PASTE)
134 mapCutOrPaste(edit, undo, mapTo.getSequences(), result, mappings);
136 else if (edit.getAction() == Action.INSERT_GAP
137 || edit.getAction() == Action.DELETE_GAP)
139 mapInsertOrDelete(edit, undo, originalSequences,
140 mapTo.getSequences(), targetCopies, gapChar, result,
144 return result.getSize() > 0 ? result : null;
148 * Helper method to map an edit command to insert or delete gaps.
151 * the original command
153 * if true, the action is to undo the command
154 * @param originalSequences
155 * the sequences the command acted on
157 * @param targetCopies
160 * the new EditCommand to add mapped commands to
163 protected static void mapInsertOrDelete(Edit edit, boolean undo,
164 Map<SequenceI, SequenceI> originalSequences,
165 final List<SequenceI> targetSeqs,
166 Map<SequenceI, SequenceI> targetCopies, char gapChar,
167 EditCommand result, List<AlignedCodonFrame> mappings)
169 Action action = edit.getAction();
172 * Invert sense of action if an Undo.
176 action = action.getUndoAction();
178 final int count = edit.getNumber();
179 final int editPos = edit.getPosition();
180 for (SequenceI seq : edit.getSequences())
183 * Get residue position at (or to right of) edit location. Note we use our
184 * 'copy' of the sequence before editing for this.
186 SequenceI ds = seq.getDatasetSequence();
191 final SequenceI actedOn = originalSequences.get(ds);
192 final int seqpos = actedOn.findPosition(editPos);
195 * Determine all mappings from this position to mapped sequences.
197 SearchResults sr = buildSearchResults(seq, seqpos, mappings);
201 for (SequenceI targetSeq : targetSeqs)
203 ds = targetSeq.getDatasetSequence();
208 SequenceI copyTarget = targetCopies.get(ds);
209 final int[] match = sr.getResults(copyTarget, 0,
210 copyTarget.getLength());
213 final int ratio = 3; // TODO: compute this - how?
214 final int mappedCount = count * ratio;
217 * Shift Delete start position left, as it acts on positions to its
220 int mappedEditPos = action == Action.DELETE_GAP ? match[0]
221 - mappedCount : match[0];
222 Edit e = result.new Edit(action, new SequenceI[] { targetSeq },
223 mappedEditPos, mappedCount, gapChar);
227 * and 'apply' the edit to our copy of its target sequence
229 if (action == Action.INSERT_GAP)
231 copyTarget.setSequence(new String(StringUtils.insertCharAt(
232 copyTarget.getSequence(), mappedEditPos, mappedCount,
235 else if (action == Action.DELETE_GAP)
237 copyTarget.setSequence(new String(StringUtils.deleteChars(
238 copyTarget.getSequence(), mappedEditPos,
239 mappedEditPos + mappedCount)));
245 * and 'apply' the edit to our copy of its source sequence
247 if (action == Action.INSERT_GAP)
249 actedOn.setSequence(new String(StringUtils.insertCharAt(
250 actedOn.getSequence(), editPos, count, gapChar)));
252 else if (action == Action.DELETE_GAP)
254 actedOn.setSequence(new String(StringUtils.deleteChars(
255 actedOn.getSequence(), editPos, editPos + count)));
261 * Returns a SearchResults object describing the mapped region corresponding
262 * to the specified sequence position.
269 public static SearchResults buildSearchResults(SequenceI seq, int index,
270 List<AlignedCodonFrame> seqmappings)
272 SearchResults results = new SearchResults();
273 addSearchResults(results, seq, index, seqmappings);
278 * Adds entries to a SearchResults object describing the mapped region
279 * corresponding to the specified sequence position.
286 public static void addSearchResults(SearchResults results, SequenceI seq,
287 int index, List<AlignedCodonFrame> seqmappings)
289 if (index >= seq.getStart() && index <= seq.getEnd())
291 for (AlignedCodonFrame acf : seqmappings)
293 acf.markMappedRegion(seq, index, results);
299 * Returns a (possibly empty) SequenceGroup containing any sequences in the
300 * mapped viewport corresponding to the given group in the source viewport.
307 public static SequenceGroup mapSequenceGroup(final SequenceGroup sg,
308 final AlignViewportI mapFrom, final AlignViewportI mapTo)
311 * Note the SequenceGroup holds aligned sequences, the mappings hold dataset
314 boolean targetIsNucleotide = mapTo.isNucleotide();
315 AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
316 List<AlignedCodonFrame> codonFrames = protein.getAlignment()
319 * Copy group name, colours etc, but not sequences or sequence colour scheme
321 SequenceGroup mappedGroup = new SequenceGroup(sg);
322 mappedGroup.cs = mapTo.getGlobalColourScheme();
325 int minStartCol = -1;
327 final int selectionStartRes = sg.getStartRes();
328 final int selectionEndRes = sg.getEndRes();
329 for (SequenceI selected : sg.getSequences())
332 * Find the widest range of non-gapped positions in the selection range
334 int firstUngappedPos = selectionStartRes;
335 while (firstUngappedPos <= selectionEndRes
336 && Comparison.isGap(selected.getCharAt(firstUngappedPos)))
342 * If this sequence is only gaps in the selected range, skip it
344 if (firstUngappedPos > selectionEndRes)
349 int lastUngappedPos = selectionEndRes;
350 while (lastUngappedPos >= selectionStartRes
351 && Comparison.isGap(selected.getCharAt(lastUngappedPos)))
357 * Find the selected start/end residue positions in sequence
359 int startResiduePos = selected.findPosition(firstUngappedPos);
360 int endResiduePos = selected.findPosition(lastUngappedPos);
362 for (AlignedCodonFrame acf : codonFrames)
364 SequenceI mappedSequence = targetIsNucleotide ? acf
365 .getDnaForAaSeq(selected) : acf.getAaForDnaSeq(selected);
366 if (mappedSequence != null)
368 for (SequenceI seq : mapTo.getAlignment().getSequences())
370 int mappedStartResidue = 0;
371 int mappedEndResidue = 0;
372 if (seq.getDatasetSequence() == mappedSequence)
375 * Found a sequence mapping. Locate the start/end mapped residues.
377 List<AlignedCodonFrame> mapping = Arrays.asList(new AlignedCodonFrame[] { acf });
378 SearchResults sr = buildSearchResults(selected,
379 startResiduePos, mapping);
380 for (Match m : sr.getResults())
382 mappedStartResidue = m.getStart();
383 mappedEndResidue = m.getEnd();
385 sr = buildSearchResults(selected, endResiduePos, mapping);
386 for (Match m : sr.getResults())
388 mappedStartResidue = Math.min(mappedStartResidue,
390 mappedEndResidue = Math.max(mappedEndResidue, m.getEnd());
394 * Find the mapped aligned columns, save the range. Note findIndex
395 * returns a base 1 position, SequenceGroup uses base 0
397 int mappedStartCol = seq.findIndex(mappedStartResidue) - 1;
398 minStartCol = minStartCol == -1 ? mappedStartCol : Math.min(
399 minStartCol, mappedStartCol);
400 int mappedEndCol = seq.findIndex(mappedEndResidue) - 1;
401 maxEndCol = maxEndCol == -1 ? mappedEndCol : Math.max(
402 maxEndCol, mappedEndCol);
403 mappedGroup.addSequence(seq, false);
410 mappedGroup.setStartRes(minStartCol < 0 ? 0 : minStartCol);
411 mappedGroup.setEndRes(maxEndCol < 0 ? 0 : maxEndCol);
416 * Returns an OrderCommand equivalent to the given one, but acting on mapped
417 * sequences as described by the mappings, or null if no mapping can be made.
420 * the original order command
422 * if true, the action is to undo the sort
424 * the alignment we are mapping to
426 * the mappings available
429 public static CommandI mapOrderCommand(OrderCommand command,
430 boolean undo, AlignmentI mapTo, List<AlignedCodonFrame> mappings)
432 SequenceI[] sortOrder = command.getSequenceOrder(undo);
433 List<SequenceI> mappedOrder = new ArrayList<SequenceI>();
437 * Assumption: we are only interested in a cDNA/protein mapping; refactor in
438 * future if we want to support sorting (c)dna as (c)dna or protein as
441 boolean mappingToNucleotide = mapTo.isNucleotide();
442 for (SequenceI seq : sortOrder)
444 for (AlignedCodonFrame acf : mappings)
446 SequenceI mappedSeq = mappingToNucleotide ? acf.getDnaForAaSeq(seq)
447 : acf.getAaForDnaSeq(seq);
448 if (mappedSeq != null)
450 for (SequenceI seq2 : mapTo.getSequences())
452 if (seq2.getDatasetSequence() == mappedSeq)
454 mappedOrder.add(seq2);
464 * Return null if no mappings made.
472 * Add any unmapped sequences on the end of the sort in their original
475 if (j < mapTo.getHeight())
477 for (SequenceI seq : mapTo.getSequences())
479 if (!mappedOrder.contains(seq))
481 mappedOrder.add(seq);
487 * Have to sort the sequences before constructing the OrderCommand - which
488 * then resorts them?!?
490 final SequenceI[] mappedOrderArray = mappedOrder
491 .toArray(new SequenceI[mappedOrder.size()]);
492 SequenceI[] oldOrder = mapTo.getSequencesArray();
493 AlignmentSorter.sortBy(mapTo, new AlignmentOrder(mappedOrderArray));
494 final OrderCommand result = new OrderCommand(command.getDescription(),
500 * Returns a ColumnSelection in the 'mapTo' view which corresponds to the
501 * given selection in the 'mapFrom' view. We assume one is nucleotide, the
502 * other is protein (and holds the mappings from codons to protein residues).
509 public static ColumnSelection mapColumnSelection(ColumnSelection colsel,
510 AlignViewportI mapFrom, AlignViewportI mapTo)
512 boolean targetIsNucleotide = mapTo.isNucleotide();
513 AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
514 List<AlignedCodonFrame> codonFrames = protein.getAlignment()
516 ColumnSelection mappedColumns = new ColumnSelection();
520 return mappedColumns;
523 char fromGapChar = mapFrom.getAlignment().getGapCharacter();
526 * For each mapped column, find the range of columns that residues in that
529 List<SequenceI> fromSequences = mapFrom.getAlignment().getSequences();
530 List<SequenceI> toSequences = mapTo.getAlignment().getSequences();
532 for (Integer sel : colsel.getSelected())
534 mapColumn(sel.intValue(), codonFrames, mappedColumns, fromSequences,
535 toSequences, fromGapChar);
538 for (int[] hidden : colsel.getHiddenColumns())
540 mapHiddenColumns(hidden, codonFrames, mappedColumns, fromSequences,
541 toSequences, fromGapChar);
543 return mappedColumns;
547 * Helper method that maps a [start, end] hidden column range to its mapped
552 * @param mappedColumns
553 * @param fromSequences
557 protected static void mapHiddenColumns(int[] hidden,
558 List<AlignedCodonFrame> mappings,
559 ColumnSelection mappedColumns, List<SequenceI> fromSequences,
560 List<SequenceI> toSequences, char fromGapChar)
562 for (int col = hidden[0]; col <= hidden[1]; col++)
564 int[] mappedTo = findMappedColumns(col, mappings, fromSequences,
565 toSequences, fromGapChar);
568 * Add the range of hidden columns to the mapped selection (converting
571 if (mappedTo != null)
573 mappedColumns.hideColumns(mappedTo[0] - 1, mappedTo[1] - 1);
579 * Helper method to map one column selection
582 * the column number (base 0)
584 * the sequence mappings
585 * @param mappedColumns
586 * the mapped column selections to add to
587 * @param fromSequences
591 protected static void mapColumn(int col,
592 List<AlignedCodonFrame> mappings,
593 ColumnSelection mappedColumns, List<SequenceI> fromSequences,
594 List<SequenceI> toSequences, char fromGapChar)
596 int[] mappedTo = findMappedColumns(col, mappings, fromSequences,
597 toSequences, fromGapChar);
600 * Add the range of mapped columns to the mapped selection (converting
601 * base 1 to base 0). Note that this may include intron-only regions which
602 * lie between the start and end ranges of the selection.
604 if (mappedTo != null)
606 for (int i = mappedTo[0]; i <= mappedTo[1]; i++)
608 mappedColumns.addElement(i - 1);
614 * Helper method to find the range of columns mapped to from one column.
615 * Returns the maximal range of columns mapped to from all sequences in the
616 * source column, or null if no mappings were found.
620 * @param fromSequences
625 protected static int[] findMappedColumns(int col,
626 List<AlignedCodonFrame> mappings, List<SequenceI> fromSequences,
627 List<SequenceI> toSequences, char fromGapChar)
629 int[] mappedTo = new int[] { Integer.MAX_VALUE, Integer.MIN_VALUE };
630 boolean found = false;
633 * For each sequence in the 'from' alignment
635 for (SequenceI fromSeq : fromSequences)
638 * Ignore gaps (unmapped anyway)
640 if (fromSeq.getCharAt(col) == fromGapChar)
646 * Get the residue position and find the mapped position.
648 int residuePos = fromSeq.findPosition(col);
649 SearchResults sr = buildSearchResults(fromSeq, residuePos,
651 for (Match m : sr.getResults())
653 int mappedStartResidue = m.getStart();
654 int mappedEndResidue = m.getEnd();
655 SequenceI mappedSeq = m.getSequence();
658 * Locate the aligned sequence whose dataset is mappedSeq. TODO a
659 * datamodel that can do this efficiently.
661 for (SequenceI toSeq : toSequences)
663 if (toSeq.getDatasetSequence() == mappedSeq)
665 int mappedStartCol = toSeq.findIndex(mappedStartResidue);
666 int mappedEndCol = toSeq.findIndex(mappedEndResidue);
667 mappedTo[0] = Math.min(mappedTo[0], mappedStartCol);
668 mappedTo[1] = Math.max(mappedTo[1], mappedEndCol);
671 // note: remove break if we ever want to map one to many sequences
676 return found ? mappedTo : null;
680 * Returns the mapped codon or codons for a given aligned sequence column
684 * an aligned peptide sequence
686 * an aligned column position (base 0)
688 * a set of codon mappings
689 * @return the bases of the mapped codon(s) in the cDNA dataset sequence(s),
690 * or an empty list if none found
692 public static List<char[]> findCodonsFor(SequenceI seq, int col,
693 List<AlignedCodonFrame> mappings)
695 List<char[]> result = new ArrayList<char[]>();
696 int dsPos = seq.findPosition(col);
697 for (AlignedCodonFrame mapping : mappings)
699 if (mapping.involvesSequence(seq))
701 List<char[]> codons = mapping.getMappedCodons(
702 seq.getDatasetSequence(), dsPos);
705 result.addAll(codons);
713 * Converts a series of [start, end] range pairs into an array of individual
714 * positions. This also caters for 'reverse strand' (start > end) cases.
719 public static int[] flattenRanges(int[] ranges)
722 * Count how many positions altogether
725 for (int i = 0; i < ranges.length - 1; i += 2)
727 count += Math.abs(ranges[i + 1] - ranges[i]) + 1;
730 int[] result = new int[count];
732 for (int i = 0; i < ranges.length - 1; i += 2)
734 int from = ranges[i];
735 final int to = ranges[i + 1];
736 int step = from <= to ? 1 : -1;
741 } while (from != to + step);
747 * Returns a list of any mappings that are from or to the given (aligned or
754 public static List<AlignedCodonFrame> findMappingsForSequence(
755 SequenceI sequence, List<AlignedCodonFrame> mappings)
757 return findMappingsForSequenceAndOthers(sequence, mappings, null);
761 * Returns a list of any mappings that are from or to the given (aligned or
762 * dataset) sequence, optionally limited to mappings involving one of a given
770 public static List<AlignedCodonFrame> findMappingsForSequenceAndOthers(
771 SequenceI sequence, List<AlignedCodonFrame> mappings,
772 List<SequenceI> filterList)
774 List<AlignedCodonFrame> result = new ArrayList<AlignedCodonFrame>();
775 if (sequence == null || mappings == null)
779 for (AlignedCodonFrame mapping : mappings)
781 if (mapping.involvesSequence(sequence))
783 if (filterList != null)
785 for (SequenceI otherseq : filterList)
787 SequenceI otherDataset = otherseq.getDatasetSequence();
788 if (otherseq == sequence
789 || otherseq == sequence.getDatasetSequence()
790 || (otherDataset != null && (otherDataset == sequence || otherDataset == sequence
791 .getDatasetSequence())))
793 // skip sequences in subset which directly relate to sequence
796 if (mapping.involvesSequence(otherseq))
798 // selected a mapping contained in subselect alignment
814 * Returns the total length of the supplied ranges, which may be as single
815 * [start, end] or multiple [start, end, start, end ...]
820 public static int getLength(List<int[]> ranges)
827 for (int[] range : ranges)
829 if (range.length % 2 != 0)
831 System.err.println("Error unbalance start/end ranges: "
832 + ranges.toString());
835 for (int i = 0; i < range.length - 1; i += 2)
837 length += Math.abs(range[i + 1] - range[i]) + 1;
844 * Answers true if any range includes the given value
850 public static boolean contains(List<int[]> ranges, int value)
856 for (int[] range : ranges)
858 if (range[1] >= range[0] && value >= range[0] && value <= range[1])
861 * value within ascending range
865 if (range[1] < range[0] && value <= range[0] && value >= range[1])
868 * value within descending range
877 * Removes a specified number of positions from the start of a ranges list.
878 * For example, could be used to adjust cds ranges to allow for an incomplete
879 * start codon. Subranges are removed completely, or their start positions
880 * adjusted, until the required number of positions has been removed from the
881 * range. Reverse strand ranges are supported. The input array is not
886 * an array of [start, end, start, end...] positions
887 * @return a new array with the first removeCount positions removed
889 public static int[] removeStartPositions(int removeCount,
892 if (removeCount <= 0)
897 int[] copy = Arrays.copyOf(ranges, ranges.length);
900 for (int x = 0; x < copy.length && sxpos == -1; x += 2)
902 cdspos += Math.abs(copy[x + 1] - copy[x]) + 1;
903 if (removeCount < cdspos)
906 * we have removed enough, time to finish
911 * increment start of first exon, or decrement if reverse strand
913 if (copy[x] <= copy[x + 1])
915 copy[x] = copy[x + 1] - cdspos + removeCount + 1;
919 copy[x] = copy[x + 1] + cdspos - removeCount - 1;
928 * we dropped at least one entire sub-range - compact the array
930 int[] nxon = new int[copy.length - sxpos];
931 System.arraycopy(copy, sxpos, nxon, 0, copy.length - sxpos);