2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.analysis.AlignmentSorter;
24 import jalview.api.AlignViewportI;
25 import jalview.commands.CommandI;
26 import jalview.commands.EditCommand;
27 import jalview.commands.EditCommand.Action;
28 import jalview.commands.EditCommand.Edit;
29 import jalview.commands.OrderCommand;
30 import jalview.datamodel.AlignedCodonFrame;
31 import jalview.datamodel.AlignmentI;
32 import jalview.datamodel.AlignmentOrder;
33 import jalview.datamodel.ColumnSelection;
34 import jalview.datamodel.SearchResults;
35 import jalview.datamodel.SearchResults.Match;
36 import jalview.datamodel.Sequence;
37 import jalview.datamodel.SequenceGroup;
38 import jalview.datamodel.SequenceI;
40 import java.util.ArrayList;
41 import java.util.Arrays;
42 import java.util.HashMap;
43 import java.util.Iterator;
44 import java.util.List;
48 * Helper methods for manipulations involving sequence mappings.
53 public final class MappingUtils
57 * Helper method to map a CUT or PASTE command.
60 * the original command
62 * if true, the command is to be undone
64 * the mapped sequences to apply the mapped command to
66 * the mapped EditCommand to add to
69 protected static void mapCutOrPaste(Edit edit, boolean undo,
70 List<SequenceI> targetSeqs, EditCommand result,
71 List<AlignedCodonFrame> mappings)
73 Action action = edit.getAction();
76 action = action.getUndoAction();
79 System.err.println("MappingUtils.mapCutOrPaste not yet implemented");
83 * Returns a new EditCommand representing the given command as mapped to the
84 * given sequences. If there is no mapping, returns null.
93 public static EditCommand mapEditCommand(EditCommand command,
94 boolean undo, final AlignmentI mapTo, char gapChar,
95 List<AlignedCodonFrame> mappings)
98 * For now, only support mapping from protein edits to cDna
100 if (!mapTo.isNucleotide())
106 * Cache a copy of the target sequences so we can mimic successive edits on
107 * them. This lets us compute mappings for all edits in the set.
109 Map<SequenceI, SequenceI> targetCopies = new HashMap<SequenceI, SequenceI>();
110 for (SequenceI seq : mapTo.getSequences())
112 SequenceI ds = seq.getDatasetSequence();
115 final SequenceI copy = new Sequence(seq);
116 copy.setDatasetSequence(ds);
117 targetCopies.put(ds, copy);
122 * Compute 'source' sequences as they were before applying edits:
124 Map<SequenceI, SequenceI> originalSequences = command.priorState(undo);
126 EditCommand result = new EditCommand();
127 Iterator<Edit> edits = command.getEditIterator(!undo);
128 while (edits.hasNext())
130 Edit edit = edits.next();
131 if (edit.getAction() == Action.CUT
132 || edit.getAction() == Action.PASTE)
134 mapCutOrPaste(edit, undo, mapTo.getSequences(), result, mappings);
136 else if (edit.getAction() == Action.INSERT_GAP
137 || edit.getAction() == Action.DELETE_GAP)
139 mapInsertOrDelete(edit, undo, originalSequences,
140 mapTo.getSequences(), targetCopies, gapChar, result,
144 return result.getSize() > 0 ? result : null;
148 * Helper method to map an edit command to insert or delete gaps.
151 * the original command
153 * if true, the action is to undo the command
154 * @param originalSequences
155 * the sequences the command acted on
157 * @param targetCopies
160 * the new EditCommand to add mapped commands to
163 protected static void mapInsertOrDelete(Edit edit, boolean undo,
164 Map<SequenceI, SequenceI> originalSequences,
165 final List<SequenceI> targetSeqs,
166 Map<SequenceI, SequenceI> targetCopies, char gapChar,
167 EditCommand result, List<AlignedCodonFrame> mappings)
169 Action action = edit.getAction();
172 * Invert sense of action if an Undo.
176 action = action.getUndoAction();
178 final int count = edit.getNumber();
179 final int editPos = edit.getPosition();
180 for (SequenceI seq : edit.getSequences())
183 * Get residue position at (or to right of) edit location. Note we use our
184 * 'copy' of the sequence before editing for this.
186 SequenceI ds = seq.getDatasetSequence();
191 final SequenceI actedOn = originalSequences.get(ds);
192 final int seqpos = actedOn.findPosition(editPos);
195 * Determine all mappings from this position to mapped sequences.
197 SearchResults sr = buildSearchResults(seq, seqpos, mappings);
201 for (SequenceI targetSeq : targetSeqs)
203 ds = targetSeq.getDatasetSequence();
208 SequenceI copyTarget = targetCopies.get(ds);
209 final int[] match = sr.getResults(copyTarget, 0,
210 copyTarget.getLength());
213 final int ratio = 3; // TODO: compute this - how?
214 final int mappedCount = count * ratio;
217 * Shift Delete start position left, as it acts on positions to its
220 int mappedEditPos = action == Action.DELETE_GAP ? match[0]
221 - mappedCount : match[0];
222 Edit e = result.new Edit(action, new SequenceI[] { targetSeq },
223 mappedEditPos, mappedCount, gapChar);
227 * and 'apply' the edit to our copy of its target sequence
229 if (action == Action.INSERT_GAP)
231 copyTarget.setSequence(new String(StringUtils.insertCharAt(
232 copyTarget.getSequence(), mappedEditPos, mappedCount,
235 else if (action == Action.DELETE_GAP)
237 copyTarget.setSequence(new String(StringUtils.deleteChars(
238 copyTarget.getSequence(), mappedEditPos,
239 mappedEditPos + mappedCount)));
245 * and 'apply' the edit to our copy of its source sequence
247 if (action == Action.INSERT_GAP)
249 actedOn.setSequence(new String(StringUtils.insertCharAt(
250 actedOn.getSequence(), editPos, count, gapChar)));
252 else if (action == Action.DELETE_GAP)
254 actedOn.setSequence(new String(StringUtils.deleteChars(
255 actedOn.getSequence(), editPos, editPos + count)));
261 * Returns a SearchResults object describing the mapped region corresponding
262 * to the specified sequence position.
269 public static SearchResults buildSearchResults(SequenceI seq, int index,
270 List<AlignedCodonFrame> seqmappings)
272 SearchResults results = new SearchResults();
273 addSearchResults(results, seq, index, seqmappings);
278 * Adds entries to a SearchResults object describing the mapped region
279 * corresponding to the specified sequence position.
286 public static void addSearchResults(SearchResults results, SequenceI seq,
287 int index, List<AlignedCodonFrame> seqmappings)
289 if (index >= seq.getStart() && index <= seq.getEnd())
291 for (AlignedCodonFrame acf : seqmappings)
293 acf.markMappedRegion(seq, index, results);
299 * Returns a (possibly empty) SequenceGroup containing any sequences in the
300 * mapped viewport corresponding to the given group in the source viewport.
307 public static SequenceGroup mapSequenceGroup(final SequenceGroup sg,
308 final AlignViewportI mapFrom, final AlignViewportI mapTo)
311 * Note the SequenceGroup holds aligned sequences, the mappings hold dataset
314 boolean targetIsNucleotide = mapTo.isNucleotide();
315 AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
316 List<AlignedCodonFrame> codonFrames = protein.getAlignment()
319 * Copy group name, colours etc, but not sequences or sequence colour scheme
321 SequenceGroup mappedGroup = new SequenceGroup(sg);
322 mappedGroup.cs = mapTo.getGlobalColourScheme();
325 int minStartCol = -1;
327 final int selectionStartRes = sg.getStartRes();
328 final int selectionEndRes = sg.getEndRes();
329 for (SequenceI selected : sg.getSequences())
332 * Find the widest range of non-gapped positions in the selection range
334 int firstUngappedPos = selectionStartRes;
335 while (firstUngappedPos <= selectionEndRes
336 && Comparison.isGap(selected.getCharAt(firstUngappedPos)))
342 * If this sequence is only gaps in the selected range, skip it
344 if (firstUngappedPos > selectionEndRes)
349 int lastUngappedPos = selectionEndRes;
350 while (lastUngappedPos >= selectionStartRes
351 && Comparison.isGap(selected.getCharAt(lastUngappedPos)))
357 * Find the selected start/end residue positions in sequence
359 int startResiduePos = selected.findPosition(firstUngappedPos);
360 int endResiduePos = selected.findPosition(lastUngappedPos);
362 for (AlignedCodonFrame acf : codonFrames)
364 SequenceI mappedSequence = targetIsNucleotide ? acf
365 .getDnaForAaSeq(selected) : acf.getAaForDnaSeq(selected);
366 if (mappedSequence != null)
368 for (SequenceI seq : mapTo.getAlignment().getSequences())
370 int mappedStartResidue = 0;
371 int mappedEndResidue = 0;
372 if (seq.getDatasetSequence() == mappedSequence)
375 * Found a sequence mapping. Locate the start/end mapped residues.
377 List<AlignedCodonFrame> mapping = Arrays
378 .asList(new AlignedCodonFrame[] { acf });
379 SearchResults sr = buildSearchResults(selected,
380 startResiduePos, mapping);
381 for (Match m : sr.getResults())
383 mappedStartResidue = m.getStart();
384 mappedEndResidue = m.getEnd();
386 sr = buildSearchResults(selected, endResiduePos, mapping);
387 for (Match m : sr.getResults())
389 mappedStartResidue = Math.min(mappedStartResidue,
391 mappedEndResidue = Math.max(mappedEndResidue, m.getEnd());
395 * Find the mapped aligned columns, save the range. Note findIndex
396 * returns a base 1 position, SequenceGroup uses base 0
398 int mappedStartCol = seq.findIndex(mappedStartResidue) - 1;
399 minStartCol = minStartCol == -1 ? mappedStartCol : Math.min(
400 minStartCol, mappedStartCol);
401 int mappedEndCol = seq.findIndex(mappedEndResidue) - 1;
402 maxEndCol = maxEndCol == -1 ? mappedEndCol : Math.max(
403 maxEndCol, mappedEndCol);
404 mappedGroup.addSequence(seq, false);
411 mappedGroup.setStartRes(minStartCol < 0 ? 0 : minStartCol);
412 mappedGroup.setEndRes(maxEndCol < 0 ? 0 : maxEndCol);
417 * Returns an OrderCommand equivalent to the given one, but acting on mapped
418 * sequences as described by the mappings, or null if no mapping can be made.
421 * the original order command
423 * if true, the action is to undo the sort
425 * the alignment we are mapping to
427 * the mappings available
430 public static CommandI mapOrderCommand(OrderCommand command,
431 boolean undo, AlignmentI mapTo, List<AlignedCodonFrame> mappings)
433 SequenceI[] sortOrder = command.getSequenceOrder(undo);
434 List<SequenceI> mappedOrder = new ArrayList<SequenceI>();
438 * Assumption: we are only interested in a cDNA/protein mapping; refactor in
439 * future if we want to support sorting (c)dna as (c)dna or protein as
442 boolean mappingToNucleotide = mapTo.isNucleotide();
443 for (SequenceI seq : sortOrder)
445 for (AlignedCodonFrame acf : mappings)
447 SequenceI mappedSeq = mappingToNucleotide ? acf.getDnaForAaSeq(seq)
448 : acf.getAaForDnaSeq(seq);
449 if (mappedSeq != null)
451 for (SequenceI seq2 : mapTo.getSequences())
453 if (seq2.getDatasetSequence() == mappedSeq)
455 mappedOrder.add(seq2);
465 * Return null if no mappings made.
473 * Add any unmapped sequences on the end of the sort in their original
476 if (j < mapTo.getHeight())
478 for (SequenceI seq : mapTo.getSequences())
480 if (!mappedOrder.contains(seq))
482 mappedOrder.add(seq);
488 * Have to sort the sequences before constructing the OrderCommand - which
489 * then resorts them?!?
491 final SequenceI[] mappedOrderArray = mappedOrder
492 .toArray(new SequenceI[mappedOrder.size()]);
493 SequenceI[] oldOrder = mapTo.getSequencesArray();
494 AlignmentSorter.sortBy(mapTo, new AlignmentOrder(mappedOrderArray));
495 final OrderCommand result = new OrderCommand(command.getDescription(),
501 * Returns a ColumnSelection in the 'mapTo' view which corresponds to the
502 * given selection in the 'mapFrom' view. We assume one is nucleotide, the
503 * other is protein (and holds the mappings from codons to protein residues).
510 public static ColumnSelection mapColumnSelection(ColumnSelection colsel,
511 AlignViewportI mapFrom, AlignViewportI mapTo)
513 boolean targetIsNucleotide = mapTo.isNucleotide();
514 AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
515 List<AlignedCodonFrame> codonFrames = protein.getAlignment()
517 ColumnSelection mappedColumns = new ColumnSelection();
521 return mappedColumns;
524 char fromGapChar = mapFrom.getAlignment().getGapCharacter();
527 * For each mapped column, find the range of columns that residues in that
530 List<SequenceI> fromSequences = mapFrom.getAlignment().getSequences();
531 List<SequenceI> toSequences = mapTo.getAlignment().getSequences();
533 for (Integer sel : colsel.getSelected())
535 mapColumn(sel.intValue(), codonFrames, mappedColumns, fromSequences,
536 toSequences, fromGapChar);
539 for (int[] hidden : colsel.getHiddenColumns())
541 mapHiddenColumns(hidden, codonFrames, mappedColumns, fromSequences,
542 toSequences, fromGapChar);
544 return mappedColumns;
548 * Helper method that maps a [start, end] hidden column range to its mapped
553 * @param mappedColumns
554 * @param fromSequences
558 protected static void mapHiddenColumns(int[] hidden,
559 List<AlignedCodonFrame> mappings, ColumnSelection mappedColumns,
560 List<SequenceI> fromSequences, List<SequenceI> toSequences,
563 for (int col = hidden[0]; col <= hidden[1]; col++)
565 int[] mappedTo = findMappedColumns(col, mappings, fromSequences,
566 toSequences, fromGapChar);
569 * Add the range of hidden columns to the mapped selection (converting
572 if (mappedTo != null)
574 mappedColumns.hideColumns(mappedTo[0] - 1, mappedTo[1] - 1);
580 * Helper method to map one column selection
583 * the column number (base 0)
585 * the sequence mappings
586 * @param mappedColumns
587 * the mapped column selections to add to
588 * @param fromSequences
592 protected static void mapColumn(int col,
593 List<AlignedCodonFrame> mappings, ColumnSelection mappedColumns,
594 List<SequenceI> fromSequences, List<SequenceI> toSequences,
597 int[] mappedTo = findMappedColumns(col, mappings, fromSequences,
598 toSequences, fromGapChar);
601 * Add the range of mapped columns to the mapped selection (converting
602 * base 1 to base 0). Note that this may include intron-only regions which
603 * lie between the start and end ranges of the selection.
605 if (mappedTo != null)
607 for (int i = mappedTo[0]; i <= mappedTo[1]; i++)
609 mappedColumns.addElement(i - 1);
615 * Helper method to find the range of columns mapped to from one column.
616 * Returns the maximal range of columns mapped to from all sequences in the
617 * source column, or null if no mappings were found.
621 * @param fromSequences
626 protected static int[] findMappedColumns(int col,
627 List<AlignedCodonFrame> mappings, List<SequenceI> fromSequences,
628 List<SequenceI> toSequences, char fromGapChar)
630 int[] mappedTo = new int[] { Integer.MAX_VALUE, Integer.MIN_VALUE };
631 boolean found = false;
634 * For each sequence in the 'from' alignment
636 for (SequenceI fromSeq : fromSequences)
639 * Ignore gaps (unmapped anyway)
641 if (fromSeq.getCharAt(col) == fromGapChar)
647 * Get the residue position and find the mapped position.
649 int residuePos = fromSeq.findPosition(col);
650 SearchResults sr = buildSearchResults(fromSeq, residuePos, mappings);
651 for (Match m : sr.getResults())
653 int mappedStartResidue = m.getStart();
654 int mappedEndResidue = m.getEnd();
655 SequenceI mappedSeq = m.getSequence();
658 * Locate the aligned sequence whose dataset is mappedSeq. TODO a
659 * datamodel that can do this efficiently.
661 for (SequenceI toSeq : toSequences)
663 if (toSeq.getDatasetSequence() == mappedSeq)
665 int mappedStartCol = toSeq.findIndex(mappedStartResidue);
666 int mappedEndCol = toSeq.findIndex(mappedEndResidue);
667 mappedTo[0] = Math.min(mappedTo[0], mappedStartCol);
668 mappedTo[1] = Math.max(mappedTo[1], mappedEndCol);
671 // note: remove break if we ever want to map one to many sequences
676 return found ? mappedTo : null;
680 * Returns the mapped codon or codons for a given aligned sequence column
684 * an aligned peptide sequence
686 * an aligned column position (base 0)
688 * a set of codon mappings
689 * @return the bases of the mapped codon(s) in the cDNA dataset sequence(s),
690 * or an empty list if none found
692 public static List<char[]> findCodonsFor(SequenceI seq, int col,
693 List<AlignedCodonFrame> mappings)
695 List<char[]> result = new ArrayList<char[]>();
696 int dsPos = seq.findPosition(col);
697 for (AlignedCodonFrame mapping : mappings)
699 if (mapping.involvesSequence(seq))
701 List<char[]> codons = mapping.getMappedCodons(
702 seq.getDatasetSequence(), dsPos);
705 result.addAll(codons);
713 * Converts a series of [start, end] range pairs into an array of individual
714 * positions. This also caters for 'reverse strand' (start > end) cases.
719 public static int[] flattenRanges(int[] ranges)
722 * Count how many positions altogether
725 for (int i = 0; i < ranges.length - 1; i += 2)
727 count += Math.abs(ranges[i + 1] - ranges[i]) + 1;
730 int[] result = new int[count];
732 for (int i = 0; i < ranges.length - 1; i += 2)
734 int from = ranges[i];
735 final int to = ranges[i + 1];
736 int step = from <= to ? 1 : -1;
741 } while (from != to + step);
747 * Returns a list of any mappings that are from or to the given (aligned or
754 public static List<AlignedCodonFrame> findMappingsForSequence(
755 SequenceI sequence, List<AlignedCodonFrame> mappings)
757 return findMappingsForSequenceAndOthers(sequence, mappings, null);
761 * Returns a list of any mappings that are from or to the given (aligned or
762 * dataset) sequence, optionally limited to mappings involving one of a given
770 public static List<AlignedCodonFrame> findMappingsForSequenceAndOthers(
771 SequenceI sequence, List<AlignedCodonFrame> mappings,
772 List<SequenceI> filterList)
774 List<AlignedCodonFrame> result = new ArrayList<AlignedCodonFrame>();
775 if (sequence == null || mappings == null)
779 for (AlignedCodonFrame mapping : mappings)
781 if (mapping.involvesSequence(sequence))
783 if (filterList != null)
785 for (SequenceI otherseq : filterList)
787 SequenceI otherDataset = otherseq.getDatasetSequence();
788 if (otherseq == sequence
789 || otherseq == sequence.getDatasetSequence()
790 || (otherDataset != null && (otherDataset == sequence || otherDataset == sequence
791 .getDatasetSequence())))
793 // skip sequences in subset which directly relate to sequence
796 if (mapping.involvesSequence(otherseq))
798 // selected a mapping contained in subselect alignment
814 * Returns the total length of the supplied ranges, which may be as single
815 * [start, end] or multiple [start, end, start, end ...]
820 public static int getLength(List<int[]> ranges)
827 for (int[] range : ranges)
829 if (range.length % 2 != 0)
831 System.err.println("Error unbalance start/end ranges: "
832 + ranges.toString());
835 for (int i = 0; i < range.length - 1; i += 2)
837 length += Math.abs(range[i + 1] - range[i]) + 1;
844 * Answers true if any range includes the given value
850 public static boolean contains(List<int[]> ranges, int value)
856 for (int[] range : ranges)
858 if (range[1] >= range[0] && value >= range[0] && value <= range[1])
861 * value within ascending range
865 if (range[1] < range[0] && value <= range[0] && value >= range[1])
868 * value within descending range
877 * Removes a specified number of positions from the start of a ranges list.
878 * For example, could be used to adjust cds ranges to allow for an incomplete
879 * start codon. Subranges are removed completely, or their start positions
880 * adjusted, until the required number of positions has been removed from the
881 * range. Reverse strand ranges are supported. The input array is not
886 * an array of [start, end, start, end...] positions
887 * @return a new array with the first removeCount positions removed
889 public static int[] removeStartPositions(int removeCount,
892 if (removeCount <= 0)
897 int[] copy = Arrays.copyOf(ranges, ranges.length);
900 for (int x = 0; x < copy.length && sxpos == -1; x += 2)
902 cdspos += Math.abs(copy[x + 1] - copy[x]) + 1;
903 if (removeCount < cdspos)
906 * we have removed enough, time to finish
911 * increment start of first exon, or decrement if reverse strand
913 if (copy[x] <= copy[x + 1])
915 copy[x] = copy[x + 1] - cdspos + removeCount + 1;
919 copy[x] = copy[x + 1] + cdspos - removeCount - 1;
928 * we dropped at least one entire sub-range - compact the array
930 int[] nxon = new int[copy.length - sxpos];
931 System.arraycopy(copy, sxpos, nxon, 0, copy.length - sxpos);