2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.analysis.AlignmentSorter;
24 import jalview.api.AlignViewportI;
25 import jalview.commands.CommandI;
26 import jalview.commands.EditCommand;
27 import jalview.commands.EditCommand.Action;
28 import jalview.commands.EditCommand.Edit;
29 import jalview.commands.OrderCommand;
30 import jalview.datamodel.AlignedCodonFrame;
31 import jalview.datamodel.AlignmentI;
32 import jalview.datamodel.AlignmentOrder;
33 import jalview.datamodel.ColumnSelection;
34 import jalview.datamodel.HiddenColumns;
35 import jalview.datamodel.SearchResultMatchI;
36 import jalview.datamodel.SearchResults;
37 import jalview.datamodel.SearchResultsI;
38 import jalview.datamodel.Sequence;
39 import jalview.datamodel.SequenceGroup;
40 import jalview.datamodel.SequenceI;
42 import java.util.ArrayList;
43 import java.util.Arrays;
44 import java.util.HashMap;
45 import java.util.Iterator;
46 import java.util.List;
50 * Helper methods for manipulations involving sequence mappings.
55 public final class MappingUtils
59 * Helper method to map a CUT or PASTE command.
62 * the original command
64 * if true, the command is to be undone
66 * the mapped sequences to apply the mapped command to
68 * the mapped EditCommand to add to
71 protected static void mapCutOrPaste(Edit edit, boolean undo,
72 List<SequenceI> targetSeqs, EditCommand result,
73 List<AlignedCodonFrame> mappings)
75 Action action = edit.getAction();
78 action = action.getUndoAction();
81 System.err.println("MappingUtils.mapCutOrPaste not yet implemented");
85 * Returns a new EditCommand representing the given command as mapped to the
86 * given sequences. If there is no mapping, returns null.
95 public static EditCommand mapEditCommand(EditCommand command,
96 boolean undo, final AlignmentI mapTo, char gapChar,
97 List<AlignedCodonFrame> mappings)
100 * For now, only support mapping from protein edits to cDna
102 if (!mapTo.isNucleotide())
108 * Cache a copy of the target sequences so we can mimic successive edits on
109 * them. This lets us compute mappings for all edits in the set.
111 Map<SequenceI, SequenceI> targetCopies = new HashMap<SequenceI, SequenceI>();
112 for (SequenceI seq : mapTo.getSequences())
114 SequenceI ds = seq.getDatasetSequence();
117 final SequenceI copy = new Sequence(seq);
118 copy.setDatasetSequence(ds);
119 targetCopies.put(ds, copy);
124 * Compute 'source' sequences as they were before applying edits:
126 Map<SequenceI, SequenceI> originalSequences = command.priorState(undo);
128 EditCommand result = new EditCommand();
129 Iterator<Edit> edits = command.getEditIterator(!undo);
130 while (edits.hasNext())
132 Edit edit = edits.next();
133 if (edit.getAction() == Action.CUT
134 || edit.getAction() == Action.PASTE)
136 mapCutOrPaste(edit, undo, mapTo.getSequences(), result, mappings);
138 else if (edit.getAction() == Action.INSERT_GAP
139 || edit.getAction() == Action.DELETE_GAP)
141 mapInsertOrDelete(edit, undo, originalSequences,
142 mapTo.getSequences(), targetCopies, gapChar, result,
146 return result.getSize() > 0 ? result : null;
150 * Helper method to map an edit command to insert or delete gaps.
153 * the original command
155 * if true, the action is to undo the command
156 * @param originalSequences
157 * the sequences the command acted on
159 * @param targetCopies
162 * the new EditCommand to add mapped commands to
165 protected static void mapInsertOrDelete(Edit edit, boolean undo,
166 Map<SequenceI, SequenceI> originalSequences,
167 final List<SequenceI> targetSeqs,
168 Map<SequenceI, SequenceI> targetCopies, char gapChar,
169 EditCommand result, List<AlignedCodonFrame> mappings)
171 Action action = edit.getAction();
174 * Invert sense of action if an Undo.
178 action = action.getUndoAction();
180 final int count = edit.getNumber();
181 final int editPos = edit.getPosition();
182 for (SequenceI seq : edit.getSequences())
185 * Get residue position at (or to right of) edit location. Note we use our
186 * 'copy' of the sequence before editing for this.
188 SequenceI ds = seq.getDatasetSequence();
193 final SequenceI actedOn = originalSequences.get(ds);
194 final int seqpos = actedOn.findPosition(editPos);
197 * Determine all mappings from this position to mapped sequences.
199 SearchResultsI sr = buildSearchResults(seq, seqpos, mappings);
203 for (SequenceI targetSeq : targetSeqs)
205 ds = targetSeq.getDatasetSequence();
210 SequenceI copyTarget = targetCopies.get(ds);
211 final int[] match = sr.getResults(copyTarget, 0,
212 copyTarget.getLength());
215 final int ratio = 3; // TODO: compute this - how?
216 final int mappedCount = count * ratio;
219 * Shift Delete start position left, as it acts on positions to its
222 int mappedEditPos = action == Action.DELETE_GAP ? match[0]
223 - mappedCount : match[0];
224 Edit e = result.new Edit(action, new SequenceI[] { targetSeq },
225 mappedEditPos, mappedCount, gapChar);
229 * and 'apply' the edit to our copy of its target sequence
231 if (action == Action.INSERT_GAP)
233 copyTarget.setSequence(new String(StringUtils.insertCharAt(
234 copyTarget.getSequence(), mappedEditPos, mappedCount,
237 else if (action == Action.DELETE_GAP)
239 copyTarget.setSequence(new String(StringUtils.deleteChars(
240 copyTarget.getSequence(), mappedEditPos,
241 mappedEditPos + mappedCount)));
247 * and 'apply' the edit to our copy of its source sequence
249 if (action == Action.INSERT_GAP)
251 actedOn.setSequence(new String(StringUtils.insertCharAt(
252 actedOn.getSequence(), editPos, count, gapChar)));
254 else if (action == Action.DELETE_GAP)
256 actedOn.setSequence(new String(StringUtils.deleteChars(
257 actedOn.getSequence(), editPos, editPos + count)));
263 * Returns a SearchResults object describing the mapped region corresponding
264 * to the specified sequence position.
271 public static SearchResultsI buildSearchResults(SequenceI seq, int index,
272 List<AlignedCodonFrame> seqmappings)
274 SearchResultsI results = new SearchResults();
275 addSearchResults(results, seq, index, seqmappings);
280 * Adds entries to a SearchResults object describing the mapped region
281 * corresponding to the specified sequence position.
288 public static void addSearchResults(SearchResultsI results, SequenceI seq,
289 int index, List<AlignedCodonFrame> seqmappings)
291 if (index >= seq.getStart() && index <= seq.getEnd())
293 for (AlignedCodonFrame acf : seqmappings)
295 acf.markMappedRegion(seq, index, results);
301 * Returns a (possibly empty) SequenceGroup containing any sequences in the
302 * mapped viewport corresponding to the given group in the source viewport.
309 public static SequenceGroup mapSequenceGroup(final SequenceGroup sg,
310 final AlignViewportI mapFrom, final AlignViewportI mapTo)
313 * Note the SequenceGroup holds aligned sequences, the mappings hold dataset
316 boolean targetIsNucleotide = mapTo.isNucleotide();
317 AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
318 List<AlignedCodonFrame> codonFrames = protein.getAlignment()
321 * Copy group name, colours etc, but not sequences or sequence colour scheme
323 SequenceGroup mappedGroup = new SequenceGroup(sg);
324 mappedGroup.setColourScheme(mapTo.getGlobalColourScheme());
327 int minStartCol = -1;
329 final int selectionStartRes = sg.getStartRes();
330 final int selectionEndRes = sg.getEndRes();
331 for (SequenceI selected : sg.getSequences())
334 * Find the widest range of non-gapped positions in the selection range
336 int firstUngappedPos = selectionStartRes;
337 while (firstUngappedPos <= selectionEndRes
338 && Comparison.isGap(selected.getCharAt(firstUngappedPos)))
344 * If this sequence is only gaps in the selected range, skip it
346 if (firstUngappedPos > selectionEndRes)
351 int lastUngappedPos = selectionEndRes;
352 while (lastUngappedPos >= selectionStartRes
353 && Comparison.isGap(selected.getCharAt(lastUngappedPos)))
359 * Find the selected start/end residue positions in sequence
361 int startResiduePos = selected.findPosition(firstUngappedPos);
362 int endResiduePos = selected.findPosition(lastUngappedPos);
364 for (AlignedCodonFrame acf : codonFrames)
366 SequenceI mappedSequence = targetIsNucleotide ? acf
367 .getDnaForAaSeq(selected) : acf.getAaForDnaSeq(selected);
368 if (mappedSequence != null)
370 for (SequenceI seq : mapTo.getAlignment().getSequences())
372 int mappedStartResidue = 0;
373 int mappedEndResidue = 0;
374 if (seq.getDatasetSequence() == mappedSequence)
377 * Found a sequence mapping. Locate the start/end mapped residues.
379 List<AlignedCodonFrame> mapping = Arrays
380 .asList(new AlignedCodonFrame[] { acf });
381 SearchResultsI sr = buildSearchResults(selected,
382 startResiduePos, mapping);
383 for (SearchResultMatchI m : sr.getResults())
385 mappedStartResidue = m.getStart();
386 mappedEndResidue = m.getEnd();
388 sr = buildSearchResults(selected, endResiduePos, mapping);
389 for (SearchResultMatchI m : sr.getResults())
391 mappedStartResidue = Math.min(mappedStartResidue,
393 mappedEndResidue = Math.max(mappedEndResidue, m.getEnd());
397 * Find the mapped aligned columns, save the range. Note findIndex
398 * returns a base 1 position, SequenceGroup uses base 0
400 int mappedStartCol = seq.findIndex(mappedStartResidue) - 1;
401 minStartCol = minStartCol == -1 ? mappedStartCol : Math.min(
402 minStartCol, mappedStartCol);
403 int mappedEndCol = seq.findIndex(mappedEndResidue) - 1;
404 maxEndCol = maxEndCol == -1 ? mappedEndCol : Math.max(
405 maxEndCol, mappedEndCol);
406 mappedGroup.addSequence(seq, false);
413 mappedGroup.setStartRes(minStartCol < 0 ? 0 : minStartCol);
414 mappedGroup.setEndRes(maxEndCol < 0 ? 0 : maxEndCol);
419 * Returns an OrderCommand equivalent to the given one, but acting on mapped
420 * sequences as described by the mappings, or null if no mapping can be made.
423 * the original order command
425 * if true, the action is to undo the sort
427 * the alignment we are mapping to
429 * the mappings available
432 public static CommandI mapOrderCommand(OrderCommand command,
433 boolean undo, AlignmentI mapTo, List<AlignedCodonFrame> mappings)
435 SequenceI[] sortOrder = command.getSequenceOrder(undo);
436 List<SequenceI> mappedOrder = new ArrayList<SequenceI>();
440 * Assumption: we are only interested in a cDNA/protein mapping; refactor in
441 * future if we want to support sorting (c)dna as (c)dna or protein as
444 boolean mappingToNucleotide = mapTo.isNucleotide();
445 for (SequenceI seq : sortOrder)
447 for (AlignedCodonFrame acf : mappings)
449 SequenceI mappedSeq = mappingToNucleotide ? acf.getDnaForAaSeq(seq)
450 : acf.getAaForDnaSeq(seq);
451 if (mappedSeq != null)
453 for (SequenceI seq2 : mapTo.getSequences())
455 if (seq2.getDatasetSequence() == mappedSeq)
457 mappedOrder.add(seq2);
467 * Return null if no mappings made.
475 * Add any unmapped sequences on the end of the sort in their original
478 if (j < mapTo.getHeight())
480 for (SequenceI seq : mapTo.getSequences())
482 if (!mappedOrder.contains(seq))
484 mappedOrder.add(seq);
490 * Have to sort the sequences before constructing the OrderCommand - which
491 * then resorts them?!?
493 final SequenceI[] mappedOrderArray = mappedOrder
494 .toArray(new SequenceI[mappedOrder.size()]);
495 SequenceI[] oldOrder = mapTo.getSequencesArray();
496 AlignmentSorter.sortBy(mapTo, new AlignmentOrder(mappedOrderArray));
497 final OrderCommand result = new OrderCommand(command.getDescription(),
503 * Returns a ColumnSelection in the 'mapTo' view which corresponds to the
504 * given selection in the 'mapFrom' view. We assume one is nucleotide, the
505 * other is protein (and holds the mappings from codons to protein residues).
512 public static ColumnSelection mapColumnSelection(ColumnSelection colsel,
513 AlignViewportI mapFrom, AlignViewportI mapTo)
515 boolean targetIsNucleotide = mapTo.isNucleotide();
516 AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
517 List<AlignedCodonFrame> codonFrames = protein.getAlignment()
519 ColumnSelection mappedColumns = new ColumnSelection();
523 return mappedColumns;
526 char fromGapChar = mapFrom.getAlignment().getGapCharacter();
529 * For each mapped column, find the range of columns that residues in that
532 List<SequenceI> fromSequences = mapFrom.getAlignment().getSequences();
533 List<SequenceI> toSequences = mapTo.getAlignment().getSequences();
535 for (Integer sel : colsel.getSelected())
537 mapColumn(sel.intValue(), codonFrames, mappedColumns, fromSequences,
538 toSequences, fromGapChar);
541 for (int[] hidden : colsel.getListOfCols())
543 mapHiddenColumns(hidden, codonFrames, mappedColumns, fromSequences,
544 toSequences, fromGapChar);
546 return mappedColumns;
550 * Helper method that maps a [start, end] hidden column range to its mapped
555 * @param mappedColumns
556 * @param fromSequences
560 protected static void mapHiddenColumns(int[] hidden,
561 List<AlignedCodonFrame> mappings, HiddenColumns mappedColumns,
562 List<SequenceI> fromSequences, List<SequenceI> toSequences,
565 for (int col = hidden[0]; col <= hidden[1]; col++)
567 int[] mappedTo = findMappedColumns(col, mappings, fromSequences,
568 toSequences, fromGapChar);
571 * Add the range of hidden columns to the mapped selection (converting
574 if (mappedTo != null)
576 mappedColumns.hideColumns(mappedTo[0] - 1, mappedTo[1] - 1);
582 * Helper method to map one column selection
585 * the column number (base 0)
587 * the sequence mappings
588 * @param mappedColumns
589 * the mapped column selections to add to
590 * @param fromSequences
594 protected static void mapColumn(int col,
595 List<AlignedCodonFrame> mappings, ColumnSelection mappedColumns,
596 List<SequenceI> fromSequences, List<SequenceI> toSequences,
599 int[] mappedTo = findMappedColumns(col, mappings, fromSequences,
600 toSequences, fromGapChar);
603 * Add the range of mapped columns to the mapped selection (converting
604 * base 1 to base 0). Note that this may include intron-only regions which
605 * lie between the start and end ranges of the selection.
607 if (mappedTo != null)
609 for (int i = mappedTo[0]; i <= mappedTo[1]; i++)
611 mappedColumns.addElement(i - 1);
617 * Helper method to find the range of columns mapped to from one column.
618 * Returns the maximal range of columns mapped to from all sequences in the
619 * source column, or null if no mappings were found.
623 * @param fromSequences
628 protected static int[] findMappedColumns(int col,
629 List<AlignedCodonFrame> mappings, List<SequenceI> fromSequences,
630 List<SequenceI> toSequences, char fromGapChar)
632 int[] mappedTo = new int[] { Integer.MAX_VALUE, Integer.MIN_VALUE };
633 boolean found = false;
636 * For each sequence in the 'from' alignment
638 for (SequenceI fromSeq : fromSequences)
641 * Ignore gaps (unmapped anyway)
643 if (fromSeq.getCharAt(col) == fromGapChar)
649 * Get the residue position and find the mapped position.
651 int residuePos = fromSeq.findPosition(col);
652 SearchResultsI sr = buildSearchResults(fromSeq, residuePos, mappings);
653 for (SearchResultMatchI m : sr.getResults())
655 int mappedStartResidue = m.getStart();
656 int mappedEndResidue = m.getEnd();
657 SequenceI mappedSeq = m.getSequence();
660 * Locate the aligned sequence whose dataset is mappedSeq. TODO a
661 * datamodel that can do this efficiently.
663 for (SequenceI toSeq : toSequences)
665 if (toSeq.getDatasetSequence() == mappedSeq)
667 int mappedStartCol = toSeq.findIndex(mappedStartResidue);
668 int mappedEndCol = toSeq.findIndex(mappedEndResidue);
669 mappedTo[0] = Math.min(mappedTo[0], mappedStartCol);
670 mappedTo[1] = Math.max(mappedTo[1], mappedEndCol);
673 // note: remove break if we ever want to map one to many sequences
678 return found ? mappedTo : null;
682 * Returns the mapped codon or codons for a given aligned sequence column
686 * an aligned peptide sequence
688 * an aligned column position (base 0)
690 * a set of codon mappings
691 * @return the bases of the mapped codon(s) in the cDNA dataset sequence(s),
692 * or an empty list if none found
694 public static List<char[]> findCodonsFor(SequenceI seq, int col,
695 List<AlignedCodonFrame> mappings)
697 List<char[]> result = new ArrayList<char[]>();
698 int dsPos = seq.findPosition(col);
699 for (AlignedCodonFrame mapping : mappings)
701 if (mapping.involvesSequence(seq))
703 List<char[]> codons = mapping.getMappedCodons(
704 seq.getDatasetSequence(), dsPos);
707 result.addAll(codons);
715 * Converts a series of [start, end] range pairs into an array of individual
716 * positions. This also caters for 'reverse strand' (start > end) cases.
721 public static int[] flattenRanges(int[] ranges)
724 * Count how many positions altogether
727 for (int i = 0; i < ranges.length - 1; i += 2)
729 count += Math.abs(ranges[i + 1] - ranges[i]) + 1;
732 int[] result = new int[count];
734 for (int i = 0; i < ranges.length - 1; i += 2)
736 int from = ranges[i];
737 final int to = ranges[i + 1];
738 int step = from <= to ? 1 : -1;
743 } while (from != to + step);
749 * Returns a list of any mappings that are from or to the given (aligned or
756 public static List<AlignedCodonFrame> findMappingsForSequence(
757 SequenceI sequence, List<AlignedCodonFrame> mappings)
759 return findMappingsForSequenceAndOthers(sequence, mappings, null);
763 * Returns a list of any mappings that are from or to the given (aligned or
764 * dataset) sequence, optionally limited to mappings involving one of a given
772 public static List<AlignedCodonFrame> findMappingsForSequenceAndOthers(
773 SequenceI sequence, List<AlignedCodonFrame> mappings,
774 List<SequenceI> filterList)
776 List<AlignedCodonFrame> result = new ArrayList<AlignedCodonFrame>();
777 if (sequence == null || mappings == null)
781 for (AlignedCodonFrame mapping : mappings)
783 if (mapping.involvesSequence(sequence))
785 if (filterList != null)
787 for (SequenceI otherseq : filterList)
789 SequenceI otherDataset = otherseq.getDatasetSequence();
790 if (otherseq == sequence
791 || otherseq == sequence.getDatasetSequence()
792 || (otherDataset != null && (otherDataset == sequence || otherDataset == sequence
793 .getDatasetSequence())))
795 // skip sequences in subset which directly relate to sequence
798 if (mapping.involvesSequence(otherseq))
800 // selected a mapping contained in subselect alignment
816 * Returns the total length of the supplied ranges, which may be as single
817 * [start, end] or multiple [start, end, start, end ...]
822 public static int getLength(List<int[]> ranges)
829 for (int[] range : ranges)
831 if (range.length % 2 != 0)
833 System.err.println("Error unbalance start/end ranges: "
834 + ranges.toString());
837 for (int i = 0; i < range.length - 1; i += 2)
839 length += Math.abs(range[i + 1] - range[i]) + 1;
846 * Answers true if any range includes the given value
852 public static boolean contains(List<int[]> ranges, int value)
858 for (int[] range : ranges)
860 if (range[1] >= range[0] && value >= range[0] && value <= range[1])
863 * value within ascending range
867 if (range[1] < range[0] && value <= range[0] && value >= range[1])
870 * value within descending range
879 * Removes a specified number of positions from the start of a ranges list.
880 * For example, could be used to adjust cds ranges to allow for an incomplete
881 * start codon. Subranges are removed completely, or their start positions
882 * adjusted, until the required number of positions has been removed from the
883 * range. Reverse strand ranges are supported. The input array is not
888 * an array of [start, end, start, end...] positions
889 * @return a new array with the first removeCount positions removed
891 public static int[] removeStartPositions(int removeCount,
894 if (removeCount <= 0)
899 int[] copy = Arrays.copyOf(ranges, ranges.length);
902 for (int x = 0; x < copy.length && sxpos == -1; x += 2)
904 cdspos += Math.abs(copy[x + 1] - copy[x]) + 1;
905 if (removeCount < cdspos)
908 * we have removed enough, time to finish
913 * increment start of first exon, or decrement if reverse strand
915 if (copy[x] <= copy[x + 1])
917 copy[x] = copy[x + 1] - cdspos + removeCount + 1;
921 copy[x] = copy[x + 1] + cdspos - removeCount - 1;
930 * we dropped at least one entire sub-range - compact the array
932 int[] nxon = new int[copy.length - sxpos];
933 System.arraycopy(copy, sxpos, nxon, 0, copy.length - sxpos);