2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.9)
3 * Copyright (C) 2015 The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.analysis.AlignmentSorter;
24 import jalview.api.AlignViewportI;
25 import jalview.commands.CommandI;
26 import jalview.commands.EditCommand;
27 import jalview.commands.EditCommand.Action;
28 import jalview.commands.EditCommand.Edit;
29 import jalview.commands.OrderCommand;
30 import jalview.datamodel.AlignedCodonFrame;
31 import jalview.datamodel.AlignmentI;
32 import jalview.datamodel.AlignmentOrder;
33 import jalview.datamodel.ColumnSelection;
34 import jalview.datamodel.SearchResults;
35 import jalview.datamodel.SearchResults.Match;
36 import jalview.datamodel.Sequence;
37 import jalview.datamodel.SequenceGroup;
38 import jalview.datamodel.SequenceI;
40 import java.util.ArrayList;
41 import java.util.Collections;
42 import java.util.HashMap;
43 import java.util.Iterator;
44 import java.util.List;
49 * Helper methods for manipulations involving sequence mappings.
54 public final class MappingUtils
58 * Helper method to map a CUT or PASTE command.
61 * the original command
63 * if true, the command is to be undone
65 * the mapped sequences to apply the mapped command to
67 * the mapped EditCommand to add to
70 protected static void mapCutOrPaste(Edit edit, boolean undo,
71 List<SequenceI> targetSeqs, EditCommand result,
72 Set<AlignedCodonFrame> mappings)
74 Action action = edit.getAction();
77 action = action.getUndoAction();
80 System.err.println("MappingUtils.mapCutOrPaste not yet implemented");
84 * Returns a new EditCommand representing the given command as mapped to the
85 * given sequences. If there is no mapping, returns null.
94 public static EditCommand mapEditCommand(EditCommand command,
95 boolean undo, final AlignmentI mapTo, char gapChar,
96 Set<AlignedCodonFrame> mappings)
99 * For now, only support mapping from protein edits to cDna
101 if (!mapTo.isNucleotide())
107 * Cache a copy of the target sequences so we can mimic successive edits on
108 * them. This lets us compute mappings for all edits in the set.
110 Map<SequenceI, SequenceI> targetCopies = new HashMap<SequenceI, SequenceI>();
111 for (SequenceI seq : mapTo.getSequences())
113 SequenceI ds = seq.getDatasetSequence();
116 final SequenceI copy = new Sequence(seq);
117 copy.setDatasetSequence(ds);
118 targetCopies.put(ds, copy);
123 * Compute 'source' sequences as they were before applying edits:
125 Map<SequenceI, SequenceI> originalSequences = command.priorState(undo);
127 EditCommand result = new EditCommand();
128 Iterator<Edit> edits = command.getEditIterator(!undo);
129 while (edits.hasNext())
131 Edit edit = edits.next();
132 if (edit.getAction() == Action.CUT
133 || edit.getAction() == Action.PASTE)
135 mapCutOrPaste(edit, undo, mapTo.getSequences(), result, mappings);
137 else if (edit.getAction() == Action.INSERT_GAP
138 || edit.getAction() == Action.DELETE_GAP)
140 mapInsertOrDelete(edit, undo, originalSequences,
141 mapTo.getSequences(), targetCopies, gapChar, result,
145 return result.getSize() > 0 ? result : null;
149 * Helper method to map an edit command to insert or delete gaps.
152 * the original command
154 * if true, the action is to undo the command
155 * @param originalSequences
156 * the sequences the command acted on
158 * @param targetCopies
161 * the new EditCommand to add mapped commands to
164 protected static void mapInsertOrDelete(Edit edit, boolean undo,
165 Map<SequenceI, SequenceI> originalSequences,
166 final List<SequenceI> targetSeqs,
167 Map<SequenceI, SequenceI> targetCopies, char gapChar,
168 EditCommand result, Set<AlignedCodonFrame> mappings)
170 Action action = edit.getAction();
173 * Invert sense of action if an Undo.
177 action = action.getUndoAction();
179 final int count = edit.getNumber();
180 final int editPos = edit.getPosition();
181 for (SequenceI seq : edit.getSequences())
184 * Get residue position at (or to right of) edit location. Note we use our
185 * 'copy' of the sequence before editing for this.
187 SequenceI ds = seq.getDatasetSequence();
192 final SequenceI actedOn = originalSequences.get(ds);
193 final int seqpos = actedOn.findPosition(editPos);
196 * Determine all mappings from this position to mapped sequences.
198 SearchResults sr = buildSearchResults(seq, seqpos, mappings);
202 for (SequenceI targetSeq : targetSeqs)
204 ds = targetSeq.getDatasetSequence();
209 SequenceI copyTarget = targetCopies.get(ds);
210 final int[] match = sr.getResults(copyTarget, 0,
211 copyTarget.getLength());
214 final int ratio = 3; // TODO: compute this - how?
215 final int mappedCount = count * ratio;
218 * Shift Delete start position left, as it acts on positions to its
221 int mappedEditPos = action == Action.DELETE_GAP ? match[0]
222 - mappedCount : match[0];
223 Edit e = result.new Edit(action, new SequenceI[] { targetSeq },
224 mappedEditPos, mappedCount, gapChar);
228 * and 'apply' the edit to our copy of its target sequence
230 if (action == Action.INSERT_GAP)
232 copyTarget.setSequence(new String(StringUtils.insertCharAt(
233 copyTarget.getSequence(), mappedEditPos, mappedCount,
236 else if (action == Action.DELETE_GAP)
238 copyTarget.setSequence(new String(StringUtils.deleteChars(
239 copyTarget.getSequence(), mappedEditPos,
240 mappedEditPos + mappedCount)));
246 * and 'apply' the edit to our copy of its source sequence
248 if (action == Action.INSERT_GAP)
250 actedOn.setSequence(new String(StringUtils.insertCharAt(
251 actedOn.getSequence(), editPos, count, gapChar)));
253 else if (action == Action.DELETE_GAP)
255 actedOn.setSequence(new String(StringUtils.deleteChars(
256 actedOn.getSequence(), editPos, editPos + count)));
262 * Returns a SearchResults object describing the mapped region corresponding
263 * to the specified sequence position.
270 public static SearchResults buildSearchResults(SequenceI seq, int index,
271 Set<AlignedCodonFrame> seqmappings)
273 SearchResults results = new SearchResults();
274 addSearchResults(results, seq, index, seqmappings);
279 * Adds entries to a SearchResults object describing the mapped region
280 * corresponding to the specified sequence position.
287 public static void addSearchResults(SearchResults results, SequenceI seq,
288 int index, Set<AlignedCodonFrame> seqmappings)
290 if (index >= seq.getStart() && index <= seq.getEnd())
292 for (AlignedCodonFrame acf : seqmappings)
294 acf.markMappedRegion(seq, index, results);
300 * Returns a (possibly empty) SequenceGroup containing any sequences in the
301 * mapped viewport corresponding to the given group in the source viewport.
308 public static SequenceGroup mapSequenceGroup(final SequenceGroup sg,
309 final AlignViewportI mapFrom, final AlignViewportI mapTo)
312 * Note the SequenceGroup holds aligned sequences, the mappings hold dataset
315 boolean targetIsNucleotide = mapTo.isNucleotide();
316 AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
317 Set<AlignedCodonFrame> codonFrames = protein.getAlignment()
320 * Copy group name, colours etc, but not sequences or sequence colour scheme
322 SequenceGroup mappedGroup = new SequenceGroup(sg);
323 mappedGroup.cs = mapTo.getGlobalColourScheme();
326 int minStartCol = -1;
328 final int selectionStartRes = sg.getStartRes();
329 final int selectionEndRes = sg.getEndRes();
330 for (SequenceI selected : sg.getSequences())
333 * Find the widest range of non-gapped positions in the selection range
335 int firstUngappedPos = selectionStartRes;
336 while (firstUngappedPos <= selectionEndRes
337 && Comparison.isGap(selected.getCharAt(firstUngappedPos)))
343 * If this sequence is only gaps in the selected range, skip it
345 if (firstUngappedPos > selectionEndRes)
350 int lastUngappedPos = selectionEndRes;
351 while (lastUngappedPos >= selectionStartRes
352 && Comparison.isGap(selected.getCharAt(lastUngappedPos)))
358 * Find the selected start/end residue positions in sequence
360 int startResiduePos = selected.findPosition(firstUngappedPos);
361 int endResiduePos = selected.findPosition(lastUngappedPos);
363 for (AlignedCodonFrame acf : codonFrames)
365 SequenceI mappedSequence = targetIsNucleotide ? acf
366 .getDnaForAaSeq(selected) : acf.getAaForDnaSeq(selected);
367 if (mappedSequence != null)
369 for (SequenceI seq : mapTo.getAlignment().getSequences())
371 int mappedStartResidue = 0;
372 int mappedEndResidue = 0;
373 if (seq.getDatasetSequence() == mappedSequence)
376 * Found a sequence mapping. Locate the start/end mapped residues.
378 SearchResults sr = buildSearchResults(selected,
379 startResiduePos, Collections.singleton(acf));
380 for (Match m : sr.getResults())
382 mappedStartResidue = m.getStart();
383 mappedEndResidue = m.getEnd();
385 sr = buildSearchResults(selected, endResiduePos,
386 Collections.singleton(acf));
387 for (Match m : sr.getResults())
389 mappedStartResidue = Math.min(mappedStartResidue,
391 mappedEndResidue = Math.max(mappedEndResidue, m.getEnd());
395 * Find the mapped aligned columns, save the range. Note findIndex
396 * returns a base 1 position, SequenceGroup uses base 0
398 int mappedStartCol = seq.findIndex(mappedStartResidue) - 1;
399 minStartCol = minStartCol == -1 ? mappedStartCol : Math.min(
400 minStartCol, mappedStartCol);
401 int mappedEndCol = seq.findIndex(mappedEndResidue) - 1;
402 maxEndCol = maxEndCol == -1 ? mappedEndCol : Math.max(
403 maxEndCol, mappedEndCol);
404 mappedGroup.addSequence(seq, false);
411 mappedGroup.setStartRes(minStartCol < 0 ? 0 : minStartCol);
412 mappedGroup.setEndRes(maxEndCol < 0 ? 0 : maxEndCol);
417 * Returns an OrderCommand equivalent to the given one, but acting on mapped
418 * sequences as described by the mappings, or null if no mapping can be made.
421 * the original order command
423 * if true, the action is to undo the sort
425 * the alignment we are mapping to
427 * the mappings available
430 public static CommandI mapOrderCommand(OrderCommand command,
431 boolean undo, AlignmentI mapTo, Set<AlignedCodonFrame> mappings)
433 SequenceI[] sortOrder = command.getSequenceOrder(undo);
434 List<SequenceI> mappedOrder = new ArrayList<SequenceI>();
438 * Assumption: we are only interested in a cDNA/protein mapping; refactor in
439 * future if we want to support sorting (c)dna as (c)dna or protein as
442 boolean mappingToNucleotide = mapTo.isNucleotide();
443 for (SequenceI seq : sortOrder)
445 for (AlignedCodonFrame acf : mappings)
447 SequenceI mappedSeq = mappingToNucleotide ? acf.getDnaForAaSeq(seq)
448 : acf.getAaForDnaSeq(seq);
449 if (mappedSeq != null)
451 for (SequenceI seq2 : mapTo.getSequences())
453 if (seq2.getDatasetSequence() == mappedSeq)
455 mappedOrder.add(seq2);
465 * Return null if no mappings made.
473 * Add any unmapped sequences on the end of the sort in their original
476 if (j < mapTo.getHeight())
478 for (SequenceI seq : mapTo.getSequences())
480 if (!mappedOrder.contains(seq))
482 mappedOrder.add(seq);
488 * Have to sort the sequences before constructing the OrderCommand - which
489 * then resorts them?!?
491 final SequenceI[] mappedOrderArray = mappedOrder
492 .toArray(new SequenceI[mappedOrder.size()]);
493 SequenceI[] oldOrder = mapTo.getSequencesArray();
494 AlignmentSorter.sortBy(mapTo, new AlignmentOrder(mappedOrderArray));
495 final OrderCommand result = new OrderCommand(command.getDescription(),
501 * Returns a ColumnSelection in the 'mapTo' view which corresponds to the
502 * given selection in the 'mapFrom' view. We assume one is nucleotide, the
503 * other is protein (and holds the mappings from codons to protein residues).
510 public static ColumnSelection mapColumnSelection(ColumnSelection colsel,
511 AlignViewportI mapFrom, AlignViewportI mapTo)
513 boolean targetIsNucleotide = mapTo.isNucleotide();
514 AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
515 Set<AlignedCodonFrame> codonFrames = protein.getAlignment()
517 ColumnSelection mappedColumns = new ColumnSelection();
521 return mappedColumns;
524 char fromGapChar = mapFrom.getAlignment().getGapCharacter();
526 // FIXME allow for hidden columns
529 * For each mapped column, find the range of columns that residues in that
532 for (Object obj : colsel.getSelected())
534 int col = ((Integer) obj).intValue();
535 int mappedToMin = Integer.MAX_VALUE;
536 int mappedToMax = Integer.MIN_VALUE;
539 * For each sequence in the 'from' alignment
541 for (SequenceI fromSeq : mapFrom.getAlignment().getSequences())
544 * Ignore gaps (unmapped anyway)
546 if (fromSeq.getCharAt(col) == fromGapChar)
552 * Get the residue position and find the mapped position.
554 int residuePos = fromSeq.findPosition(col);
555 SearchResults sr = buildSearchResults(fromSeq, residuePos,
557 for (Match m : sr.getResults())
559 int mappedStartResidue = m.getStart();
560 int mappedEndResidue = m.getEnd();
561 SequenceI mappedSeq = m.getSequence();
564 * Locate the aligned sequence whose dataset is mappedSeq. TODO a
565 * datamodel that can do this efficiently.
567 for (SequenceI toSeq : mapTo.getAlignment().getSequences())
569 if (toSeq.getDatasetSequence() == mappedSeq)
571 int mappedStartCol = toSeq.findIndex(mappedStartResidue);
572 int mappedEndCol = toSeq.findIndex(mappedEndResidue);
573 mappedToMin = Math.min(mappedToMin, mappedStartCol);
574 mappedToMax = Math.max(mappedToMax, mappedEndCol);
575 // System.out.println(fromSeq.getName() + " mapped to cols "
576 // + mappedStartCol + ":" + mappedEndCol);
578 // note: remove break if we ever want to map one to many sequences
584 * Add the range of mapped columns to the mapped selection (converting
585 * base 1 to base 0). Note that this may include intron-only regions which
586 * lie between the start and end ranges of the selection.
588 for (int i = mappedToMin; i <= mappedToMax; i++)
590 mappedColumns.addElement(i - 1);
593 return mappedColumns;
597 * Returns the mapped codon for a given aligned sequence column position (base
601 * an aligned peptide sequence
603 * an aligned column position (base 0)
605 * a set of codon mappings
606 * @return the bases of the mapped codon in the cDNA dataset sequence, or null
609 public static char[] findCodonFor(SequenceI seq, int col,
610 Set<AlignedCodonFrame> mappings)
612 int dsPos = seq.findPosition(col);
613 for (AlignedCodonFrame mapping : mappings)
615 if (mapping.involvesSequence(seq))
617 return mapping.getMappedCodon(seq.getDatasetSequence(), dsPos);
624 * Converts a series of [start, end] ranges into an array of individual
630 public static int[] flattenRanges(int[] ranges)
633 * Count how many positions altogether
636 for (int i = 0; i < ranges.length - 1; i += 2)
638 count += ranges[i + 1] - ranges[i] + 1;
641 int[] result = new int[count];
643 for (int i = 0; i < ranges.length - 1; i += 2)
645 for (int j = ranges[i]; j <= ranges[i + 1]; j++)
654 * Returns a list of any mappings that are from or to the given (aligned or
661 public static List<AlignedCodonFrame> findMappingsForSequence(
662 SequenceI sequence, Set<AlignedCodonFrame> mappings)
664 List<AlignedCodonFrame> result = new ArrayList<AlignedCodonFrame>();
665 if (sequence == null || mappings == null)
669 for (AlignedCodonFrame mapping : mappings)
671 if (mapping.involvesSequence(sequence))