3 import java.util.ArrayList;
4 import java.util.Collections;
5 import java.util.HashMap;
6 import java.util.Iterator;
11 import jalview.analysis.AlignmentSorter;
12 import jalview.api.AlignViewportI;
13 import jalview.commands.CommandI;
14 import jalview.commands.EditCommand;
15 import jalview.commands.EditCommand.Action;
16 import jalview.commands.EditCommand.Edit;
17 import jalview.commands.OrderCommand;
18 import jalview.datamodel.AlignedCodonFrame;
19 import jalview.datamodel.AlignmentI;
20 import jalview.datamodel.AlignmentOrder;
21 import jalview.datamodel.ColumnSelection;
22 import jalview.datamodel.SearchResults;
23 import jalview.datamodel.SearchResults.Match;
24 import jalview.datamodel.Sequence;
25 import jalview.datamodel.SequenceGroup;
26 import jalview.datamodel.SequenceI;
29 * Helper methods for manipulations involving sequence mappings.
34 public final class MappingUtils
38 * Helper method to map a CUT or PASTE command.
41 * the original command
43 * if true, the command is to be undone
45 * the mapped sequences to apply the mapped command to
47 * the mapped EditCommand to add to
50 protected static void mapCutOrPaste(Edit edit, boolean undo,
51 List<SequenceI> targetSeqs, EditCommand result,
52 Set<AlignedCodonFrame> mappings)
54 Action action = edit.getAction();
57 action = action.getUndoAction();
60 System.err.println("MappingUtils.mapCutOrPaste not yet implemented");
64 * Returns a new EditCommand representing the given command as mapped to the
65 * given sequences. If there is no mapping, returns null.
74 public static EditCommand mapEditCommand(EditCommand command,
75 boolean undo, final AlignmentI mapTo, char gapChar,
76 Set<AlignedCodonFrame> mappings)
79 * For now, only support mapping from protein edits to cDna
81 if (!mapTo.isNucleotide())
87 * Cache a copy of the target sequences so we can mimic successive edits on
88 * them. This lets us compute mappings for all edits in the set.
90 Map<SequenceI, SequenceI> targetCopies = new HashMap<SequenceI, SequenceI>();
91 for (SequenceI seq : mapTo.getSequences())
93 SequenceI ds = seq.getDatasetSequence();
96 final SequenceI copy = new Sequence(seq);
97 copy.setDatasetSequence(ds);
98 targetCopies.put(ds, copy);
103 * Compute 'source' sequences as they were before applying edits:
105 Map<SequenceI, SequenceI> originalSequences = command.priorState(undo);
107 EditCommand result = new EditCommand();
108 Iterator<Edit> edits = command.getEditIterator(!undo);
109 while (edits.hasNext())
111 Edit edit = edits.next();
112 if (edit.getAction() == Action.CUT
113 || edit.getAction() == Action.PASTE)
115 mapCutOrPaste(edit, undo, mapTo.getSequences(), result, mappings);
117 else if (edit.getAction() == Action.INSERT_GAP
118 || edit.getAction() == Action.DELETE_GAP)
120 mapInsertOrDelete(edit, undo, originalSequences,
121 mapTo.getSequences(), targetCopies, gapChar, result,
125 return result.getSize() > 0 ? result : null;
129 * Helper method to map an edit command to insert or delete gaps.
132 * the original command
134 * if true, the action is to undo the command
135 * @param originalSequences
136 * the sequences the command acted on
138 * @param targetCopies
141 * the new EditCommand to add mapped commands to
144 protected static void mapInsertOrDelete(Edit edit, boolean undo,
145 Map<SequenceI, SequenceI> originalSequences,
146 final List<SequenceI> targetSeqs,
147 Map<SequenceI, SequenceI> targetCopies, char gapChar,
148 EditCommand result, Set<AlignedCodonFrame> mappings)
150 Action action = edit.getAction();
153 * Invert sense of action if an Undo.
157 action = action.getUndoAction();
159 final int count = edit.getNumber();
160 final int editPos = edit.getPosition();
161 for (SequenceI seq : edit.getSequences())
164 * Get residue position at (or to right of) edit location. Note we use our
165 * 'copy' of the sequence before editing for this.
167 SequenceI ds = seq.getDatasetSequence();
172 final SequenceI actedOn = originalSequences.get(ds);
173 final int seqpos = actedOn.findPosition(editPos);
176 * Determine all mappings from this position to mapped sequences.
178 SearchResults sr = buildSearchResults(seq, seqpos, mappings);
182 for (SequenceI targetSeq : targetSeqs)
184 ds = targetSeq.getDatasetSequence();
189 SequenceI copyTarget = targetCopies.get(ds);
190 final int[] match = sr.getResults(copyTarget, 0,
191 copyTarget.getLength());
194 final int ratio = 3; // TODO: compute this - how?
195 final int mappedCount = count * ratio;
198 * Shift Delete start position left, as it acts on positions to its
201 int mappedEditPos = action == Action.DELETE_GAP ? match[0]
202 - mappedCount : match[0];
203 Edit e = result.new Edit(action, new SequenceI[]
204 { targetSeq }, mappedEditPos, mappedCount, gapChar);
208 * and 'apply' the edit to our copy of its target sequence
210 if (action == Action.INSERT_GAP)
212 copyTarget.setSequence(new String(StringUtils.insertCharAt(
213 copyTarget.getSequence(), mappedEditPos, mappedCount,
216 else if (action == Action.DELETE_GAP)
218 copyTarget.setSequence(new String(StringUtils.deleteChars(
219 copyTarget.getSequence(), mappedEditPos,
220 mappedEditPos + mappedCount)));
226 * and 'apply' the edit to our copy of its source sequence
228 if (action == Action.INSERT_GAP)
230 actedOn.setSequence(new String(StringUtils.insertCharAt(
231 actedOn.getSequence(), editPos, count, gapChar)));
233 else if (action == Action.DELETE_GAP)
235 actedOn.setSequence(new String(StringUtils.deleteChars(
236 actedOn.getSequence(), editPos, editPos + count)));
242 * Returns a SearchResults object describing the mapped region corresponding
243 * to the specified sequence position.
250 public static SearchResults buildSearchResults(SequenceI seq, int index,
251 Set<AlignedCodonFrame> seqmappings)
253 SearchResults results = new SearchResults();
254 addSearchResults(results, seq, index, seqmappings);
259 * Adds entries to a SearchResults object describing the mapped region
260 * corresponding to the specified sequence position.
267 public static void addSearchResults(SearchResults results, SequenceI seq,
268 int index, Set<AlignedCodonFrame> seqmappings)
270 if (index >= seq.getStart() && index <= seq.getEnd())
272 for (AlignedCodonFrame acf : seqmappings)
274 acf.markMappedRegion(seq, index, results);
280 * Returns a (possibly empty) SequenceGroup containing any sequences in the
281 * mapped viewport corresponding to the given group in the source viewport.
288 public static SequenceGroup mapSequenceGroup(final SequenceGroup sg,
289 final AlignViewportI mapFrom, final AlignViewportI mapTo)
292 * Note the SequenceGroup holds aligned sequences, the mappings hold dataset
295 boolean targetIsNucleotide = mapTo.isNucleotide();
296 AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
297 Set<AlignedCodonFrame> codonFrames = protein.getAlignment()
300 * Copy group name, colours etc, but not sequences or sequence colour scheme
302 SequenceGroup mappedGroup = new SequenceGroup(sg);
303 mappedGroup.cs = mapTo.getGlobalColourScheme();
306 int minStartCol = -1;
308 final int selectionStartRes = sg.getStartRes();
309 final int selectionEndRes = sg.getEndRes();
310 for (SequenceI selected : sg.getSequences())
313 * Find the widest range of non-gapped positions in the selection range
315 int firstUngappedPos = selectionStartRes;
316 while (firstUngappedPos <= selectionEndRes
317 && Comparison.isGap(selected.getCharAt(firstUngappedPos)))
323 * If this sequence is only gaps in the selected range, skip it
325 if (firstUngappedPos > selectionEndRes)
330 int lastUngappedPos = selectionEndRes;
331 while (lastUngappedPos >= selectionStartRes
332 && Comparison.isGap(selected.getCharAt(lastUngappedPos)))
338 * Find the selected start/end residue positions in sequence
340 int startResiduePos = selected.findPosition(firstUngappedPos);
341 int endResiduePos = selected.findPosition(lastUngappedPos);
343 for (AlignedCodonFrame acf : codonFrames)
345 SequenceI mappedSequence = targetIsNucleotide ? acf
346 .getDnaForAaSeq(selected) : acf.getAaForDnaSeq(selected);
347 if (mappedSequence != null)
349 for (SequenceI seq : mapTo.getAlignment().getSequences())
351 int mappedStartResidue = 0;
352 int mappedEndResidue = 0;
353 if (seq.getDatasetSequence() == mappedSequence)
356 * Found a sequence mapping. Locate the start/end mapped residues.
358 SearchResults sr = buildSearchResults(selected,
359 startResiduePos, Collections.singleton(acf));
360 for (Match m : sr.getResults())
362 mappedStartResidue = m.getStart();
363 mappedEndResidue = m.getEnd();
365 sr = buildSearchResults(selected, endResiduePos,
366 Collections.singleton(acf));
367 for (Match m : sr.getResults())
369 mappedStartResidue = Math.min(mappedStartResidue,
371 mappedEndResidue = Math.max(mappedEndResidue, m.getEnd());
375 * Find the mapped aligned columns, save the range. Note findIndex
376 * returns a base 1 position, SequenceGroup uses base 0
378 int mappedStartCol = seq.findIndex(mappedStartResidue) - 1;
379 minStartCol = minStartCol == -1 ? mappedStartCol : Math.min(
380 minStartCol, mappedStartCol);
381 int mappedEndCol = seq.findIndex(mappedEndResidue) - 1;
382 maxEndCol = maxEndCol == -1 ? mappedEndCol : Math.max(
383 maxEndCol, mappedEndCol);
384 mappedGroup.addSequence(seq, false);
391 mappedGroup.setStartRes(minStartCol < 0 ? 0 : minStartCol);
392 mappedGroup.setEndRes(maxEndCol < 0 ? 0 : maxEndCol);
397 * Returns an OrderCommand equivalent to the given one, but acting on mapped
398 * sequences as described by the mappings, or null if no mapping can be made.
401 * the original order command
403 * if true, the action is to undo the sort
405 * the alignment we are mapping to
407 * the mappings available
410 public static CommandI mapOrderCommand(OrderCommand command,
411 boolean undo, AlignmentI mapTo, Set<AlignedCodonFrame> mappings)
413 SequenceI[] sortOrder = command.getSequenceOrder(undo);
414 List<SequenceI> mappedOrder = new ArrayList<SequenceI>();
418 * Assumption: we are only interested in a cDNA/protein mapping; refactor in
419 * future if we want to support sorting (c)dna as (c)dna or protein as
422 boolean mappingToNucleotide = mapTo.isNucleotide();
423 for (SequenceI seq : sortOrder)
425 for (AlignedCodonFrame acf : mappings)
427 SequenceI mappedSeq = mappingToNucleotide ? acf.getDnaForAaSeq(seq) : acf.getAaForDnaSeq(seq);
428 if (mappedSeq != null)
430 for (SequenceI seq2 : mapTo.getSequences())
432 if (seq2.getDatasetSequence() == mappedSeq)
434 mappedOrder.add(seq2);
444 * Return null if no mappings made.
452 * Add any unmapped sequences on the end of the sort in their original
455 if (j < mapTo.getHeight())
457 for (SequenceI seq : mapTo.getSequences())
459 if (!mappedOrder.contains(seq))
461 mappedOrder.add(seq);
467 * Have to sort the sequences before constructing the OrderCommand - which
468 * then resorts them?!?
470 final SequenceI[] mappedOrderArray = mappedOrder
471 .toArray(new SequenceI[mappedOrder.size()]);
472 SequenceI[] oldOrder = mapTo.getSequencesArray();
473 AlignmentSorter.sortBy(mapTo, new AlignmentOrder(mappedOrderArray));
474 final OrderCommand result = new OrderCommand(command.getDescription(),
480 * Returns a ColumnSelection in the 'mapTo' view which corresponds to the
481 * given selection in the 'mapFrom' view. We assume one is nucleotide, the
482 * other is protein (and holds the mappings from codons to protein residues).
489 public static ColumnSelection mapColumnSelection(ColumnSelection colsel,
490 AlignViewportI mapFrom, AlignViewportI mapTo)
492 boolean targetIsNucleotide = mapTo.isNucleotide();
493 AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
494 Set<AlignedCodonFrame> codonFrames = protein.getAlignment()
496 ColumnSelection mappedColumns = new ColumnSelection();
500 return mappedColumns;
503 char fromGapChar = mapFrom.getAlignment().getGapCharacter();
505 // FIXME allow for hidden columns
508 * For each mapped column, find the range of columns that residues in that
511 for (Object obj : colsel.getSelected())
513 int col = ((Integer) obj).intValue();
514 int mappedToMin = Integer.MAX_VALUE;
515 int mappedToMax = Integer.MIN_VALUE;
518 * For each sequence in the 'from' alignment
520 for (SequenceI fromSeq : mapFrom.getAlignment().getSequences())
523 * Ignore gaps (unmapped anyway)
525 if (fromSeq.getCharAt(col) == fromGapChar)
531 * Get the residue position and find the mapped position.
533 int residuePos = fromSeq.findPosition(col);
534 SearchResults sr = buildSearchResults(fromSeq, residuePos,
536 for (Match m : sr.getResults())
538 int mappedStartResidue = m.getStart();
539 int mappedEndResidue = m.getEnd();
540 SequenceI mappedSeq = m.getSequence();
543 * Locate the aligned sequence whose dataset is mappedSeq. TODO a
544 * datamodel that can do this efficiently.
546 for (SequenceI toSeq : mapTo.getAlignment().getSequences())
548 if (toSeq.getDatasetSequence() == mappedSeq)
550 int mappedStartCol = toSeq.findIndex(mappedStartResidue);
551 int mappedEndCol = toSeq.findIndex(mappedEndResidue);
552 mappedToMin = Math.min(mappedToMin, mappedStartCol);
553 mappedToMax = Math.max(mappedToMax, mappedEndCol);
554 // System.out.println(fromSeq.getName() + " mapped to cols "
555 // + mappedStartCol + ":" + mappedEndCol);
557 // note: remove break if we ever want to map one to many sequences
563 * Add the range of mapped columns to the mapped selection (converting
564 * base 1 to base 0). Note that this may include intron-only regions which
565 * lie between the start and end ranges of the selection.
567 for (int i = mappedToMin; i <= mappedToMax; i++)
569 mappedColumns.addElement(i - 1);
572 return mappedColumns;
576 * Returns the mapped codon for a given aligned sequence column position (base
580 * an aligned peptide sequence
582 * an aligned column position (base 0)
584 * a set of codon mappings
585 * @return the bases of the mapped codon in the cDNA dataset sequence, or null
588 public static char[] findCodonFor(SequenceI seq, int col,
589 Set<AlignedCodonFrame> mappings)
591 int dsPos = seq.findPosition(col);
592 for (AlignedCodonFrame mapping : mappings)
594 if (mapping.involvesSequence(seq))
596 return mapping.getMappedCodon(seq.getDatasetSequence(), dsPos);
603 * Converts a series of [start, end] ranges into an array of individual
609 public static int[] flattenRanges(int[] ranges)
612 * Count how many positions altogether
615 for (int i = 0; i < ranges.length - 1; i += 2)
617 count += ranges[i + 1] - ranges[i] + 1;
620 int[] result = new int[count];
622 for (int i = 0; i < ranges.length - 1; i += 2)
624 for (int j = ranges[i]; j <= ranges[i + 1]; j++)
633 * Returns a list of any mappings that are from or to the given (aligned or
640 public static List<AlignedCodonFrame> findMappingsForSequence(
641 SequenceI sequence, Set<AlignedCodonFrame> mappings)
643 List<AlignedCodonFrame> result = new ArrayList<AlignedCodonFrame>();
644 if (sequence == null || mappings == null)
648 for (AlignedCodonFrame mapping : mappings) {
649 if (mapping.involvesSequence(sequence)) {