3 import jalview.analysis.AlignmentSorter;
4 import jalview.api.AlignViewportI;
5 import jalview.commands.CommandI;
6 import jalview.commands.EditCommand;
7 import jalview.commands.EditCommand.Action;
8 import jalview.commands.EditCommand.Edit;
9 import jalview.commands.OrderCommand;
10 import jalview.datamodel.AlignedCodonFrame;
11 import jalview.datamodel.AlignmentI;
12 import jalview.datamodel.AlignmentOrder;
13 import jalview.datamodel.ColumnSelection;
14 import jalview.datamodel.SearchResults;
15 import jalview.datamodel.SearchResults.Match;
16 import jalview.datamodel.Sequence;
17 import jalview.datamodel.SequenceGroup;
18 import jalview.datamodel.SequenceI;
20 import java.util.ArrayList;
21 import java.util.HashMap;
22 import java.util.Iterator;
23 import java.util.List;
28 * Helper methods for manipulations involving sequence mappings.
33 public final class MappingUtils
37 * Helper method to map a CUT or PASTE command.
40 * the original command
42 * if true, the command is to be undone
44 * the mapped sequences to apply the mapped command to
46 * the mapped EditCommand to add to
49 protected static void mapCutOrPaste(Edit edit, boolean undo,
50 List<SequenceI> targetSeqs, EditCommand result,
51 Set<AlignedCodonFrame> mappings)
53 Action action = edit.getAction();
56 action = action.getUndoAction();
59 System.err.println("MappingUtils.mapCutOrPaste not yet implemented");
63 * Returns a new EditCommand representing the given command as mapped to the
64 * given sequences. If there is no mapping, returns null.
73 public static EditCommand mapEditCommand(EditCommand command,
74 boolean undo, final AlignmentI mapTo, char gapChar,
75 Set<AlignedCodonFrame> mappings)
78 * For now, only support mapping from protein edits to cDna
80 if (!mapTo.isNucleotide())
86 * Cache a copy of the target sequences so we can mimic successive edits on
87 * them. This lets us compute mappings for all edits in the set.
89 Map<SequenceI, SequenceI> targetCopies = new HashMap<SequenceI, SequenceI>();
90 for (SequenceI seq : mapTo.getSequences())
92 SequenceI ds = seq.getDatasetSequence();
95 final SequenceI copy = new Sequence(seq);
96 copy.setDatasetSequence(ds);
97 targetCopies.put(ds, copy);
102 * Compute 'source' sequences as they were before applying edits:
104 Map<SequenceI, SequenceI> originalSequences = command.priorState(undo);
106 EditCommand result = new EditCommand();
107 Iterator<Edit> edits = command.getEditIterator(!undo);
108 while (edits.hasNext())
110 Edit edit = edits.next();
111 if (edit.getAction() == Action.CUT
112 || edit.getAction() == Action.PASTE)
114 mapCutOrPaste(edit, undo, mapTo.getSequences(), result, mappings);
116 else if (edit.getAction() == Action.INSERT_GAP
117 || edit.getAction() == Action.DELETE_GAP)
119 mapInsertOrDelete(edit, undo, originalSequences,
120 mapTo.getSequences(), targetCopies, gapChar, result,
124 return result.getSize() > 0 ? result : null;
128 * Helper method to map an edit command to insert or delete gaps.
131 * the original command
133 * if true, the action is to undo the command
134 * @param originalSequences
135 * the sequences the command acted on
137 * @param targetCopies
140 * the new EditCommand to add mapped commands to
143 protected static void mapInsertOrDelete(Edit edit, boolean undo,
144 Map<SequenceI, SequenceI> originalSequences,
145 final List<SequenceI> targetSeqs,
146 Map<SequenceI, SequenceI> targetCopies, char gapChar,
147 EditCommand result, Set<AlignedCodonFrame> mappings)
149 Action action = edit.getAction();
152 * Invert sense of action if an Undo.
156 action = action.getUndoAction();
158 final int count = edit.getNumber();
159 final int editPos = edit.getPosition();
160 for (SequenceI seq : edit.getSequences())
163 * Get residue position at (or to right of) edit location. Note we use our
164 * 'copy' of the sequence before editing for this.
166 SequenceI ds = seq.getDatasetSequence();
171 final SequenceI actedOn = originalSequences.get(ds);
172 final int seqpos = actedOn.findPosition(editPos);
175 * Determine all mappings from this position to mapped sequences.
177 SearchResults sr = buildSearchResults(seq, seqpos, mappings);
181 for (SequenceI targetSeq : targetSeqs)
183 ds = targetSeq.getDatasetSequence();
188 SequenceI copyTarget = targetCopies.get(ds);
189 final int[] match = sr.getResults(copyTarget, 0,
190 copyTarget.getLength());
193 final int ratio = 3; // TODO: compute this - how?
194 final int mappedCount = count * ratio;
197 * Shift Delete start position left, as it acts on positions to its
200 int mappedEditPos = action == Action.DELETE_GAP ? match[0]
201 - mappedCount : match[0];
202 Edit e = result.new Edit(action, new SequenceI[]
203 { targetSeq }, mappedEditPos, mappedCount, gapChar);
207 * and 'apply' the edit to our copy of its target sequence
209 if (action == Action.INSERT_GAP)
211 copyTarget.setSequence(new String(StringUtils.insertCharAt(
212 copyTarget.getSequence(), mappedEditPos, mappedCount,
215 else if (action == Action.DELETE_GAP)
217 copyTarget.setSequence(new String(StringUtils.deleteChars(
218 copyTarget.getSequence(), mappedEditPos,
219 mappedEditPos + mappedCount)));
225 * and 'apply' the edit to our copy of its source sequence
227 if (action == Action.INSERT_GAP)
229 actedOn.setSequence(new String(StringUtils.insertCharAt(
230 actedOn.getSequence(), editPos, count, gapChar)));
232 else if (action == Action.DELETE_GAP)
234 actedOn.setSequence(new String(StringUtils.deleteChars(
235 actedOn.getSequence(), editPos, editPos + count)));
241 * Returns a SearchResults object describing the mapped region corresponding
242 * to the specified sequence position.
249 public static SearchResults buildSearchResults(SequenceI seq, int index,
250 Set<AlignedCodonFrame> seqmappings)
252 SearchResults results;
253 results = new SearchResults();
254 if (index >= seq.getStart() && index <= seq.getEnd())
256 for (AlignedCodonFrame acf : seqmappings)
258 acf.markMappedRegion(seq, index, results);
265 * Returns a (possibly empty) SequenceGroup containing any sequences in the
266 * mapped viewport corresponding to the given group in the source viewport.
273 public static SequenceGroup mapSequenceGroup(SequenceGroup sg,
274 AlignViewportI mapFrom, AlignViewportI mapTo)
277 * Note the SequenceGroup holds aligned sequences, the mappings hold dataset
280 boolean targetIsNucleotide = mapTo.isNucleotide();
281 AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
282 Set<AlignedCodonFrame> codonFrames = protein.getAlignment()
286 * Copy group name, name colours, but not sequences or sequence colour
289 SequenceGroup mappedGroup = new SequenceGroup(sg);
290 mappedGroup.cs = mapTo.getGlobalColourScheme();
292 // TODO set width of mapped group
294 for (SequenceI selected : sg.getSequences())
296 for (AlignedCodonFrame acf : codonFrames)
298 SequenceI mappedSequence = targetIsNucleotide ? acf
299 .getDnaForAaSeq(selected) : acf.getAaForDnaSeq(selected);
300 if (mappedSequence != null)
302 for (SequenceI seq : mapTo.getAlignment().getSequences())
304 if (seq.getDatasetSequence() == mappedSequence)
306 mappedGroup.addSequence(seq, false);
317 * Returns an OrderCommand equivalent to the given one, but acting on mapped
318 * sequences as described by the mappings, or null if no mapping can be made.
321 * the original order command
323 * if true, the action is to undo the sort
325 * the alignment we are mapping to
327 * the mappings available
330 public static CommandI mapOrderCommand(OrderCommand command,
331 boolean undo, AlignmentI mapTo, Set<AlignedCodonFrame> mappings)
333 SequenceI[] sortOrder = command.getSequenceOrder(undo);
334 List<SequenceI> mappedOrder = new ArrayList<SequenceI>();
336 for (SequenceI seq : sortOrder)
338 for (AlignedCodonFrame acf : mappings)
341 * Try protein-to-Dna, failing that try dna-to-protein
343 SequenceI mappedSeq = acf.getDnaForAaSeq(seq);
344 if (mappedSeq == null)
346 mappedSeq = acf.getAaForDnaSeq(seq);
348 if (mappedSeq != null)
350 for (SequenceI seq2 : mapTo.getSequences())
352 if (seq2.getDatasetSequence() == mappedSeq)
354 mappedOrder.add(seq2);
364 * Return null if no mappings made.
372 * Add any unmapped sequences on the end of the sort in their original
375 if (j < mapTo.getHeight())
377 for (SequenceI seq : mapTo.getSequences())
379 if (!mappedOrder.contains(seq))
381 mappedOrder.add(seq);
387 * Have to sort the sequences before constructing the OrderCommand - which
388 * then resorts them?!?
390 final SequenceI[] mappedOrderArray = mappedOrder
391 .toArray(new SequenceI[mappedOrder.size()]);
392 SequenceI[] oldOrder = mapTo.getSequencesArray();
393 AlignmentSorter.sortBy(mapTo, new AlignmentOrder(mappedOrderArray));
394 final OrderCommand result = new OrderCommand(command.getDescription(),
400 * Returns a ColumnSelection in the 'mapTo' view which corresponds to the
401 * given selection in the 'mapFrom' view. We assume one is nucleotide, the
402 * other is protein (and holds the mappings from codons to protein residues).
409 public static ColumnSelection mapColumnSelection(ColumnSelection colsel,
410 AlignViewportI mapFrom, AlignViewportI mapTo)
412 boolean targetIsNucleotide = mapTo.isNucleotide();
413 AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
414 Set<AlignedCodonFrame> codonFrames = protein.getAlignment()
416 ColumnSelection mappedColumns = new ColumnSelection();
417 char fromGapChar = mapFrom.getAlignment().getGapCharacter();
419 // FIXME allow for hidden columns
422 * For each mapped column, find the range of columns that residues in that
425 for (Object obj : colsel.getSelected())
427 int col = ((Integer) obj).intValue();
428 int mappedToMin = Integer.MAX_VALUE;
429 int mappedToMax = Integer.MIN_VALUE;
432 * For each sequence in the 'from' alignment
434 for (SequenceI fromSeq : mapFrom.getAlignment().getSequences())
437 * Ignore gaps (unmapped anyway)
439 if (fromSeq.getCharAt(col) == fromGapChar)
445 * Get the residue position and find the mapped position.
447 int residuePos = fromSeq.findPosition(col);
448 SearchResults sr = buildSearchResults(fromSeq, residuePos,
450 for (Match m : sr.getResults())
452 int mappedStartResidue = m.getStart();
453 int mappedEndResidue = m.getEnd();
454 SequenceI mappedSeq = m.getSequence();
457 * Locate the aligned sequence whose dataset is mappedSeq. TODO a
458 * datamodel that can do this efficiently.
460 for (SequenceI toSeq : mapTo.getAlignment().getSequences())
462 if (toSeq.getDatasetSequence() == mappedSeq)
464 int mappedStartCol = toSeq.findIndex(mappedStartResidue);
465 int mappedEndCol = toSeq.findIndex(mappedEndResidue);
466 mappedToMin = Math.min(mappedToMin, mappedStartCol);
467 mappedToMax = Math.max(mappedToMax, mappedEndCol);
468 // System.out.println(fromSeq.getName() + " mapped to cols "
469 // + mappedStartCol + ":" + mappedEndCol);
471 // note: remove break if we ever want to map one to many sequences
477 * Add the range of mapped columns to the mapped selection (converting
478 * base 1 to base 0). Note that this may include intron-only regions which
479 * lie between the start and end ranges of the selection.
481 for (int i = mappedToMin; i <= mappedToMax; i++)
483 mappedColumns.addElement(i - 1);
486 return mappedColumns;
490 * Returns the mapped codon for a given aligned sequence column position (base
494 * an aligned peptide sequence
496 * an aligned column position (base 0)
498 * a set of codon mappings
499 * @return the bases of the mapped codon in the cDNA dataset sequence, or null
502 public static char[] findCodonFor(SequenceI seq, int col,
503 Set<AlignedCodonFrame> mappings)
505 int dsPos = seq.findPosition(col);
506 for (AlignedCodonFrame mapping : mappings)
508 if (mapping.involvesSequence(seq))
510 return mapping.getMappedCodon(seq.getDatasetSequence(), dsPos);
517 * Converts a series of [start, end] ranges into an array of individual
523 public static int[] flattenRanges(int[] ranges)
526 * Count how many positions altogether
529 for (int i = 0; i < ranges.length - 1; i += 2)
531 count += ranges[i + 1] - ranges[i] + 1;
534 int[] result = new int[count];
536 for (int i = 0; i < ranges.length - 1; i += 2)
538 for (int j = ranges[i]; j <= ranges[i + 1]; j++)