/*
* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
* Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see .
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.util;
import jalview.analysis.AlignmentSorter;
import jalview.api.AlignViewportI;
import jalview.commands.CommandI;
import jalview.commands.EditCommand;
import jalview.commands.EditCommand.Action;
import jalview.commands.EditCommand.Edit;
import jalview.commands.OrderCommand;
import jalview.datamodel.AlignedCodonFrame;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.AlignmentOrder;
import jalview.datamodel.ColumnSelection;
import jalview.datamodel.SearchResultMatchI;
import jalview.datamodel.SearchResults;
import jalview.datamodel.SearchResultsI;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceGroup;
import jalview.datamodel.SequenceI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
/**
* Helper methods for manipulations involving sequence mappings.
*
* @author gmcarstairs
*
*/
public final class MappingUtils
{
/**
* Helper method to map a CUT or PASTE command.
*
* @param edit
* the original command
* @param undo
* if true, the command is to be undone
* @param targetSeqs
* the mapped sequences to apply the mapped command to
* @param result
* the mapped EditCommand to add to
* @param mappings
*/
protected static void mapCutOrPaste(Edit edit, boolean undo,
List targetSeqs, EditCommand result,
List mappings)
{
Action action = edit.getAction();
if (undo)
{
action = action.getUndoAction();
}
// TODO write this
System.err.println("MappingUtils.mapCutOrPaste not yet implemented");
}
/**
* Returns a new EditCommand representing the given command as mapped to the
* given sequences. If there is no mapping, returns null.
*
* @param command
* @param undo
* @param mapTo
* @param gapChar
* @param mappings
* @return
*/
public static EditCommand mapEditCommand(EditCommand command,
boolean undo, final AlignmentI mapTo, char gapChar,
List mappings)
{
/*
* For now, only support mapping from protein edits to cDna
*/
if (!mapTo.isNucleotide())
{
return null;
}
/*
* Cache a copy of the target sequences so we can mimic successive edits on
* them. This lets us compute mappings for all edits in the set.
*/
Map targetCopies = new HashMap();
for (SequenceI seq : mapTo.getSequences())
{
SequenceI ds = seq.getDatasetSequence();
if (ds != null)
{
final SequenceI copy = new Sequence(seq);
copy.setDatasetSequence(ds);
targetCopies.put(ds, copy);
}
}
/*
* Compute 'source' sequences as they were before applying edits:
*/
Map originalSequences = command.priorState(undo);
EditCommand result = new EditCommand();
Iterator edits = command.getEditIterator(!undo);
while (edits.hasNext())
{
Edit edit = edits.next();
if (edit.getAction() == Action.CUT
|| edit.getAction() == Action.PASTE)
{
mapCutOrPaste(edit, undo, mapTo.getSequences(), result, mappings);
}
else if (edit.getAction() == Action.INSERT_GAP
|| edit.getAction() == Action.DELETE_GAP)
{
mapInsertOrDelete(edit, undo, originalSequences,
mapTo.getSequences(), targetCopies, gapChar, result,
mappings);
}
}
return result.getSize() > 0 ? result : null;
}
/**
* Helper method to map an edit command to insert or delete gaps.
*
* @param edit
* the original command
* @param undo
* if true, the action is to undo the command
* @param originalSequences
* the sequences the command acted on
* @param targetSeqs
* @param targetCopies
* @param gapChar
* @param result
* the new EditCommand to add mapped commands to
* @param mappings
*/
protected static void mapInsertOrDelete(Edit edit, boolean undo,
Map originalSequences,
final List targetSeqs,
Map targetCopies, char gapChar,
EditCommand result, List mappings)
{
Action action = edit.getAction();
/*
* Invert sense of action if an Undo.
*/
if (undo)
{
action = action.getUndoAction();
}
final int count = edit.getNumber();
final int editPos = edit.getPosition();
for (SequenceI seq : edit.getSequences())
{
/*
* Get residue position at (or to right of) edit location. Note we use our
* 'copy' of the sequence before editing for this.
*/
SequenceI ds = seq.getDatasetSequence();
if (ds == null)
{
continue;
}
final SequenceI actedOn = originalSequences.get(ds);
final int seqpos = actedOn.findPosition(editPos);
/*
* Determine all mappings from this position to mapped sequences.
*/
SearchResultsI sr = buildSearchResults(seq, seqpos, mappings);
if (!sr.isEmpty())
{
for (SequenceI targetSeq : targetSeqs)
{
ds = targetSeq.getDatasetSequence();
if (ds == null)
{
continue;
}
SequenceI copyTarget = targetCopies.get(ds);
final int[] match = sr.getResults(copyTarget, 0,
copyTarget.getLength());
if (match != null)
{
final int ratio = 3; // TODO: compute this - how?
final int mappedCount = count * ratio;
/*
* Shift Delete start position left, as it acts on positions to its
* right.
*/
int mappedEditPos = action == Action.DELETE_GAP ? match[0]
- mappedCount : match[0];
Edit e = result.new Edit(action, new SequenceI[] { targetSeq },
mappedEditPos, mappedCount, gapChar);
result.addEdit(e);
/*
* and 'apply' the edit to our copy of its target sequence
*/
if (action == Action.INSERT_GAP)
{
copyTarget.setSequence(new String(StringUtils.insertCharAt(
copyTarget.getSequence(), mappedEditPos, mappedCount,
gapChar)));
}
else if (action == Action.DELETE_GAP)
{
copyTarget.setSequence(new String(StringUtils.deleteChars(
copyTarget.getSequence(), mappedEditPos,
mappedEditPos + mappedCount)));
}
}
}
}
/*
* and 'apply' the edit to our copy of its source sequence
*/
if (action == Action.INSERT_GAP)
{
actedOn.setSequence(new String(StringUtils.insertCharAt(
actedOn.getSequence(), editPos, count, gapChar)));
}
else if (action == Action.DELETE_GAP)
{
actedOn.setSequence(new String(StringUtils.deleteChars(
actedOn.getSequence(), editPos, editPos + count)));
}
}
}
/**
* Returns a SearchResults object describing the mapped region corresponding
* to the specified sequence position.
*
* @param seq
* @param index
* @param seqmappings
* @return
*/
public static SearchResultsI buildSearchResults(SequenceI seq, int index,
List seqmappings)
{
SearchResultsI results = new SearchResults();
addSearchResults(results, seq, index, seqmappings);
return results;
}
/**
* Adds entries to a SearchResults object describing the mapped region
* corresponding to the specified sequence position.
*
* @param results
* @param seq
* @param index
* @param seqmappings
*/
public static void addSearchResults(SearchResultsI results, SequenceI seq,
int index, List seqmappings)
{
if (index >= seq.getStart() && index <= seq.getEnd())
{
for (AlignedCodonFrame acf : seqmappings)
{
acf.markMappedRegion(seq, index, results);
}
}
}
/**
* Returns a (possibly empty) SequenceGroup containing any sequences in the
* mapped viewport corresponding to the given group in the source viewport.
*
* @param sg
* @param mapFrom
* @param mapTo
* @return
*/
public static SequenceGroup mapSequenceGroup(final SequenceGroup sg,
final AlignViewportI mapFrom, final AlignViewportI mapTo)
{
/*
* Note the SequenceGroup holds aligned sequences, the mappings hold dataset
* sequences.
*/
boolean targetIsNucleotide = mapTo.isNucleotide();
AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
List codonFrames = protein.getAlignment()
.getCodonFrames();
/*
* Copy group name, colours etc, but not sequences or sequence colour scheme
*/
SequenceGroup mappedGroup = new SequenceGroup(sg);
mappedGroup.setColourScheme(mapTo.getGlobalColourScheme());
mappedGroup.clear();
int minStartCol = -1;
int maxEndCol = -1;
final int selectionStartRes = sg.getStartRes();
final int selectionEndRes = sg.getEndRes();
for (SequenceI selected : sg.getSequences())
{
/*
* Find the widest range of non-gapped positions in the selection range
*/
int firstUngappedPos = selectionStartRes;
while (firstUngappedPos <= selectionEndRes
&& Comparison.isGap(selected.getCharAt(firstUngappedPos)))
{
firstUngappedPos++;
}
/*
* If this sequence is only gaps in the selected range, skip it
*/
if (firstUngappedPos > selectionEndRes)
{
continue;
}
int lastUngappedPos = selectionEndRes;
while (lastUngappedPos >= selectionStartRes
&& Comparison.isGap(selected.getCharAt(lastUngappedPos)))
{
lastUngappedPos--;
}
/*
* Find the selected start/end residue positions in sequence
*/
int startResiduePos = selected.findPosition(firstUngappedPos);
int endResiduePos = selected.findPosition(lastUngappedPos);
for (AlignedCodonFrame acf : codonFrames)
{
SequenceI mappedSequence = targetIsNucleotide ? acf
.getDnaForAaSeq(selected) : acf.getAaForDnaSeq(selected);
if (mappedSequence != null)
{
for (SequenceI seq : mapTo.getAlignment().getSequences())
{
int mappedStartResidue = 0;
int mappedEndResidue = 0;
if (seq.getDatasetSequence() == mappedSequence)
{
/*
* Found a sequence mapping. Locate the start/end mapped residues.
*/
List mapping = Arrays
.asList(new AlignedCodonFrame[] { acf });
SearchResultsI sr = buildSearchResults(selected,
startResiduePos, mapping);
for (SearchResultMatchI m : sr.getResults())
{
mappedStartResidue = m.getStart();
mappedEndResidue = m.getEnd();
}
sr = buildSearchResults(selected, endResiduePos, mapping);
for (SearchResultMatchI m : sr.getResults())
{
mappedStartResidue = Math.min(mappedStartResidue,
m.getStart());
mappedEndResidue = Math.max(mappedEndResidue, m.getEnd());
}
/*
* Find the mapped aligned columns, save the range. Note findIndex
* returns a base 1 position, SequenceGroup uses base 0
*/
int mappedStartCol = seq.findIndex(mappedStartResidue) - 1;
minStartCol = minStartCol == -1 ? mappedStartCol : Math.min(
minStartCol, mappedStartCol);
int mappedEndCol = seq.findIndex(mappedEndResidue) - 1;
maxEndCol = maxEndCol == -1 ? mappedEndCol : Math.max(
maxEndCol, mappedEndCol);
mappedGroup.addSequence(seq, false);
break;
}
}
}
}
}
mappedGroup.setStartRes(minStartCol < 0 ? 0 : minStartCol);
mappedGroup.setEndRes(maxEndCol < 0 ? 0 : maxEndCol);
return mappedGroup;
}
/**
* Returns an OrderCommand equivalent to the given one, but acting on mapped
* sequences as described by the mappings, or null if no mapping can be made.
*
* @param command
* the original order command
* @param undo
* if true, the action is to undo the sort
* @param mapTo
* the alignment we are mapping to
* @param mappings
* the mappings available
* @return
*/
public static CommandI mapOrderCommand(OrderCommand command,
boolean undo, AlignmentI mapTo, List mappings)
{
SequenceI[] sortOrder = command.getSequenceOrder(undo);
List mappedOrder = new ArrayList();
int j = 0;
/*
* Assumption: we are only interested in a cDNA/protein mapping; refactor in
* future if we want to support sorting (c)dna as (c)dna or protein as
* protein
*/
boolean mappingToNucleotide = mapTo.isNucleotide();
for (SequenceI seq : sortOrder)
{
for (AlignedCodonFrame acf : mappings)
{
SequenceI mappedSeq = mappingToNucleotide ? acf.getDnaForAaSeq(seq)
: acf.getAaForDnaSeq(seq);
if (mappedSeq != null)
{
for (SequenceI seq2 : mapTo.getSequences())
{
if (seq2.getDatasetSequence() == mappedSeq)
{
mappedOrder.add(seq2);
j++;
break;
}
}
}
}
}
/*
* Return null if no mappings made.
*/
if (j == 0)
{
return null;
}
/*
* Add any unmapped sequences on the end of the sort in their original
* ordering.
*/
if (j < mapTo.getHeight())
{
for (SequenceI seq : mapTo.getSequences())
{
if (!mappedOrder.contains(seq))
{
mappedOrder.add(seq);
}
}
}
/*
* Have to sort the sequences before constructing the OrderCommand - which
* then resorts them?!?
*/
final SequenceI[] mappedOrderArray = mappedOrder
.toArray(new SequenceI[mappedOrder.size()]);
SequenceI[] oldOrder = mapTo.getSequencesArray();
AlignmentSorter.sortBy(mapTo, new AlignmentOrder(mappedOrderArray));
final OrderCommand result = new OrderCommand(command.getDescription(),
oldOrder, mapTo);
return result;
}
/**
* Returns a ColumnSelection in the 'mapTo' view which corresponds to the
* given selection in the 'mapFrom' view. We assume one is nucleotide, the
* other is protein (and holds the mappings from codons to protein residues).
*
* @param colsel
* @param mapFrom
* @param mapTo
* @return
*/
public static ColumnSelection mapColumnSelection(ColumnSelection colsel,
AlignViewportI mapFrom, AlignViewportI mapTo)
{
boolean targetIsNucleotide = mapTo.isNucleotide();
AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
List codonFrames = protein.getAlignment()
.getCodonFrames();
ColumnSelection mappedColumns = new ColumnSelection();
if (colsel == null)
{
return mappedColumns;
}
char fromGapChar = mapFrom.getAlignment().getGapCharacter();
/*
* For each mapped column, find the range of columns that residues in that
* column map to.
*/
List fromSequences = mapFrom.getAlignment().getSequences();
List toSequences = mapTo.getAlignment().getSequences();
for (Integer sel : colsel.getSelected())
{
mapColumn(sel.intValue(), codonFrames, mappedColumns, fromSequences,
toSequences, fromGapChar);
}
for (int[] hidden : colsel.getHiddenColumns())
{
mapHiddenColumns(hidden, codonFrames, mappedColumns, fromSequences,
toSequences, fromGapChar);
}
return mappedColumns;
}
/**
* Helper method that maps a [start, end] hidden column range to its mapped
* equivalent
*
* @param hidden
* @param mappings
* @param mappedColumns
* @param fromSequences
* @param toSequences
* @param fromGapChar
*/
protected static void mapHiddenColumns(int[] hidden,
List mappings, ColumnSelection mappedColumns,
List fromSequences, List toSequences,
char fromGapChar)
{
for (int col = hidden[0]; col <= hidden[1]; col++)
{
int[] mappedTo = findMappedColumns(col, mappings, fromSequences,
toSequences, fromGapChar);
/*
* Add the range of hidden columns to the mapped selection (converting
* base 1 to base 0).
*/
if (mappedTo != null)
{
mappedColumns.hideColumns(mappedTo[0] - 1, mappedTo[1] - 1);
}
}
}
/**
* Helper method to map one column selection
*
* @param col
* the column number (base 0)
* @param mappings
* the sequence mappings
* @param mappedColumns
* the mapped column selections to add to
* @param fromSequences
* @param toSequences
* @param fromGapChar
*/
protected static void mapColumn(int col,
List mappings, ColumnSelection mappedColumns,
List fromSequences, List toSequences,
char fromGapChar)
{
int[] mappedTo = findMappedColumns(col, mappings, fromSequences,
toSequences, fromGapChar);
/*
* Add the range of mapped columns to the mapped selection (converting
* base 1 to base 0). Note that this may include intron-only regions which
* lie between the start and end ranges of the selection.
*/
if (mappedTo != null)
{
for (int i = mappedTo[0]; i <= mappedTo[1]; i++)
{
mappedColumns.addElement(i - 1);
}
}
}
/**
* Helper method to find the range of columns mapped to from one column.
* Returns the maximal range of columns mapped to from all sequences in the
* source column, or null if no mappings were found.
*
* @param col
* @param mappings
* @param fromSequences
* @param toSequences
* @param fromGapChar
* @return
*/
protected static int[] findMappedColumns(int col,
List mappings, List fromSequences,
List toSequences, char fromGapChar)
{
int[] mappedTo = new int[] { Integer.MAX_VALUE, Integer.MIN_VALUE };
boolean found = false;
/*
* For each sequence in the 'from' alignment
*/
for (SequenceI fromSeq : fromSequences)
{
/*
* Ignore gaps (unmapped anyway)
*/
if (fromSeq.getCharAt(col) == fromGapChar)
{
continue;
}
/*
* Get the residue position and find the mapped position.
*/
int residuePos = fromSeq.findPosition(col);
SearchResultsI sr = buildSearchResults(fromSeq, residuePos, mappings);
for (SearchResultMatchI m : sr.getResults())
{
int mappedStartResidue = m.getStart();
int mappedEndResidue = m.getEnd();
SequenceI mappedSeq = m.getSequence();
/*
* Locate the aligned sequence whose dataset is mappedSeq. TODO a
* datamodel that can do this efficiently.
*/
for (SequenceI toSeq : toSequences)
{
if (toSeq.getDatasetSequence() == mappedSeq)
{
int mappedStartCol = toSeq.findIndex(mappedStartResidue);
int mappedEndCol = toSeq.findIndex(mappedEndResidue);
mappedTo[0] = Math.min(mappedTo[0], mappedStartCol);
mappedTo[1] = Math.max(mappedTo[1], mappedEndCol);
found = true;
break;
// note: remove break if we ever want to map one to many sequences
}
}
}
}
return found ? mappedTo : null;
}
/**
* Returns the mapped codon or codons for a given aligned sequence column
* position (base 0).
*
* @param seq
* an aligned peptide sequence
* @param col
* an aligned column position (base 0)
* @param mappings
* a set of codon mappings
* @return the bases of the mapped codon(s) in the cDNA dataset sequence(s),
* or an empty list if none found
*/
public static List findCodonsFor(SequenceI seq, int col,
List mappings)
{
List result = new ArrayList();
int dsPos = seq.findPosition(col);
for (AlignedCodonFrame mapping : mappings)
{
if (mapping.involvesSequence(seq))
{
List codons = mapping.getMappedCodons(
seq.getDatasetSequence(), dsPos);
if (codons != null)
{
result.addAll(codons);
}
}
}
return result;
}
/**
* Converts a series of [start, end] range pairs into an array of individual
* positions. This also caters for 'reverse strand' (start > end) cases.
*
* @param ranges
* @return
*/
public static int[] flattenRanges(int[] ranges)
{
/*
* Count how many positions altogether
*/
int count = 0;
for (int i = 0; i < ranges.length - 1; i += 2)
{
count += Math.abs(ranges[i + 1] - ranges[i]) + 1;
}
int[] result = new int[count];
int k = 0;
for (int i = 0; i < ranges.length - 1; i += 2)
{
int from = ranges[i];
final int to = ranges[i + 1];
int step = from <= to ? 1 : -1;
do
{
result[k++] = from;
from += step;
} while (from != to + step);
}
return result;
}
/**
* Returns a list of any mappings that are from or to the given (aligned or
* dataset) sequence.
*
* @param sequence
* @param mappings
* @return
*/
public static List findMappingsForSequence(
SequenceI sequence, List mappings)
{
return findMappingsForSequenceAndOthers(sequence, mappings, null);
}
/**
* Returns a list of any mappings that are from or to the given (aligned or
* dataset) sequence, optionally limited to mappings involving one of a given
* list of sequences.
*
* @param sequence
* @param mappings
* @param filterList
* @return
*/
public static List findMappingsForSequenceAndOthers(
SequenceI sequence, List mappings,
List filterList)
{
List result = new ArrayList();
if (sequence == null || mappings == null)
{
return result;
}
for (AlignedCodonFrame mapping : mappings)
{
if (mapping.involvesSequence(sequence))
{
if (filterList != null)
{
for (SequenceI otherseq : filterList)
{
SequenceI otherDataset = otherseq.getDatasetSequence();
if (otherseq == sequence
|| otherseq == sequence.getDatasetSequence()
|| (otherDataset != null && (otherDataset == sequence || otherDataset == sequence
.getDatasetSequence())))
{
// skip sequences in subset which directly relate to sequence
continue;
}
if (mapping.involvesSequence(otherseq))
{
// selected a mapping contained in subselect alignment
result.add(mapping);
break;
}
}
}
else
{
result.add(mapping);
}
}
}
return result;
}
/**
* Returns the total length of the supplied ranges, which may be as single
* [start, end] or multiple [start, end, start, end ...]
*
* @param ranges
* @return
*/
public static int getLength(List ranges)
{
if (ranges == null)
{
return 0;
}
int length = 0;
for (int[] range : ranges)
{
if (range.length % 2 != 0)
{
System.err.println("Error unbalance start/end ranges: "
+ ranges.toString());
return 0;
}
for (int i = 0; i < range.length - 1; i += 2)
{
length += Math.abs(range[i + 1] - range[i]) + 1;
}
}
return length;
}
/**
* Answers true if any range includes the given value
*
* @param ranges
* @param value
* @return
*/
public static boolean contains(List ranges, int value)
{
if (ranges == null)
{
return false;
}
for (int[] range : ranges)
{
if (range[1] >= range[0] && value >= range[0] && value <= range[1])
{
/*
* value within ascending range
*/
return true;
}
if (range[1] < range[0] && value <= range[0] && value >= range[1])
{
/*
* value within descending range
*/
return true;
}
}
return false;
}
/**
* Removes a specified number of positions from the start of a ranges list.
* For example, could be used to adjust cds ranges to allow for an incomplete
* start codon. Subranges are removed completely, or their start positions
* adjusted, until the required number of positions has been removed from the
* range. Reverse strand ranges are supported. The input array is not
* modified.
*
* @param removeCount
* @param ranges
* an array of [start, end, start, end...] positions
* @return a new array with the first removeCount positions removed
*/
public static int[] removeStartPositions(int removeCount,
final int[] ranges)
{
if (removeCount <= 0)
{
return ranges;
}
int[] copy = Arrays.copyOf(ranges, ranges.length);
int sxpos = -1;
int cdspos = 0;
for (int x = 0; x < copy.length && sxpos == -1; x += 2)
{
cdspos += Math.abs(copy[x + 1] - copy[x]) + 1;
if (removeCount < cdspos)
{
/*
* we have removed enough, time to finish
*/
sxpos = x;
/*
* increment start of first exon, or decrement if reverse strand
*/
if (copy[x] <= copy[x + 1])
{
copy[x] = copy[x + 1] - cdspos + removeCount + 1;
}
else
{
copy[x] = copy[x + 1] + cdspos - removeCount - 1;
}
break;
}
}
if (sxpos > 0)
{
/*
* we dropped at least one entire sub-range - compact the array
*/
int[] nxon = new int[copy.length - sxpos];
System.arraycopy(copy, sxpos, nxon, 0, copy.length - sxpos);
return nxon;
}
return copy;
}
}