import jalview.datamodel.SequenceI;
import java.util.ArrayList;
-import java.util.Collections;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import java.util.Set;
/**
* Helper methods for manipulations involving sequence mappings.
*/
protected static void mapCutOrPaste(Edit edit, boolean undo,
List<SequenceI> targetSeqs, EditCommand result,
- Set<AlignedCodonFrame> mappings)
+ List<AlignedCodonFrame> mappings)
{
Action action = edit.getAction();
if (undo)
*/
public static EditCommand mapEditCommand(EditCommand command,
boolean undo, final AlignmentI mapTo, char gapChar,
- Set<AlignedCodonFrame> mappings)
+ List<AlignedCodonFrame> mappings)
{
/*
* For now, only support mapping from protein edits to cDna
Map<SequenceI, SequenceI> originalSequences,
final List<SequenceI> targetSeqs,
Map<SequenceI, SequenceI> targetCopies, char gapChar,
- EditCommand result, Set<AlignedCodonFrame> mappings)
+ EditCommand result, List<AlignedCodonFrame> mappings)
{
Action action = edit.getAction();
* @return
*/
public static SearchResults buildSearchResults(SequenceI seq, int index,
- Set<AlignedCodonFrame> seqmappings)
+ List<AlignedCodonFrame> seqmappings)
{
SearchResults results = new SearchResults();
addSearchResults(results, seq, index, seqmappings);
* @param seqmappings
*/
public static void addSearchResults(SearchResults results, SequenceI seq,
- int index, Set<AlignedCodonFrame> seqmappings)
+ int index, List<AlignedCodonFrame> seqmappings)
{
if (index >= seq.getStart() && index <= seq.getEnd())
{
*/
boolean targetIsNucleotide = mapTo.isNucleotide();
AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
- Set<AlignedCodonFrame> codonFrames = protein.getAlignment()
+ List<AlignedCodonFrame> codonFrames = protein.getAlignment()
.getCodonFrames();
/*
* Copy group name, colours etc, but not sequences or sequence colour scheme
/*
* Found a sequence mapping. Locate the start/end mapped residues.
*/
+ List<AlignedCodonFrame> mapping = Arrays.asList(new AlignedCodonFrame[] { acf });
SearchResults sr = buildSearchResults(selected,
- startResiduePos, Collections.singleton(acf));
+ startResiduePos, mapping);
for (Match m : sr.getResults())
{
mappedStartResidue = m.getStart();
mappedEndResidue = m.getEnd();
}
- sr = buildSearchResults(selected, endResiduePos,
- Collections.singleton(acf));
+ sr = buildSearchResults(selected, endResiduePos, mapping);
for (Match m : sr.getResults())
{
mappedStartResidue = Math.min(mappedStartResidue,
* @return
*/
public static CommandI mapOrderCommand(OrderCommand command,
- boolean undo, AlignmentI mapTo, Set<AlignedCodonFrame> mappings)
+ boolean undo, AlignmentI mapTo, List<AlignedCodonFrame> mappings)
{
SequenceI[] sortOrder = command.getSequenceOrder(undo);
List<SequenceI> mappedOrder = new ArrayList<SequenceI>();
{
boolean targetIsNucleotide = mapTo.isNucleotide();
AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
- Set<AlignedCodonFrame> codonFrames = protein.getAlignment()
+ List<AlignedCodonFrame> codonFrames = protein.getAlignment()
.getCodonFrames();
ColumnSelection mappedColumns = new ColumnSelection();
char fromGapChar = mapFrom.getAlignment().getGapCharacter();
- // FIXME allow for hidden columns
-
/*
* For each mapped column, find the range of columns that residues in that
* column map to.
*/
- for (Object obj : colsel.getSelected())
+ List<SequenceI> fromSequences = mapFrom.getAlignment().getSequences();
+ List<SequenceI> toSequences = mapTo.getAlignment().getSequences();
+
+ for (Integer sel : colsel.getSelected())
+ {
+ mapColumn(sel.intValue(), codonFrames, mappedColumns, fromSequences,
+ toSequences, fromGapChar);
+ }
+
+ for (int[] hidden : colsel.getHiddenColumns())
+ {
+ mapHiddenColumns(hidden, codonFrames, mappedColumns, fromSequences,
+ toSequences, fromGapChar);
+ }
+ return mappedColumns;
+ }
+
+ /**
+ * Helper method that maps a [start, end] hidden column range to its mapped
+ * equivalent
+ *
+ * @param hidden
+ * @param mappings
+ * @param mappedColumns
+ * @param fromSequences
+ * @param toSequences
+ * @param fromGapChar
+ */
+ protected static void mapHiddenColumns(int[] hidden,
+ List<AlignedCodonFrame> mappings,
+ ColumnSelection mappedColumns, List<SequenceI> fromSequences,
+ List<SequenceI> toSequences, char fromGapChar)
+ {
+ for (int col = hidden[0]; col <= hidden[1]; col++)
{
- int col = ((Integer) obj).intValue();
- int mappedToMin = Integer.MAX_VALUE;
- int mappedToMax = Integer.MIN_VALUE;
+ int[] mappedTo = findMappedColumns(col, mappings, fromSequences,
+ toSequences, fromGapChar);
/*
- * For each sequence in the 'from' alignment
+ * Add the range of hidden columns to the mapped selection (converting
+ * base 1 to base 0).
*/
- for (SequenceI fromSeq : mapFrom.getAlignment().getSequences())
+ if (mappedTo != null)
{
- /*
- * Ignore gaps (unmapped anyway)
- */
- if (fromSeq.getCharAt(col) == fromGapChar)
- {
- continue;
- }
+ mappedColumns.hideColumns(mappedTo[0] - 1, mappedTo[1] - 1);
+ }
+ }
+ }
+
+ /**
+ * Helper method to map one column selection
+ *
+ * @param col
+ * the column number (base 0)
+ * @param mappings
+ * the sequence mappings
+ * @param mappedColumns
+ * the mapped column selections to add to
+ * @param fromSequences
+ * @param toSequences
+ * @param fromGapChar
+ */
+ protected static void mapColumn(int col,
+ List<AlignedCodonFrame> mappings,
+ ColumnSelection mappedColumns, List<SequenceI> fromSequences,
+ List<SequenceI> toSequences, char fromGapChar)
+ {
+ int[] mappedTo = findMappedColumns(col, mappings, fromSequences,
+ toSequences, fromGapChar);
+
+ /*
+ * Add the range of mapped columns to the mapped selection (converting
+ * base 1 to base 0). Note that this may include intron-only regions which
+ * lie between the start and end ranges of the selection.
+ */
+ if (mappedTo != null)
+ {
+ for (int i = mappedTo[0]; i <= mappedTo[1]; i++)
+ {
+ mappedColumns.addElement(i - 1);
+ }
+ }
+ }
+
+ /**
+ * Helper method to find the range of columns mapped to from one column.
+ * Returns the maximal range of columns mapped to from all sequences in the
+ * source column, or null if no mappings were found.
+ *
+ * @param col
+ * @param mappings
+ * @param fromSequences
+ * @param toSequences
+ * @param fromGapChar
+ * @return
+ */
+ protected static int[] findMappedColumns(int col,
+ List<AlignedCodonFrame> mappings, List<SequenceI> fromSequences,
+ List<SequenceI> toSequences, char fromGapChar)
+ {
+ int[] mappedTo = new int[] { Integer.MAX_VALUE, Integer.MIN_VALUE };
+ boolean found = false;
+
+ /*
+ * For each sequence in the 'from' alignment
+ */
+ for (SequenceI fromSeq : fromSequences)
+ {
+ /*
+ * Ignore gaps (unmapped anyway)
+ */
+ if (fromSeq.getCharAt(col) == fromGapChar)
+ {
+ continue;
+ }
+
+ /*
+ * Get the residue position and find the mapped position.
+ */
+ int residuePos = fromSeq.findPosition(col);
+ SearchResults sr = buildSearchResults(fromSeq, residuePos,
+ mappings);
+ for (Match m : sr.getResults())
+ {
+ int mappedStartResidue = m.getStart();
+ int mappedEndResidue = m.getEnd();
+ SequenceI mappedSeq = m.getSequence();
/*
- * Get the residue position and find the mapped position.
+ * Locate the aligned sequence whose dataset is mappedSeq. TODO a
+ * datamodel that can do this efficiently.
*/
- int residuePos = fromSeq.findPosition(col);
- SearchResults sr = buildSearchResults(fromSeq, residuePos,
- codonFrames);
- for (Match m : sr.getResults())
+ for (SequenceI toSeq : toSequences)
{
- int mappedStartResidue = m.getStart();
- int mappedEndResidue = m.getEnd();
- SequenceI mappedSeq = m.getSequence();
-
- /*
- * Locate the aligned sequence whose dataset is mappedSeq. TODO a
- * datamodel that can do this efficiently.
- */
- for (SequenceI toSeq : mapTo.getAlignment().getSequences())
+ if (toSeq.getDatasetSequence() == mappedSeq)
{
- if (toSeq.getDatasetSequence() == mappedSeq)
- {
- int mappedStartCol = toSeq.findIndex(mappedStartResidue);
- int mappedEndCol = toSeq.findIndex(mappedEndResidue);
- mappedToMin = Math.min(mappedToMin, mappedStartCol);
- mappedToMax = Math.max(mappedToMax, mappedEndCol);
- // System.out.println(fromSeq.getName() + " mapped to cols "
- // + mappedStartCol + ":" + mappedEndCol);
- break;
- // note: remove break if we ever want to map one to many sequences
- }
+ int mappedStartCol = toSeq.findIndex(mappedStartResidue);
+ int mappedEndCol = toSeq.findIndex(mappedEndResidue);
+ mappedTo[0] = Math.min(mappedTo[0], mappedStartCol);
+ mappedTo[1] = Math.max(mappedTo[1], mappedEndCol);
+ found = true;
+ break;
+ // note: remove break if we ever want to map one to many sequences
}
}
}
- /*
- * Add the range of mapped columns to the mapped selection (converting
- * base 1 to base 0). Note that this may include intron-only regions which
- * lie between the start and end ranges of the selection.
- */
- for (int i = mappedToMin; i <= mappedToMax; i++)
- {
- mappedColumns.addElement(i - 1);
- }
}
- return mappedColumns;
+ return found ? mappedTo : null;
}
/**
- * Returns the mapped codon for a given aligned sequence column position (base
- * 0).
+ * Returns the mapped codon or codons for a given aligned sequence column
+ * position (base 0).
*
* @param seq
* an aligned peptide sequence
* an aligned column position (base 0)
* @param mappings
* a set of codon mappings
- * @return the bases of the mapped codon in the cDNA dataset sequence, or null
- * if not found
+ * @return the bases of the mapped codon(s) in the cDNA dataset sequence(s),
+ * or an empty list if none found
*/
- public static char[] findCodonFor(SequenceI seq, int col,
- Set<AlignedCodonFrame> mappings)
+ public static List<char[]> findCodonsFor(SequenceI seq, int col,
+ List<AlignedCodonFrame> mappings)
{
+ List<char[]> result = new ArrayList<char[]>();
int dsPos = seq.findPosition(col);
for (AlignedCodonFrame mapping : mappings)
{
if (mapping.involvesSequence(seq))
{
- return mapping.getMappedCodon(seq.getDatasetSequence(), dsPos);
+ List<char[]> codons = mapping.getMappedCodons(
+ seq.getDatasetSequence(), dsPos);
+ if (codons != null)
+ {
+ result.addAll(codons);
+ }
}
}
- return null;
+ return result;
}
/**
- * Converts a series of [start, end] ranges into an array of individual
- * positions.
+ * Converts a series of [start, end] range pairs into an array of individual
+ * positions. This also caters for 'reverse strand' (start > end) cases.
*
* @param ranges
* @return
int count = 0;
for (int i = 0; i < ranges.length - 1; i += 2)
{
- count += ranges[i + 1] - ranges[i] + 1;
+ count += Math.abs(ranges[i + 1] - ranges[i]) + 1;
}
int[] result = new int[count];
int k = 0;
for (int i = 0; i < ranges.length - 1; i += 2)
{
- for (int j = ranges[i]; j <= ranges[i + 1]; j++)
+ int from = ranges[i];
+ final int to = ranges[i + 1];
+ int step = from <= to ? 1 : -1;
+ do
{
- result[k++] = j;
- }
+ result[k++] = from;
+ from += step;
+ } while (from != to + step);
}
return result;
}
* @return
*/
public static List<AlignedCodonFrame> findMappingsForSequence(
- SequenceI sequence, Set<AlignedCodonFrame> mappings)
+ SequenceI sequence, List<AlignedCodonFrame> mappings)
{
List<AlignedCodonFrame> result = new ArrayList<AlignedCodonFrame>();
if (sequence == null || mappings == null)
}
return result;
}
+
+ /**
+ * Remove the last 3 mapped positions from the given ranges
+ *
+ * @param ranges
+ * @param mappedLength
+ */
+ public static void unmapStopCodon(List<int[]> ranges,
+ int mappedLength)
+ {
+ if (mappedLength < 3)
+ {
+ return;
+ }
+ boolean done = false;
+ int targetLength = mappedLength - 3;
+ int mapped = 0;
+ Iterator<int[]> it = ranges.iterator();
+ while (!done && it.hasNext())
+ {
+ int[] range = it.next();
+ int length = Math.abs(range[1] - range[0]) + 1;
+ if (mapped + length == targetLength)
+ {
+ done = true;
+ }
+ else if (mapped + length < targetLength)
+ {
+ mapped += length;
+ continue;
+ }
+ else
+ {
+ /*
+ * need just a bit of this range
+ */
+ int needed = targetLength - mapped;
+ int sense = range[1] >= range[0] ? 1 : -1;
+ range[1] = range[0] + (sense * (needed - 1));
+ done = true;
+ }
+ }
+ /*
+ * remove any trailing ranges
+ */
+ while (it.hasNext())
+ {
+ it.next();
+ it.remove();
+ }
+ }
}