import jalview.datamodel.AlignmentI;
import jalview.datamodel.AlignmentOrder;
import jalview.datamodel.ColumnSelection;
+import jalview.datamodel.HiddenColumns;
+import jalview.datamodel.SearchResultMatchI;
import jalview.datamodel.SearchResults;
-import jalview.datamodel.SearchResults.Match;
+import jalview.datamodel.SearchResultsI;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceGroup;
import jalview.datamodel.SequenceI;
import java.util.ArrayList;
-import java.util.Collections;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import java.util.Set;
/**
* Helper methods for manipulations involving sequence mappings.
*/
protected static void mapCutOrPaste(Edit edit, boolean undo,
List<SequenceI> targetSeqs, EditCommand result,
- Set<AlignedCodonFrame> mappings)
+ List<AlignedCodonFrame> mappings)
{
Action action = edit.getAction();
if (undo)
*/
public static EditCommand mapEditCommand(EditCommand command,
boolean undo, final AlignmentI mapTo, char gapChar,
- Set<AlignedCodonFrame> mappings)
+ List<AlignedCodonFrame> mappings)
{
/*
* For now, only support mapping from protein edits to cDna
* Cache a copy of the target sequences so we can mimic successive edits on
* them. This lets us compute mappings for all edits in the set.
*/
- Map<SequenceI, SequenceI> targetCopies = new HashMap<SequenceI, SequenceI>();
+ Map<SequenceI, SequenceI> targetCopies = new HashMap<>();
for (SequenceI seq : mapTo.getSequences())
{
SequenceI ds = seq.getDatasetSequence();
Map<SequenceI, SequenceI> originalSequences,
final List<SequenceI> targetSeqs,
Map<SequenceI, SequenceI> targetCopies, char gapChar,
- EditCommand result, Set<AlignedCodonFrame> mappings)
+ EditCommand result, List<AlignedCodonFrame> mappings)
{
Action action = edit.getAction();
/*
* Determine all mappings from this position to mapped sequences.
*/
- SearchResults sr = buildSearchResults(seq, seqpos, mappings);
+ SearchResultsI sr = buildSearchResults(seq, seqpos, mappings);
if (!sr.isEmpty())
{
* Shift Delete start position left, as it acts on positions to its
* right.
*/
- int mappedEditPos = action == Action.DELETE_GAP ? match[0]
- - mappedCount : match[0];
+ int mappedEditPos = action == Action.DELETE_GAP
+ ? match[0] - mappedCount
+ : match[0];
Edit e = result.new Edit(action, new SequenceI[] { targetSeq },
mappedEditPos, mappedCount, gapChar);
result.addEdit(e);
*/
if (action == Action.INSERT_GAP)
{
- copyTarget.setSequence(new String(StringUtils.insertCharAt(
- copyTarget.getSequence(), mappedEditPos, mappedCount,
- gapChar)));
+ copyTarget.setSequence(new String(
+ StringUtils.insertCharAt(copyTarget.getSequence(),
+ mappedEditPos, mappedCount, gapChar)));
}
else if (action == Action.DELETE_GAP)
{
- copyTarget.setSequence(new String(StringUtils.deleteChars(
- copyTarget.getSequence(), mappedEditPos,
- mappedEditPos + mappedCount)));
+ copyTarget.setSequence(new String(
+ StringUtils.deleteChars(copyTarget.getSequence(),
+ mappedEditPos, mappedEditPos + mappedCount)));
}
}
}
* @param seqmappings
* @return
*/
- public static SearchResults buildSearchResults(SequenceI seq, int index,
- Set<AlignedCodonFrame> seqmappings)
+ public static SearchResultsI buildSearchResults(SequenceI seq, int index,
+ List<AlignedCodonFrame> seqmappings)
{
- SearchResults results = new SearchResults();
+ SearchResultsI results = new SearchResults();
addSearchResults(results, seq, index, seqmappings);
return results;
}
* @param index
* @param seqmappings
*/
- public static void addSearchResults(SearchResults results, SequenceI seq,
- int index, Set<AlignedCodonFrame> seqmappings)
+ public static void addSearchResults(SearchResultsI results, SequenceI seq,
+ int index, List<AlignedCodonFrame> seqmappings)
{
if (index >= seq.getStart() && index <= seq.getEnd())
{
*/
boolean targetIsNucleotide = mapTo.isNucleotide();
AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
- Set<AlignedCodonFrame> codonFrames = protein.getAlignment()
+ List<AlignedCodonFrame> codonFrames = protein.getAlignment()
.getCodonFrames();
/*
* Copy group name, colours etc, but not sequences or sequence colour scheme
*/
SequenceGroup mappedGroup = new SequenceGroup(sg);
- mappedGroup.cs = mapTo.getGlobalColourScheme();
+ mappedGroup.setColourScheme(mapTo.getGlobalColourScheme());
mappedGroup.clear();
int minStartCol = -1;
for (AlignedCodonFrame acf : codonFrames)
{
- SequenceI mappedSequence = targetIsNucleotide ? acf
- .getDnaForAaSeq(selected) : acf.getAaForDnaSeq(selected);
+ SequenceI mappedSequence = targetIsNucleotide
+ ? acf.getDnaForAaSeq(selected)
+ : acf.getAaForDnaSeq(selected);
if (mappedSequence != null)
{
for (SequenceI seq : mapTo.getAlignment().getSequences())
/*
* Found a sequence mapping. Locate the start/end mapped residues.
*/
- SearchResults sr = buildSearchResults(selected,
- startResiduePos, Collections.singleton(acf));
- for (Match m : sr.getResults())
+ List<AlignedCodonFrame> mapping = Arrays
+ .asList(new AlignedCodonFrame[]
+ { acf });
+ SearchResultsI sr = buildSearchResults(selected,
+ startResiduePos, mapping);
+ for (SearchResultMatchI m : sr.getResults())
{
mappedStartResidue = m.getStart();
mappedEndResidue = m.getEnd();
}
- sr = buildSearchResults(selected, endResiduePos,
- Collections.singleton(acf));
- for (Match m : sr.getResults())
+ sr = buildSearchResults(selected, endResiduePos, mapping);
+ for (SearchResultMatchI m : sr.getResults())
{
mappedStartResidue = Math.min(mappedStartResidue,
m.getStart());
* returns a base 1 position, SequenceGroup uses base 0
*/
int mappedStartCol = seq.findIndex(mappedStartResidue) - 1;
- minStartCol = minStartCol == -1 ? mappedStartCol : Math.min(
- minStartCol, mappedStartCol);
+ minStartCol = minStartCol == -1 ? mappedStartCol
+ : Math.min(minStartCol, mappedStartCol);
int mappedEndCol = seq.findIndex(mappedEndResidue) - 1;
- maxEndCol = maxEndCol == -1 ? mappedEndCol : Math.max(
- maxEndCol, mappedEndCol);
+ maxEndCol = maxEndCol == -1 ? mappedEndCol
+ : Math.max(maxEndCol, mappedEndCol);
mappedGroup.addSequence(seq, false);
break;
}
* the mappings available
* @return
*/
- public static CommandI mapOrderCommand(OrderCommand command,
- boolean undo, AlignmentI mapTo, Set<AlignedCodonFrame> mappings)
+ public static CommandI mapOrderCommand(OrderCommand command, boolean undo,
+ AlignmentI mapTo, List<AlignedCodonFrame> mappings)
{
SequenceI[] sortOrder = command.getSequenceOrder(undo);
- List<SequenceI> mappedOrder = new ArrayList<SequenceI>();
+ List<SequenceI> mappedOrder = new ArrayList<>();
int j = 0;
/*
* @param mapTo
* @return
*/
- public static ColumnSelection mapColumnSelection(ColumnSelection colsel,
- AlignViewportI mapFrom, AlignViewportI mapTo)
+ public static void mapColumnSelection(ColumnSelection colsel,
+ HiddenColumns hiddencols, AlignViewportI mapFrom,
+ AlignViewportI mapTo, ColumnSelection newColSel,
+ HiddenColumns newHidden)
{
boolean targetIsNucleotide = mapTo.isNucleotide();
AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
- Set<AlignedCodonFrame> codonFrames = protein.getAlignment()
+ List<AlignedCodonFrame> codonFrames = protein.getAlignment()
.getCodonFrames();
- ColumnSelection mappedColumns = new ColumnSelection();
if (colsel == null)
{
- return mappedColumns;
+ return; // mappedColumns;
}
char fromGapChar = mapFrom.getAlignment().getGapCharacter();
- // FIXME allow for hidden columns
-
/*
* For each mapped column, find the range of columns that residues in that
* column map to.
for (Integer sel : colsel.getSelected())
{
- mapColumn(sel.intValue(), codonFrames, mappedColumns, fromSequences,
+ mapColumn(sel.intValue(), codonFrames, newColSel, fromSequences,
toSequences, fromGapChar);
}
- for (int[] hidden : colsel.getHiddenColumns())
+ Iterator<int[]> regions = hiddencols.iterator();
+ while (regions.hasNext())
{
- mapHiddenColumns(hidden, codonFrames, mappedColumns, fromSequences,
+ mapHiddenColumns(regions.next(), codonFrames, newHidden,
+ fromSequences,
toSequences, fromGapChar);
}
- return mappedColumns;
+ return; // mappedColumns;
}
/**
* @param fromGapChar
*/
protected static void mapHiddenColumns(int[] hidden,
- Set<AlignedCodonFrame> mappings,
- ColumnSelection mappedColumns, List<SequenceI> fromSequences,
- List<SequenceI> toSequences, char fromGapChar)
+ List<AlignedCodonFrame> mappings, HiddenColumns mappedColumns,
+ List<SequenceI> fromSequences, List<SequenceI> toSequences,
+ char fromGapChar)
{
for (int col = hidden[0]; col <= hidden[1]; col++)
{
* @param toSequences
* @param fromGapChar
*/
- protected static void mapColumn(int col, Set<AlignedCodonFrame> mappings,
+ protected static void mapColumn(int col, List<AlignedCodonFrame> mappings,
ColumnSelection mappedColumns, List<SequenceI> fromSequences,
List<SequenceI> toSequences, char fromGapChar)
{
* @return
*/
protected static int[] findMappedColumns(int col,
- Set<AlignedCodonFrame> mappings, List<SequenceI> fromSequences,
+ List<AlignedCodonFrame> mappings, List<SequenceI> fromSequences,
List<SequenceI> toSequences, char fromGapChar)
{
int[] mappedTo = new int[] { Integer.MAX_VALUE, Integer.MIN_VALUE };
* Get the residue position and find the mapped position.
*/
int residuePos = fromSeq.findPosition(col);
- SearchResults sr = buildSearchResults(fromSeq, residuePos,
- mappings);
- for (Match m : sr.getResults())
+ SearchResultsI sr = buildSearchResults(fromSeq, residuePos, mappings);
+ for (SearchResultMatchI m : sr.getResults())
{
int mappedStartResidue = m.getStart();
int mappedEndResidue = m.getEnd();
}
/**
- * Returns the mapped codon for a given aligned sequence column position (base
- * 0).
+ * Returns the mapped codon or codons for a given aligned sequence column
+ * position (base 0).
*
* @param seq
* an aligned peptide sequence
* an aligned column position (base 0)
* @param mappings
* a set of codon mappings
- * @return the bases of the mapped codon in the cDNA dataset sequence, or null
- * if not found
+ * @return the bases of the mapped codon(s) in the cDNA dataset sequence(s),
+ * or an empty list if none found
*/
- public static char[] findCodonFor(SequenceI seq, int col,
- Set<AlignedCodonFrame> mappings)
+ public static List<char[]> findCodonsFor(SequenceI seq, int col,
+ List<AlignedCodonFrame> mappings)
{
+ List<char[]> result = new ArrayList<>();
int dsPos = seq.findPosition(col);
for (AlignedCodonFrame mapping : mappings)
{
if (mapping.involvesSequence(seq))
{
- return mapping.getMappedCodon(seq.getDatasetSequence(), dsPos);
+ List<char[]> codons = mapping
+ .getMappedCodons(seq.getDatasetSequence(), dsPos);
+ if (codons != null)
+ {
+ result.addAll(codons);
+ }
}
}
- return null;
+ return result;
}
/**
- * Converts a series of [start, end] ranges into an array of individual
- * positions.
+ * Converts a series of [start, end] range pairs into an array of individual
+ * positions. This also caters for 'reverse strand' (start > end) cases.
*
* @param ranges
* @return
int count = 0;
for (int i = 0; i < ranges.length - 1; i += 2)
{
- count += ranges[i + 1] - ranges[i] + 1;
+ count += Math.abs(ranges[i + 1] - ranges[i]) + 1;
}
int[] result = new int[count];
int k = 0;
for (int i = 0; i < ranges.length - 1; i += 2)
{
- for (int j = ranges[i]; j <= ranges[i + 1]; j++)
+ int from = ranges[i];
+ final int to = ranges[i + 1];
+ int step = from <= to ? 1 : -1;
+ do
{
- result[k++] = j;
- }
+ result[k++] = from;
+ from += step;
+ } while (from != to + step);
}
return result;
}
* @return
*/
public static List<AlignedCodonFrame> findMappingsForSequence(
- SequenceI sequence, Set<AlignedCodonFrame> mappings)
+ SequenceI sequence, List<AlignedCodonFrame> mappings)
+ {
+ return findMappingsForSequenceAndOthers(sequence, mappings, null);
+ }
+
+ /**
+ * Returns a list of any mappings that are from or to the given (aligned or
+ * dataset) sequence, optionally limited to mappings involving one of a given
+ * list of sequences.
+ *
+ * @param sequence
+ * @param mappings
+ * @param filterList
+ * @return
+ */
+ public static List<AlignedCodonFrame> findMappingsForSequenceAndOthers(
+ SequenceI sequence, List<AlignedCodonFrame> mappings,
+ List<SequenceI> filterList)
{
- List<AlignedCodonFrame> result = new ArrayList<AlignedCodonFrame>();
+ List<AlignedCodonFrame> result = new ArrayList<>();
if (sequence == null || mappings == null)
{
return result;
{
if (mapping.involvesSequence(sequence))
{
- result.add(mapping);
+ if (filterList != null)
+ {
+ for (SequenceI otherseq : filterList)
+ {
+ SequenceI otherDataset = otherseq.getDatasetSequence();
+ if (otherseq == sequence
+ || otherseq == sequence.getDatasetSequence()
+ || (otherDataset != null && (otherDataset == sequence
+ || otherDataset == sequence
+ .getDatasetSequence())))
+ {
+ // skip sequences in subset which directly relate to sequence
+ continue;
+ }
+ if (mapping.involvesSequence(otherseq))
+ {
+ // selected a mapping contained in subselect alignment
+ result.add(mapping);
+ break;
+ }
+ }
+ }
+ else
+ {
+ result.add(mapping);
+ }
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Returns the total length of the supplied ranges, which may be as single
+ * [start, end] or multiple [start, end, start, end ...]
+ *
+ * @param ranges
+ * @return
+ */
+ public static int getLength(List<int[]> ranges)
+ {
+ if (ranges == null)
+ {
+ return 0;
+ }
+ int length = 0;
+ for (int[] range : ranges)
+ {
+ if (range.length % 2 != 0)
+ {
+ System.err.println(
+ "Error unbalance start/end ranges: " + ranges.toString());
+ return 0;
+ }
+ for (int i = 0; i < range.length - 1; i += 2)
+ {
+ length += Math.abs(range[i + 1] - range[i]) + 1;
+ }
+ }
+ return length;
+ }
+
+ /**
+ * Answers true if any range includes the given value
+ *
+ * @param ranges
+ * @param value
+ * @return
+ */
+ public static boolean contains(List<int[]> ranges, int value)
+ {
+ if (ranges == null)
+ {
+ return false;
+ }
+ for (int[] range : ranges)
+ {
+ if (range[1] >= range[0] && value >= range[0] && value <= range[1])
+ {
+ /*
+ * value within ascending range
+ */
+ return true;
+ }
+ if (range[1] < range[0] && value <= range[0] && value >= range[1])
+ {
+ /*
+ * value within descending range
+ */
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Removes a specified number of positions from the start of a ranges list.
+ * For example, could be used to adjust cds ranges to allow for an incomplete
+ * start codon. Subranges are removed completely, or their start positions
+ * adjusted, until the required number of positions has been removed from the
+ * range. Reverse strand ranges are supported. The input array is not
+ * modified.
+ *
+ * @param removeCount
+ * @param ranges
+ * an array of [start, end, start, end...] positions
+ * @return a new array with the first removeCount positions removed
+ */
+ public static int[] removeStartPositions(int removeCount,
+ final int[] ranges)
+ {
+ if (removeCount <= 0)
+ {
+ return ranges;
+ }
+
+ int[] copy = Arrays.copyOf(ranges, ranges.length);
+ int sxpos = -1;
+ int cdspos = 0;
+ for (int x = 0; x < copy.length && sxpos == -1; x += 2)
+ {
+ cdspos += Math.abs(copy[x + 1] - copy[x]) + 1;
+ if (removeCount < cdspos)
+ {
+ /*
+ * we have removed enough, time to finish
+ */
+ sxpos = x;
+
+ /*
+ * increment start of first exon, or decrement if reverse strand
+ */
+ if (copy[x] <= copy[x + 1])
+ {
+ copy[x] = copy[x + 1] - cdspos + removeCount + 1;
+ }
+ else
+ {
+ copy[x] = copy[x + 1] + cdspos - removeCount - 1;
+ }
+ break;
}
}
+
+ if (sxpos > 0)
+ {
+ /*
+ * we dropped at least one entire sub-range - compact the array
+ */
+ int[] nxon = new int[copy.length - sxpos];
+ System.arraycopy(copy, sxpos, nxon, 0, copy.length - sxpos);
+ return nxon;
+ }
+ return copy;
+ }
+
+ /**
+ * Answers true if range's start-end positions include those of queryRange,
+ * where either range might be in reverse direction, else false
+ *
+ * @param range
+ * a start-end range
+ * @param queryRange
+ * a candidate subrange of range (start2-end2)
+ * @return
+ */
+ public static boolean rangeContains(int[] range, int[] queryRange)
+ {
+ if (range == null || queryRange == null || range.length != 2
+ || queryRange.length != 2)
+ {
+ /*
+ * invalid arguments
+ */
+ return false;
+ }
+
+ int min = Math.min(range[0], range[1]);
+ int max = Math.max(range[0], range[1]);
+
+ return (min <= queryRange[0] && max >= queryRange[0]
+ && min <= queryRange[1] && max >= queryRange[1]);
+ }
+
+ /**
+ * Removes the specified number of positions from the given ranges. Provided
+ * to allow a stop codon to be stripped from a CDS sequence so that it matches
+ * the peptide translation length.
+ *
+ * @param positions
+ * @param ranges
+ * a list of (single) [start, end] ranges
+ * @return
+ */
+ public static void removeEndPositions(int positions,
+ List<int[]> ranges)
+ {
+ int toRemove = positions;
+ Iterator<int[]> it = new ReverseListIterator<>(ranges);
+ while (toRemove > 0)
+ {
+ int[] endRange = it.next();
+ if (endRange.length != 2)
+ {
+ /*
+ * not coded for [start1, end1, start2, end2, ...]
+ */
+ System.err
+ .println("MappingUtils.removeEndPositions doesn't handle multiple ranges");
+ return;
+ }
+
+ int length = endRange[1] - endRange[0] + 1;
+ if (length <= 0)
+ {
+ /*
+ * not coded for a reverse strand range (end < start)
+ */
+ System.err
+ .println("MappingUtils.removeEndPositions doesn't handle reverse strand");
+ return;
+ }
+ if (length > toRemove)
+ {
+ endRange[1] -= toRemove;
+ toRemove = 0;
+ }
+ else
+ {
+ toRemove -= length;
+ it.remove();
+ }
+ }
+ }
+
+ /**
+ * Converts a list of [start, end] ranges to a single array of [start, end,
+ * start, end ...]
+ *
+ * @param ranges
+ * @return
+ */
+ public static int[] listToArray(List<int[]> ranges)
+ {
+ int[] result = new int[ranges.size() * 2];
+ int i = 0;
+ for (int[] range : ranges)
+ {
+ result[i++] = range[0];
+ result[i++] = range[1];
+ }
return result;
}
}