*/
package jalview.util;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
import jalview.analysis.AlignmentSorter;
import jalview.api.AlignViewportI;
+import jalview.bin.Console;
import jalview.commands.CommandI;
import jalview.commands.EditCommand;
import jalview.commands.EditCommand.Action;
import jalview.commands.EditCommand.Edit;
import jalview.commands.OrderCommand;
import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.AlignmentOrder;
import jalview.datamodel.ColumnSelection;
+import jalview.datamodel.HiddenColumns;
+import jalview.datamodel.Mapping;
+import jalview.datamodel.SearchResultMatchI;
import jalview.datamodel.SearchResults;
-import jalview.datamodel.SearchResults.Match;
+import jalview.datamodel.SearchResultsI;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceGroup;
import jalview.datamodel.SequenceI;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
/**
* Helper methods for manipulations involving sequence mappings.
*
action = action.getUndoAction();
}
// TODO write this
- System.err.println("MappingUtils.mapCutOrPaste not yet implemented");
+ Console.error("MappingUtils.mapCutOrPaste not yet implemented");
}
/**
* Cache a copy of the target sequences so we can mimic successive edits on
* them. This lets us compute mappings for all edits in the set.
*/
- Map<SequenceI, SequenceI> targetCopies = new HashMap<SequenceI, SequenceI>();
+ Map<SequenceI, SequenceI> targetCopies = new HashMap<>();
for (SequenceI seq : mapTo.getSequences())
{
SequenceI ds = seq.getDatasetSequence();
/*
* Determine all mappings from this position to mapped sequences.
*/
- SearchResults sr = buildSearchResults(seq, seqpos, mappings);
+ SearchResultsI sr = buildSearchResults(seq, seqpos, mappings);
if (!sr.isEmpty())
{
* Shift Delete start position left, as it acts on positions to its
* right.
*/
- int mappedEditPos = action == Action.DELETE_GAP ? match[0]
- - mappedCount : match[0];
+ int mappedEditPos = action == Action.DELETE_GAP
+ ? match[0] - mappedCount
+ : match[0];
Edit e = result.new Edit(action, new SequenceI[] { targetSeq },
mappedEditPos, mappedCount, gapChar);
result.addEdit(e);
*/
if (action == Action.INSERT_GAP)
{
- copyTarget.setSequence(new String(StringUtils.insertCharAt(
- copyTarget.getSequence(), mappedEditPos, mappedCount,
- gapChar)));
+ copyTarget.setSequence(new String(
+ StringUtils.insertCharAt(copyTarget.getSequence(),
+ mappedEditPos, mappedCount, gapChar)));
}
else if (action == Action.DELETE_GAP)
{
- copyTarget.setSequence(new String(StringUtils.deleteChars(
- copyTarget.getSequence(), mappedEditPos,
- mappedEditPos + mappedCount)));
+ copyTarget.setSequence(new String(
+ StringUtils.deleteChars(copyTarget.getSequence(),
+ mappedEditPos, mappedEditPos + mappedCount)));
}
}
}
* @param seqmappings
* @return
*/
- public static SearchResults buildSearchResults(SequenceI seq, int index,
+ public static SearchResultsI buildSearchResults(SequenceI seq, int index,
List<AlignedCodonFrame> seqmappings)
{
- SearchResults results = new SearchResults();
+ SearchResultsI results = new SearchResults();
addSearchResults(results, seq, index, seqmappings);
return results;
}
* @param index
* @param seqmappings
*/
- public static void addSearchResults(SearchResults results, SequenceI seq,
+ public static void addSearchResults(SearchResultsI results, SequenceI seq,
int index, List<AlignedCodonFrame> seqmappings)
{
if (index >= seq.getStart() && index <= seq.getEnd())
* Copy group name, colours etc, but not sequences or sequence colour scheme
*/
SequenceGroup mappedGroup = new SequenceGroup(sg);
- mappedGroup.cs = mapTo.getGlobalColourScheme();
+ mappedGroup.setColourScheme(mapTo.getGlobalColourScheme());
mappedGroup.clear();
int minStartCol = -1;
*/
int startResiduePos = selected.findPosition(firstUngappedPos);
int endResiduePos = selected.findPosition(lastUngappedPos);
-
- for (AlignedCodonFrame acf : codonFrames)
+ for (SequenceI seq : mapTo.getAlignment().getSequences())
{
- SequenceI mappedSequence = targetIsNucleotide ? acf
- .getDnaForAaSeq(selected) : acf.getAaForDnaSeq(selected);
- if (mappedSequence != null)
+ int mappedStartResidue = 0;
+ int mappedEndResidue = 0;
+ for (AlignedCodonFrame acf : codonFrames)
{
- for (SequenceI seq : mapTo.getAlignment().getSequences())
+ // rather than use acf.getCoveringMapping() we iterate through all
+ // mappings to make sure all CDS are selected for a protein
+ for (SequenceToSequenceMapping map : acf.getMappings())
{
- int mappedStartResidue = 0;
- int mappedEndResidue = 0;
- if (seq.getDatasetSequence() == mappedSequence)
+ if (map.covers(selected) && map.covers(seq))
{
/*
* Found a sequence mapping. Locate the start/end mapped residues.
*/
- List<AlignedCodonFrame> mapping = Arrays.asList(new AlignedCodonFrame[] { acf });
- SearchResults sr = buildSearchResults(selected,
+ List<AlignedCodonFrame> mapping = Arrays
+ .asList(new AlignedCodonFrame[]
+ { acf });
+ // locate start
+ SearchResultsI sr = buildSearchResults(selected,
startResiduePos, mapping);
- for (Match m : sr.getResults())
+ for (SearchResultMatchI m : sr.getResults())
{
mappedStartResidue = m.getStart();
mappedEndResidue = m.getEnd();
}
+ // locate end - allowing for adjustment of start range
sr = buildSearchResults(selected, endResiduePos, mapping);
- for (Match m : sr.getResults())
+ for (SearchResultMatchI m : sr.getResults())
{
mappedStartResidue = Math.min(mappedStartResidue,
m.getStart());
* returns a base 1 position, SequenceGroup uses base 0
*/
int mappedStartCol = seq.findIndex(mappedStartResidue) - 1;
- minStartCol = minStartCol == -1 ? mappedStartCol : Math.min(
- minStartCol, mappedStartCol);
+ minStartCol = minStartCol == -1 ? mappedStartCol
+ : Math.min(minStartCol, mappedStartCol);
int mappedEndCol = seq.findIndex(mappedEndResidue) - 1;
- maxEndCol = maxEndCol == -1 ? mappedEndCol : Math.max(
- maxEndCol, mappedEndCol);
+ maxEndCol = maxEndCol == -1 ? mappedEndCol
+ : Math.max(maxEndCol, mappedEndCol);
mappedGroup.addSequence(seq, false);
break;
}
* the mappings available
* @return
*/
- public static CommandI mapOrderCommand(OrderCommand command,
- boolean undo, AlignmentI mapTo, List<AlignedCodonFrame> mappings)
+ public static CommandI mapOrderCommand(OrderCommand command, boolean undo,
+ AlignmentI mapTo, List<AlignedCodonFrame> mappings)
{
SequenceI[] sortOrder = command.getSequenceOrder(undo);
- List<SequenceI> mappedOrder = new ArrayList<SequenceI>();
+ List<SequenceI> mappedOrder = new ArrayList<>();
int j = 0;
/*
{
for (AlignedCodonFrame acf : mappings)
{
- SequenceI mappedSeq = mappingToNucleotide ? acf.getDnaForAaSeq(seq)
- : acf.getAaForDnaSeq(seq);
- if (mappedSeq != null)
+ for (SequenceI seq2 : mapTo.getSequences())
{
- for (SequenceI seq2 : mapTo.getSequences())
+ /*
+ * the corresponding peptide / CDS is the one for which there is
+ * a complete ('covering') mapping to 'seq'
+ */
+ SequenceI peptide = mappingToNucleotide ? seq2 : seq;
+ SequenceI cds = mappingToNucleotide ? seq : seq2;
+ SequenceToSequenceMapping s2s = acf.getCoveringMapping(cds,
+ peptide);
+ if (s2s != null)
{
- if (seq2.getDatasetSequence() == mappedSeq)
- {
- mappedOrder.add(seq2);
- j++;
- break;
- }
+ mappedOrder.add(seq2);
+ j++;
+ break;
}
}
}
* @param mapTo
* @return
*/
- public static ColumnSelection mapColumnSelection(ColumnSelection colsel,
- AlignViewportI mapFrom, AlignViewportI mapTo)
+ public static void mapColumnSelection(ColumnSelection colsel,
+ HiddenColumns hiddencols, AlignViewportI mapFrom,
+ AlignViewportI mapTo, ColumnSelection newColSel,
+ HiddenColumns newHidden)
{
boolean targetIsNucleotide = mapTo.isNucleotide();
AlignViewportI protein = targetIsNucleotide ? mapFrom : mapTo;
List<AlignedCodonFrame> codonFrames = protein.getAlignment()
.getCodonFrames();
- ColumnSelection mappedColumns = new ColumnSelection();
if (colsel == null)
{
- return mappedColumns;
+ return;
}
char fromGapChar = mapFrom.getAlignment().getGapCharacter();
for (Integer sel : colsel.getSelected())
{
- mapColumn(sel.intValue(), codonFrames, mappedColumns, fromSequences,
+ mapColumn(sel.intValue(), codonFrames, newColSel, fromSequences,
toSequences, fromGapChar);
}
- for (int[] hidden : colsel.getHiddenColumns())
+ Iterator<int[]> regions = hiddencols.iterator();
+ while (regions.hasNext())
{
- mapHiddenColumns(hidden, codonFrames, mappedColumns, fromSequences,
- toSequences, fromGapChar);
+ mapHiddenColumns(regions.next(), codonFrames, newHidden,
+ fromSequences, toSequences, fromGapChar);
}
- return mappedColumns;
+ return;
}
/**
* @param fromGapChar
*/
protected static void mapHiddenColumns(int[] hidden,
- List<AlignedCodonFrame> mappings,
- ColumnSelection mappedColumns, List<SequenceI> fromSequences,
- List<SequenceI> toSequences, char fromGapChar)
+ List<AlignedCodonFrame> mappings, HiddenColumns mappedColumns,
+ List<SequenceI> fromSequences, List<SequenceI> toSequences,
+ char fromGapChar)
{
for (int col = hidden[0]; col <= hidden[1]; col++)
{
* @param toSequences
* @param fromGapChar
*/
- protected static void mapColumn(int col,
- List<AlignedCodonFrame> mappings,
+ protected static void mapColumn(int col, List<AlignedCodonFrame> mappings,
ColumnSelection mappedColumns, List<SequenceI> fromSequences,
List<SequenceI> toSequences, char fromGapChar)
{
* Get the residue position and find the mapped position.
*/
int residuePos = fromSeq.findPosition(col);
- SearchResults sr = buildSearchResults(fromSeq, residuePos,
- mappings);
- for (Match m : sr.getResults())
+ SearchResultsI sr = buildSearchResults(fromSeq, residuePos, mappings);
+ for (SearchResultMatchI m : sr.getResults())
{
int mappedStartResidue = m.getStart();
int mappedEndResidue = m.getEnd();
*/
for (SequenceI toSeq : toSequences)
{
- if (toSeq.getDatasetSequence() == mappedSeq)
+ if (toSeq.getDatasetSequence() == mappedSeq
+ && mappedStartResidue >= toSeq.getStart()
+ && mappedEndResidue <= toSeq.getEnd())
{
int mappedStartCol = toSeq.findIndex(mappedStartResidue);
int mappedEndCol = toSeq.findIndex(mappedEndResidue);
public static List<char[]> findCodonsFor(SequenceI seq, int col,
List<AlignedCodonFrame> mappings)
{
- List<char[]> result = new ArrayList<char[]>();
+ List<char[]> result = new ArrayList<>();
int dsPos = seq.findPosition(col);
for (AlignedCodonFrame mapping : mappings)
{
if (mapping.involvesSequence(seq))
{
- List<char[]> codons = mapping.getMappedCodons(
- seq.getDatasetSequence(), dsPos);
+ List<char[]> codons = mapping
+ .getMappedCodons(seq.getDatasetSequence(), dsPos);
if (codons != null)
{
result.addAll(codons);
public static List<AlignedCodonFrame> findMappingsForSequence(
SequenceI sequence, List<AlignedCodonFrame> mappings)
{
- List<AlignedCodonFrame> result = new ArrayList<AlignedCodonFrame>();
+ return findMappingsForSequenceAndOthers(sequence, mappings, null);
+ }
+
+ /**
+ * Returns a list of any mappings that are from or to the given (aligned or
+ * dataset) sequence, optionally limited to mappings involving one of a given
+ * list of sequences.
+ *
+ * @param sequence
+ * @param mappings
+ * @param filterList
+ * @return
+ */
+ public static List<AlignedCodonFrame> findMappingsForSequenceAndOthers(
+ SequenceI sequence, List<AlignedCodonFrame> mappings,
+ List<SequenceI> filterList)
+ {
+ List<AlignedCodonFrame> result = new ArrayList<>();
if (sequence == null || mappings == null)
{
return result;
{
if (mapping.involvesSequence(sequence))
{
- result.add(mapping);
+ if (filterList != null)
+ {
+ for (SequenceI otherseq : filterList)
+ {
+ SequenceI otherDataset = otherseq.getDatasetSequence();
+ if (otherseq == sequence
+ || otherseq == sequence.getDatasetSequence()
+ || (otherDataset != null && (otherDataset == sequence
+ || otherDataset == sequence
+ .getDatasetSequence())))
+ {
+ // skip sequences in subset which directly relate to sequence
+ continue;
+ }
+ if (mapping.involvesSequence(otherseq))
+ {
+ // selected a mapping contained in subselect alignment
+ result.add(mapping);
+ break;
+ }
+ }
+ }
+ else
+ {
+ result.add(mapping);
+ }
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Returns the total length of the supplied ranges, which may be as single
+ * [start, end] or multiple [start, end, start, end ...]
+ *
+ * @param ranges
+ * @return
+ */
+ public static int getLength(List<int[]> ranges)
+ {
+ if (ranges == null)
+ {
+ return 0;
+ }
+ int length = 0;
+ for (int[] range : ranges)
+ {
+ if (range.length % 2 != 0)
+ {
+ Console.error(
+ "Error unbalance start/end ranges: " + ranges.toString());
+ return 0;
+ }
+ for (int i = 0; i < range.length - 1; i += 2)
+ {
+ length += Math.abs(range[i + 1] - range[i]) + 1;
+ }
+ }
+ return length;
+ }
+
+ /**
+ * Answers true if any range includes the given value
+ *
+ * @param ranges
+ * @param value
+ * @return
+ */
+ public static boolean contains(List<int[]> ranges, int value)
+ {
+ if (ranges == null)
+ {
+ return false;
+ }
+ for (int[] range : ranges)
+ {
+ if (range[1] >= range[0] && value >= range[0] && value <= range[1])
+ {
+ /*
+ * value within ascending range
+ */
+ return true;
+ }
+ if (range[1] < range[0] && value <= range[0] && value >= range[1])
+ {
+ /*
+ * value within descending range
+ */
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Removes a specified number of positions from the start of a ranges list.
+ * For example, could be used to adjust cds ranges to allow for an incomplete
+ * start codon. Subranges are removed completely, or their start positions
+ * adjusted, until the required number of positions has been removed from the
+ * range. Reverse strand ranges are supported. The input array is not
+ * modified.
+ *
+ * @param removeCount
+ * @param ranges
+ * an array of [start, end, start, end...] positions
+ * @return a new array with the first removeCount positions removed
+ */
+ public static int[] removeStartPositions(int removeCount,
+ final int[] ranges)
+ {
+ if (removeCount <= 0)
+ {
+ return ranges;
+ }
+
+ int[] copy = Arrays.copyOf(ranges, ranges.length);
+ int sxpos = -1;
+ int cdspos = 0;
+ for (int x = 0; x < copy.length && sxpos == -1; x += 2)
+ {
+ cdspos += Math.abs(copy[x + 1] - copy[x]) + 1;
+ if (removeCount < cdspos)
+ {
+ /*
+ * we have removed enough, time to finish
+ */
+ sxpos = x;
+
+ /*
+ * increment start of first exon, or decrement if reverse strand
+ */
+ if (copy[x] <= copy[x + 1])
+ {
+ copy[x] = copy[x + 1] - cdspos + removeCount + 1;
+ }
+ else
+ {
+ copy[x] = copy[x + 1] + cdspos - removeCount - 1;
+ }
+ break;
}
}
+
+ if (sxpos > 0)
+ {
+ /*
+ * we dropped at least one entire sub-range - compact the array
+ */
+ int[] nxon = new int[copy.length - sxpos];
+ System.arraycopy(copy, sxpos, nxon, 0, copy.length - sxpos);
+ return nxon;
+ }
+ return copy;
+ }
+
+ /**
+ * Answers true if range's start-end positions include those of queryRange,
+ * where either range might be in reverse direction, else false
+ *
+ * @param range
+ * a start-end range
+ * @param queryRange
+ * a candidate subrange of range (start2-end2)
+ * @return
+ */
+ public static boolean rangeContains(int[] range, int[] queryRange)
+ {
+ if (range == null || queryRange == null || range.length != 2
+ || queryRange.length != 2)
+ {
+ /*
+ * invalid arguments
+ */
+ return false;
+ }
+
+ int min = Math.min(range[0], range[1]);
+ int max = Math.max(range[0], range[1]);
+
+ return (min <= queryRange[0] && max >= queryRange[0]
+ && min <= queryRange[1] && max >= queryRange[1]);
+ }
+
+ /**
+ * Removes the specified number of positions from the given ranges. Provided
+ * to allow a stop codon to be stripped from a CDS sequence so that it matches
+ * the peptide translation length.
+ *
+ * @param positions
+ * @param ranges
+ * a list of (single) [start, end] ranges
+ * @return
+ */
+ public static void removeEndPositions(int positions, List<int[]> ranges)
+ {
+ int toRemove = positions;
+ Iterator<int[]> it = new ReverseListIterator<>(ranges);
+ while (toRemove > 0)
+ {
+ int[] endRange = it.next();
+ if (endRange.length != 2)
+ {
+ /*
+ * not coded for [start1, end1, start2, end2, ...]
+ */
+ Console.error(
+ "MappingUtils.removeEndPositions doesn't handle multiple ranges");
+ return;
+ }
+
+ int length = endRange[1] - endRange[0] + 1;
+ if (length <= 0)
+ {
+ /*
+ * not coded for a reverse strand range (end < start)
+ */
+ Console.error(
+ "MappingUtils.removeEndPositions doesn't handle reverse strand");
+ return;
+ }
+ if (length > toRemove)
+ {
+ endRange[1] -= toRemove;
+ toRemove = 0;
+ }
+ else
+ {
+ toRemove -= length;
+ it.remove();
+ }
+ }
+ }
+
+ /**
+ * Converts a list of {@code start-end} ranges to a single array of
+ * {@code start1, end1, start2, ... } ranges
+ *
+ * @param ranges
+ * @return
+ */
+ public static int[] rangeListToArray(List<int[]> ranges)
+ {
+ int rangeCount = ranges.size();
+ int[] result = new int[rangeCount * 2];
+ int j = 0;
+ for (int i = 0; i < rangeCount; i++)
+ {
+ int[] range = ranges.get(i);
+ result[j++] = range[0];
+ result[j++] = range[1];
+ }
return result;
}
+
+ /*
+ * Returns the maximal start-end positions in the given (ordered) list of
+ * ranges which is overlapped by the given begin-end range, or null if there
+ * is no overlap.
+ *
+ * <pre>
+ * Examples:
+ * if ranges is {[4, 8], [10, 12], [16, 19]}
+ * then
+ * findOverlap(ranges, 1, 20) == [4, 19]
+ * findOverlap(ranges, 6, 11) == [6, 11]
+ * findOverlap(ranges, 9, 15) == [10, 12]
+ * findOverlap(ranges, 13, 15) == null
+ * </pre>
+ *
+ * @param ranges
+ * @param begin
+ * @param end
+ * @return
+ */
+ protected static int[] findOverlap(List<int[]> ranges, final int begin,
+ final int end)
+ {
+ boolean foundStart = false;
+ int from = 0;
+ int to = 0;
+
+ /*
+ * traverse the ranges to find the first position (if any) >= begin,
+ * and the last position (if any) <= end
+ */
+ for (int[] range : ranges)
+ {
+ if (!foundStart)
+ {
+ if (range[0] >= begin)
+ {
+ /*
+ * first range that starts with, or follows, begin
+ */
+ foundStart = true;
+ from = Math.max(range[0], begin);
+ }
+ else if (range[1] >= begin)
+ {
+ /*
+ * first range that contains begin
+ */
+ foundStart = true;
+ from = begin;
+ }
+ }
+
+ if (range[0] <= end)
+ {
+ to = Math.min(end, range[1]);
+ }
+ }
+
+ return foundStart && to >= from ? new int[] { from, to } : null;
+ }
}