import java.util.Locale;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.NoSuchElementException;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import jalview.bin.Cache;
import jalview.commands.RemoveGapColCommand;
import jalview.datamodel.AlignedCodon;
import jalview.datamodel.AlignedCodonFrame;
import jalview.util.MapList;
import jalview.util.MappingUtils;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.NoSuchElementException;
-import java.util.Set;
-import java.util.SortedMap;
-import java.util.TreeMap;
-
/**
* grab bag of useful alignment manipulation operations Expect these to be
* refactored elsewhere at some point.
SequenceI newSeq = null;
- final MapList maplist = mapping.getMap();
- if (maplist.isContiguous() && maplist.isFromForwardStrand())
- {
- /*
- * just a subsequence, keep same dataset sequence
- */
- int start = maplist.getFromLowest();
- int end = maplist.getFromHighest();
- newSeq = seq.getSubSequence(start - 1, end);
- newSeq.setName(seqId);
- }
- else
- {
- /*
- * construct by splicing mapped from ranges
- */
- char[] seqChars = seq.getSequence();
- List<int[]> fromRanges = maplist.getFromRanges();
- int cdsWidth = MappingUtils.getLength(fromRanges);
- char[] newSeqChars = new char[cdsWidth];
+ /*
+ * construct CDS sequence by splicing mapped from ranges
+ */
+ char[] seqChars = seq.getSequence();
+ List<int[]> fromRanges = mapping.getMap().getFromRanges();
+ int cdsWidth = MappingUtils.getLength(fromRanges);
+ char[] newSeqChars = new char[cdsWidth];
- int newPos = 0;
- for (int[] range : fromRanges)
+ int newPos = 0;
+ for (int[] range : fromRanges)
+ {
+ if (range[0] <= range[1])
{
- if (range[0] <= range[1])
- {
- // forward strand mapping - just copy the range
- int length = range[1] - range[0] + 1;
- System.arraycopy(seqChars, range[0] - 1, newSeqChars, newPos,
- length);
- newPos += length;
- }
- else
+ // forward strand mapping - just copy the range
+ int length = range[1] - range[0] + 1;
+ System.arraycopy(seqChars, range[0] - 1, newSeqChars, newPos,
+ length);
+ newPos += length;
+ }
+ else
+ {
+ // reverse strand mapping - copy and complement one by one
+ for (int i = range[0]; i >= range[1]; i--)
{
- // reverse strand mapping - copy and complement one by one
- for (int i = range[0]; i >= range[1]; i--)
- {
- newSeqChars[newPos++] = Dna.getComplement(seqChars[i - 1]);
- }
+ newSeqChars[newPos++] = Dna.getComplement(seqChars[i - 1]);
}
}
}
else
{
- System.err.println(
- "JAL-2154 regression: warning - found (and ignnored a duplicate CDS sequence):"
- + mtch.toString());
+ Cache.log.error(
+ "JAL-2154 regression: warning - found (and ignored) a duplicate CDS sequence:" + mtch.toString());
}
}
}
*/
package jalview.datamodel;
-import jalview.util.MapList;
-import jalview.util.MappingUtils;
-
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.List;
+import jalview.util.MapList;
+import jalview.util.MappingUtils;
+
/**
* Stores mapping between the columns of a protein alignment and a DNA alignment
* and a list of individual codon to amino acid mappings between sequences.
{
return mapping;
}
+
+ /**
+ * Returns true if the mapping covers the full length of the given sequence.
+ * This allows us to distinguish the CDS that codes for a protein from
+ * another overlapping CDS in the parent dna sequence.
+ *
+ * @param seq
+ * @return
+ */
+ public boolean covers(SequenceI seq)
+ {
+ List<int[]> mappedRanges = null;
+ MapList mapList = mapping.getMap();
+ if (fromSeq == seq || fromSeq == seq.getDatasetSequence())
+ {
+ mappedRanges = mapList.getFromRanges();
+ }
+ else if (mapping.to == seq || mapping.to == seq.getDatasetSequence())
+ {
+ mappedRanges = mapList.getToRanges();
+ }
+ else
+ {
+ return false;
+ }
+
+ /*
+ * check that each mapped range lies within the sequence range
+ * (necessary for circular CDS - example EMBL:J03321:AAA91567)
+ * and mapped length covers (at least) sequence length
+ */
+ int length = 0;
+ for (int[] range : mappedRanges)
+ {
+ int from = Math.min(range[0], range[1]);
+ int to = Math.max(range[0], range[1]);
+ if (from < seq.getStart() || to > seq.getEnd())
+ {
+ return false;
+ }
+ length += (to - from + 1);
+ }
+ // add 1 to mapped length to allow for a mapped stop codon
+ if (length + 1 < (seq.getEnd() - seq.getStart() + 1))
+ {
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Adds any regions mapped to or from position {@code pos} in sequence
+ * {@code seq} to the given search results
+ *
+ * @param seq
+ * @param pos
+ * @param sr
+ */
+ public void markMappedRegion(SequenceI seq, int pos, SearchResultsI sr)
+ {
+ int[] codon = null;
+ SequenceI mappedSeq = null;
+ SequenceI ds = seq.getDatasetSequence();
+ if (ds == null)
+ {
+ ds = seq;
+ }
+
+ if (this.fromSeq == seq || this.fromSeq == ds)
+ {
+ codon = this.mapping.map.locateInTo(pos, pos);
+ mappedSeq = this.mapping.to;
+ }
+ else if (this.mapping.to == seq || this.mapping.to == ds)
+ {
+ codon = this.mapping.map.locateInFrom(pos, pos);
+ mappedSeq = this.fromSeq;
+ }
+
+ if (codon != null)
+ {
+ for (int i = 0; i < codon.length; i += 2)
+ {
+ sr.addResult(mappedSeq, codon[i], codon[i + 1]);
+ }
+ }
+ }
}
private List<SequenceToSequenceMapping> mappings;
}
/**
+ * Return the corresponding aligned or dataset dna sequence for given amino
+ * acid sequence, or null if not found. returns the sequence from the first
+ * mapping found that involves the protein sequence.
*
- * @param sequenceRef
- * @return null or corresponding aaSeq entry for dnaSeq entry
+ * @param aaSeqRef
+ * @return
*/
public SequenceI getDnaForAaSeq(SequenceI aaSeqRef)
{
/**
* Add search results for regions in other sequences that translate or are
- * translated from a particular position in seq
+ * translated from a particular position in seq (which may be an aligned or
+ * dataset sequence)
*
* @param seq
* @param index
public void markMappedRegion(SequenceI seq, int index,
SearchResultsI results)
{
- int[] codon;
SequenceI ds = seq.getDatasetSequence();
- for (SequenceToSequenceMapping ssm : mappings)
+ if (ds == null)
{
- if (ssm.fromSeq == seq || ssm.fromSeq == ds)
- {
- codon = ssm.mapping.map.locateInTo(index, index);
- if (codon != null)
- {
- for (int i = 0; i < codon.length; i += 2)
- {
- results.addResult(ssm.mapping.to, codon[i], codon[i + 1]);
- }
- }
- }
- else if (ssm.mapping.to == seq || ssm.mapping.to == ds)
- {
- {
- codon = ssm.mapping.map.locateInFrom(index, index);
- if (codon != null)
- {
- for (int i = 0; i < codon.length; i += 2)
- {
- results.addResult(ssm.fromSeq, codon[i], codon[i + 1]);
- }
- }
- }
- }
+ ds = seq;
}
- }
-
- /**
- * Returns the DNA codon positions (base 1) for the given position (base 1) in
- * a mapped protein sequence, or null if no mapping is found.
- *
- * Intended for use in aligning cDNA to match aligned protein. Only the first
- * mapping found is returned, so not suitable for use if multiple protein
- * sequences are mapped to the same cDNA (but aligning cDNA as protein is
- * ill-defined for this case anyway).
- *
- * @param seq
- * the DNA dataset sequence
- * @param aaPos
- * residue position (base 1) in a protein sequence
- * @return
- */
- public int[] getDnaPosition(SequenceI seq, int aaPos)
- {
- /*
- * Adapted from markMappedRegion().
- */
- MapList ml = null;
- int i = 0;
for (SequenceToSequenceMapping ssm : mappings)
{
- if (ssm.fromSeq == seq)
- {
- ml = getdnaToProt()[i];
- break;
- }
- i++;
+ ssm.markMappedRegion(ds, index, results);
}
- return ml == null ? null : ml.locateInFrom(aaPos, aaPos);
}
/**
* Two AlignedCodonFrame objects are equal if they hold the same ordered list
* of mappings
*
- * @see SequenceToSequenceMapping#
+ * @see SequenceToSequenceMapping#equals
*/
@Override
public boolean equals(Object obj)
{
return mappings;
}
+
+ /**
+ * Returns the first mapping found which is between the two given sequences,
+ * and covers the full extent of both.
+ *
+ * @param seq1
+ * @param seq2
+ * @return
+ */
+ public SequenceToSequenceMapping getCoveringMapping(SequenceI seq1,
+ SequenceI seq2)
+ {
+ for (SequenceToSequenceMapping mapping : mappings)
+ {
+ if (mapping.covers(seq2) && mapping.covers(seq1))
+ {
+ return mapping;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Returns the first mapping found which is between the given dataset sequence
+ * and another, is a triplet mapping (3:1 or 1:3), and covers the full extent
+ * of both sequences involved
+ *
+ * @param seq
+ * @return
+ */
+ public SequenceToSequenceMapping getCoveringCodonMapping(SequenceI seq)
+ {
+ for (SequenceToSequenceMapping mapping : mappings)
+ {
+ if (mapping.getMapping().getMap().isTripletMap()
+ && mapping.covers(seq))
+ {
+ if (mapping.fromSeq == seq
+ && mapping.covers(mapping.getMapping().getTo()))
+ {
+ return mapping;
+ }
+ else if (mapping.getMapping().getTo() == seq
+ && mapping.covers(mapping.fromSeq))
+ {
+ return mapping;
+ }
+ }
+ }
+ return null;
+ }
}
}
/**
- * Two SearchResults are considered equal if they contain the same matches in
- * the same order.
+ * Two SearchResults are considered equal if they contain the same matches
+ * (Sequence, start position, end position) in the same order
+ *
+ * @see Match#equals(Object)
*/
@Override
public boolean equals(Object obj)
* show local rather than linked feature coordinates
*/
int[] beginRange = mf.getMappedPositions(start, start);
- start = beginRange[0];
int[] endRange = mf.getMappedPositions(end, end);
+ if (beginRange == null || endRange == null)
+ {
+ // e.g. variant extending to stop codon so not mappable
+ return;
+ }
+ start = beginRange[0];
end = endRange[endRange.length - 1];
}
StringBuilder desc = new StringBuilder();
import jalview.analysis.AlignmentSorter;
import jalview.api.AlignViewportI;
+import jalview.bin.Cache;
import jalview.commands.CommandI;
import jalview.commands.EditCommand;
import jalview.commands.EditCommand.Action;
import jalview.commands.EditCommand.Edit;
import jalview.commands.OrderCommand;
import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.AlignmentOrder;
import jalview.datamodel.ColumnSelection;
action = action.getUndoAction();
}
// TODO write this
- System.err.println("MappingUtils.mapCutOrPaste not yet implemented");
+ Cache.log.error("MappingUtils.mapCutOrPaste not yet implemented");
}
/**
for (AlignedCodonFrame acf : codonFrames)
{
- SequenceI mappedSequence = targetIsNucleotide
- ? acf.getDnaForAaSeq(selected)
- : acf.getAaForDnaSeq(selected);
- if (mappedSequence != null)
+ for (SequenceI seq : mapTo.getAlignment().getSequences())
{
- for (SequenceI seq : mapTo.getAlignment().getSequences())
+ SequenceI peptide = targetIsNucleotide ? selected : seq;
+ SequenceI cds = targetIsNucleotide ? seq : selected;
+ SequenceToSequenceMapping s2s = acf.getCoveringMapping(cds,
+ peptide);
+ if (s2s == null)
{
- int mappedStartResidue = 0;
- int mappedEndResidue = 0;
- if (seq.getDatasetSequence() == mappedSequence)
- {
- /*
- * Found a sequence mapping. Locate the start/end mapped residues.
- */
- List<AlignedCodonFrame> mapping = Arrays
- .asList(new AlignedCodonFrame[]
- { acf });
- SearchResultsI sr = buildSearchResults(selected,
- startResiduePos, mapping);
- for (SearchResultMatchI m : sr.getResults())
- {
- mappedStartResidue = m.getStart();
- mappedEndResidue = m.getEnd();
- }
- sr = buildSearchResults(selected, endResiduePos, mapping);
- for (SearchResultMatchI m : sr.getResults())
- {
- mappedStartResidue = Math.min(mappedStartResidue,
- m.getStart());
- mappedEndResidue = Math.max(mappedEndResidue, m.getEnd());
- }
-
- /*
- * Find the mapped aligned columns, save the range. Note findIndex
- * returns a base 1 position, SequenceGroup uses base 0
- */
- int mappedStartCol = seq.findIndex(mappedStartResidue) - 1;
- minStartCol = minStartCol == -1 ? mappedStartCol
- : Math.min(minStartCol, mappedStartCol);
- int mappedEndCol = seq.findIndex(mappedEndResidue) - 1;
- maxEndCol = maxEndCol == -1 ? mappedEndCol
- : Math.max(maxEndCol, mappedEndCol);
- mappedGroup.addSequence(seq, false);
- break;
- }
+ continue;
+ }
+ int mappedStartResidue = 0;
+ int mappedEndResidue = 0;
+ List<AlignedCodonFrame> mapping = Arrays.asList(acf);
+ SearchResultsI sr = buildSearchResults(selected, startResiduePos,
+ mapping);
+ for (SearchResultMatchI m : sr.getResults())
+ {
+ mappedStartResidue = m.getStart();
+ mappedEndResidue = m.getEnd();
}
+ sr = buildSearchResults(selected, endResiduePos, mapping);
+ for (SearchResultMatchI m : sr.getResults())
+ {
+ mappedStartResidue = Math.min(mappedStartResidue, m.getStart());
+ mappedEndResidue = Math.max(mappedEndResidue, m.getEnd());
+ }
+
+ /*
+ * Find the mapped aligned columns, save the range. Note findIndex
+ * returns a base 1 position, SequenceGroup uses base 0
+ */
+ int mappedStartCol = seq.findIndex(mappedStartResidue) - 1;
+ minStartCol = minStartCol == -1 ? mappedStartCol
+ : Math.min(minStartCol, mappedStartCol);
+ int mappedEndCol = seq.findIndex(mappedEndResidue) - 1;
+ maxEndCol = maxEndCol == -1 ? mappedEndCol
+ : Math.max(maxEndCol, mappedEndCol);
+ mappedGroup.addSequence(seq, false);
+ break;
}
}
}
{
for (AlignedCodonFrame acf : mappings)
{
- SequenceI mappedSeq = mappingToNucleotide ? acf.getDnaForAaSeq(seq)
- : acf.getAaForDnaSeq(seq);
- if (mappedSeq != null)
- {
for (SequenceI seq2 : mapTo.getSequences())
{
- if (seq2.getDatasetSequence() == mappedSeq)
+ /*
+ * the corresponding peptide / CDS is the one for which there is
+ * a complete ('covering') mapping to 'seq'
+ */
+ SequenceI peptide = mappingToNucleotide ? seq2 : seq;
+ SequenceI cds = mappingToNucleotide ? seq : seq2;
+ SequenceToSequenceMapping s2s = acf.getCoveringMapping(cds,
+ peptide);
+ if (s2s != null)
{
mappedOrder.add(seq2);
j++;
break;
}
}
- }
}
}
if (colsel == null)
{
- return; // mappedColumns;
+ return;
}
char fromGapChar = mapFrom.getAlignment().getGapCharacter();
mapHiddenColumns(regions.next(), codonFrames, newHidden,
fromSequences, toSequences, fromGapChar);
}
- return; // mappedColumns;
+ return;
}
/**
*/
for (SequenceI toSeq : toSequences)
{
- if (toSeq.getDatasetSequence() == mappedSeq)
+ if (toSeq.getDatasetSequence() == mappedSeq
+ && mappedStartResidue >= toSeq.getStart()
+ && mappedEndResidue <= toSeq.getEnd())
{
int mappedStartCol = toSeq.findIndex(mappedStartResidue);
int mappedEndCol = toSeq.findIndex(mappedEndResidue);
import jalview.api.FeatureColourI;
import jalview.api.FeaturesDisplayedI;
import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.MappedFeatures;
-import jalview.datamodel.Mapping;
import jalview.datamodel.SearchResultMatchI;
import jalview.datamodel.SearchResults;
import jalview.datamodel.SearchResultsI;
* todo: direct lookup of CDS for peptide and vice-versa; for now,
* have to search through an unordered list of mappings for a candidate
*/
- Mapping mapping = null;
+ SequenceToSequenceMapping mapping = null;
SequenceI mapFrom = null;
for (AlignedCodonFrame acf : mappings)
{
- mapping = acf.getMappingForSequence(sequence);
- if (mapping == null || !mapping.getMap().isTripletMap())
+ mapping = acf.getCoveringCodonMapping(ds);
+ if (mapping == null)
{
- continue; // we are only looking for 3:1 or 1:3 mappings
+ continue;
}
SearchResultsI sr = new SearchResults();
- acf.markMappedRegion(ds, pos, sr);
+ mapping.markMappedRegion(ds, pos, sr);
for (SearchResultMatchI match : sr.getResults())
{
int fromRes = match.getStart();
}
}
- return new MappedFeatures(mapping, mapFrom, pos, residue, result);
+ return new MappedFeatures(mapping.getMapping(), mapFrom, pos, residue, result);
}
@Override
import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertNotNull;
import static org.testng.AssertJUnit.assertNull;
import static org.testng.AssertJUnit.assertSame;
import static org.testng.AssertJUnit.assertTrue;
import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
-import jalview.gui.JvOptionPane;
-import jalview.util.MapList;
-
import java.util.Arrays;
import java.util.List;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
+import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
+import jalview.gui.JvOptionPane;
+import jalview.util.MapList;
+
public class AlignedCodonFrameTest
{
public void testGetMappedRegion()
{
// introns lower case, exons upper case
- final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T");
- seq1.createDatasetSequence();
- final Sequence seq2 = new Sequence("Seq2", "-TA-gG-Gg-CG-a");
- seq2.createDatasetSequence();
+ final Sequence dna1 = new Sequence("Seq1/10-18", "c-G-TA-gC-gT-T");
+ dna1.createDatasetSequence();
+ final Sequence dna2 = new Sequence("Seq2/20-28", "-TA-gG-Gg-CG-a");
+ dna2.createDatasetSequence();
- final Sequence aseq1 = new Sequence("Seq1", "-P-R");
- aseq1.createDatasetSequence();
- final Sequence aseq2 = new Sequence("Seq2", "-LY-Q");
- aseq2.createDatasetSequence();
+ final Sequence pep1 = new Sequence("Seq1/3-4", "-P-R");
+ pep1.createDatasetSequence();
+ final Sequence pep2 = new Sequence("Seq2/7-9", "-LY-Q");
+ pep2.createDatasetSequence();
/*
* First with no mappings
*/
AlignedCodonFrame acf = new AlignedCodonFrame();
- assertNull(acf.getMappedRegion(seq1, aseq1, 1));
+ assertNull(acf.getMappedRegion(dna1, pep1, 3));
/*
* Set up the mappings for the exons (upper-case bases)
* Note residue Q is unmapped
*/
- MapList map = new MapList(new int[] { 2, 4, 6, 6, 8, 9 }, new int[] {
- 1, 2 }, 3, 1);
- acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map);
- map = new MapList(new int[] { 1, 2, 4, 5, 7, 8 }, new int[] { 1, 2 },
+ MapList map1 = new MapList(new int[] { 11, 13, 15, 15, 17, 18 }, new int[] {
+ 3, 4 }, 3, 1);
+ acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map1);
+ MapList map2 = new MapList(new int[] { 20, 21, 23, 24, 26, 27 }, new int[] { 7, 9 },
3, 1);
- acf.addMap(seq2.getDatasetSequence(), aseq2.getDatasetSequence(), map);
+ acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map2);
- assertArrayEquals(new int[] { 2, 4 },
- acf.getMappedRegion(seq1, aseq1, 1));
- assertArrayEquals(new int[] { 6, 6, 8, 9 },
- acf.getMappedRegion(seq1, aseq1, 2));
- assertArrayEquals(new int[] { 1, 2, 4, 4 },
- acf.getMappedRegion(seq2, aseq2, 1));
- assertArrayEquals(new int[] { 5, 5, 7, 8 },
- acf.getMappedRegion(seq2, aseq2, 2));
+ /*
+ * get codon positions for peptide position
+ */
+ assertArrayEquals(new int[] { 11, 13 },
+ acf.getMappedRegion(dna1, pep1, 3));
+ assertArrayEquals(new int[] { 15, 15, 17, 18 },
+ acf.getMappedRegion(dna1, pep1, 4));
+ assertArrayEquals(new int[] { 20, 21, 23, 23 },
+ acf.getMappedRegion(dna2, pep2, 7));
+ assertArrayEquals(new int[] { 24, 24, 26, 27 },
+ acf.getMappedRegion(dna2, pep2, 8));
/*
- * No mapping from seq2 to Q
+ * No mapping from dna2 to Q
*/
- assertNull(acf.getMappedRegion(seq2, aseq2, 3));
+ assertNull(acf.getMappedRegion(dna2, pep2, 9));
/*
- * No mapping from sequence 1 to sequence 2
+ * No mapping from dna1 to pep2
*/
- assertNull(acf.getMappedRegion(seq1, aseq2, 1));
+ assertNull(acf.getMappedRegion(dna1, pep2, 7));
+
+ /*
+ * get peptide position for codon position
+ */
+ assertArrayEquals(new int[] { 3, 3 },
+ acf.getMappedRegion(pep1, dna1, 11));
+ assertArrayEquals(new int[] { 3, 3 },
+ acf.getMappedRegion(pep1, dna1, 12));
+ assertArrayEquals(new int[] { 3, 3 },
+ acf.getMappedRegion(pep1, dna1, 13));
+ assertNull(acf.getMappedRegion(pep1, dna1, 14)); // intron base, not mapped
+
}
@Test(groups = { "Functional" })
assertEquals(1, acf.getMappingsFromSequence(seq1).size());
assertSame(before, acf.getMappingsFromSequence(seq1).get(0));
}
+
+ @Test(groups = { "Functional" })
+ public void testGetCoveringMapping()
+ {
+ SequenceI dna = new Sequence("dna", "acttcaATGGCGGACtaattt");
+ SequenceI cds = new Sequence("cds/7-15", "ATGGCGGAC");
+ cds.setDatasetSequence(dna);
+ SequenceI pep = new Sequence("pep", "MAD");
+
+ /*
+ * with null argument or no mappings
+ */
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ assertNull(acf.getCoveringMapping(null, null));
+ assertNull(acf.getCoveringMapping(dna, null));
+ assertNull(acf.getCoveringMapping(null, pep));
+ assertNull(acf.getCoveringMapping(dna, pep));
+
+ /*
+ * with a non-covering mapping e.g. overlapping exon
+ */
+ MapList map = new MapList(new int[] { 7, 9 }, new int[] {
+ 1, 1 }, 3, 1);
+ acf.addMap(dna, pep, map);
+ assertNull(acf.getCoveringMapping(dna, pep));
+
+ acf = new AlignedCodonFrame();
+ MapList map2 = new MapList(new int[] { 13, 18 }, new int[] {
+ 2, 2 }, 3, 1);
+ acf.addMap(dna, pep, map2);
+ assertNull(acf.getCoveringMapping(dna, pep));
+
+ /*
+ * with a covering mapping from CDS (dataset) to protein
+ */
+ acf = new AlignedCodonFrame();
+ MapList map3 = new MapList(new int[] { 7, 15 }, new int[] {
+ 1, 3 }, 3, 1);
+ acf.addMap(dna, pep, map3);
+ assertNull(acf.getCoveringMapping(dna, pep));
+ SequenceToSequenceMapping mapping = acf.getCoveringMapping(cds, pep);
+ assertNotNull(mapping);
+
+ /*
+ * with a mapping that extends to stop codon
+ */
+ acf = new AlignedCodonFrame();
+ MapList map4 = new MapList(new int[] { 7, 18 }, new int[] {
+ 1, 3 }, 3, 1);
+ acf.addMap(dna, pep, map4);
+ assertNull(acf.getCoveringMapping(dna, pep));
+ assertNull(acf.getCoveringMapping(cds, pep));
+ SequenceI cds2 = new Sequence("cds/7-18", "ATGGCGGACtaa");
+ cds2.setDatasetSequence(dna);
+ mapping = acf.getCoveringMapping(cds2, pep);
+ assertNotNull(mapping);
+ }
+
+ /**
+ * Test the method that adds mapped positions to SearchResults
+ */
+ @Test(groups = { "Functional" })
+ public void testMarkMappedRegion()
+ {
+ // introns lower case, exons upper case
+ final Sequence dna1 = new Sequence("Seq1/10-18", "c-G-TA-gC-gT-T");
+ dna1.createDatasetSequence();
+ final Sequence dna2 = new Sequence("Seq2/20-28", "-TA-gG-Gg-CG-a");
+ dna2.createDatasetSequence();
+
+ final Sequence pep1 = new Sequence("Seq1/3-4", "-P-R");
+ pep1.createDatasetSequence();
+ final Sequence pep2 = new Sequence("Seq2/7-9", "-LY-Q");
+ pep2.createDatasetSequence();
+
+ /*
+ * First with no mappings
+ */
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ SearchResults sr = new SearchResults();
+ acf.markMappedRegion(dna1, 12, sr);
+ assertTrue(sr.isEmpty());
+
+ /*
+ * Set up the mappings for the exons (upper-case bases)
+ * Note residue Q is unmapped
+ */
+ MapList map1 = new MapList(new int[] { 11, 13, 15, 15, 17, 18 }, new int[] {
+ 3, 4 }, 3, 1);
+ acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map1);
+ MapList map2 = new MapList(new int[] { 20, 21, 23, 24, 26, 27 }, new int[] { 7, 8 },
+ 3, 1);
+ acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map2);
+
+ /*
+ * intron bases are not mapped
+ */
+ acf.markMappedRegion(dna1, 10, sr);
+ assertTrue(sr.isEmpty());
+
+ /*
+ * Q is not mapped
+ */
+ acf.markMappedRegion(pep2, 9, sr);
+ assertTrue(sr.isEmpty());
+
+ /*
+ * mark peptide position for exon position (of aligned sequence)
+ */
+ acf.markMappedRegion(dna1, 11, sr);
+ SearchResults expected = new SearchResults();
+ expected.addResult(pep1.getDatasetSequence(), 3, 3);
+ assertEquals(sr, expected);
+
+ /*
+ * mark peptide position for exon position of dataset sequence - same result
+ */
+ sr = new SearchResults();
+ acf.markMappedRegion(dna1.getDatasetSequence(), 11, sr);
+ assertEquals(sr, expected);
+
+ /*
+ * marking the same position a second time should not create a duplicate match
+ */
+ acf.markMappedRegion(dna1.getDatasetSequence(), 12, sr);
+ assertEquals(sr, expected);
+
+ /*
+ * mark exon positions for peptide position (of aligned sequence)
+ */
+ sr = new SearchResults();
+ acf.markMappedRegion(pep2, 7, sr); // codon positions 20, 21, 23
+ expected = new SearchResults();
+ expected.addResult(dna2.getDatasetSequence(), 20, 21);
+ expected.addResult(dna2.getDatasetSequence(), 23, 23);
+ assertEquals(sr, expected);
+
+ /*
+ * add another codon to the same SearchResults
+ */
+ acf.markMappedRegion(pep1.getDatasetSequence(), 4, sr); // codon positions 15, 17, 18
+ expected.addResult(dna1.getDatasetSequence(), 15, 15);
+ expected.addResult(dna1.getDatasetSequence(), 17, 18);
+ assertEquals(sr, expected);
+ }
+
+ @Test(groups = { "Functional" })
+ public void testGetCoveringCodonMapping()
+ {
+ SequenceI dna = new Sequence("dna/10-30", "acttcaATGGCGGACtaattt");
+ // CDS sequence with its own dataset sequence (JAL-3763)
+ SequenceI cds = new Sequence("cds/1-9", "-A--TGGC-GGAC");
+ cds.createDatasetSequence();
+ SequenceI pep = new Sequence("pep/1-3", "MAD");
+
+ /*
+ * with null argument or no mappings
+ */
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ assertNull(acf.getCoveringCodonMapping(null));
+ assertNull(acf.getCoveringCodonMapping(dna));
+ assertNull(acf.getCoveringCodonMapping(pep));
+
+ /*
+ * with a non-covering mapping e.g. overlapping exon
+ */
+ MapList map = new MapList(new int[] { 16, 18 }, new int[] {
+ 1, 1 }, 3, 1);
+ acf.addMap(dna, pep, map);
+ assertNull(acf.getCoveringCodonMapping(dna));
+ assertNull(acf.getCoveringCodonMapping(pep));
+
+ acf = new AlignedCodonFrame();
+ MapList map2 = new MapList(new int[] { 13, 18 }, new int[] {
+ 2, 2 }, 3, 1);
+ acf.addMap(dna, pep, map2);
+ assertNull(acf.getCoveringCodonMapping(dna));
+ assertNull(acf.getCoveringCodonMapping(pep));
+
+ /*
+ * with a covering mapping from CDS (dataset) to protein
+ */
+ acf = new AlignedCodonFrame();
+ MapList map3 = new MapList(new int[] { 1, 9 }, new int[] {
+ 1, 3 }, 3, 1);
+ acf.addMap(cds.getDatasetSequence(), pep, map3);
+ assertNull(acf.getCoveringCodonMapping(dna));
+ SequenceToSequenceMapping mapping = acf.getCoveringCodonMapping(pep);
+ assertNotNull(mapping);
+ SequenceToSequenceMapping mapping2 = acf.getCoveringCodonMapping(cds.getDatasetSequence());
+ assertSame(mapping, mapping2);
+
+ /*
+ * with a mapping that extends to stop codon
+ * (EMBL CDS location often includes the stop codon)
+ * - getCoveringCodonMapping is lenient (doesn't require exact length match)
+ */
+ SequenceI cds2 = new Sequence("cds/1-12", "-A--TGGC-GGACTAA");
+ cds2.createDatasetSequence();
+ acf = new AlignedCodonFrame();
+ MapList map4 = new MapList(new int[] { 1, 12 }, new int[] {
+ 1, 3 }, 3, 1);
+ acf.addMap(cds2, pep, map4);
+ mapping = acf.getCoveringCodonMapping(cds2.getDatasetSequence());
+ assertNotNull(mapping);
+ mapping2 = acf.getCoveringCodonMapping(pep);
+ assertSame(mapping, mapping2);
+ }
}
{
Cache.initLogger();
}
-
+
@BeforeClass(alwaysRun = true)
public void setUpJvOptionPane()
{
assertArrayEquals(new int[] { 5, 6 }, merged.get(1));
assertArrayEquals(new int[] { 12, 8 }, merged.get(2));
assertArrayEquals(new int[] { 8, 7 }, merged.get(3));
-
+
// 'subsumed' ranges are preserved
ranges.clear();
ranges.add(new int[] { 10, 30 });
- ranges.add(new int[] { 15, 25 });
+ ranges.add(new int[] { 15, 25 });
+
merged = MapList.coalesceRanges(ranges);
assertEquals(2, merged.size());
assertArrayEquals(new int[] { 10, 30 }, merged.get(0));
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
+import java.awt.Color;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
import jalview.api.AlignViewportI;
import jalview.bin.Cache;
import jalview.commands.EditCommand;
protein.setCodonFrames(acfList);
/*
- * Select Seq1 and Seq3 in the protein (startRes=endRes=0)
+ * Select Seq1 and Seq3 in the protein
*/
SequenceGroup sg = new SequenceGroup();
sg.setColourText(true);
sg.setOutlineColour(Color.LIGHT_GRAY);
sg.addSequence(protein.getSequenceAt(0), false);
sg.addSequence(protein.getSequenceAt(2), false);
+ sg.setEndRes(protein.getWidth() - 1);
/*
* Verify the mapped sequence group in dna
assertSame(cdna.getSequenceAt(0), mappedGroup.getSequences().get(0));
assertSame(cdna.getSequenceAt(2), mappedGroup.getSequences().get(1));
assertEquals(0, mappedGroup.getStartRes());
- assertEquals(2, mappedGroup.getEndRes());
+ assertEquals(2, mappedGroup.getEndRes()); // 3 columns (1 codon)
/*
* Verify mapping sequence group from dna to protein
// expected
}
}
+
+ /**
+ * Test mapping a sequence group where sequences in and outside the group
+ * share a dataset sequence (e.g. alternative CDS for the same gene)
+ * <p>
+ * This scenario doesn't arise after JAL-3763 changes, but test left as still valid
+ * @throws IOException
+ */
+ @Test(groups = { "Functional" })
+ public void testMapSequenceGroup_sharedDataset() throws IOException
+ {
+ /*
+ * Set up dna and protein Seq1/2/3 with mappings (held on the protein
+ * viewport). CDS sequences share the same 'gene' dataset sequence.
+ */
+ SequenceI dna = new Sequence("dna", "aaatttgggcccaaatttgggccc");
+ SequenceI cds1 = new Sequence("cds1/1-6", "aaattt");
+ SequenceI cds2 = new Sequence("cds1/4-9", "tttggg");
+ SequenceI cds3 = new Sequence("cds1/19-24", "gggccc");
+
+ cds1.setDatasetSequence(dna);
+ cds2.setDatasetSequence(dna);
+ cds3.setDatasetSequence(dna);
+
+ SequenceI pep1 = new Sequence("pep1", "KF");
+ SequenceI pep2 = new Sequence("pep2", "FG");
+ SequenceI pep3 = new Sequence("pep3", "GP");
+ pep1.createDatasetSequence();
+ pep2.createDatasetSequence();
+ pep3.createDatasetSequence();
+
+ /*
+ * add mappings from coding positions of dna to respective peptides
+ */
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ acf.addMap(dna, pep1,
+ new MapList(new int[]
+ { 1, 6 }, new int[] { 1, 2 }, 3, 1));
+ acf.addMap(dna, pep2,
+ new MapList(new int[]
+ { 4, 9 }, new int[] { 1, 2 }, 3, 1));
+ acf.addMap(dna, pep3,
+ new MapList(new int[]
+ { 19, 24 }, new int[] { 1, 2 }, 3, 1));
+
+ List<AlignedCodonFrame> acfList = Arrays
+ .asList(new AlignedCodonFrame[]
+ { acf });
+
+ AlignmentI cdna = new Alignment(new SequenceI[] { cds1, cds2, cds3 });
+ AlignmentI protein = new Alignment(
+ new SequenceI[]
+ { pep1, pep2, pep3 });
+ AlignViewportI cdnaView = new AlignViewport(cdna);
+ AlignViewportI peptideView = new AlignViewport(protein);
+ protein.setCodonFrames(acfList);
+
+ /*
+ * Select pep1 and pep3 in the protein alignment
+ */
+ SequenceGroup sg = new SequenceGroup();
+ sg.setColourText(true);
+ sg.setIdColour(Color.GREEN);
+ sg.setOutlineColour(Color.LIGHT_GRAY);
+ sg.addSequence(pep1, false);
+ sg.addSequence(pep3, false);
+ sg.setEndRes(protein.getWidth() - 1);
+
+ /*
+ * Verify the mapped sequence group in dna is cds1 and cds3
+ */
+ SequenceGroup mappedGroup = MappingUtils.mapSequenceGroup(sg,
+ peptideView, cdnaView);
+ assertTrue(mappedGroup.getColourText());
+ assertSame(sg.getIdColour(), mappedGroup.getIdColour());
+ assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour());
+ assertEquals(2, mappedGroup.getSequences().size());
+ assertSame(cds1, mappedGroup.getSequences().get(0));
+ assertSame(cds3, mappedGroup.getSequences().get(1));
+ // columns 1-6 selected (0-5 base zero)
+ assertEquals(0, mappedGroup.getStartRes());
+ assertEquals(5, mappedGroup.getEndRes());
+
+ /*
+ * Select mapping sequence group from dna to protein
+ */
+ sg.clear();
+ sg.addSequence(cds2, false);
+ sg.addSequence(cds1, false);
+ sg.setStartRes(0);
+ sg.setEndRes(cdna.getWidth() - 1);
+ mappedGroup = MappingUtils.mapSequenceGroup(sg, cdnaView, peptideView);
+ assertTrue(mappedGroup.getColourText());
+ assertSame(sg.getIdColour(), mappedGroup.getIdColour());
+ assertSame(sg.getOutlineColour(), mappedGroup.getOutlineColour());
+ assertEquals(2, mappedGroup.getSequences().size());
+ assertSame(protein.getSequenceAt(1), mappedGroup.getSequences().get(0));
+ assertSame(protein.getSequenceAt(0), mappedGroup.getSequences().get(1));
+ assertEquals(0, mappedGroup.getStartRes());
+ assertEquals(1, mappedGroup.getEndRes()); // two columns
+ }
}