import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertNotNull;
import static org.testng.AssertJUnit.assertNull;
import static org.testng.AssertJUnit.assertSame;
import static org.testng.AssertJUnit.assertTrue;
import jalview.datamodel.Annotation;
import jalview.datamodel.DBRefEntry;
import jalview.datamodel.Mapping;
-import jalview.datamodel.SearchResults;
-import jalview.datamodel.SearchResults.Match;
+import jalview.datamodel.SearchResultMatchI;
+import jalview.datamodel.SearchResultsI;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
+import jalview.datamodel.features.SequenceFeatures;
+import jalview.gui.JvOptionPane;
import jalview.io.AppletFormatAdapter;
+import jalview.io.DataSourceType;
+import jalview.io.FileFormat;
+import jalview.io.FileFormatI;
import jalview.io.FormatAdapter;
import jalview.util.MapList;
import jalview.util.MappingUtils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
-import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
public class AlignmentUtilsTests
{
+
+ @BeforeClass(alwaysRun = true)
+ public void setUpJvOptionPane()
+ {
+ JvOptionPane.setInteractiveMode(false);
+ JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
+ }
+
public static Sequence ts = new Sequence("short",
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm");
SequenceI s1 = ts.deriveSequence().getSubSequence(i, i + 7);
al.addSequence(s1);
}
- System.out.println(new AppletFormatAdapter().formatSequences("Clustal",
+ System.out.println(new AppletFormatAdapter().formatSequences(
+ FileFormat.Clustal,
al, true));
for (int flnk = -1; flnk < 25; flnk++)
{
AlignmentI exp = AlignmentUtils.expandContext(al, flnk);
System.out.println("\nFlank size: " + flnk);
System.out.println(new AppletFormatAdapter().formatSequences(
- "Clustal", exp, true));
+ FileFormat.Clustal, exp, true));
if (flnk == -1)
{
/*
{
final String data = ">Seq1Name\nKQYL\n" + ">Seq2Name\nRFPW\n"
+ ">Seq1Name\nABCD\n";
- AlignmentI al = loadAlignment(data, "FASTA");
+ AlignmentI al = loadAlignment(data, FileFormat.Fasta);
Map<String, List<SequenceI>> map = AlignmentUtils
.getSequencesByName(al);
assertEquals(2, map.keySet().size());
* @return
* @throws IOException
*/
- protected AlignmentI loadAlignment(final String data, String format)
+ protected AlignmentI loadAlignment(final String data, FileFormatI format)
throws IOException
{
AlignmentI a = new FormatAdapter().readFile(data,
- AppletFormatAdapter.PASTE, format);
+ DataSourceType.PASTE, format);
a.setDataset(null);
return a;
}
SequenceI alignFrom = new Sequence("Seq2", alignModel);
alignFrom.createDatasetSequence();
AlignedCodonFrame acf = new AlignedCodonFrame();
- acf.addMap(alignMe.getDatasetSequence(), alignFrom.getDatasetSequence(), map);
+ acf.addMap(alignMe.getDatasetSequence(),
+ alignFrom.getDatasetSequence(), map);
AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "---", '-',
preserveMappedGaps, preserveUnmappedGaps);
@Test(groups = { "Functional" })
public void testMakeCdsAlignment()
{
+ /*
+ * scenario:
+ * dna1 --> [4, 6] [10,12] --> pep1
+ * dna2 --> [1, 3] [7, 9] [13,15] --> pep2
+ */
SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");
SequenceI pep1 = new Sequence("pep1", "GF");
SequenceI pep2 = new Sequence("pep2", "GFP");
+ pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "pep1"));
+ pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "pep2"));
dna1.createDatasetSequence();
dna2.createDatasetSequence();
pep1.createDatasetSequence();
pep2.createDatasetSequence();
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 6, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 10, 12, 0f,
- null));
- dna2.addSequenceFeature(new SequenceFeature("CDS", "cds3", 1, 3, 0f,
- null));
- dna2.addSequenceFeature(new SequenceFeature("CDS", "cds4", 7, 9, 0f,
- null));
- dna2.addSequenceFeature(new SequenceFeature("CDS", "cds5", 13, 15, 0f,
- null));
AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
dna.setDataset(null);
- MapList map = new MapList(new int[] { 4, 6, 10, 12 },
- new int[] { 1, 2 }, 3, 1);
+ /*
+ * put a variant feature on dna2 base 8
+ * - should transfer to cds2 base 5
+ */
+ dna2.addSequenceFeature(new SequenceFeature("variant", "hgmd", 8, 8,
+ 0f, null));
+
+ /*
+ * need a sourceDbRef if we are to construct dbrefs to the CDS
+ * sequence from the dna contig sequences
+ */
+ DBRefEntry dbref = new DBRefEntry("ENSEMBL", "0", "dna1");
+ dna1.getDatasetSequence().addDBRef(dbref);
+ org.testng.Assert.assertEquals(dbref, dna1.getPrimaryDBRefs().get(0));
+ dbref = new DBRefEntry("ENSEMBL", "0", "dna2");
+ dna2.getDatasetSequence().addDBRef(dbref);
+ org.testng.Assert.assertEquals(dbref, dna2.getPrimaryDBRefs().get(0));
+
+ /*
+ * CDS sequences are 'discovered' from dna-to-protein mappings on the alignment
+ * dataset (e.g. added from dbrefs by CrossRef.findXrefSequences)
+ */
+ MapList mapfordna1 = new MapList(new int[] { 4, 6, 10, 12 }, new int[] {
+ 1, 2 }, 3, 1);
AlignedCodonFrame acf = new AlignedCodonFrame();
- acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
+ acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(),
+ mapfordna1);
dna.addCodonFrame(acf);
- map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 },
- 3, 1);
+ MapList mapfordna2 = new MapList(new int[] { 1, 3, 7, 9, 13, 15 },
+ new int[] { 1, 3 }, 3, 1);
acf = new AlignedCodonFrame();
- acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
+ acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(),
+ mapfordna2);
dna.addCodonFrame(acf);
/*
+ * In this case, mappings originally came from matching Uniprot accessions - so need an xref on dna involving those regions. These are normally constructed from CDS annotation
+ */
+ DBRefEntry dna1xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep1",
+ new Mapping(mapfordna1));
+ dna1.getDatasetSequence().addDBRef(dna1xref);
+ DBRefEntry dna2xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep2",
+ new Mapping(mapfordna2));
+ dna2.getDatasetSequence().addDBRef(dna2xref);
+
+ /*
* execute method under test:
*/
AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
- dna1, dna2 }, dna.getDataset());
+ dna1, dna2 }, dna.getDataset(), null);
+ /*
+ * verify cds sequences
+ */
assertEquals(2, cds.getSequences().size());
- assertEquals("GGGTTT", cds.getSequenceAt(0)
- .getSequenceAsString());
- assertEquals("GGGTTTCCC", cds.getSequenceAt(1)
- .getSequenceAsString());
+ assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
+ assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString());
/*
* verify shared, extended alignment dataset
*/
assertSame(dna.getDataset(), cds.getDataset());
- assertTrue(dna.getDataset().getSequences()
- .contains(cds.getSequenceAt(0).getDatasetSequence()));
- assertTrue(dna.getDataset().getSequences()
- .contains(cds.getSequenceAt(1).getDatasetSequence()));
+ SequenceI cds1Dss = cds.getSequenceAt(0).getDatasetSequence();
+ SequenceI cds2Dss = cds.getSequenceAt(1).getDatasetSequence();
+ assertTrue(dna.getDataset().getSequences().contains(cds1Dss));
+ assertTrue(dna.getDataset().getSequences().contains(cds2Dss));
+
+ /*
+ * verify CDS has a dbref with mapping to peptide
+ */
+ assertNotNull(cds1Dss.getDBRefs());
+ assertEquals(2, cds1Dss.getDBRefs().length);
+ dbref = cds1Dss.getDBRefs()[0];
+ assertEquals(dna1xref.getSource(), dbref.getSource());
+ // version is via ensembl's primary ref
+ assertEquals(dna1xref.getVersion(), dbref.getVersion());
+ assertEquals(dna1xref.getAccessionId(), dbref.getAccessionId());
+ assertNotNull(dbref.getMap());
+ assertSame(pep1.getDatasetSequence(), dbref.getMap().getTo());
+ MapList cdsMapping = new MapList(new int[] { 1, 6 },
+ new int[] { 1, 2 }, 3, 1);
+ assertEquals(cdsMapping, dbref.getMap().getMap());
/*
- * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
- * the mappings are on the shared alignment dataset
+ * verify peptide has added a dbref with reverse mapping to CDS
*/
- List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames();
+ assertNotNull(pep1.getDBRefs());
+ // FIXME pep1.getDBRefs() is 1 - is that the correct behaviour ?
+ assertEquals(2, pep1.getDBRefs().length);
+ dbref = pep1.getDBRefs()[1];
+ assertEquals("ENSEMBL", dbref.getSource());
+ assertEquals("0", dbref.getVersion());
+ assertEquals("CDS|dna1", dbref.getAccessionId());
+ assertNotNull(dbref.getMap());
+ assertSame(cds1Dss, dbref.getMap().getTo());
+ assertEquals(cdsMapping.getInverse(), dbref.getMap().getMap());
+
/*
+ * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
+ * the mappings are on the shared alignment dataset
* 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep)
*/
+ List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames();
assertEquals(6, cdsMappings.size());
+
/*
- * verify that mapping sets for dna and cds alignments are different
+ * verify that mapping sets for dna and cds alignments are different
+ * [not current behaviour - all mappings are on the alignment dataset]
*/
- Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames());
- assertEquals(4, dna.getCodonFrames());
- assertEquals(4, cds.getCodonFrames());
+ // select -> subselect type to test.
+ // Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames());
+ // assertEquals(4, dna.getCodonFrames().size());
+ // assertEquals(4, cds.getCodonFrames().size());
+
/*
+ * Two mappings involve pep1 (dna to pep1, cds to pep1)
* Mapping from pep1 to GGGTTT in first new exon sequence
*/
- List<AlignedCodonFrame> pep1Mapping = MappingUtils
- .findMappingsForSequence(pep1, cds.getCodonFrames());
- assertEquals(1, pep1Mapping.size()); // CDS->Pep
+ List<AlignedCodonFrame> pep1Mappings = MappingUtils
+ .findMappingsForSequence(pep1, cdsMappings);
+ assertEquals(2, pep1Mappings.size());
+ List<AlignedCodonFrame> mappings = MappingUtils
+ .findMappingsForSequence(cds.getSequenceAt(0), pep1Mappings);
+ assertEquals(1, mappings.size());
// map G to GGG
- SearchResults sr = MappingUtils
- .buildSearchResults(pep1, 1, cdsMappings);
+ SearchResultsI sr = MappingUtils.buildSearchResults(pep1, 1, mappings);
assertEquals(1, sr.getResults().size());
- Match m = sr.getResults().get(0);
- assertSame(cds.getSequenceAt(0).getDatasetSequence(),
- m.getSequence());
+ SearchResultMatchI m = sr.getResults().get(0);
+ assertSame(cds1Dss, m.getSequence());
assertEquals(1, m.getStart());
assertEquals(3, m.getEnd());
// map F to TTT
- sr = MappingUtils.buildSearchResults(pep1, 2, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep1, 2, mappings);
m = sr.getResults().get(0);
- assertSame(cds.getSequenceAt(0).getDatasetSequence(),
- m.getSequence());
+ assertSame(cds1Dss, m.getSequence());
assertEquals(4, m.getStart());
assertEquals(6, m.getEnd());
/*
- * Mapping from pep2 to GGGTTTCCC in second new exon sequence
+ * Two mappings involve pep2 (dna to pep2, cds to pep2)
+ * Verify mapping from pep2 to GGGTTTCCC in second new exon sequence
*/
- List<AlignedCodonFrame> pep2Mapping = MappingUtils
+ List<AlignedCodonFrame> pep2Mappings = MappingUtils
.findMappingsForSequence(pep2, cdsMappings);
- assertEquals(1, pep2Mapping.size());
+ assertEquals(2, pep2Mappings.size());
+ mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1),
+ pep2Mappings);
+ assertEquals(1, mappings.size());
// map G to GGG
- sr = MappingUtils.buildSearchResults(pep2, 1, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep2, 1, mappings);
assertEquals(1, sr.getResults().size());
m = sr.getResults().get(0);
- assertSame(cds.getSequenceAt(1).getDatasetSequence(),
- m.getSequence());
+ assertSame(cds2Dss, m.getSequence());
assertEquals(1, m.getStart());
assertEquals(3, m.getEnd());
// map F to TTT
- sr = MappingUtils.buildSearchResults(pep2, 2, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep2, 2, mappings);
m = sr.getResults().get(0);
- assertSame(cds.getSequenceAt(1).getDatasetSequence(),
- m.getSequence());
+ assertSame(cds2Dss, m.getSequence());
assertEquals(4, m.getStart());
assertEquals(6, m.getEnd());
// map P to CCC
- sr = MappingUtils.buildSearchResults(pep2, 3, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep2, 3, mappings);
m = sr.getResults().get(0);
- assertSame(cds.getSequenceAt(1).getDatasetSequence(),
- m.getSequence());
+ assertSame(cds2Dss, m.getSequence());
assertEquals(7, m.getStart());
assertEquals(9, m.getEnd());
+
+ /*
+ * check cds2 acquired a variant feature in position 5
+ */
+ List<SequenceFeature> sfs = cds2Dss.getSequenceFeatures();
+ assertNotNull(sfs);
+ assertEquals(1, sfs.size());
+ assertEquals("variant", sfs.get(0).type);
+ assertEquals(5, sfs.get(0).begin);
+ assertEquals(5, sfs.get(0).end);
}
/**
pep1.createDatasetSequence();
pep2.createDatasetSequence();
pep3.createDatasetSequence();
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 6, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 10, 12, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds3", 1, 3, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds4", 7, 9, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds5", 1, 3, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds6", 10, 12, 0f,
- null));
pep1.getDatasetSequence().addDBRef(
new DBRefEntry("EMBLCDS", "2", "A12345"));
pep2.getDatasetSequence().addDBRef(
* execute method under test
*/
AlignmentI cdsal = AlignmentUtils.makeCdsAlignment(
- new SequenceI[] { dna1 }, dna);
+ new SequenceI[] { dna1 }, dna.getDataset(), null);
/*
* Verify we have 3 cds sequences, mapped to pep1/2/3 respectively
SequenceI cdsSeq = cds.get(0);
assertEquals("GGGTTT", cdsSeq.getSequenceAsString());
// assertEquals("dna1|A12345", cdsSeq.getName());
- assertEquals("dna1|pep1", cdsSeq.getName());
+ assertEquals("CDS|dna1", cdsSeq.getName());
// assertEquals(1, cdsSeq.getDBRefs().length);
// DBRefEntry cdsRef = cdsSeq.getDBRefs()[0];
// assertEquals("EMBLCDS", cdsRef.getSource());
cdsSeq = cds.get(1);
assertEquals("aaaccc", cdsSeq.getSequenceAsString());
// assertEquals("dna1|A12346", cdsSeq.getName());
- assertEquals("dna1|pep2", cdsSeq.getName());
+ assertEquals("CDS|dna1", cdsSeq.getName());
// assertEquals(1, cdsSeq.getDBRefs().length);
// cdsRef = cdsSeq.getDBRefs()[0];
// assertEquals("EMBLCDS", cdsRef.getSource());
cdsSeq = cds.get(2);
assertEquals("aaaTTT", cdsSeq.getSequenceAsString());
// assertEquals("dna1|A12347", cdsSeq.getName());
- assertEquals("dna1|pep3", cdsSeq.getName());
+ assertEquals("CDS|dna1", cdsSeq.getName());
// assertEquals(1, cdsSeq.getDBRefs().length);
// cdsRef = cdsSeq.getDBRefs()[0];
// assertEquals("EMBLCDS", cdsRef.getSource());
* Verify there are mappings from each cds sequence to its protein product
* and also to its dna source
*/
- Iterator<AlignedCodonFrame> newMappingsIterator = cdsal
- .getCodonFrames().iterator();
-
- // mappings for dna1 - exon1 - pep1
- AlignedCodonFrame cdsMapping = newMappingsIterator.next();
- List<Mapping> dnaMappings = cdsMapping.getMappingsFromSequence(dna1);
- assertEquals(3, dnaMappings.size());
- assertSame(cds.get(0).getDatasetSequence(), dnaMappings.get(0)
- .getTo());
- assertEquals("G(1) in CDS should map to G(4) in DNA", 4, dnaMappings
- .get(0).getMap().getToPosition(1));
- List<Mapping> peptideMappings = cdsMapping.getMappingsFromSequence(cds
- .get(0).getDatasetSequence());
- assertEquals(1, peptideMappings.size());
- assertSame(pep1.getDatasetSequence(), peptideMappings.get(0).getTo());
-
- // mappings for dna1 - cds2 - pep2
- assertSame(cds.get(1).getDatasetSequence(), dnaMappings.get(1)
- .getTo());
- assertEquals("c(4) in CDS should map to c(7) in DNA", 7, dnaMappings
- .get(1).getMap().getToPosition(4));
- peptideMappings = cdsMapping.getMappingsFromSequence(cds.get(1)
- .getDatasetSequence());
- assertEquals(1, peptideMappings.size());
- assertSame(pep2.getDatasetSequence(), peptideMappings.get(0).getTo());
-
- // mappings for dna1 - cds3 - pep3
- assertSame(cds.get(2).getDatasetSequence(), dnaMappings.get(2)
- .getTo());
- assertEquals("T(4) in CDS should map to T(10) in DNA", 10, dnaMappings
- .get(2).getMap().getToPosition(4));
- peptideMappings = cdsMapping.getMappingsFromSequence(cds.get(2)
- .getDatasetSequence());
- assertEquals(1, peptideMappings.size());
- assertSame(pep3.getDatasetSequence(), peptideMappings.get(0).getTo());
+ List<AlignedCodonFrame> newMappings = cdsal.getCodonFrames();
+
+ /*
+ * 6 mappings involve dna1 (to pep1/2/3, cds1/2/3)
+ */
+ List<AlignedCodonFrame> dnaMappings = MappingUtils
+ .findMappingsForSequence(dna1, newMappings);
+ assertEquals(6, dnaMappings.size());
+
+ /*
+ * dna1 to pep1
+ */
+ List<AlignedCodonFrame> mappings = MappingUtils
+ .findMappingsForSequence(pep1, dnaMappings);
+ assertEquals(1, mappings.size());
+ assertEquals(1, mappings.get(0).getMappings().size());
+ assertSame(pep1.getDatasetSequence(), mappings.get(0).getMappings()
+ .get(0).getMapping().getTo());
+
+ /*
+ * dna1 to cds1
+ */
+ List<AlignedCodonFrame> dnaToCds1Mappings = MappingUtils
+ .findMappingsForSequence(cds.get(0), dnaMappings);
+ Mapping mapping = dnaToCds1Mappings.get(0).getMappings().get(0)
+ .getMapping();
+ assertSame(cds.get(0).getDatasetSequence(), mapping.getTo());
+ assertEquals("G(1) in CDS should map to G(4) in DNA", 4, mapping
+ .getMap().getToPosition(1));
+
+ /*
+ * dna1 to pep2
+ */
+ mappings = MappingUtils.findMappingsForSequence(pep2, dnaMappings);
+ assertEquals(1, mappings.size());
+ assertEquals(1, mappings.get(0).getMappings().size());
+ assertSame(pep2.getDatasetSequence(), mappings.get(0).getMappings()
+ .get(0).getMapping().getTo());
+
+ /*
+ * dna1 to cds2
+ */
+ List<AlignedCodonFrame> dnaToCds2Mappings = MappingUtils
+ .findMappingsForSequence(cds.get(1), dnaMappings);
+ mapping = dnaToCds2Mappings.get(0).getMappings().get(0).getMapping();
+ assertSame(cds.get(1).getDatasetSequence(), mapping.getTo());
+ assertEquals("c(4) in CDS should map to c(7) in DNA", 7, mapping
+ .getMap().getToPosition(4));
+
+ /*
+ * dna1 to pep3
+ */
+ mappings = MappingUtils.findMappingsForSequence(pep3, dnaMappings);
+ assertEquals(1, mappings.size());
+ assertEquals(1, mappings.get(0).getMappings().size());
+ assertSame(pep3.getDatasetSequence(), mappings.get(0).getMappings()
+ .get(0).getMapping().getTo());
+
+ /*
+ * dna1 to cds3
+ */
+ List<AlignedCodonFrame> dnaToCds3Mappings = MappingUtils
+ .findMappingsForSequence(cds.get(2), dnaMappings);
+ mapping = dnaToCds3Mappings.get(0).getMappings().get(0).getMapping();
+ assertSame(cds.get(2).getDatasetSequence(), mapping.getTo());
+ assertEquals("T(4) in CDS should map to T(10) in DNA", 10, mapping
+ .getMap().getToPosition(4));
}
@Test(groups = { "Functional" })
* @throws IOException
*/
@Test(groups = { "Functional" })
- public void testMapCdnaToProtein_forSubsequence()
- throws IOException
+ public void testMapCdnaToProtein_forSubsequence() throws IOException
{
SequenceI prot = new Sequence("UNIPROT|V12345", "E-I--Q", 10, 12);
prot.createDatasetSequence();
@Test(groups = { "Functional" })
public void testAlignSequenceAs_mappedProteinProtein()
{
-
+
SequenceI alignMe = new Sequence("Match", "MGAASEV");
alignMe.createDatasetSequence();
SequenceI alignFrom = new Sequence("Query", "LQTGYMGAASEVMFSPTRR");
MapList map = new MapList(new int[] { 6, 12 }, new int[] { 1, 7 }, 1, 1);
acf.addMap(alignFrom.getDatasetSequence(),
alignMe.getDatasetSequence(), map);
-
+
AlignmentUtils.alignSequenceAs(alignMe, alignFrom, acf, "-", '-', true,
true);
assertEquals("-----MGAASEV-------", alignMe.getSequenceAsString());
{
// map first 3 codons to KPF; G is a trailing unmapped residue
MapList map = new MapList(new int[] { 1, 9 }, new int[] { 1, 3 }, 3, 1);
-
+
checkAlignSequenceAs("AAACCCTTT", "K-PFG", true, true, map,
"AAA---CCCTTT---");
}
* that partially overlap 5' or 3' (start or end) of target sequence
*/
AlignmentUtils.transferFeatures(dna, cds, map, null);
- SequenceFeature[] sfs = cds.getSequenceFeatures();
- assertEquals(6, sfs.length);
+ List<SequenceFeature> sfs = cds.getSequenceFeatures();
+ assertEquals(6, sfs.size());
- SequenceFeature sf = sfs[0];
+ SequenceFeature sf = sfs.get(0);
assertEquals("type2", sf.getType());
assertEquals("desc2", sf.getDescription());
assertEquals(2f, sf.getScore());
assertEquals(1, sf.getBegin());
assertEquals(1, sf.getEnd());
- sf = sfs[1];
+ sf = sfs.get(1);
assertEquals("type3", sf.getType());
assertEquals("desc3", sf.getDescription());
assertEquals(3f, sf.getScore());
assertEquals(1, sf.getBegin());
assertEquals(3, sf.getEnd());
- sf = sfs[2];
+ sf = sfs.get(2);
assertEquals("type4", sf.getType());
assertEquals(2, sf.getBegin());
assertEquals(5, sf.getEnd());
- sf = sfs[3];
+ sf = sfs.get(3);
assertEquals("type5", sf.getType());
assertEquals(1, sf.getBegin());
assertEquals(6, sf.getEnd());
- sf = sfs[4];
+ sf = sfs.get(4);
assertEquals("type8", sf.getType());
assertEquals(6, sf.getBegin());
assertEquals(6, sf.getEnd());
- sf = sfs[5];
+ sf = sfs.get(5);
assertEquals("type9", sf.getType());
assertEquals(6, sf.getBegin());
assertEquals(6, sf.getEnd());
MapList map = new MapList(new int[] { 4, 6, 10, 12 },
new int[] { 1, 6 }, 1, 1);
-
+
// [5, 11] maps to [2, 5]
dna.addSequenceFeature(new SequenceFeature("type4", "desc4", 5, 11, 4f,
null));
// [12, 12] maps to [6, 6]
dna.addSequenceFeature(new SequenceFeature("type8", "desc8", 12, 12,
8f, null));
-
+
// desc4 and desc8 are the 'omit these' varargs
AlignmentUtils.transferFeatures(dna, cds, map, null, "type4", "type8");
- SequenceFeature[] sfs = cds.getSequenceFeatures();
- assertEquals(1, sfs.length);
-
- SequenceFeature sf = sfs[0];
+ List<SequenceFeature> sfs = cds.getSequenceFeatures();
+ assertEquals(1, sfs.size());
+
+ SequenceFeature sf = sfs.get(0);
assertEquals("type5", sf.getType());
assertEquals(1, sf.getBegin());
assertEquals(6, sf.getEnd());
{
SequenceI dna = new Sequence("dna/20-34", "acgTAGcaaGCCcgt");
SequenceI cds = new Sequence("cds/10-15", "TAGGCC");
-
+
MapList map = new MapList(new int[] { 4, 6, 10, 12 },
new int[] { 1, 6 }, 1, 1);
-
+
// [5, 11] maps to [2, 5]
dna.addSequenceFeature(new SequenceFeature("type4", "desc4", 5, 11, 4f,
null));
// [12, 12] maps to [6, 6]
dna.addSequenceFeature(new SequenceFeature("type8", "desc8", 12, 12,
8f, null));
-
+
// "type5" is the 'select this type' argument
AlignmentUtils.transferFeatures(dna, cds, map, "type5");
- SequenceFeature[] sfs = cds.getSequenceFeatures();
- assertEquals(1, sfs.length);
-
- SequenceFeature sf = sfs[0];
+ List<SequenceFeature> sfs = cds.getSequenceFeatures();
+ assertEquals(1, sfs.size());
+
+ SequenceFeature sf = sfs.get(0);
assertEquals("type5", sf.getType());
assertEquals(1, sf.getBegin());
assertEquals(6, sf.getEnd());
dna3.createDatasetSequence();
pep1.createDatasetSequence();
pep2.createDatasetSequence();
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 8, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 9, 12, 0f,
- null));
- dna1.addSequenceFeature(new SequenceFeature("CDS", "cds3", 16, 18, 0f,
- null));
- dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 4, 8, 0f,
- null));
- dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 12, 12, 0f,
- null));
- dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 16, 18, 0f,
- null));
AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
dna.setDataset(null);
-
+
MapList map = new MapList(new int[] { 4, 12, 16, 18 },
new int[] { 1, 4 }, 3, 1);
AlignedCodonFrame acf = new AlignedCodonFrame();
acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
dna.addCodonFrame(acf);
map = new MapList(new int[] { 4, 8, 12, 12, 16, 18 },
- new int[] { 1, 3 },
- 3, 1);
+ new int[] { 1, 3 }, 3, 1);
acf = new AlignedCodonFrame();
acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
dna.addCodonFrame(acf);
-
+
AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
- dna1, dna2, dna3 }, dna);
+ dna1, dna2, dna3 }, dna.getDataset(), null);
List<SequenceI> cdsSeqs = cds.getSequences();
assertEquals(2, cdsSeqs.size());
assertEquals("GGGCCCTTTGGG", cdsSeqs.get(0).getSequenceAsString());
assertEquals("GGGCCTGGG", cdsSeqs.get(1).getSequenceAsString());
-
+
/*
* verify shared, extended alignment dataset
*/
.contains(cdsSeqs.get(1).getDatasetSequence()));
/*
- * Verify updated mappings
+ * Verify 6 mappings: dna1 to cds1, cds1 to pep1, dna1 to pep1
+ * and the same for dna2/cds2/pep2
*/
- List<AlignedCodonFrame> cdsMappings = cds.getCodonFrames();
- assertEquals(2, cdsMappings.size());
-
+ List<AlignedCodonFrame> mappings = cds.getCodonFrames();
+ assertEquals(6, mappings.size());
+
/*
- * Mapping from pep1 to GGGTTT in first new CDS sequence
+ * 2 mappings involve pep1
*/
- List<AlignedCodonFrame> pep1Mapping = MappingUtils
- .findMappingsForSequence(pep1, cdsMappings);
- assertEquals(1, pep1Mapping.size());
+ List<AlignedCodonFrame> pep1Mappings = MappingUtils
+ .findMappingsForSequence(pep1, mappings);
+ assertEquals(2, pep1Mappings.size());
+
/*
+ * Get mapping of pep1 to cds1 and verify it
* maps GPFG to 1-3,4-6,7-9,10-12
*/
- SearchResults sr = MappingUtils
- .buildSearchResults(pep1, 1, cdsMappings);
+ List<AlignedCodonFrame> pep1CdsMappings = MappingUtils
+ .findMappingsForSequence(cds.getSequenceAt(0), pep1Mappings);
+ assertEquals(1, pep1CdsMappings.size());
+ SearchResultsI sr = MappingUtils.buildSearchResults(pep1, 1,
+ pep1CdsMappings);
assertEquals(1, sr.getResults().size());
- Match m = sr.getResults().get(0);
- assertEquals(cds.getSequenceAt(0).getDatasetSequence(),
- m.getSequence());
+ SearchResultMatchI m = sr.getResults().get(0);
+ assertEquals(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
assertEquals(1, m.getStart());
assertEquals(3, m.getEnd());
- sr = MappingUtils.buildSearchResults(pep1, 2, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep1, 2, pep1CdsMappings);
m = sr.getResults().get(0);
assertEquals(4, m.getStart());
assertEquals(6, m.getEnd());
- sr = MappingUtils.buildSearchResults(pep1, 3, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep1, 3, pep1CdsMappings);
m = sr.getResults().get(0);
assertEquals(7, m.getStart());
assertEquals(9, m.getEnd());
- sr = MappingUtils.buildSearchResults(pep1, 4, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep1, 4, pep1CdsMappings);
m = sr.getResults().get(0);
assertEquals(10, m.getStart());
assertEquals(12, m.getEnd());
-
+
/*
- * GPG in pep2 map to 1-3,4-6,7-9 in second CDS sequence
+ * Get mapping of pep2 to cds2 and verify it
+ * maps GPG in pep2 to 1-3,4-6,7-9 in second CDS sequence
*/
- List<AlignedCodonFrame> pep2Mapping = MappingUtils
- .findMappingsForSequence(pep2, cdsMappings);
- assertEquals(1, pep2Mapping.size());
- sr = MappingUtils.buildSearchResults(pep2, 1, cdsMappings);
+ List<AlignedCodonFrame> pep2Mappings = MappingUtils
+ .findMappingsForSequence(pep2, mappings);
+ assertEquals(2, pep2Mappings.size());
+ List<AlignedCodonFrame> pep2CdsMappings = MappingUtils
+ .findMappingsForSequence(cds.getSequenceAt(1), pep2Mappings);
+ assertEquals(1, pep2CdsMappings.size());
+ sr = MappingUtils.buildSearchResults(pep2, 1, pep2CdsMappings);
assertEquals(1, sr.getResults().size());
m = sr.getResults().get(0);
- assertEquals(cds.getSequenceAt(1).getDatasetSequence(),
- m.getSequence());
+ assertEquals(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
assertEquals(1, m.getStart());
assertEquals(3, m.getEnd());
- sr = MappingUtils.buildSearchResults(pep2, 2, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep2, 2, pep2CdsMappings);
m = sr.getResults().get(0);
assertEquals(4, m.getStart());
assertEquals(6, m.getEnd());
- sr = MappingUtils.buildSearchResults(pep2, 3, cdsMappings);
+ sr = MappingUtils.buildSearchResults(pep2, 3, pep2CdsMappings);
m = sr.getResults().get(0);
assertEquals(7, m.getStart());
assertEquals(9, m.getEnd());
SequenceI dna3 = new Sequence("Seq3", "ccaaa-ttt-GGG-");
AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
dna.setDataset(null);
-
+
// prot1 has 'X' for incomplete start codon (not mapped)
SequenceI prot1 = new Sequence("Seq1", "XKFG"); // X for incomplete start
SequenceI prot2 = new Sequence("Seq2", "NG");
AlignmentI protein = new Alignment(new SequenceI[] { prot1, prot2,
prot3 });
protein.setDataset(null);
-
+
// map dna1 [3, 11] to prot1 [2, 4] KFG
MapList map = new MapList(new int[] { 3, 11 }, new int[] { 2, 4 }, 3, 1);
AlignedCodonFrame acf = new AlignedCodonFrame();
SequenceI dnaSeq = new Sequence("dna", "aaagGGCCCaaaTTTttt");
dnaSeq.createDatasetSequence();
SequenceI ds = dnaSeq.getDatasetSequence();
-
+
// CDS for dna 5-6 (incomplete codon), 7-9
SequenceFeature sf = new SequenceFeature("CDS", "", 5, 9, 0f, null);
sf.setPhase("2"); // skip 2 bases to start of next codon
// CDS for dna 13-15
sf = new SequenceFeature("CDS_predicted", "", 13, 15, 0f, null);
ds.addSequenceFeature(sf);
-
+
List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);
-
+
/*
* check the mapping starts with the first complete codon
*/
SequenceI dnaSeq = new Sequence("dna", "aaaGGGcccAAATTTttt");
dnaSeq.createDatasetSequence();
SequenceI ds = dnaSeq.getDatasetSequence();
-
+
// CDS for dna 10-12
SequenceFeature sf = new SequenceFeature("CDS_predicted", "", 10, 12,
0f, null);
// exon feature should be ignored here
sf = new SequenceFeature("exon", "", 7, 9, 0f, null);
ds.addSequenceFeature(sf);
-
+
List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);
/*
* verify ranges { [4-6], [12-10] }
public void testComputePeptideVariants()
{
/*
- * scenario: AAATTTCCC codes for KFP, with variants
- * GAA -> E
- * CAA -> Q
- * AAG synonymous
- * AAT -> N
- * TTC synonymous
- * CAC,CGC -> H,R (as one variant)
+ * scenario: AAATTTCCC codes for KFP
+ * variants:
+ * GAA -> E source: Ensembl
+ * CAA -> Q source: dbSNP
+ * AAG synonymous source: COSMIC
+ * AAT -> N source: Ensembl
+ * ...TTC synonymous source: dbSNP
+ * ......CAC,CGC -> H,R source: COSMIC
+ * (one variant with two alleles)
*/
SequenceI peptide = new Sequence("pep/10-12", "KFP");
* two distinct variants for codon 1 position 1
* second one has clinical significance
*/
+ String ensembl = "Ensembl";
+ String dbSnp = "dbSNP";
+ String cosmic = "COSMIC";
SequenceFeature sf1 = new SequenceFeature("sequence_variant", "", 1, 1,
- 0f, null);
+ 0f, ensembl);
sf1.setValue("alleles", "A,G"); // GAA -> E
sf1.setValue("ID", "var1.125A>G");
SequenceFeature sf2 = new SequenceFeature("sequence_variant", "", 1, 1,
- 0f, null);
+ 0f, dbSnp);
sf2.setValue("alleles", "A,C"); // CAA -> Q
sf2.setValue("ID", "var2");
sf2.setValue("clinical_significance", "Dodgy");
SequenceFeature sf3 = new SequenceFeature("sequence_variant", "", 3, 3,
- 0f, null);
+ 0f, cosmic);
sf3.setValue("alleles", "A,G"); // synonymous
sf3.setValue("ID", "var3");
sf3.setValue("clinical_significance", "None");
SequenceFeature sf4 = new SequenceFeature("sequence_variant", "", 3, 3,
- 0f, null);
+ 0f, ensembl);
sf4.setValue("alleles", "A,T"); // AAT -> N
sf4.setValue("ID", "sequence_variant:var4"); // prefix gets stripped off
sf4.setValue("clinical_significance", "Benign");
SequenceFeature sf5 = new SequenceFeature("sequence_variant", "", 6, 6,
- 0f, null);
+ 0f, dbSnp);
sf5.setValue("alleles", "T,C"); // synonymous
sf5.setValue("ID", "var5");
sf5.setValue("clinical_significance", "Bad");
SequenceFeature sf6 = new SequenceFeature("sequence_variant", "", 8, 8,
- 0f, null);
+ 0f, cosmic);
sf6.setValue("alleles", "C,A,G"); // CAC,CGC -> H,R
sf6.setValue("ID", "var6");
sf6.setValue("clinical_significance", "Good");
/*
* verify added sequence features for
- * var1 K -> E
- * var2 K -> Q
- * var4 K -> N
- * var6 P -> H
- * var6 P -> R
- */
- SequenceFeature[] sfs = peptide.getSequenceFeatures();
- assertEquals(5, sfs.length);
- SequenceFeature sf = sfs[0];
+ * var1 K -> E Ensembl
+ * var2 K -> Q dbSNP
+ * var4 K -> N Ensembl
+ * var6 P -> H COSMIC
+ * var6 P -> R COSMIC
+ */
+ List<SequenceFeature> sfs = peptide.getSequenceFeatures();
+ SequenceFeatures.sortFeatures(sfs, true);
+ assertEquals(5, sfs.size());
+
+ /*
+ * features are sorted by start position ascending, but in no
+ * particular order where start positions match; asserts here
+ * simply match the data returned (the order is not important)
+ */
+ SequenceFeature sf = sfs.get(0);
assertEquals(1, sf.getBegin());
assertEquals(1, sf.getEnd());
- assertEquals("p.Lys1Glu", sf.getDescription());
- assertEquals("var1.125A>G", sf.getValue("ID"));
- assertNull(sf.getValue("clinical_significance"));
- assertEquals("ID=var1.125A>G", sf.getAttributes());
+ assertEquals("p.Lys1Asn", sf.getDescription());
+ assertEquals("var4", sf.getValue("ID"));
+ assertEquals("Benign", sf.getValue("clinical_significance"));
+ assertEquals("ID=var4;clinical_significance=Benign", sf.getAttributes());
assertEquals(1, sf.links.size());
- // link to variation is urlencoded
assertEquals(
- "p.Lys1Glu var1.125A>G|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var1.125A%3EG",
+ "p.Lys1Asn var4|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var4",
sf.links.get(0));
- assertEquals("Jalview", sf.getFeatureGroup());
- sf = sfs[1];
+ assertEquals(ensembl, sf.getFeatureGroup());
+
+ sf = sfs.get(1);
assertEquals(1, sf.getBegin());
assertEquals(1, sf.getEnd());
assertEquals("p.Lys1Gln", sf.getDescription());
assertEquals(
"p.Lys1Gln var2|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var2",
sf.links.get(0));
- assertEquals("Jalview", sf.getFeatureGroup());
- sf = sfs[2];
+ assertEquals(dbSnp, sf.getFeatureGroup());
+
+ sf = sfs.get(2);
assertEquals(1, sf.getBegin());
assertEquals(1, sf.getEnd());
- assertEquals("p.Lys1Asn", sf.getDescription());
- assertEquals("var4", sf.getValue("ID"));
- assertEquals("Benign", sf.getValue("clinical_significance"));
- assertEquals("ID=var4;clinical_significance=Benign", sf.getAttributes());
+ assertEquals("p.Lys1Glu", sf.getDescription());
+ assertEquals("var1.125A>G", sf.getValue("ID"));
+ assertNull(sf.getValue("clinical_significance"));
+ assertEquals("ID=var1.125A>G", sf.getAttributes());
assertEquals(1, sf.links.size());
+ // link to variation is urlencoded
assertEquals(
- "p.Lys1Asn var4|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var4",
+ "p.Lys1Glu var1.125A>G|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var1.125A%3EG",
sf.links.get(0));
- assertEquals("Jalview", sf.getFeatureGroup());
- sf = sfs[3];
+ assertEquals(ensembl, sf.getFeatureGroup());
+
+ sf = sfs.get(3);
assertEquals(3, sf.getBegin());
assertEquals(3, sf.getEnd());
- assertEquals("p.Pro3His", sf.getDescription());
+ assertEquals("p.Pro3Arg", sf.getDescription());
assertEquals("var6", sf.getValue("ID"));
assertEquals("Good", sf.getValue("clinical_significance"));
assertEquals("ID=var6;clinical_significance=Good", sf.getAttributes());
assertEquals(1, sf.links.size());
assertEquals(
- "p.Pro3His var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6",
+ "p.Pro3Arg var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6",
sf.links.get(0));
+ assertEquals(cosmic, sf.getFeatureGroup());
+
// var5 generates two distinct protein variant features
- assertEquals("Jalview", sf.getFeatureGroup());
- sf = sfs[4];
+ sf = sfs.get(4);
assertEquals(3, sf.getBegin());
assertEquals(3, sf.getEnd());
- assertEquals("p.Pro3Arg", sf.getDescription());
+ assertEquals("p.Pro3His", sf.getDescription());
assertEquals("var6", sf.getValue("ID"));
assertEquals("Good", sf.getValue("clinical_significance"));
assertEquals("ID=var6;clinical_significance=Good", sf.getAttributes());
assertEquals(1, sf.links.size());
assertEquals(
- "p.Pro3Arg var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6",
+ "p.Pro3His var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6",
sf.links.get(0));
- assertEquals("Jalview", sf.getFeatureGroup());
+ assertEquals(cosmic, sf.getFeatureGroup());
}
/**
SequenceI dnaSeq = new Sequence("dna", "aaaGGGcccAAATTTttt");
dnaSeq.createDatasetSequence();
SequenceI ds = dnaSeq.getDatasetSequence();
-
+
// CDS for dna 4-6
SequenceFeature sf = new SequenceFeature("CDS", "", 4, 6, 0f, null);
sf.setStrand("-");
sf = new SequenceFeature("CDS_predicted", "", 10, 12, 0f, null);
sf.setStrand("-");
ds.addSequenceFeature(sf);
-
+
List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);
/*
* verify ranges { [12-10], [6-4] }
SequenceI dnaSeq = new Sequence("dna", "aaagGGCCCaaaTTTttt");
dnaSeq.createDatasetSequence();
SequenceI ds = dnaSeq.getDatasetSequence();
-
+
// CDS for dna 5-9
SequenceFeature sf = new SequenceFeature("CDS", "", 5, 9, 0f, null);
sf.setStrand("-");
sf.setStrand("-");
sf.setPhase("2"); // skip 2 bases to start of next codon
ds.addSequenceFeature(sf);
-
+
List<int[]> ranges = AlignmentUtils.findCdsPositions(dnaSeq);
-
+
/*
* check the mapping starts with the first complete codon
* expect ranges [13, 13], [9, 5]
from.createDatasetSequence();
seq1.createDatasetSequence();
Mapping mapping = new Mapping(seq1, new MapList(
- new int[] { 3, 6, 9, 10 },
- new int[] { 1, 6 }, 1, 1));
+ new int[] { 3, 6, 9, 10 }, new int[] { 1, 6 }, 1, 1));
Map<Integer, Map<SequenceI, Character>> map = new TreeMap<Integer, Map<SequenceI, Character>>();
AlignmentUtils.addMappedPositions(seq1, from, mapping, map);
from.createDatasetSequence();
seq1.createDatasetSequence();
Mapping mapping = new Mapping(seq1, new MapList(
- new int[] { 3, 6, 9, 10 },
- new int[] { 1, 6 }, 1, 1));
+ new int[] { 3, 6, 9, 10 }, new int[] { 1, 6 }, 1, 1));
Map<Integer, Map<SequenceI, Character>> map = new TreeMap<Integer, Map<SequenceI, Character>>();
AlignmentUtils.addMappedPositions(seq1, from, mapping, map);
-
+
/*
* verify map has seq1 residues in columns 3,4,6,7,11,12
*/
assertEquals('T', map.get(11).get(seq1).charValue());
assertEquals('T', map.get(12).get(seq1).charValue());
}
+
+ /**
+ * Test for the case where the products for which we want CDS are specified.
+ * This is to represent the case where EMBL has CDS mappings to both Uniprot
+ * and EMBLCDSPROTEIN. makeCdsAlignment() should only return the mappings for
+ * the protein sequences specified.
+ */
+ @Test(groups = { "Functional" })
+ public void testMakeCdsAlignment_filterProducts()
+ {
+ SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
+ SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");
+ SequenceI pep1 = new Sequence("Uniprot|pep1", "GF");
+ SequenceI pep2 = new Sequence("Uniprot|pep2", "GFP");
+ SequenceI pep3 = new Sequence("EMBL|pep3", "GF");
+ SequenceI pep4 = new Sequence("EMBL|pep4", "GFP");
+ dna1.createDatasetSequence();
+ dna2.createDatasetSequence();
+ pep1.createDatasetSequence();
+ pep2.createDatasetSequence();
+ pep3.createDatasetSequence();
+ pep4.createDatasetSequence();
+ AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
+ dna.setDataset(null);
+ AlignmentI emblPeptides = new Alignment(new SequenceI[] { pep3, pep4 });
+ emblPeptides.setDataset(null);
+
+ AlignedCodonFrame acf = new AlignedCodonFrame();
+ MapList map = new MapList(new int[] { 4, 6, 10, 12 },
+ new int[] { 1, 2 }, 3, 1);
+ acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
+ acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map);
+ dna.addCodonFrame(acf);
+
+ acf = new AlignedCodonFrame();
+ map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 },
+ 3, 1);
+ acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
+ acf.addMap(dna2.getDatasetSequence(), pep4.getDatasetSequence(), map);
+ dna.addCodonFrame(acf);
+
+ /*
+ * execute method under test to find CDS for EMBL peptides only
+ */
+ AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
+ dna1, dna2 }, dna.getDataset(), emblPeptides.getSequencesArray());
+
+ assertEquals(2, cds.getSequences().size());
+ assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString());
+ assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString());
+
+ /*
+ * verify shared, extended alignment dataset
+ */
+ assertSame(dna.getDataset(), cds.getDataset());
+ assertTrue(dna.getDataset().getSequences()
+ .contains(cds.getSequenceAt(0).getDatasetSequence()));
+ assertTrue(dna.getDataset().getSequences()
+ .contains(cds.getSequenceAt(1).getDatasetSequence()));
+
+ /*
+ * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
+ * the mappings are on the shared alignment dataset
+ */
+ List<AlignedCodonFrame> cdsMappings = cds.getDataset().getCodonFrames();
+ /*
+ * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep)
+ */
+ assertEquals(6, cdsMappings.size());
+
+ /*
+ * verify that mapping sets for dna and cds alignments are different
+ * [not current behaviour - all mappings are on the alignment dataset]
+ */
+ // select -> subselect type to test.
+ // Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames());
+ // assertEquals(4, dna.getCodonFrames().size());
+ // assertEquals(4, cds.getCodonFrames().size());
+
+ /*
+ * Two mappings involve pep3 (dna to pep3, cds to pep3)
+ * Mapping from pep3 to GGGTTT in first new exon sequence
+ */
+ List<AlignedCodonFrame> pep3Mappings = MappingUtils
+ .findMappingsForSequence(pep3, cdsMappings);
+ assertEquals(2, pep3Mappings.size());
+ List<AlignedCodonFrame> mappings = MappingUtils
+ .findMappingsForSequence(cds.getSequenceAt(0), pep3Mappings);
+ assertEquals(1, mappings.size());
+
+ // map G to GGG
+ SearchResultsI sr = MappingUtils.buildSearchResults(pep3, 1, mappings);
+ assertEquals(1, sr.getResults().size());
+ SearchResultMatchI m = sr.getResults().get(0);
+ assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
+ assertEquals(1, m.getStart());
+ assertEquals(3, m.getEnd());
+ // map F to TTT
+ sr = MappingUtils.buildSearchResults(pep3, 2, mappings);
+ m = sr.getResults().get(0);
+ assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
+ assertEquals(4, m.getStart());
+ assertEquals(6, m.getEnd());
+
+ /*
+ * Two mappings involve pep4 (dna to pep4, cds to pep4)
+ * Verify mapping from pep4 to GGGTTTCCC in second new exon sequence
+ */
+ List<AlignedCodonFrame> pep4Mappings = MappingUtils
+ .findMappingsForSequence(pep4, cdsMappings);
+ assertEquals(2, pep4Mappings.size());
+ mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1),
+ pep4Mappings);
+ assertEquals(1, mappings.size());
+ // map G to GGG
+ sr = MappingUtils.buildSearchResults(pep4, 1, mappings);
+ assertEquals(1, sr.getResults().size());
+ m = sr.getResults().get(0);
+ assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
+ assertEquals(1, m.getStart());
+ assertEquals(3, m.getEnd());
+ // map F to TTT
+ sr = MappingUtils.buildSearchResults(pep4, 2, mappings);
+ m = sr.getResults().get(0);
+ assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
+ assertEquals(4, m.getStart());
+ assertEquals(6, m.getEnd());
+ // map P to CCC
+ sr = MappingUtils.buildSearchResults(pep4, 3, mappings);
+ m = sr.getResults().get(0);
+ assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence());
+ assertEquals(7, m.getStart());
+ assertEquals(9, m.getEnd());
+ }
+
+ /**
+ * Test the method that just copies aligned sequences, provided all sequences
+ * to be aligned share the aligned sequence's dataset
+ */
+ @Test(groups = "Functional")
+ public void testAlignAsSameSequences()
+ {
+ SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa");
+ SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA");
+ AlignmentI al1 = new Alignment(new SequenceI[] { dna1, dna2 });
+ ((Alignment) al1).createDatasetAlignment();
+
+ SequenceI dna3 = new Sequence(dna1);
+ SequenceI dna4 = new Sequence(dna2);
+ assertSame(dna3.getDatasetSequence(), dna1.getDatasetSequence());
+ assertSame(dna4.getDatasetSequence(), dna2.getDatasetSequence());
+ String seq1 = "-cc-GG-GT-TT--aaa";
+ dna3.setSequence(seq1);
+ String seq2 = "C--C-Cgg--gtt-tAA-A-";
+ dna4.setSequence(seq2);
+ AlignmentI al2 = new Alignment(new SequenceI[] { dna3, dna4 });
+ ((Alignment) al2).createDatasetAlignment();
+
+ assertTrue(AlignmentUtils.alignAsSameSequences(al1, al2));
+ assertEquals(seq1, al1.getSequenceAt(0).getSequenceAsString());
+ assertEquals(seq2, al1.getSequenceAt(1).getSequenceAsString());
+
+ /*
+ * add another sequence to 'aligned' - should still succeed, since
+ * unaligned sequences still share a dataset with aligned sequences
+ */
+ SequenceI dna5 = new Sequence("dna5", "CCCgggtttAAA");
+ dna5.createDatasetSequence();
+ al2.addSequence(dna5);
+ assertTrue(AlignmentUtils.alignAsSameSequences(al1, al2));
+ assertEquals(seq1, al1.getSequenceAt(0).getSequenceAsString());
+ assertEquals(seq2, al1.getSequenceAt(1).getSequenceAsString());
+
+ /*
+ * add another sequence to 'unaligned' - should fail, since now not
+ * all unaligned sequences share a dataset with aligned sequences
+ */
+ SequenceI dna6 = new Sequence("dna6", "CCCgggtttAAA");
+ dna6.createDatasetSequence();
+ al1.addSequence(dna6);
+ // JAL-2110 JBP Comment: what's the use case for this behaviour ?
+ assertFalse(AlignmentUtils.alignAsSameSequences(al1, al2));
+ }
+
+ @Test(groups = "Functional")
+ public void testAlignAsSameSequencesMultipleSubSeq()
+ {
+ SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa");
+ SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA");
+ SequenceI as1 = dna1.deriveSequence();
+ SequenceI as2 = dna1.deriveSequence().getSubSequence(3, 7);
+ SequenceI as3 = dna2.deriveSequence();
+ as1.insertCharAt(6, 5, '-');
+ String s_as1 = as1.getSequenceAsString();
+ as2.insertCharAt(6, 5, '-');
+ String s_as2 = as2.getSequenceAsString();
+ as3.insertCharAt(6, 5, '-');
+ String s_as3 = as3.getSequenceAsString();
+ AlignmentI aligned = new Alignment(new SequenceI[] { as1, as2, as3 });
+
+ // why do we need to cast this still ?
+ ((Alignment) aligned).createDatasetAlignment();
+ SequenceI uas1 = dna1.deriveSequence();
+ SequenceI uas2 = dna1.deriveSequence().getSubSequence(3, 7);
+ SequenceI uas3 = dna2.deriveSequence();
+ AlignmentI tobealigned = new Alignment(new SequenceI[] { uas1, uas2,
+ uas3 });
+ ((Alignment) tobealigned).createDatasetAlignment();
+
+ assertTrue(AlignmentUtils.alignAsSameSequences(tobealigned, aligned));
+ assertEquals(s_as1, uas1.getSequenceAsString());
+ assertEquals(s_as2, uas2.getSequenceAsString());
+ assertEquals(s_as3, uas3.getSequenceAsString());
+ }
+
+ /**
+ * Tests for the method that maps nucleotide to protein based on CDS features
+ */
+ @Test(groups = "Functional")
+ public void testMapCdsToProtein()
+ {
+ SequenceI peptide = new Sequence("pep", "KLQ");
+
+ /*
+ * Case 1: CDS 3 times length of peptide
+ * NB method only checks lengths match, not translation
+ */
+ SequenceI dna = new Sequence("dna", "AACGacgtCTCCT");
+ dna.createDatasetSequence();
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 13, null));
+ MapList ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
+ assertEquals(3, ml.getFromRatio());
+ assertEquals(1, ml.getToRatio());
+ assertEquals("[[1, 3]]",
+ Arrays.deepToString(ml.getToRanges().toArray()));
+ assertEquals("[[1, 4], [9, 13]]",
+ Arrays.deepToString(ml.getFromRanges().toArray()));
+
+ /*
+ * Case 2: CDS 3 times length of peptide + stop codon
+ * (note code does not currently check trailing codon is a stop codon)
+ */
+ dna = new Sequence("dna", "AACGacgtCTCCTTGA");
+ dna.createDatasetSequence();
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 16, null));
+ ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
+ assertEquals(3, ml.getFromRatio());
+ assertEquals(1, ml.getToRatio());
+ assertEquals("[[1, 3]]",
+ Arrays.deepToString(ml.getToRanges().toArray()));
+ assertEquals("[[1, 4], [9, 13]]",
+ Arrays.deepToString(ml.getFromRanges().toArray()));
+
+ /*
+ * Case 3: CDS not 3 times length of peptide - no mapping is made
+ */
+ dna = new Sequence("dna", "AACGacgtCTCCTTG");
+ dna.createDatasetSequence();
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null));
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, null));
+ ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
+ assertNull(ml);
+
+ /*
+ * Case 4: incomplete start codon corresponding to X in peptide
+ */
+ dna = new Sequence("dna", "ACGacgtCTCCTTGG");
+ dna.createDatasetSequence();
+ SequenceFeature sf = new SequenceFeature("CDS", "", 1, 3, null);
+ sf.setPhase("2"); // skip 2 positions (AC) to start of next codon (GCT)
+ dna.addSequenceFeature(sf);
+ dna.addSequenceFeature(new SequenceFeature("CDS", "", 8, 15, null));
+ peptide = new Sequence("pep", "XLQ");
+ ml = AlignmentUtils.mapCdsToProtein(dna, peptide);
+ assertEquals("[[2, 3]]",
+ Arrays.deepToString(ml.getToRanges().toArray()));
+ assertEquals("[[3, 3], [8, 12]]",
+ Arrays.deepToString(ml.getFromRanges().toArray()));
+ }
+
}