X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fanalysis%2FAlignmentUtilsTests.java;h=be6ba60d74f0c0bb3e434103cb532de2c064ed76;hb=537da9c65518c52985308b67c499fe6a60a607ce;hp=7f7ec31b38fbbecb748a8e4c3de821eb65935998;hpb=7d67fb613ec026dc9a265e351e7fab542e3f1d61;p=jalview.git diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 7f7ec31..be6ba60 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -34,12 +34,15 @@ import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.Annotation; import jalview.datamodel.DBRefEntry; +import jalview.datamodel.GeneLociI; import jalview.datamodel.Mapping; import jalview.datamodel.SearchResultMatchI; import jalview.datamodel.SearchResultsI; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; +import jalview.datamodel.features.SequenceFeatures; +import jalview.gui.JvOptionPane; import jalview.io.AppletFormatAdapter; import jalview.io.DataSourceType; import jalview.io.FileFormat; @@ -56,13 +59,21 @@ import java.util.List; import java.util.Map; import java.util.TreeMap; +import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; public class AlignmentUtilsTests { - public static Sequence ts = new Sequence("short", + private static Sequence ts = new Sequence("short", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm"); + @BeforeClass(alwaysRun = true) + public void setUpJvOptionPane() + { + JvOptionPane.setInteractiveMode(false); + JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION); + } + @Test(groups = { "Functional" }) public void testExpandContext() { @@ -252,14 +263,14 @@ public class AlignmentUtilsTests @Test(groups = { "Functional" }) public void testMapProteinAlignmentToCdna_noXrefs() throws IOException { - List protseqs = new ArrayList(); + List protseqs = new ArrayList<>(); protseqs.add(new Sequence("UNIPROT|V12345", "EIQ")); protseqs.add(new Sequence("UNIPROT|V12346", "EIQ")); protseqs.add(new Sequence("UNIPROT|V12347", "SAR")); AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3])); protein.setDataset(null); - List dnaseqs = new ArrayList(); + List dnaseqs = new ArrayList<>(); dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAA")); // = EIQ dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ @@ -496,7 +507,7 @@ public class AlignmentUtilsTests acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map); acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map); acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map); - ArrayList acfs = new ArrayList(); + ArrayList acfs = new ArrayList<>(); acfs.add(acf); protein.setCodonFrames(acfs); @@ -594,14 +605,14 @@ public class AlignmentUtilsTests public void testMapProteinAlignmentToCdna_withStartAndStopCodons() throws IOException { - List protseqs = new ArrayList(); + List protseqs = new ArrayList<>(); protseqs.add(new Sequence("UNIPROT|V12345", "EIQ")); protseqs.add(new Sequence("UNIPROT|V12346", "EIQ")); protseqs.add(new Sequence("UNIPROT|V12347", "SAR")); AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3])); protein.setDataset(null); - List dnaseqs = new ArrayList(); + List dnaseqs = new ArrayList<>(); // start + SAR: dnaseqs.add(new Sequence("EMBL|A11111", "ATGTCAGCACGC")); // = EIQ + stop @@ -686,14 +697,14 @@ public class AlignmentUtilsTests @Test(groups = { "Functional" }) public void testMapProteinAlignmentToCdna_withXrefs() throws IOException { - List protseqs = new ArrayList(); + List protseqs = new ArrayList<>(); protseqs.add(new Sequence("UNIPROT|V12345", "EIQ")); protseqs.add(new Sequence("UNIPROT|V12346", "EIQ")); protseqs.add(new Sequence("UNIPROT|V12347", "SAR")); AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3])); protein.setDataset(null); - List dnaseqs = new ArrayList(); + List dnaseqs = new ArrayList<>(); dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR dnaseqs.add(new Sequence("EMBL|A22222", "ATGGAGATACAA")); // = start + EIQ dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ @@ -763,14 +774,14 @@ public class AlignmentUtilsTests public void testMapProteinAlignmentToCdna_prioritiseXrefs() throws IOException { - List protseqs = new ArrayList(); + List protseqs = new ArrayList<>(); protseqs.add(new Sequence("UNIPROT|V12345", "EIQ")); protseqs.add(new Sequence("UNIPROT|V12346", "EIQ")); AlignmentI protein = new Alignment( protseqs.toArray(new SequenceI[protseqs.size()])); protein.setDataset(null); - List dnaseqs = new ArrayList(); + List dnaseqs = new ArrayList<>(); dnaseqs.add(new Sequence("EMBL|A11111", "GAAATCCAG")); // = EIQ dnaseqs.add(new Sequence("EMBL|A22222", "GAAATTCAG")); // = EIQ AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[dnaseqs @@ -837,8 +848,8 @@ public class AlignmentUtilsTests al.addAnnotation(ann4); // Temp for seq1 al.addAnnotation(ann5); // Temp for seq2 al.addAnnotation(ann6); // Temp for no sequence - List types = new ArrayList(); - List scope = new ArrayList(); + List types = new ArrayList<>(); + List scope = new ArrayList<>(); /* * Set all sequence related Structure to hidden (ann1, ann2) @@ -1033,14 +1044,18 @@ public class AlignmentUtilsTests dna.addCodonFrame(acf); /* - * In this case, mappings originally came from matching Uniprot accessions - so need an xref on dna involving those regions. These are normally constructed from CDS annotation + * In this case, mappings originally came from matching Uniprot accessions + * - so need an xref on dna involving those regions. + * These are normally constructed from CDS annotation */ DBRefEntry dna1xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep1", new Mapping(mapfordna1)); - dna1.getDatasetSequence().addDBRef(dna1xref); + dna1.addDBRef(dna1xref); + assertEquals(2, dna1.getDBRefs().length); // to self and to pep1 DBRefEntry dna2xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep2", new Mapping(mapfordna2)); - dna2.getDatasetSequence().addDBRef(dna2xref); + dna2.addDBRef(dna2xref); + assertEquals(2, dna2.getDBRefs().length); // to self and to pep2 /* * execute method under test: @@ -1095,6 +1110,38 @@ public class AlignmentUtilsTests assertEquals(cdsMapping.getInverse(), dbref.getMap().getMap()); /* + * verify cDNA has added a dbref with mapping to CDS + */ + assertEquals(3, dna1.getDBRefs().length); + DBRefEntry dbRefEntry = dna1.getDBRefs()[2]; + assertSame(cds1Dss, dbRefEntry.getMap().getTo()); + MapList dnaToCdsMapping = new MapList(new int[] { 4, 6, 10, 12 }, + new int[] { 1, 6 }, 1, 1); + assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap()); + assertEquals(3, dna2.getDBRefs().length); + dbRefEntry = dna2.getDBRefs()[2]; + assertSame(cds2Dss, dbRefEntry.getMap().getTo()); + dnaToCdsMapping = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, + new int[] { 1, 9 }, 1, 1); + assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap()); + + /* + * verify CDS has added a dbref with mapping to cDNA + */ + assertEquals(2, cds1Dss.getDBRefs().length); + dbRefEntry = cds1Dss.getDBRefs()[1]; + assertSame(dna1.getDatasetSequence(), dbRefEntry.getMap().getTo()); + MapList cdsToDnaMapping = new MapList(new int[] { 1, 6 }, new int[] { + 4, 6, 10, 12 }, 1, 1); + assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap()); + assertEquals(2, cds2Dss.getDBRefs().length); + dbRefEntry = cds2Dss.getDBRefs()[1]; + assertSame(dna2.getDatasetSequence(), dbRefEntry.getMap().getTo()); + cdsToDnaMapping = new MapList(new int[] { 1, 9 }, new int[] { 1, 3, 7, + 9, 13, 15 }, 1, 1); + assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap()); + + /* * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide * the mappings are on the shared alignment dataset * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep) @@ -1169,12 +1216,12 @@ public class AlignmentUtilsTests /* * check cds2 acquired a variant feature in position 5 */ - SequenceFeature[] sfs = cds2Dss.getSequenceFeatures(); + List sfs = cds2Dss.getSequenceFeatures(); assertNotNull(sfs); - assertEquals(1, sfs.length); - assertEquals("variant", sfs[0].type); - assertEquals(5, sfs[0].begin); - assertEquals(5, sfs[0].end); + assertEquals(1, sfs.size()); + assertEquals("variant", sfs.get(0).type); + assertEquals(5, sfs.get(0).begin); + assertEquals(5, sfs.get(0).end); } /** @@ -1479,39 +1526,39 @@ public class AlignmentUtilsTests * that partially overlap 5' or 3' (start or end) of target sequence */ AlignmentUtils.transferFeatures(dna, cds, map, null); - SequenceFeature[] sfs = cds.getSequenceFeatures(); - assertEquals(6, sfs.length); + List sfs = cds.getSequenceFeatures(); + assertEquals(6, sfs.size()); - SequenceFeature sf = sfs[0]; + SequenceFeature sf = sfs.get(0); assertEquals("type2", sf.getType()); assertEquals("desc2", sf.getDescription()); assertEquals(2f, sf.getScore()); assertEquals(1, sf.getBegin()); assertEquals(1, sf.getEnd()); - sf = sfs[1]; + sf = sfs.get(1); assertEquals("type3", sf.getType()); assertEquals("desc3", sf.getDescription()); assertEquals(3f, sf.getScore()); assertEquals(1, sf.getBegin()); assertEquals(3, sf.getEnd()); - sf = sfs[2]; + sf = sfs.get(2); assertEquals("type4", sf.getType()); assertEquals(2, sf.getBegin()); assertEquals(5, sf.getEnd()); - sf = sfs[3]; + sf = sfs.get(3); assertEquals("type5", sf.getType()); assertEquals(1, sf.getBegin()); assertEquals(6, sf.getEnd()); - sf = sfs[4]; + sf = sfs.get(4); assertEquals("type8", sf.getType()); assertEquals(6, sf.getBegin()); assertEquals(6, sf.getEnd()); - sf = sfs[5]; + sf = sfs.get(5); assertEquals("type9", sf.getType()); assertEquals(6, sf.getBegin()); assertEquals(6, sf.getEnd()); @@ -1541,10 +1588,10 @@ public class AlignmentUtilsTests // desc4 and desc8 are the 'omit these' varargs AlignmentUtils.transferFeatures(dna, cds, map, null, "type4", "type8"); - SequenceFeature[] sfs = cds.getSequenceFeatures(); - assertEquals(1, sfs.length); + List sfs = cds.getSequenceFeatures(); + assertEquals(1, sfs.size()); - SequenceFeature sf = sfs[0]; + SequenceFeature sf = sfs.get(0); assertEquals("type5", sf.getType()); assertEquals(1, sf.getBegin()); assertEquals(6, sf.getEnd()); @@ -1574,10 +1621,10 @@ public class AlignmentUtilsTests // "type5" is the 'select this type' argument AlignmentUtils.transferFeatures(dna, cds, map, "type5"); - SequenceFeature[] sfs = cds.getSequenceFeatures(); - assertEquals(1, sfs.length); + List sfs = cds.getSequenceFeatures(); + assertEquals(1, sfs.size()); - SequenceFeature sf = sfs[0]; + SequenceFeature sf = sfs.get(0); assertEquals("type5", sf.getType()); assertEquals(1, sf.getBegin()); assertEquals(6, sf.getEnd()); @@ -1736,7 +1783,7 @@ public class AlignmentUtilsTests map = new MapList(new int[] { 9, 11 }, new int[] { 2, 2 }, 3, 1); acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map); - ArrayList acfs = new ArrayList(); + ArrayList acfs = new ArrayList<>(); acfs.add(acf); protein.setCodonFrames(acfs); @@ -1885,6 +1932,7 @@ public class AlignmentUtilsTests sf6.setValue("alleles", "g, a"); // should force to upper-case sf6.setValue("ID", "sequence_variant:rs758803216"); dna.addSequenceFeature(sf6); + SequenceFeature sf7 = new SequenceFeature("sequence_variant", "", 15, 15, 0f, null); sf7.setValue("alleles", "A, T"); @@ -1974,6 +2022,7 @@ public class AlignmentUtilsTests * variants: * GAA -> E source: Ensembl * CAA -> Q source: dbSNP + * TAA -> STOP source: dnSNP * AAG synonymous source: COSMIC * AAT -> N source: Ensembl * ...TTC synonymous source: dbSNP @@ -1989,39 +2038,50 @@ public class AlignmentUtilsTests String ensembl = "Ensembl"; String dbSnp = "dbSNP"; String cosmic = "COSMIC"; + SequenceFeature sf1 = new SequenceFeature("sequence_variant", "", 1, 1, 0f, ensembl); - sf1.setValue("alleles", "A,G"); // GAA -> E + sf1.setValue("alleles", "A,G"); // AAA -> GAA -> K/E sf1.setValue("ID", "var1.125A>G"); + SequenceFeature sf2 = new SequenceFeature("sequence_variant", "", 1, 1, 0f, dbSnp); - sf2.setValue("alleles", "A,C"); // CAA -> Q + sf2.setValue("alleles", "A,C"); // AAA -> CAA -> K/Q sf2.setValue("ID", "var2"); sf2.setValue("clinical_significance", "Dodgy"); - SequenceFeature sf3 = new SequenceFeature("sequence_variant", "", 3, 3, - 0f, cosmic); - sf3.setValue("alleles", "A,G"); // synonymous + + SequenceFeature sf3 = new SequenceFeature("sequence_variant", "", 1, 1, + 0f, dbSnp); + sf3.setValue("alleles", "A,T"); // AAA -> TAA -> stop codon sf3.setValue("ID", "var3"); - sf3.setValue("clinical_significance", "None"); + sf3.setValue("clinical_significance", "Bad"); + SequenceFeature sf4 = new SequenceFeature("sequence_variant", "", 3, 3, + 0f, cosmic); + sf4.setValue("alleles", "A,G"); // AAA -> AAG synonymous + sf4.setValue("ID", "var4"); + sf4.setValue("clinical_significance", "None"); + + SequenceFeature sf5 = new SequenceFeature("sequence_variant", "", 3, 3, 0f, ensembl); - sf4.setValue("alleles", "A,T"); // AAT -> N - sf4.setValue("ID", "sequence_variant:var4"); // prefix gets stripped off - sf4.setValue("clinical_significance", "Benign"); - SequenceFeature sf5 = new SequenceFeature("sequence_variant", "", 6, 6, + sf5.setValue("alleles", "A,T"); // AAA -> AAT -> K/N + sf5.setValue("ID", "sequence_variant:var5"); // prefix gets stripped off + sf5.setValue("clinical_significance", "Benign"); + + SequenceFeature sf6 = new SequenceFeature("sequence_variant", "", 6, 6, 0f, dbSnp); - sf5.setValue("alleles", "T,C"); // synonymous - sf5.setValue("ID", "var5"); - sf5.setValue("clinical_significance", "Bad"); - SequenceFeature sf6 = new SequenceFeature("sequence_variant", "", 8, 8, - 0f, cosmic); - sf6.setValue("alleles", "C,A,G"); // CAC,CGC -> H,R + sf6.setValue("alleles", "T,C"); // TTT -> TTC synonymous sf6.setValue("ID", "var6"); - sf6.setValue("clinical_significance", "Good"); - List codon1Variants = new ArrayList(); - List codon2Variants = new ArrayList(); - List codon3Variants = new ArrayList(); + SequenceFeature sf7 = new SequenceFeature("sequence_variant", "", 8, 8, + 0f, cosmic); + sf7.setValue("alleles", "C,A,G"); // CCC -> CAC,CGC -> P/H/R + sf7.setValue("ID", "var7"); + sf7.setValue("clinical_significance", "Good"); + + List codon1Variants = new ArrayList<>(); + List codon2Variants = new ArrayList<>(); + List codon3Variants = new ArrayList<>(); List codonVariants[] = new ArrayList[3]; codonVariants[0] = codon1Variants; codonVariants[1] = codon2Variants; @@ -2032,10 +2092,11 @@ public class AlignmentUtilsTests */ codon1Variants.add(new DnaVariant("A", sf1)); codon1Variants.add(new DnaVariant("A", sf2)); + codon1Variants.add(new DnaVariant("A", sf3)); codon2Variants.add(new DnaVariant("A")); - codon2Variants.add(new DnaVariant("A")); - codon3Variants.add(new DnaVariant("A", sf3)); + // codon2Variants.add(new DnaVariant("A")); codon3Variants.add(new DnaVariant("A", sf4)); + codon3Variants.add(new DnaVariant("A", sf5)); AlignmentUtils.computePeptideVariants(peptide, 1, codonVariants); /* @@ -2046,7 +2107,7 @@ public class AlignmentUtilsTests codon3Variants.clear(); codon1Variants.add(new DnaVariant("T")); codon2Variants.add(new DnaVariant("T")); - codon3Variants.add(new DnaVariant("T", sf5)); + codon3Variants.add(new DnaVariant("T", sf6)); AlignmentUtils.computePeptideVariants(peptide, 2, codonVariants); /* @@ -2056,7 +2117,7 @@ public class AlignmentUtilsTests codon2Variants.clear(); codon3Variants.clear(); codon1Variants.add(new DnaVariant("C")); - codon2Variants.add(new DnaVariant("C", sf6)); + codon2Variants.add(new DnaVariant("C", sf7)); codon3Variants.add(new DnaVariant("C")); AlignmentUtils.computePeptideVariants(peptide, 3, codonVariants); @@ -2064,16 +2125,58 @@ public class AlignmentUtilsTests * verify added sequence features for * var1 K -> E Ensembl * var2 K -> Q dbSNP - * var4 K -> N Ensembl - * var6 P -> H COSMIC - * var6 P -> R COSMIC + * var3 K -> stop + * var4 synonymous + * var5 K -> N Ensembl + * var6 synonymous + * var7 P -> H COSMIC + * var8 P -> R COSMIC + */ + List sfs = peptide.getSequenceFeatures(); + SequenceFeatures.sortFeatures(sfs, true); + assertEquals(8, sfs.size()); + + /* + * features are sorted by start position ascending, but in no + * particular order where start positions match; asserts here + * simply match the data returned (the order is not important) */ - SequenceFeature[] sfs = peptide.getSequenceFeatures(); - assertEquals(5, sfs.length); + // AAA -> AAT -> K/N + SequenceFeature sf = sfs.get(0); + assertEquals(1, sf.getBegin()); + assertEquals(1, sf.getEnd()); + assertEquals("nonsynonymous_variant", sf.getType()); + assertEquals("p.Lys1Asn", sf.getDescription()); + assertEquals("var5", sf.getValue("ID")); + assertEquals("Benign", sf.getValue("clinical_significance")); + assertEquals("ID=var5;clinical_significance=Benign", + sf.getAttributes()); + assertEquals(1, sf.links.size()); + assertEquals( + "p.Lys1Asn var5|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var5", + sf.links.get(0)); + assertEquals(ensembl, sf.getFeatureGroup()); + + // AAA -> CAA -> K/Q + sf = sfs.get(1); + assertEquals(1, sf.getBegin()); + assertEquals(1, sf.getEnd()); + assertEquals("nonsynonymous_variant", sf.getType()); + assertEquals("p.Lys1Gln", sf.getDescription()); + assertEquals("var2", sf.getValue("ID")); + assertEquals("Dodgy", sf.getValue("clinical_significance")); + assertEquals("ID=var2;clinical_significance=Dodgy", sf.getAttributes()); + assertEquals(1, sf.links.size()); + assertEquals( + "p.Lys1Gln var2|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var2", + sf.links.get(0)); + assertEquals(dbSnp, sf.getFeatureGroup()); - SequenceFeature sf = sfs[0]; + // AAA -> GAA -> K/E + sf = sfs.get(2); assertEquals(1, sf.getBegin()); assertEquals(1, sf.getEnd()); + assertEquals("nonsynonymous_variant", sf.getType()); assertEquals("p.Lys1Glu", sf.getDescription()); assertEquals("var1.125A>G", sf.getValue("ID")); assertNull(sf.getValue("clinical_significance")); @@ -2085,56 +2188,79 @@ public class AlignmentUtilsTests sf.links.get(0)); assertEquals(ensembl, sf.getFeatureGroup()); - sf = sfs[1]; + // AAA -> TAA -> stop codon + sf = sfs.get(3); assertEquals(1, sf.getBegin()); assertEquals(1, sf.getEnd()); - assertEquals("p.Lys1Gln", sf.getDescription()); - assertEquals("var2", sf.getValue("ID")); - assertEquals("Dodgy", sf.getValue("clinical_significance")); - assertEquals("ID=var2;clinical_significance=Dodgy", sf.getAttributes()); + assertEquals("stop_gained", sf.getType()); + assertEquals("TAA", sf.getDescription()); + assertEquals("var3", sf.getValue("ID")); + assertEquals("Bad", sf.getValue("clinical_significance")); + assertEquals("ID=var3;clinical_significance=Bad", sf.getAttributes()); assertEquals(1, sf.links.size()); assertEquals( - "p.Lys1Gln var2|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var2", + "TAA var3|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var3", sf.links.get(0)); assertEquals(dbSnp, sf.getFeatureGroup()); - sf = sfs[2]; + // AAA -> AAG synonymous + sf = sfs.get(4); assertEquals(1, sf.getBegin()); assertEquals(1, sf.getEnd()); - assertEquals("p.Lys1Asn", sf.getDescription()); + assertEquals("synonymous_variant", sf.getType()); + assertEquals("AAG", sf.getDescription()); assertEquals("var4", sf.getValue("ID")); - assertEquals("Benign", sf.getValue("clinical_significance")); - assertEquals("ID=var4;clinical_significance=Benign", sf.getAttributes()); + assertEquals("None", sf.getValue("clinical_significance")); + assertEquals("ID=var4;clinical_significance=None", sf.getAttributes()); assertEquals(1, sf.links.size()); assertEquals( - "p.Lys1Asn var4|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var4", + "AAG var4|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var4", sf.links.get(0)); - assertEquals(ensembl, sf.getFeatureGroup()); + assertEquals(cosmic, sf.getFeatureGroup()); + + // TTT -> TTC synonymous + sf = sfs.get(5); + assertEquals(2, sf.getBegin()); + assertEquals(2, sf.getEnd()); + assertEquals("synonymous_variant", sf.getType()); + assertEquals("TTC", sf.getDescription()); + assertEquals("var6", sf.getValue("ID")); + assertNull(sf.getValue("clinical_significance")); + assertEquals("ID=var6", sf.getAttributes()); + assertEquals(1, sf.links.size()); + assertEquals( + "TTC var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6", + sf.links.get(0)); + assertEquals(dbSnp, sf.getFeatureGroup()); - // var5 generates two distinct protein variant features - sf = sfs[3]; + // var7 generates two distinct protein variant features (two alleles) + // CCC -> CGC -> P/R + sf = sfs.get(6); assertEquals(3, sf.getBegin()); assertEquals(3, sf.getEnd()); - assertEquals("p.Pro3His", sf.getDescription()); - assertEquals("var6", sf.getValue("ID")); + assertEquals("nonsynonymous_variant", sf.getType()); + assertEquals("p.Pro3Arg", sf.getDescription()); + assertEquals("var7", sf.getValue("ID")); assertEquals("Good", sf.getValue("clinical_significance")); - assertEquals("ID=var6;clinical_significance=Good", sf.getAttributes()); + assertEquals("ID=var7;clinical_significance=Good", sf.getAttributes()); assertEquals(1, sf.links.size()); assertEquals( - "p.Pro3His var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6", + "p.Pro3Arg var7|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var7", sf.links.get(0)); assertEquals(cosmic, sf.getFeatureGroup()); - sf = sfs[4]; + // CCC -> CAC -> P/H + sf = sfs.get(7); assertEquals(3, sf.getBegin()); assertEquals(3, sf.getEnd()); - assertEquals("p.Pro3Arg", sf.getDescription()); - assertEquals("var6", sf.getValue("ID")); + assertEquals("nonsynonymous_variant", sf.getType()); + assertEquals("p.Pro3His", sf.getDescription()); + assertEquals("var7", sf.getValue("ID")); assertEquals("Good", sf.getValue("clinical_significance")); - assertEquals("ID=var6;clinical_significance=Good", sf.getAttributes()); + assertEquals("ID=var7;clinical_significance=Good", sf.getAttributes()); assertEquals(1, sf.links.size()); assertEquals( - "p.Pro3Arg var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6", + "p.Pro3His var7|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var7", sf.links.get(0)); assertEquals(cosmic, sf.getFeatureGroup()); } @@ -2255,7 +2381,7 @@ public class AlignmentUtilsTests seq1.createDatasetSequence(); Mapping mapping = new Mapping(seq1, new MapList( new int[] { 3, 6, 9, 10 }, new int[] { 1, 6 }, 1, 1)); - Map> map = new TreeMap>(); + Map> map = new TreeMap<>(); AlignmentUtils.addMappedPositions(seq1, from, mapping, map); /* @@ -2287,7 +2413,7 @@ public class AlignmentUtilsTests seq1.createDatasetSequence(); Mapping mapping = new Mapping(seq1, new MapList( new int[] { 3, 6, 9, 10 }, new int[] { 1, 6 }, 1, 1)); - Map> map = new TreeMap>(); + Map> map = new TreeMap<>(); AlignmentUtils.addMappedPositions(seq1, from, mapping, map); /* @@ -2516,4 +2642,134 @@ public class AlignmentUtilsTests assertEquals(s_as3, uas3.getSequenceAsString()); } + @Test(groups = { "Functional" }) + public void testTransferGeneLoci() + { + SequenceI from = new Sequence("transcript", + "aaacccgggTTTAAACCCGGGtttaaacccgggttt"); + SequenceI to = new Sequence("CDS", "TTTAAACCCGGG"); + MapList map = new MapList(new int[] { 1, 12 }, new int[] { 10, 21 }, 1, + 1); + + /* + * first with nothing to transfer + */ + AlignmentUtils.transferGeneLoci(from, map, to); + assertNull(to.getGeneLoci()); + + /* + * next with gene loci set on 'from' sequence + */ + int[] exons = new int[] { 100, 105, 155, 164, 210, 229 }; + MapList geneMap = new MapList(new int[] { 1, 36 }, exons, 1, 1); + from.setGeneLoci("human", "GRCh38", "7", geneMap); + AlignmentUtils.transferGeneLoci(from, map, to); + + GeneLociI toLoci = to.getGeneLoci(); + assertNotNull(toLoci); + // DBRefEntry constructor upper-cases 'source' + assertEquals("HUMAN", toLoci.getSpeciesId()); + assertEquals("GRCh38", toLoci.getAssemblyId()); + assertEquals("7", toLoci.getChromosomeId()); + + /* + * transcript 'exons' are 1-6, 7-16, 17-36 + * CDS 1:12 is transcript 10-21 + * transcript 'CDS' is 10-16, 17-21 + * which is 'gene' 158-164, 210-214 + */ + MapList toMap = toLoci.getMap(); + assertEquals(1, toMap.getFromRanges().size()); + assertEquals(2, toMap.getFromRanges().get(0).length); + assertEquals(1, toMap.getFromRanges().get(0)[0]); + assertEquals(12, toMap.getFromRanges().get(0)[1]); + assertEquals(1, toMap.getToRanges().size()); + assertEquals(4, toMap.getToRanges().get(0).length); + assertEquals(158, toMap.getToRanges().get(0)[0]); + assertEquals(164, toMap.getToRanges().get(0)[1]); + assertEquals(210, toMap.getToRanges().get(0)[2]); + assertEquals(214, toMap.getToRanges().get(0)[3]); + // or summarised as (but toString might change in future): + assertEquals("[ [1, 12] ] 1:1 to [ [158, 164, 210, 214] ]", + toMap.toString()); + + /* + * an existing value is not overridden + */ + geneMap = new MapList(new int[] { 1, 36 }, new int[] { 36, 1 }, 1, 1); + from.setGeneLoci("inhuman", "GRCh37", "6", geneMap); + AlignmentUtils.transferGeneLoci(from, map, to); + assertEquals("GRCh38", toLoci.getAssemblyId()); + assertEquals("7", toLoci.getChromosomeId()); + toMap = toLoci.getMap(); + assertEquals("[ [1, 12] ] 1:1 to [ [158, 164, 210, 214] ]", + toMap.toString()); + } + + /** + * Tests for the method that maps nucleotide to protein based on CDS features + */ + @Test(groups = "Functional") + public void testMapCdsToProtein() + { + SequenceI peptide = new Sequence("pep", "KLQ"); + + /* + * Case 1: CDS 3 times length of peptide + * NB method only checks lengths match, not translation + */ + SequenceI dna = new Sequence("dna", "AACGacgtCTCCT"); + dna.createDatasetSequence(); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null)); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 13, null)); + MapList ml = AlignmentUtils.mapCdsToProtein(dna, peptide); + assertEquals(3, ml.getFromRatio()); + assertEquals(1, ml.getToRatio()); + assertEquals("[[1, 3]]", + Arrays.deepToString(ml.getToRanges().toArray())); + assertEquals("[[1, 4], [9, 13]]", + Arrays.deepToString(ml.getFromRanges().toArray())); + + /* + * Case 2: CDS 3 times length of peptide + stop codon + * (note code does not currently check trailing codon is a stop codon) + */ + dna = new Sequence("dna", "AACGacgtCTCCTTGA"); + dna.createDatasetSequence(); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null)); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 16, null)); + ml = AlignmentUtils.mapCdsToProtein(dna, peptide); + assertEquals(3, ml.getFromRatio()); + assertEquals(1, ml.getToRatio()); + assertEquals("[[1, 3]]", + Arrays.deepToString(ml.getToRanges().toArray())); + assertEquals("[[1, 4], [9, 13]]", + Arrays.deepToString(ml.getFromRanges().toArray())); + + /* + * Case 3: CDS not 3 times length of peptide - no mapping is made + */ + dna = new Sequence("dna", "AACGacgtCTCCTTG"); + dna.createDatasetSequence(); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null)); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, null)); + ml = AlignmentUtils.mapCdsToProtein(dna, peptide); + assertNull(ml); + + /* + * Case 4: incomplete start codon corresponding to X in peptide + */ + dna = new Sequence("dna", "ACGacgtCTCCTTGG"); + dna.createDatasetSequence(); + SequenceFeature sf = new SequenceFeature("CDS", "", 1, 3, null); + sf.setPhase("2"); // skip 2 positions (AC) to start of next codon (GCT) + dna.addSequenceFeature(sf); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 8, 15, null)); + peptide = new Sequence("pep", "XLQ"); + ml = AlignmentUtils.mapCdsToProtein(dna, peptide); + assertEquals("[[2, 3]]", + Arrays.deepToString(ml.getToRanges().toArray())); + assertEquals("[[3, 3], [8, 12]]", + Arrays.deepToString(ml.getFromRanges().toArray())); + } }