X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fanalysis%2FAlignmentUtilsTests.java;h=14559dde51668411008b52d0775c2daf7e9c9a05;hb=cbeb7ad59d51b468c54ca3db2a2a7693060a2509;hp=06b51e6fd3e90f7064885338c18ee6ab6bf117c5;hpb=c371d7d1dcac5bd74a3f693c656d8cff3168a57f;p=jalview.git diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 06b51e6..14559dd 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -34,6 +34,7 @@ import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.Annotation; import jalview.datamodel.DBRefEntry; +import jalview.datamodel.GeneLociI; import jalview.datamodel.Mapping; import jalview.datamodel.SearchResultMatchI; import jalview.datamodel.SearchResultsI; @@ -47,6 +48,7 @@ import jalview.io.DataSourceType; import jalview.io.FileFormat; import jalview.io.FileFormatI; import jalview.io.FormatAdapter; +import jalview.io.gff.SequenceOntologyI; import jalview.util.MapList; import jalview.util.MappingUtils; @@ -63,6 +65,8 @@ import org.testng.annotations.Test; public class AlignmentUtilsTests { + private static Sequence ts = new Sequence("short", + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm"); @BeforeClass(alwaysRun = true) public void setUpJvOptionPane() @@ -71,9 +75,6 @@ public class AlignmentUtilsTests JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION); } - public static Sequence ts = new Sequence("short", - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm"); - @Test(groups = { "Functional" }) public void testExpandContext() { @@ -263,14 +264,14 @@ public class AlignmentUtilsTests @Test(groups = { "Functional" }) public void testMapProteinAlignmentToCdna_noXrefs() throws IOException { - List protseqs = new ArrayList(); + List protseqs = new ArrayList<>(); protseqs.add(new Sequence("UNIPROT|V12345", "EIQ")); protseqs.add(new Sequence("UNIPROT|V12346", "EIQ")); protseqs.add(new Sequence("UNIPROT|V12347", "SAR")); AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3])); protein.setDataset(null); - List dnaseqs = new ArrayList(); + List dnaseqs = new ArrayList<>(); dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR dnaseqs.add(new Sequence("EMBL|A22222", "GAGATACAA")); // = EIQ dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ @@ -507,7 +508,7 @@ public class AlignmentUtilsTests acf.addMap(dna1.getDatasetSequence(), prot1.getDatasetSequence(), map); acf.addMap(dna2.getDatasetSequence(), prot2.getDatasetSequence(), map); acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map); - ArrayList acfs = new ArrayList(); + ArrayList acfs = new ArrayList<>(); acfs.add(acf); protein.setCodonFrames(acfs); @@ -605,14 +606,14 @@ public class AlignmentUtilsTests public void testMapProteinAlignmentToCdna_withStartAndStopCodons() throws IOException { - List protseqs = new ArrayList(); + List protseqs = new ArrayList<>(); protseqs.add(new Sequence("UNIPROT|V12345", "EIQ")); protseqs.add(new Sequence("UNIPROT|V12346", "EIQ")); protseqs.add(new Sequence("UNIPROT|V12347", "SAR")); AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3])); protein.setDataset(null); - List dnaseqs = new ArrayList(); + List dnaseqs = new ArrayList<>(); // start + SAR: dnaseqs.add(new Sequence("EMBL|A11111", "ATGTCAGCACGC")); // = EIQ + stop @@ -697,14 +698,14 @@ public class AlignmentUtilsTests @Test(groups = { "Functional" }) public void testMapProteinAlignmentToCdna_withXrefs() throws IOException { - List protseqs = new ArrayList(); + List protseqs = new ArrayList<>(); protseqs.add(new Sequence("UNIPROT|V12345", "EIQ")); protseqs.add(new Sequence("UNIPROT|V12346", "EIQ")); protseqs.add(new Sequence("UNIPROT|V12347", "SAR")); AlignmentI protein = new Alignment(protseqs.toArray(new SequenceI[3])); protein.setDataset(null); - List dnaseqs = new ArrayList(); + List dnaseqs = new ArrayList<>(); dnaseqs.add(new Sequence("EMBL|A11111", "TCAGCACGC")); // = SAR dnaseqs.add(new Sequence("EMBL|A22222", "ATGGAGATACAA")); // = start + EIQ dnaseqs.add(new Sequence("EMBL|A33333", "GAAATCCAG")); // = EIQ @@ -774,14 +775,14 @@ public class AlignmentUtilsTests public void testMapProteinAlignmentToCdna_prioritiseXrefs() throws IOException { - List protseqs = new ArrayList(); + List protseqs = new ArrayList<>(); protseqs.add(new Sequence("UNIPROT|V12345", "EIQ")); protseqs.add(new Sequence("UNIPROT|V12346", "EIQ")); AlignmentI protein = new Alignment( protseqs.toArray(new SequenceI[protseqs.size()])); protein.setDataset(null); - List dnaseqs = new ArrayList(); + List dnaseqs = new ArrayList<>(); dnaseqs.add(new Sequence("EMBL|A11111", "GAAATCCAG")); // = EIQ dnaseqs.add(new Sequence("EMBL|A22222", "GAAATTCAG")); // = EIQ AlignmentI cdna = new Alignment(dnaseqs.toArray(new SequenceI[dnaseqs @@ -848,8 +849,8 @@ public class AlignmentUtilsTests al.addAnnotation(ann4); // Temp for seq1 al.addAnnotation(ann5); // Temp for seq2 al.addAnnotation(ann6); // Temp for no sequence - List types = new ArrayList(); - List scope = new ArrayList(); + List types = new ArrayList<>(); + List scope = new ArrayList<>(); /* * Set all sequence related Structure to hidden (ann1, ann2) @@ -1044,14 +1045,18 @@ public class AlignmentUtilsTests dna.addCodonFrame(acf); /* - * In this case, mappings originally came from matching Uniprot accessions - so need an xref on dna involving those regions. These are normally constructed from CDS annotation + * In this case, mappings originally came from matching Uniprot accessions + * - so need an xref on dna involving those regions. + * These are normally constructed from CDS annotation */ DBRefEntry dna1xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep1", new Mapping(mapfordna1)); - dna1.getDatasetSequence().addDBRef(dna1xref); + dna1.addDBRef(dna1xref); + assertEquals(2, dna1.getDBRefs().length); // to self and to pep1 DBRefEntry dna2xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep2", new Mapping(mapfordna2)); - dna2.getDatasetSequence().addDBRef(dna2xref); + dna2.addDBRef(dna2xref); + assertEquals(2, dna2.getDBRefs().length); // to self and to pep2 /* * execute method under test: @@ -1106,6 +1111,38 @@ public class AlignmentUtilsTests assertEquals(cdsMapping.getInverse(), dbref.getMap().getMap()); /* + * verify cDNA has added a dbref with mapping to CDS + */ + assertEquals(3, dna1.getDBRefs().length); + DBRefEntry dbRefEntry = dna1.getDBRefs()[2]; + assertSame(cds1Dss, dbRefEntry.getMap().getTo()); + MapList dnaToCdsMapping = new MapList(new int[] { 4, 6, 10, 12 }, + new int[] { 1, 6 }, 1, 1); + assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap()); + assertEquals(3, dna2.getDBRefs().length); + dbRefEntry = dna2.getDBRefs()[2]; + assertSame(cds2Dss, dbRefEntry.getMap().getTo()); + dnaToCdsMapping = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, + new int[] { 1, 9 }, 1, 1); + assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap()); + + /* + * verify CDS has added a dbref with mapping to cDNA + */ + assertEquals(2, cds1Dss.getDBRefs().length); + dbRefEntry = cds1Dss.getDBRefs()[1]; + assertSame(dna1.getDatasetSequence(), dbRefEntry.getMap().getTo()); + MapList cdsToDnaMapping = new MapList(new int[] { 1, 6 }, new int[] { + 4, 6, 10, 12 }, 1, 1); + assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap()); + assertEquals(2, cds2Dss.getDBRefs().length); + dbRefEntry = cds2Dss.getDBRefs()[1]; + assertSame(dna2.getDatasetSequence(), dbRefEntry.getMap().getTo()); + cdsToDnaMapping = new MapList(new int[] { 1, 9 }, new int[] { 1, 3, 7, + 9, 13, 15 }, 1, 1); + assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap()); + + /* * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide * the mappings are on the shared alignment dataset * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep) @@ -1747,7 +1784,7 @@ public class AlignmentUtilsTests map = new MapList(new int[] { 9, 11 }, new int[] { 2, 2 }, 3, 1); acf.addMap(dna3.getDatasetSequence(), prot3.getDatasetSequence(), map); - ArrayList acfs = new ArrayList(); + ArrayList acfs = new ArrayList<>(); acfs.add(acf); protein.setCodonFrames(acfs); @@ -1896,6 +1933,7 @@ public class AlignmentUtilsTests sf6.setValue("alleles", "g, a"); // should force to upper-case sf6.setValue("ID", "sequence_variant:rs758803216"); dna.addSequenceFeature(sf6); + SequenceFeature sf7 = new SequenceFeature("sequence_variant", "", 15, 15, 0f, null); sf7.setValue("alleles", "A, T"); @@ -1985,6 +2023,7 @@ public class AlignmentUtilsTests * variants: * GAA -> E source: Ensembl * CAA -> Q source: dbSNP + * TAA -> STOP source: dnSNP * AAG synonymous source: COSMIC * AAT -> N source: Ensembl * ...TTC synonymous source: dbSNP @@ -2000,39 +2039,55 @@ public class AlignmentUtilsTests String ensembl = "Ensembl"; String dbSnp = "dbSNP"; String cosmic = "COSMIC"; + + /* + * NB setting "id" (as returned by Ensembl for features in JSON format); + * previously "ID" (as returned for GFF3 format) + */ SequenceFeature sf1 = new SequenceFeature("sequence_variant", "", 1, 1, 0f, ensembl); - sf1.setValue("alleles", "A,G"); // GAA -> E - sf1.setValue("ID", "var1.125A>G"); + sf1.setValue("alleles", "A,G"); // AAA -> GAA -> K/E + sf1.setValue("id", "var1.125A>G"); + SequenceFeature sf2 = new SequenceFeature("sequence_variant", "", 1, 1, 0f, dbSnp); - sf2.setValue("alleles", "A,C"); // CAA -> Q - sf2.setValue("ID", "var2"); + sf2.setValue("alleles", "A,C"); // AAA -> CAA -> K/Q + sf2.setValue("id", "var2"); sf2.setValue("clinical_significance", "Dodgy"); - SequenceFeature sf3 = new SequenceFeature("sequence_variant", "", 3, 3, - 0f, cosmic); - sf3.setValue("alleles", "A,G"); // synonymous - sf3.setValue("ID", "var3"); - sf3.setValue("clinical_significance", "None"); + + SequenceFeature sf3 = new SequenceFeature("sequence_variant", "", 1, 1, + 0f, dbSnp); + sf3.setValue("alleles", "A,T"); // AAA -> TAA -> stop codon + sf3.setValue("id", "var3"); + sf3.setValue("clinical_significance", "Bad"); + SequenceFeature sf4 = new SequenceFeature("sequence_variant", "", 3, 3, + 0f, cosmic); + sf4.setValue("alleles", "A,G"); // AAA -> AAG synonymous + sf4.setValue("id", "var4"); + sf4.setValue("clinical_significance", "None"); + + SequenceFeature sf5 = new SequenceFeature("sequence_variant", "", 3, 3, 0f, ensembl); - sf4.setValue("alleles", "A,T"); // AAT -> N - sf4.setValue("ID", "sequence_variant:var4"); // prefix gets stripped off - sf4.setValue("clinical_significance", "Benign"); - SequenceFeature sf5 = new SequenceFeature("sequence_variant", "", 6, 6, + sf5.setValue("alleles", "A,T"); // AAA -> AAT -> K/N + sf5.setValue("id", "sequence_variant:var5"); // prefix gets stripped off + sf5.setValue("clinical_significance", "Benign"); + + SequenceFeature sf6 = new SequenceFeature("sequence_variant", "", 6, 6, 0f, dbSnp); - sf5.setValue("alleles", "T,C"); // synonymous - sf5.setValue("ID", "var5"); - sf5.setValue("clinical_significance", "Bad"); - SequenceFeature sf6 = new SequenceFeature("sequence_variant", "", 8, 8, + sf6.setValue("alleles", "T,C"); // TTT -> TTC synonymous + sf6.setValue("id", "var6"); + + SequenceFeature sf7 = new SequenceFeature("sequence_variant", "", 8, 8, 0f, cosmic); - sf6.setValue("alleles", "C,A,G"); // CAC,CGC -> H,R - sf6.setValue("ID", "var6"); - sf6.setValue("clinical_significance", "Good"); + sf7.setValue("alleles", "C,A,G"); // CCC -> CAC,CGC -> P/H/R + sf7.setValue("id", "var7"); + sf7.setValue("clinical_significance", "Good"); + + List codon1Variants = new ArrayList<>(); + List codon2Variants = new ArrayList<>(); + List codon3Variants = new ArrayList<>(); - List codon1Variants = new ArrayList(); - List codon2Variants = new ArrayList(); - List codon3Variants = new ArrayList(); List codonVariants[] = new ArrayList[3]; codonVariants[0] = codon1Variants; codonVariants[1] = codon2Variants; @@ -2043,10 +2098,11 @@ public class AlignmentUtilsTests */ codon1Variants.add(new DnaVariant("A", sf1)); codon1Variants.add(new DnaVariant("A", sf2)); + codon1Variants.add(new DnaVariant("A", sf3)); codon2Variants.add(new DnaVariant("A")); - codon2Variants.add(new DnaVariant("A")); - codon3Variants.add(new DnaVariant("A", sf3)); + // codon2Variants.add(new DnaVariant("A")); codon3Variants.add(new DnaVariant("A", sf4)); + codon3Variants.add(new DnaVariant("A", sf5)); AlignmentUtils.computePeptideVariants(peptide, 1, codonVariants); /* @@ -2057,7 +2113,7 @@ public class AlignmentUtilsTests codon3Variants.clear(); codon1Variants.add(new DnaVariant("T")); codon2Variants.add(new DnaVariant("T")); - codon3Variants.add(new DnaVariant("T", sf5)); + codon3Variants.add(new DnaVariant("T", sf6)); AlignmentUtils.computePeptideVariants(peptide, 2, codonVariants); /* @@ -2067,7 +2123,7 @@ public class AlignmentUtilsTests codon2Variants.clear(); codon3Variants.clear(); codon1Variants.add(new DnaVariant("C")); - codon2Variants.add(new DnaVariant("C", sf6)); + codon2Variants.add(new DnaVariant("C", sf7)); codon3Variants.add(new DnaVariant("C")); AlignmentUtils.computePeptideVariants(peptide, 3, codonVariants); @@ -2075,52 +2131,62 @@ public class AlignmentUtilsTests * verify added sequence features for * var1 K -> E Ensembl * var2 K -> Q dbSNP - * var4 K -> N Ensembl - * var6 P -> H COSMIC - * var6 P -> R COSMIC + * var3 K -> stop + * var4 synonymous + * var5 K -> N Ensembl + * var6 synonymous + * var7 P -> H COSMIC + * var8 P -> R COSMIC */ List sfs = peptide.getSequenceFeatures(); SequenceFeatures.sortFeatures(sfs, true); - assertEquals(5, sfs.size()); + assertEquals(8, sfs.size()); /* * features are sorted by start position ascending, but in no * particular order where start positions match; asserts here * simply match the data returned (the order is not important) */ + // AAA -> AAT -> K/N SequenceFeature sf = sfs.get(0); assertEquals(1, sf.getBegin()); assertEquals(1, sf.getEnd()); + assertEquals("nonsynonymous_variant", sf.getType()); assertEquals("p.Lys1Asn", sf.getDescription()); - assertEquals("var4", sf.getValue("ID")); + assertEquals("var5", sf.getValue("id")); assertEquals("Benign", sf.getValue("clinical_significance")); - assertEquals("ID=var4;clinical_significance=Benign", sf.getAttributes()); + assertEquals("id=var5;clinical_significance=Benign", + sf.getAttributes()); assertEquals(1, sf.links.size()); assertEquals( - "p.Lys1Asn var4|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var4", + "p.Lys1Asn var5|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var5", sf.links.get(0)); assertEquals(ensembl, sf.getFeatureGroup()); + // AAA -> CAA -> K/Q sf = sfs.get(1); assertEquals(1, sf.getBegin()); assertEquals(1, sf.getEnd()); + assertEquals("nonsynonymous_variant", sf.getType()); assertEquals("p.Lys1Gln", sf.getDescription()); - assertEquals("var2", sf.getValue("ID")); + assertEquals("var2", sf.getValue("id")); assertEquals("Dodgy", sf.getValue("clinical_significance")); - assertEquals("ID=var2;clinical_significance=Dodgy", sf.getAttributes()); + assertEquals("id=var2;clinical_significance=Dodgy", sf.getAttributes()); assertEquals(1, sf.links.size()); assertEquals( "p.Lys1Gln var2|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var2", sf.links.get(0)); assertEquals(dbSnp, sf.getFeatureGroup()); + // AAA -> GAA -> K/E sf = sfs.get(2); assertEquals(1, sf.getBegin()); assertEquals(1, sf.getEnd()); + assertEquals("nonsynonymous_variant", sf.getType()); assertEquals("p.Lys1Glu", sf.getDescription()); - assertEquals("var1.125A>G", sf.getValue("ID")); + assertEquals("var1.125A>G", sf.getValue("id")); assertNull(sf.getValue("clinical_significance")); - assertEquals("ID=var1.125A>G", sf.getAttributes()); + assertEquals("id=var1.125A>G", sf.getAttributes()); assertEquals(1, sf.links.size()); // link to variation is urlencoded assertEquals( @@ -2128,30 +2194,79 @@ public class AlignmentUtilsTests sf.links.get(0)); assertEquals(ensembl, sf.getFeatureGroup()); + // AAA -> TAA -> stop codon sf = sfs.get(3); + assertEquals(1, sf.getBegin()); + assertEquals(1, sf.getEnd()); + assertEquals("stop_gained", sf.getType()); + assertEquals("Aaa/Taa", sf.getDescription()); + assertEquals("var3", sf.getValue("id")); + assertEquals("Bad", sf.getValue("clinical_significance")); + assertEquals("id=var3;clinical_significance=Bad", sf.getAttributes()); + assertEquals(1, sf.links.size()); + assertEquals( + "Aaa/Taa var3|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var3", + sf.links.get(0)); + assertEquals(dbSnp, sf.getFeatureGroup()); + + // AAA -> AAG synonymous + sf = sfs.get(4); + assertEquals(1, sf.getBegin()); + assertEquals(1, sf.getEnd()); + assertEquals("synonymous_variant", sf.getType()); + assertEquals("aaA/aaG", sf.getDescription()); + assertEquals("var4", sf.getValue("id")); + assertEquals("None", sf.getValue("clinical_significance")); + assertEquals("id=var4;clinical_significance=None", sf.getAttributes()); + assertEquals(1, sf.links.size()); + assertEquals( + "aaA/aaG var4|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var4", + sf.links.get(0)); + assertEquals(cosmic, sf.getFeatureGroup()); + + // TTT -> TTC synonymous + sf = sfs.get(5); + assertEquals(2, sf.getBegin()); + assertEquals(2, sf.getEnd()); + assertEquals("synonymous_variant", sf.getType()); + assertEquals("ttT/ttC", sf.getDescription()); + assertEquals("var6", sf.getValue("id")); + assertNull(sf.getValue("clinical_significance")); + assertEquals("id=var6", sf.getAttributes()); + assertEquals(1, sf.links.size()); + assertEquals( + "ttT/ttC var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6", + sf.links.get(0)); + assertEquals(dbSnp, sf.getFeatureGroup()); + + // var7 generates two distinct protein variant features (two alleles) + // CCC -> CGC -> P/R + sf = sfs.get(6); assertEquals(3, sf.getBegin()); assertEquals(3, sf.getEnd()); + assertEquals("nonsynonymous_variant", sf.getType()); assertEquals("p.Pro3Arg", sf.getDescription()); - assertEquals("var6", sf.getValue("ID")); + assertEquals("var7", sf.getValue("id")); assertEquals("Good", sf.getValue("clinical_significance")); - assertEquals("ID=var6;clinical_significance=Good", sf.getAttributes()); + assertEquals("id=var7;clinical_significance=Good", sf.getAttributes()); assertEquals(1, sf.links.size()); assertEquals( - "p.Pro3Arg var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6", + "p.Pro3Arg var7|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var7", sf.links.get(0)); assertEquals(cosmic, sf.getFeatureGroup()); - // var5 generates two distinct protein variant features - sf = sfs.get(4); + // CCC -> CAC -> P/H + sf = sfs.get(7); assertEquals(3, sf.getBegin()); assertEquals(3, sf.getEnd()); + assertEquals("nonsynonymous_variant", sf.getType()); assertEquals("p.Pro3His", sf.getDescription()); - assertEquals("var6", sf.getValue("ID")); + assertEquals("var7", sf.getValue("id")); assertEquals("Good", sf.getValue("clinical_significance")); - assertEquals("ID=var6;clinical_significance=Good", sf.getAttributes()); + assertEquals("id=var7;clinical_significance=Good", sf.getAttributes()); assertEquals(1, sf.links.size()); assertEquals( - "p.Pro3His var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6", + "p.Pro3His var7|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var7", sf.links.get(0)); assertEquals(cosmic, sf.getFeatureGroup()); } @@ -2272,7 +2387,7 @@ public class AlignmentUtilsTests seq1.createDatasetSequence(); Mapping mapping = new Mapping(seq1, new MapList( new int[] { 3, 6, 9, 10 }, new int[] { 1, 6 }, 1, 1)); - Map> map = new TreeMap>(); + Map> map = new TreeMap<>(); AlignmentUtils.addMappedPositions(seq1, from, mapping, map); /* @@ -2304,7 +2419,7 @@ public class AlignmentUtilsTests seq1.createDatasetSequence(); Mapping mapping = new Mapping(seq1, new MapList( new int[] { 3, 6, 9, 10 }, new int[] { 1, 6 }, 1, 1)); - Map> map = new TreeMap>(); + Map> map = new TreeMap<>(); AlignmentUtils.addMappedPositions(seq1, from, mapping, map); /* @@ -2533,6 +2648,70 @@ public class AlignmentUtilsTests assertEquals(s_as3, uas3.getSequenceAsString()); } + @Test(groups = { "Functional" }) + public void testTransferGeneLoci() + { + SequenceI from = new Sequence("transcript", + "aaacccgggTTTAAACCCGGGtttaaacccgggttt"); + SequenceI to = new Sequence("CDS", "TTTAAACCCGGG"); + MapList map = new MapList(new int[] { 1, 12 }, new int[] { 10, 21 }, 1, + 1); + + /* + * first with nothing to transfer + */ + AlignmentUtils.transferGeneLoci(from, map, to); + assertNull(to.getGeneLoci()); + + /* + * next with gene loci set on 'from' sequence + */ + int[] exons = new int[] { 100, 105, 155, 164, 210, 229 }; + MapList geneMap = new MapList(new int[] { 1, 36 }, exons, 1, 1); + from.setGeneLoci("human", "GRCh38", "7", geneMap); + AlignmentUtils.transferGeneLoci(from, map, to); + + GeneLociI toLoci = to.getGeneLoci(); + assertNotNull(toLoci); + // DBRefEntry constructor upper-cases 'source' + assertEquals("HUMAN", toLoci.getSpeciesId()); + assertEquals("GRCh38", toLoci.getAssemblyId()); + assertEquals("7", toLoci.getChromosomeId()); + + /* + * transcript 'exons' are 1-6, 7-16, 17-36 + * CDS 1:12 is transcript 10-21 + * transcript 'CDS' is 10-16, 17-21 + * which is 'gene' 158-164, 210-214 + */ + MapList toMap = toLoci.getMapping(); + assertEquals(1, toMap.getFromRanges().size()); + assertEquals(2, toMap.getFromRanges().get(0).length); + assertEquals(1, toMap.getFromRanges().get(0)[0]); + assertEquals(12, toMap.getFromRanges().get(0)[1]); + assertEquals(2, toMap.getToRanges().size()); + assertEquals(2, toMap.getToRanges().get(0).length); + assertEquals(158, toMap.getToRanges().get(0)[0]); + assertEquals(164, toMap.getToRanges().get(0)[1]); + assertEquals(210, toMap.getToRanges().get(1)[0]); + assertEquals(214, toMap.getToRanges().get(1)[1]); + // or summarised as (but toString might change in future): + assertEquals("[ [1, 12] ] 1:1 to [ [158, 164] [210, 214] ]", + toMap.toString()); + + /* + * an existing value is not overridden + */ + geneMap = new MapList(new int[] { 1, 36 }, new int[] { 36, 1 }, 1, 1); + from.setGeneLoci("inhuman", "GRCh37", "6", geneMap); + AlignmentUtils.transferGeneLoci(from, map, to); + assertEquals("GRCh38", toLoci.getAssemblyId()); + assertEquals("7", toLoci.getChromosomeId()); + toMap = toLoci.getMapping(); + assertEquals("[ [1, 12] ] 1:1 to [ [158, 164] [210, 214] ]", + toMap.toString()); + } + /** * Tests for the method that maps nucleotide to protein based on CDS features */ @@ -2561,7 +2740,7 @@ public class AlignmentUtilsTests * Case 2: CDS 3 times length of peptide + stop codon * (note code does not currently check trailing codon is a stop codon) */ - dna = new Sequence("dna", "AACGacgtCTCCTTGA"); + dna = new Sequence("dna", "AACGacgtCTCCTCCC"); dna.createDatasetSequence(); dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null)); dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 16, null)); @@ -2574,17 +2753,42 @@ public class AlignmentUtilsTests Arrays.deepToString(ml.getFromRanges().toArray())); /* - * Case 3: CDS not 3 times length of peptide - no mapping is made + * Case 3: CDS longer than 3 * peptide + stop codon - no mapping is made + */ + dna = new Sequence("dna", "AACGacgtCTCCTTGATCA"); + dna.createDatasetSequence(); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null)); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 19, null)); + ml = AlignmentUtils.mapCdsToProtein(dna, peptide); + assertNull(ml); + + /* + * Case 4: CDS shorter than 3 * peptide - no mapping is made + */ + dna = new Sequence("dna", "AACGacgtCTCC"); + dna.createDatasetSequence(); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null)); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 12, null)); + ml = AlignmentUtils.mapCdsToProtein(dna, peptide); + assertNull(ml); + + /* + * Case 5: CDS 3 times length of peptide + part codon - mapping is truncated */ dna = new Sequence("dna", "AACGacgtCTCCTTG"); dna.createDatasetSequence(); dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null)); dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, null)); ml = AlignmentUtils.mapCdsToProtein(dna, peptide); - assertNull(ml); + assertEquals(3, ml.getFromRatio()); + assertEquals(1, ml.getToRatio()); + assertEquals("[[1, 3]]", + Arrays.deepToString(ml.getToRanges().toArray())); + assertEquals("[[1, 4], [9, 13]]", + Arrays.deepToString(ml.getFromRanges().toArray())); /* - * Case 4: incomplete start codon corresponding to X in peptide + * Case 6: incomplete start codon corresponding to X in peptide */ dna = new Sequence("dna", "ACGacgtCTCCTTGG"); dna.createDatasetSequence(); @@ -2600,4 +2804,151 @@ public class AlignmentUtilsTests Arrays.deepToString(ml.getFromRanges().toArray())); } + /** + * Tests for the method that locates the CDS sequence that has a mapping to + * the given protein. That is, given a transcript-to-peptide mapping, find the + * cds-to-peptide mapping that relates to both, and return the CDS sequence. + */ + @Test + public void testFindCdsForProtein() + { + List mappings = new ArrayList<>(); + AlignedCodonFrame acf1 = new AlignedCodonFrame(); + mappings.add(acf1); + + SequenceI dna1 = new Sequence("dna1", "cgatATcgGCTATCTATGacg"); + dna1.createDatasetSequence(); + + // NB we currently exclude STOP codon from CDS sequences + // the test would need to change if this changes in future + SequenceI cds1 = new Sequence("cds1", "ATGCTATCT"); + cds1.createDatasetSequence(); + + SequenceI pep1 = new Sequence("pep1", "MLS"); + pep1.createDatasetSequence(); + List seqMappings = new ArrayList<>(); + MapList mapList = new MapList( + new int[] + { 5, 6, 9, 15 }, new int[] { 1, 3 }, 3, 1); + Mapping dnaToPeptide = new Mapping(pep1.getDatasetSequence(), mapList); + + // add dna to peptide mapping + seqMappings.add(acf1); + acf1.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), + mapList); + + /* + * first case - no dna-to-CDS mapping exists - search fails + */ + SequenceI seq = AlignmentUtils.findCdsForProtein(mappings, dna1, + seqMappings, dnaToPeptide); + assertNull(seq); + + /* + * second case - CDS-to-peptide mapping exists but no dna-to-CDS + * - search fails + */ + // todo this test fails if the mapping is added to acf1, not acf2 + // need to tidy up use of lists of mappings in AlignedCodonFrame + AlignedCodonFrame acf2 = new AlignedCodonFrame(); + mappings.add(acf2); + MapList cdsToPeptideMapping = new MapList(new int[] + { 1, 9 }, new int[] { 1, 3 }, 3, 1); + acf2.addMap(cds1.getDatasetSequence(), pep1.getDatasetSequence(), + cdsToPeptideMapping); + assertNull(AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings, + dnaToPeptide)); + + /* + * third case - add dna-to-CDS mapping - CDS is now found! + */ + MapList dnaToCdsMapping = new MapList(new int[] { 5, 6, 9, 15 }, + new int[] + { 1, 9 }, 1, 1); + acf1.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(), + dnaToCdsMapping); + seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings, + dnaToPeptide); + assertSame(seq, cds1.getDatasetSequence()); + } + + /** + * Tests for the method that locates the CDS sequence that has a mapping to + * the given protein. That is, given a transcript-to-peptide mapping, find the + * cds-to-peptide mapping that relates to both, and return the CDS sequence. + * This test is for the case where transcript and CDS are the same length. + */ + @Test + public void testFindCdsForProtein_noUTR() + { + List mappings = new ArrayList<>(); + AlignedCodonFrame acf1 = new AlignedCodonFrame(); + mappings.add(acf1); + + SequenceI dna1 = new Sequence("dna1", "ATGCTATCTTAA"); + dna1.createDatasetSequence(); + + // NB we currently exclude STOP codon from CDS sequences + // the test would need to change if this changes in future + SequenceI cds1 = new Sequence("cds1", "ATGCTATCT"); + cds1.createDatasetSequence(); + + SequenceI pep1 = new Sequence("pep1", "MLS"); + pep1.createDatasetSequence(); + List seqMappings = new ArrayList<>(); + MapList mapList = new MapList( + new int[] + { 1, 9 }, new int[] { 1, 3 }, 3, 1); + Mapping dnaToPeptide = new Mapping(pep1.getDatasetSequence(), mapList); + + // add dna to peptide mapping + seqMappings.add(acf1); + acf1.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), + mapList); + + /* + * first case - transcript lacks CDS features - it appears to be + * the CDS sequence and is returned + */ + SequenceI seq = AlignmentUtils.findCdsForProtein(mappings, dna1, + seqMappings, dnaToPeptide); + assertSame(seq, dna1.getDatasetSequence()); + + /* + * second case - transcript has CDS feature - this means it is + * not returned as a match for CDS (CDS sequences don't have CDS features) + */ + dna1.addSequenceFeature( + new SequenceFeature(SequenceOntologyI.CDS, "cds", 1, 12, null)); + seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings, + dnaToPeptide); + assertNull(seq); + + /* + * third case - CDS-to-peptide mapping exists but no dna-to-CDS + * - search fails + */ + // todo this test fails if the mapping is added to acf1, not acf2 + // need to tidy up use of lists of mappings in AlignedCodonFrame + AlignedCodonFrame acf2 = new AlignedCodonFrame(); + mappings.add(acf2); + MapList cdsToPeptideMapping = new MapList(new int[] + { 1, 9 }, new int[] { 1, 3 }, 3, 1); + acf2.addMap(cds1.getDatasetSequence(), pep1.getDatasetSequence(), + cdsToPeptideMapping); + assertNull(AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings, + dnaToPeptide)); + + /* + * fourth case - add dna-to-CDS mapping - CDS is now found! + */ + MapList dnaToCdsMapping = new MapList(new int[] { 1, 9 }, + new int[] + { 1, 9 }, 1, 1); + acf1.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(), + dnaToCdsMapping); + seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings, + dnaToPeptide); + assertSame(seq, cds1.getDatasetSequence()); + } }