X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fanalysis%2FAlignmentUtilsTests.java;h=70ae6a03bfecf2c7a3cfabf7bec98314ee11d534;hb=bc18effe68ba80213a6d03ca7e6175adc6be71d6;hp=37b93fda4bb49ecbdf8615c9b688e3a37428e1ba;hpb=9214cffc006412ed194565b82c8663df2ea641c6;p=jalview.git diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 37b93fd..70ae6a0 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -48,6 +48,7 @@ import jalview.io.DataSourceType; import jalview.io.FileFormat; import jalview.io.FileFormatI; import jalview.io.FormatAdapter; +import jalview.io.gff.SequenceOntologyI; import jalview.util.MapList; import jalview.util.MappingUtils; @@ -2039,49 +2040,54 @@ public class AlignmentUtilsTests String dbSnp = "dbSNP"; String cosmic = "COSMIC"; + /* + * NB setting "id" (as returned by Ensembl for features in JSON format); + * previously "ID" (as returned for GFF3 format) + */ SequenceFeature sf1 = new SequenceFeature("sequence_variant", "", 1, 1, 0f, ensembl); sf1.setValue("alleles", "A,G"); // AAA -> GAA -> K/E - sf1.setValue("ID", "var1.125A>G"); + sf1.setValue("id", "var1.125A>G"); SequenceFeature sf2 = new SequenceFeature("sequence_variant", "", 1, 1, 0f, dbSnp); sf2.setValue("alleles", "A,C"); // AAA -> CAA -> K/Q - sf2.setValue("ID", "var2"); + sf2.setValue("id", "var2"); sf2.setValue("clinical_significance", "Dodgy"); SequenceFeature sf3 = new SequenceFeature("sequence_variant", "", 1, 1, 0f, dbSnp); sf3.setValue("alleles", "A,T"); // AAA -> TAA -> stop codon - sf3.setValue("ID", "var3"); + sf3.setValue("id", "var3"); sf3.setValue("clinical_significance", "Bad"); SequenceFeature sf4 = new SequenceFeature("sequence_variant", "", 3, 3, 0f, cosmic); sf4.setValue("alleles", "A,G"); // AAA -> AAG synonymous - sf4.setValue("ID", "var4"); + sf4.setValue("id", "var4"); sf4.setValue("clinical_significance", "None"); SequenceFeature sf5 = new SequenceFeature("sequence_variant", "", 3, 3, 0f, ensembl); sf5.setValue("alleles", "A,T"); // AAA -> AAT -> K/N - sf5.setValue("ID", "sequence_variant:var5"); // prefix gets stripped off + sf5.setValue("id", "sequence_variant:var5"); // prefix gets stripped off sf5.setValue("clinical_significance", "Benign"); SequenceFeature sf6 = new SequenceFeature("sequence_variant", "", 6, 6, 0f, dbSnp); sf6.setValue("alleles", "T,C"); // TTT -> TTC synonymous - sf6.setValue("ID", "var6"); + sf6.setValue("id", "var6"); SequenceFeature sf7 = new SequenceFeature("sequence_variant", "", 8, 8, 0f, cosmic); sf7.setValue("alleles", "C,A,G"); // CCC -> CAC,CGC -> P/H/R - sf7.setValue("ID", "var7"); + sf7.setValue("id", "var7"); sf7.setValue("clinical_significance", "Good"); List codon1Variants = new ArrayList<>(); List codon2Variants = new ArrayList<>(); List codon3Variants = new ArrayList<>(); + List codonVariants[] = new ArrayList[3]; codonVariants[0] = codon1Variants; codonVariants[1] = codon2Variants; @@ -2147,9 +2153,9 @@ public class AlignmentUtilsTests assertEquals(1, sf.getEnd()); assertEquals("nonsynonymous_variant", sf.getType()); assertEquals("p.Lys1Asn", sf.getDescription()); - assertEquals("var5", sf.getValue("ID")); + assertEquals("var5", sf.getValue("id")); assertEquals("Benign", sf.getValue("clinical_significance")); - assertEquals("ID=var5;clinical_significance=Benign", + assertEquals("id=var5;clinical_significance=Benign", sf.getAttributes()); assertEquals(1, sf.links.size()); assertEquals( @@ -2163,9 +2169,9 @@ public class AlignmentUtilsTests assertEquals(1, sf.getEnd()); assertEquals("nonsynonymous_variant", sf.getType()); assertEquals("p.Lys1Gln", sf.getDescription()); - assertEquals("var2", sf.getValue("ID")); + assertEquals("var2", sf.getValue("id")); assertEquals("Dodgy", sf.getValue("clinical_significance")); - assertEquals("ID=var2;clinical_significance=Dodgy", sf.getAttributes()); + assertEquals("id=var2;clinical_significance=Dodgy", sf.getAttributes()); assertEquals(1, sf.links.size()); assertEquals( "p.Lys1Gln var2|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var2", @@ -2178,9 +2184,9 @@ public class AlignmentUtilsTests assertEquals(1, sf.getEnd()); assertEquals("nonsynonymous_variant", sf.getType()); assertEquals("p.Lys1Glu", sf.getDescription()); - assertEquals("var1.125A>G", sf.getValue("ID")); + assertEquals("var1.125A>G", sf.getValue("id")); assertNull(sf.getValue("clinical_significance")); - assertEquals("ID=var1.125A>G", sf.getAttributes()); + assertEquals("id=var1.125A>G", sf.getAttributes()); assertEquals(1, sf.links.size()); // link to variation is urlencoded assertEquals( @@ -2194,9 +2200,9 @@ public class AlignmentUtilsTests assertEquals(1, sf.getEnd()); assertEquals("stop_gained", sf.getType()); assertEquals("Aaa/Taa", sf.getDescription()); - assertEquals("var3", sf.getValue("ID")); + assertEquals("var3", sf.getValue("id")); assertEquals("Bad", sf.getValue("clinical_significance")); - assertEquals("ID=var3;clinical_significance=Bad", sf.getAttributes()); + assertEquals("id=var3;clinical_significance=Bad", sf.getAttributes()); assertEquals(1, sf.links.size()); assertEquals( "Aaa/Taa var3|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var3", @@ -2209,9 +2215,9 @@ public class AlignmentUtilsTests assertEquals(1, sf.getEnd()); assertEquals("synonymous_variant", sf.getType()); assertEquals("aaA/aaG", sf.getDescription()); - assertEquals("var4", sf.getValue("ID")); + assertEquals("var4", sf.getValue("id")); assertEquals("None", sf.getValue("clinical_significance")); - assertEquals("ID=var4;clinical_significance=None", sf.getAttributes()); + assertEquals("id=var4;clinical_significance=None", sf.getAttributes()); assertEquals(1, sf.links.size()); assertEquals( "aaA/aaG var4|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var4", @@ -2224,9 +2230,9 @@ public class AlignmentUtilsTests assertEquals(2, sf.getEnd()); assertEquals("synonymous_variant", sf.getType()); assertEquals("ttT/ttC", sf.getDescription()); - assertEquals("var6", sf.getValue("ID")); + assertEquals("var6", sf.getValue("id")); assertNull(sf.getValue("clinical_significance")); - assertEquals("ID=var6", sf.getAttributes()); + assertEquals("id=var6", sf.getAttributes()); assertEquals(1, sf.links.size()); assertEquals( "ttT/ttC var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6", @@ -2240,9 +2246,9 @@ public class AlignmentUtilsTests assertEquals(3, sf.getEnd()); assertEquals("nonsynonymous_variant", sf.getType()); assertEquals("p.Pro3Arg", sf.getDescription()); - assertEquals("var7", sf.getValue("ID")); + assertEquals("var7", sf.getValue("id")); assertEquals("Good", sf.getValue("clinical_significance")); - assertEquals("ID=var7;clinical_significance=Good", sf.getAttributes()); + assertEquals("id=var7;clinical_significance=Good", sf.getAttributes()); assertEquals(1, sf.links.size()); assertEquals( "p.Pro3Arg var7|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var7", @@ -2255,9 +2261,9 @@ public class AlignmentUtilsTests assertEquals(3, sf.getEnd()); assertEquals("nonsynonymous_variant", sf.getType()); assertEquals("p.Pro3His", sf.getDescription()); - assertEquals("var7", sf.getValue("ID")); + assertEquals("var7", sf.getValue("id")); assertEquals("Good", sf.getValue("clinical_significance")); - assertEquals("ID=var7;clinical_significance=Good", sf.getAttributes()); + assertEquals("id=var7;clinical_significance=Good", sf.getAttributes()); assertEquals(1, sf.links.size()); assertEquals( "p.Pro3His var7|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var7", @@ -2683,14 +2689,14 @@ public class AlignmentUtilsTests assertEquals(2, toMap.getFromRanges().get(0).length); assertEquals(1, toMap.getFromRanges().get(0)[0]); assertEquals(12, toMap.getFromRanges().get(0)[1]); - assertEquals(1, toMap.getToRanges().size()); - assertEquals(4, toMap.getToRanges().get(0).length); + assertEquals(2, toMap.getToRanges().size()); + assertEquals(2, toMap.getToRanges().get(0).length); assertEquals(158, toMap.getToRanges().get(0)[0]); assertEquals(164, toMap.getToRanges().get(0)[1]); - assertEquals(210, toMap.getToRanges().get(0)[2]); - assertEquals(214, toMap.getToRanges().get(0)[3]); + assertEquals(210, toMap.getToRanges().get(1)[0]); + assertEquals(214, toMap.getToRanges().get(1)[1]); // or summarised as (but toString might change in future): - assertEquals("[ [1, 12] ] 1:1 to [ [158, 164, 210, 214] ]", + assertEquals("[ [1, 12] ] 1:1 to [ [158, 164] [210, 214] ]", toMap.toString()); /* @@ -2702,7 +2708,7 @@ public class AlignmentUtilsTests assertEquals("GRCh38", toLoci.getAssemblyId()); assertEquals("7", toLoci.getChromosomeId()); toMap = toLoci.getMap(); - assertEquals("[ [1, 12] ] 1:1 to [ [158, 164, 210, 214] ]", + assertEquals("[ [1, 12] ] 1:1 to [ [158, 164] [210, 214] ]", toMap.toString()); } @@ -2734,7 +2740,7 @@ public class AlignmentUtilsTests * Case 2: CDS 3 times length of peptide + stop codon * (note code does not currently check trailing codon is a stop codon) */ - dna = new Sequence("dna", "AACGacgtCTCCTTGA"); + dna = new Sequence("dna", "AACGacgtCTCCTCCC"); dna.createDatasetSequence(); dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null)); dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 16, null)); @@ -2747,17 +2753,42 @@ public class AlignmentUtilsTests Arrays.deepToString(ml.getFromRanges().toArray())); /* - * Case 3: CDS not 3 times length of peptide - no mapping is made + * Case 3: CDS longer than 3 * peptide + stop codon - no mapping is made + */ + dna = new Sequence("dna", "AACGacgtCTCCTTGATCA"); + dna.createDatasetSequence(); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null)); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 19, null)); + ml = AlignmentUtils.mapCdsToProtein(dna, peptide); + assertNull(ml); + + /* + * Case 4: CDS shorter than 3 * peptide - no mapping is made + */ + dna = new Sequence("dna", "AACGacgtCTCC"); + dna.createDatasetSequence(); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null)); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 12, null)); + ml = AlignmentUtils.mapCdsToProtein(dna, peptide); + assertNull(ml); + + /* + * Case 5: CDS 3 times length of peptide + part codon - mapping is truncated */ dna = new Sequence("dna", "AACGacgtCTCCTTG"); dna.createDatasetSequence(); dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null)); dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, null)); ml = AlignmentUtils.mapCdsToProtein(dna, peptide); - assertNull(ml); + assertEquals(3, ml.getFromRatio()); + assertEquals(1, ml.getToRatio()); + assertEquals("[[1, 3]]", + Arrays.deepToString(ml.getToRanges().toArray())); + assertEquals("[[1, 4], [9, 13]]", + Arrays.deepToString(ml.getFromRanges().toArray())); /* - * Case 4: incomplete start codon corresponding to X in peptide + * Case 6: incomplete start codon corresponding to X in peptide */ dna = new Sequence("dna", "ACGacgtCTCCTTGG"); dna.createDatasetSequence(); @@ -2772,4 +2803,152 @@ public class AlignmentUtilsTests assertEquals("[[3, 3], [8, 12]]", Arrays.deepToString(ml.getFromRanges().toArray())); } + + /** + * Tests for the method that locates the CDS sequence that has a mapping to + * the given protein. That is, given a transcript-to-peptide mapping, find the + * cds-to-peptide mapping that relates to both, and return the CDS sequence. + */ + @Test + public void testFindCdsForProtein() + { + List mappings = new ArrayList<>(); + AlignedCodonFrame acf1 = new AlignedCodonFrame(); + mappings.add(acf1); + + SequenceI dna1 = new Sequence("dna1", "cgatATcgGCTATCTATGacg"); + dna1.createDatasetSequence(); + + // NB we currently exclude STOP codon from CDS sequences + // the test would need to change if this changes in future + SequenceI cds1 = new Sequence("cds1", "ATGCTATCT"); + cds1.createDatasetSequence(); + + SequenceI pep1 = new Sequence("pep1", "MLS"); + pep1.createDatasetSequence(); + List seqMappings = new ArrayList<>(); + MapList mapList = new MapList( + new int[] + { 5, 6, 9, 15 }, new int[] { 1, 3 }, 3, 1); + Mapping dnaToPeptide = new Mapping(pep1.getDatasetSequence(), mapList); + + // add dna to peptide mapping + seqMappings.add(acf1); + acf1.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), + mapList); + + /* + * first case - no dna-to-CDS mapping exists - search fails + */ + SequenceI seq = AlignmentUtils.findCdsForProtein(mappings, dna1, + seqMappings, dnaToPeptide); + assertNull(seq); + + /* + * second case - CDS-to-peptide mapping exists but no dna-to-CDS + * - search fails + */ + // todo this test fails if the mapping is added to acf1, not acf2 + // need to tidy up use of lists of mappings in AlignedCodonFrame + AlignedCodonFrame acf2 = new AlignedCodonFrame(); + mappings.add(acf2); + MapList cdsToPeptideMapping = new MapList(new int[] + { 1, 9 }, new int[] { 1, 3 }, 3, 1); + acf2.addMap(cds1.getDatasetSequence(), pep1.getDatasetSequence(), + cdsToPeptideMapping); + assertNull(AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings, + dnaToPeptide)); + + /* + * third case - add dna-to-CDS mapping - CDS is now found! + */ + MapList dnaToCdsMapping = new MapList(new int[] { 5, 6, 9, 15 }, + new int[] + { 1, 9 }, 1, 1); + acf1.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(), + dnaToCdsMapping); + seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings, + dnaToPeptide); + assertSame(seq, cds1.getDatasetSequence()); + } + + /** + * Tests for the method that locates the CDS sequence that has a mapping to + * the given protein. That is, given a transcript-to-peptide mapping, find the + * cds-to-peptide mapping that relates to both, and return the CDS sequence. + * This test is for the case where transcript and CDS are the same length. + */ + @Test + public void testFindCdsForProtein_noUTR() + { + List mappings = new ArrayList<>(); + AlignedCodonFrame acf1 = new AlignedCodonFrame(); + mappings.add(acf1); + + SequenceI dna1 = new Sequence("dna1", "ATGCTATCTTAA"); + dna1.createDatasetSequence(); + + // NB we currently exclude STOP codon from CDS sequences + // the test would need to change if this changes in future + SequenceI cds1 = new Sequence("cds1", "ATGCTATCT"); + cds1.createDatasetSequence(); + + SequenceI pep1 = new Sequence("pep1", "MLS"); + pep1.createDatasetSequence(); + List seqMappings = new ArrayList<>(); + MapList mapList = new MapList( + new int[] + { 1, 9 }, new int[] { 1, 3 }, 3, 1); + Mapping dnaToPeptide = new Mapping(pep1.getDatasetSequence(), mapList); + + // add dna to peptide mapping + seqMappings.add(acf1); + acf1.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), + mapList); + + /* + * first case - transcript lacks CDS features - it appears to be + * the CDS sequence and is returned + */ + SequenceI seq = AlignmentUtils.findCdsForProtein(mappings, dna1, + seqMappings, dnaToPeptide); + assertSame(seq, dna1.getDatasetSequence()); + + /* + * second case - transcript has CDS feature - this means it is + * not returned as a match for CDS (CDS sequences don't have CDS features) + */ + dna1.addSequenceFeature( + new SequenceFeature(SequenceOntologyI.CDS, "cds", 1, 12, null)); + seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings, + dnaToPeptide); + assertNull(seq); + + /* + * third case - CDS-to-peptide mapping exists but no dna-to-CDS + * - search fails + */ + // todo this test fails if the mapping is added to acf1, not acf2 + // need to tidy up use of lists of mappings in AlignedCodonFrame + AlignedCodonFrame acf2 = new AlignedCodonFrame(); + mappings.add(acf2); + MapList cdsToPeptideMapping = new MapList(new int[] + { 1, 9 }, new int[] { 1, 3 }, 3, 1); + acf2.addMap(cds1.getDatasetSequence(), pep1.getDatasetSequence(), + cdsToPeptideMapping); + assertNull(AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings, + dnaToPeptide)); + + /* + * fourth case - add dna-to-CDS mapping - CDS is now found! + */ + MapList dnaToCdsMapping = new MapList(new int[] { 1, 9 }, + new int[] + { 1, 9 }, 1, 1); + acf1.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(), + dnaToCdsMapping); + seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings, + dnaToPeptide); + assertSame(seq, cds1.getDatasetSequence()); + } }