X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fanalysis%2FAlignmentUtilsTests.java;h=a7a7d34fd5d09c3b7292384467b135b05fa8702a;hb=b3e349b56ecd2c487a616c6c52288ab7c8f84654;hp=2b45eeecc362cd900cc069bc9a7df43e5d6f97c8;hpb=241bd0223b016b5ad5ec78520310a8de32842722;p=jalview.git diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 2b45eee..a7a7d34 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -48,6 +48,7 @@ import jalview.io.DataSourceType; import jalview.io.FileFormat; import jalview.io.FileFormatI; import jalview.io.FormatAdapter; +import jalview.io.gff.SequenceOntologyI; import jalview.util.MapList; import jalview.util.MappingUtils; @@ -2082,6 +2083,7 @@ public class AlignmentUtilsTests List codon1Variants = new ArrayList<>(); List codon2Variants = new ArrayList<>(); List codon3Variants = new ArrayList<>(); + List codonVariants[] = new ArrayList[3]; codonVariants[0] = codon1Variants; codonVariants[1] = codon2Variants; @@ -2193,13 +2195,13 @@ public class AlignmentUtilsTests assertEquals(1, sf.getBegin()); assertEquals(1, sf.getEnd()); assertEquals("stop_gained", sf.getType()); - assertEquals("TAA", sf.getDescription()); + assertEquals("Aaa/Taa", sf.getDescription()); assertEquals("var3", sf.getValue("ID")); assertEquals("Bad", sf.getValue("clinical_significance")); assertEquals("ID=var3;clinical_significance=Bad", sf.getAttributes()); assertEquals(1, sf.links.size()); assertEquals( - "TAA var3|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var3", + "Aaa/Taa var3|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var3", sf.links.get(0)); assertEquals(dbSnp, sf.getFeatureGroup()); @@ -2208,28 +2210,13 @@ public class AlignmentUtilsTests assertEquals(1, sf.getBegin()); assertEquals(1, sf.getEnd()); assertEquals("synonymous_variant", sf.getType()); - assertEquals("AAG", sf.getDescription()); - assertEquals("var4", sf.getValue("ID")); - assertEquals("None", sf.getValue("clinical_significance")); - assertEquals("ID=var4;clinical_significance=None", sf.getAttributes()); - assertEquals(1, sf.links.size()); - assertEquals( - "AAG var4|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var4", - sf.links.get(0)); - assertEquals(cosmic, sf.getFeatureGroup()); - - // AAA -> AAG synonymous - sf = sfs.get(4); - assertEquals(1, sf.getBegin()); - assertEquals(1, sf.getEnd()); - assertEquals("synonymous_variant", sf.getType()); - assertEquals("AAG", sf.getDescription()); + assertEquals("aaA/aaG", sf.getDescription()); assertEquals("var4", sf.getValue("ID")); assertEquals("None", sf.getValue("clinical_significance")); assertEquals("ID=var4;clinical_significance=None", sf.getAttributes()); assertEquals(1, sf.links.size()); assertEquals( - "AAG var4|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var4", + "aaA/aaG var4|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var4", sf.links.get(0)); assertEquals(cosmic, sf.getFeatureGroup()); @@ -2238,13 +2225,13 @@ public class AlignmentUtilsTests assertEquals(2, sf.getBegin()); assertEquals(2, sf.getEnd()); assertEquals("synonymous_variant", sf.getType()); - assertEquals("TTC", sf.getDescription()); + assertEquals("ttT/ttC", sf.getDescription()); assertEquals("var6", sf.getValue("ID")); assertNull(sf.getValue("clinical_significance")); assertEquals("ID=var6", sf.getAttributes()); assertEquals(1, sf.links.size()); assertEquals( - "TTC var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6", + "ttT/ttC var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6", sf.links.get(0)); assertEquals(dbSnp, sf.getFeatureGroup()); @@ -2698,14 +2685,14 @@ public class AlignmentUtilsTests assertEquals(2, toMap.getFromRanges().get(0).length); assertEquals(1, toMap.getFromRanges().get(0)[0]); assertEquals(12, toMap.getFromRanges().get(0)[1]); - assertEquals(1, toMap.getToRanges().size()); - assertEquals(4, toMap.getToRanges().get(0).length); + assertEquals(2, toMap.getToRanges().size()); + assertEquals(2, toMap.getToRanges().get(0).length); assertEquals(158, toMap.getToRanges().get(0)[0]); assertEquals(164, toMap.getToRanges().get(0)[1]); - assertEquals(210, toMap.getToRanges().get(0)[2]); - assertEquals(214, toMap.getToRanges().get(0)[3]); + assertEquals(210, toMap.getToRanges().get(1)[0]); + assertEquals(214, toMap.getToRanges().get(1)[1]); // or summarised as (but toString might change in future): - assertEquals("[ [1, 12] ] 1:1 to [ [158, 164, 210, 214] ]", + assertEquals("[ [1, 12] ] 1:1 to [ [158, 164] [210, 214] ]", toMap.toString()); /* @@ -2717,7 +2704,7 @@ public class AlignmentUtilsTests assertEquals("GRCh38", toLoci.getAssemblyId()); assertEquals("7", toLoci.getChromosomeId()); toMap = toLoci.getMap(); - assertEquals("[ [1, 12] ] 1:1 to [ [158, 164, 210, 214] ]", + assertEquals("[ [1, 12] ] 1:1 to [ [158, 164] [210, 214] ]", toMap.toString()); } @@ -2749,7 +2736,7 @@ public class AlignmentUtilsTests * Case 2: CDS 3 times length of peptide + stop codon * (note code does not currently check trailing codon is a stop codon) */ - dna = new Sequence("dna", "AACGacgtCTCCTTGA"); + dna = new Sequence("dna", "AACGacgtCTCCTCCC"); dna.createDatasetSequence(); dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null)); dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 16, null)); @@ -2762,17 +2749,42 @@ public class AlignmentUtilsTests Arrays.deepToString(ml.getFromRanges().toArray())); /* - * Case 3: CDS not 3 times length of peptide - no mapping is made + * Case 3: CDS longer than 3 * peptide + stop codon - no mapping is made + */ + dna = new Sequence("dna", "AACGacgtCTCCTTGATCA"); + dna.createDatasetSequence(); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null)); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 19, null)); + ml = AlignmentUtils.mapCdsToProtein(dna, peptide); + assertNull(ml); + + /* + * Case 4: CDS shorter than 3 * peptide - no mapping is made + */ + dna = new Sequence("dna", "AACGacgtCTCC"); + dna.createDatasetSequence(); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null)); + dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 12, null)); + ml = AlignmentUtils.mapCdsToProtein(dna, peptide); + assertNull(ml); + + /* + * Case 5: CDS 3 times length of peptide + part codon - mapping is truncated */ dna = new Sequence("dna", "AACGacgtCTCCTTG"); dna.createDatasetSequence(); dna.addSequenceFeature(new SequenceFeature("CDS", "", 1, 4, null)); dna.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, null)); ml = AlignmentUtils.mapCdsToProtein(dna, peptide); - assertNull(ml); + assertEquals(3, ml.getFromRatio()); + assertEquals(1, ml.getToRatio()); + assertEquals("[[1, 3]]", + Arrays.deepToString(ml.getToRanges().toArray())); + assertEquals("[[1, 4], [9, 13]]", + Arrays.deepToString(ml.getFromRanges().toArray())); /* - * Case 4: incomplete start codon corresponding to X in peptide + * Case 6: incomplete start codon corresponding to X in peptide */ dna = new Sequence("dna", "ACGacgtCTCCTTGG"); dna.createDatasetSequence(); @@ -2787,4 +2799,152 @@ public class AlignmentUtilsTests assertEquals("[[3, 3], [8, 12]]", Arrays.deepToString(ml.getFromRanges().toArray())); } + + /** + * Tests for the method that locates the CDS sequence that has a mapping to + * the given protein. That is, given a transcript-to-peptide mapping, find the + * cds-to-peptide mapping that relates to both, and return the CDS sequence. + */ + @Test + public void testFindCdsForProtein() + { + List mappings = new ArrayList<>(); + AlignedCodonFrame acf1 = new AlignedCodonFrame(); + mappings.add(acf1); + + SequenceI dna1 = new Sequence("dna1", "cgatATcgGCTATCTATGacg"); + dna1.createDatasetSequence(); + + // NB we currently exclude STOP codon from CDS sequences + // the test would need to change if this changes in future + SequenceI cds1 = new Sequence("cds1", "ATGCTATCT"); + cds1.createDatasetSequence(); + + SequenceI pep1 = new Sequence("pep1", "MLS"); + pep1.createDatasetSequence(); + List seqMappings = new ArrayList<>(); + MapList mapList = new MapList( + new int[] + { 5, 6, 9, 15 }, new int[] { 1, 3 }, 3, 1); + Mapping dnaToPeptide = new Mapping(pep1.getDatasetSequence(), mapList); + + // add dna to peptide mapping + seqMappings.add(acf1); + acf1.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), + mapList); + + /* + * first case - no dna-to-CDS mapping exists - search fails + */ + SequenceI seq = AlignmentUtils.findCdsForProtein(mappings, dna1, + seqMappings, dnaToPeptide); + assertNull(seq); + + /* + * second case - CDS-to-peptide mapping exists but no dna-to-CDS + * - search fails + */ + // todo this test fails if the mapping is added to acf1, not acf2 + // need to tidy up use of lists of mappings in AlignedCodonFrame + AlignedCodonFrame acf2 = new AlignedCodonFrame(); + mappings.add(acf2); + MapList cdsToPeptideMapping = new MapList(new int[] + { 1, 9 }, new int[] { 1, 3 }, 3, 1); + acf2.addMap(cds1.getDatasetSequence(), pep1.getDatasetSequence(), + cdsToPeptideMapping); + assertNull(AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings, + dnaToPeptide)); + + /* + * third case - add dna-to-CDS mapping - CDS is now found! + */ + MapList dnaToCdsMapping = new MapList(new int[] { 5, 6, 9, 15 }, + new int[] + { 1, 9 }, 1, 1); + acf1.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(), + dnaToCdsMapping); + seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings, + dnaToPeptide); + assertSame(seq, cds1.getDatasetSequence()); + } + + /** + * Tests for the method that locates the CDS sequence that has a mapping to + * the given protein. That is, given a transcript-to-peptide mapping, find the + * cds-to-peptide mapping that relates to both, and return the CDS sequence. + * This test is for the case where transcript and CDS are the same length. + */ + @Test + public void testFindCdsForProtein_noUTR() + { + List mappings = new ArrayList<>(); + AlignedCodonFrame acf1 = new AlignedCodonFrame(); + mappings.add(acf1); + + SequenceI dna1 = new Sequence("dna1", "ATGCTATCTTAA"); + dna1.createDatasetSequence(); + + // NB we currently exclude STOP codon from CDS sequences + // the test would need to change if this changes in future + SequenceI cds1 = new Sequence("cds1", "ATGCTATCT"); + cds1.createDatasetSequence(); + + SequenceI pep1 = new Sequence("pep1", "MLS"); + pep1.createDatasetSequence(); + List seqMappings = new ArrayList<>(); + MapList mapList = new MapList( + new int[] + { 1, 9 }, new int[] { 1, 3 }, 3, 1); + Mapping dnaToPeptide = new Mapping(pep1.getDatasetSequence(), mapList); + + // add dna to peptide mapping + seqMappings.add(acf1); + acf1.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), + mapList); + + /* + * first case - transcript lacks CDS features - it appears to be + * the CDS sequence and is returned + */ + SequenceI seq = AlignmentUtils.findCdsForProtein(mappings, dna1, + seqMappings, dnaToPeptide); + assertSame(seq, dna1.getDatasetSequence()); + + /* + * second case - transcript has CDS feature - this means it is + * not returned as a match for CDS (CDS sequences don't have CDS features) + */ + dna1.addSequenceFeature( + new SequenceFeature(SequenceOntologyI.CDS, "cds", 1, 12, null)); + seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings, + dnaToPeptide); + assertNull(seq); + + /* + * third case - CDS-to-peptide mapping exists but no dna-to-CDS + * - search fails + */ + // todo this test fails if the mapping is added to acf1, not acf2 + // need to tidy up use of lists of mappings in AlignedCodonFrame + AlignedCodonFrame acf2 = new AlignedCodonFrame(); + mappings.add(acf2); + MapList cdsToPeptideMapping = new MapList(new int[] + { 1, 9 }, new int[] { 1, 3 }, 3, 1); + acf2.addMap(cds1.getDatasetSequence(), pep1.getDatasetSequence(), + cdsToPeptideMapping); + assertNull(AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings, + dnaToPeptide)); + + /* + * fourth case - add dna-to-CDS mapping - CDS is now found! + */ + MapList dnaToCdsMapping = new MapList(new int[] { 1, 9 }, + new int[] + { 1, 9 }, 1, 1); + acf1.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(), + dnaToCdsMapping); + seq = AlignmentUtils.findCdsForProtein(mappings, dna1, seqMappings, + dnaToPeptide); + assertSame(seq, cds1.getDatasetSequence()); + } }