X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fanalysis%2FAlignmentUtilsTests.java;h=8bdd7403bf34d573b8052608d20dbd85019cc106;hb=6c52cc0b81ae3abdc3c5f6f88a23364a0246351a;hp=abe3f555096647dd3503c03571ff1f648bc3ded4;hpb=b2cedc8371f6624859f107581e6d84e841d4114b;p=jalview.git diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index abe3f55..8bdd740 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -48,6 +48,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -571,9 +572,15 @@ public class AlignmentUtilsTests @Test(groups = { "Functional" }) public void testTranslatesAs() { + // null arguments check + assertFalse(AlignmentUtils.translatesAs(null, 0, null)); + assertFalse(AlignmentUtils.translatesAs(new char[] { 't' }, 0, null)); + assertFalse(AlignmentUtils.translatesAs(null, 0, new char[] { 'a' })); + + // straight translation assertTrue(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0, "FPKG".toCharArray())); - // with start codon (not in protein) + // with extra start codon (not in protein) assertTrue(AlignmentUtils.translatesAs("atgtttcccaaaggg".toCharArray(), 3, "FPKG".toCharArray())); // with stop codon1 (not in protein) @@ -601,7 +608,7 @@ public class AlignmentUtilsTests assertTrue(AlignmentUtils.translatesAs( "atgtttcccaaagggtga".toCharArray(), 3, "FPKG".toCharArray())); - // with embedded stop codon + // with embedded stop codons assertTrue(AlignmentUtils.translatesAs( "atgtttTAGcccaaaTAAgggtga".toCharArray(), 3, "F*PK*G".toCharArray())); @@ -609,6 +616,26 @@ public class AlignmentUtilsTests // wrong protein assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), 0, "FPMG".toCharArray())); + + // truncated dna + assertFalse(AlignmentUtils.translatesAs("tttcccaaagg".toCharArray(), 0, + "FPKG".toCharArray())); + + // truncated protein + assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), + 0, "FPK".toCharArray())); + + // overlong dna (doesn't end in stop codon) + assertFalse(AlignmentUtils.translatesAs( + "tttcccaaagggttt".toCharArray(), 0, "FPKG".toCharArray())); + + // dna + stop codon + more + assertFalse(AlignmentUtils.translatesAs( + "tttcccaaagggttaga".toCharArray(), 0, "FPKG".toCharArray())); + + // overlong protein + assertFalse(AlignmentUtils.translatesAs("tttcccaaaggg".toCharArray(), + 0, "FPKGQ".toCharArray())); } /** @@ -1342,7 +1369,7 @@ public class AlignmentUtilsTests * @throws IOException */ @Test(groups = { "Functional" }) - public void testMapProteinSequenceToCdna_forSubsequence() + public void testMapCdnaToProtein_forSubsequence() throws IOException { SequenceI prot = new Sequence("UNIPROT|V12345", "E-I--Q", 10, 12); @@ -1351,7 +1378,7 @@ public class AlignmentUtilsTests SequenceI dna = new Sequence("EMBL|A33333", "GAA--AT-C-CAG", 40, 48); dna.createDatasetSequence(); - MapList map = AlignmentUtils.mapProteinSequenceToCdna(prot, dna); + MapList map = AlignmentUtils.mapCdnaToProtein(prot, dna); assertEquals(10, map.getToLowest()); assertEquals(12, map.getToHighest()); assertEquals(40, map.getFromLowest()); @@ -1786,4 +1813,197 @@ public class AlignmentUtilsTests assertEquals("--N-G", prot2.getSequenceAsString()); assertEquals("---XG", prot3.getSequenceAsString()); } + + /** + * Tests for the method that maps the subset of a dna sequence that has CDS + * (or subtype) feature - case where the start codon is incomplete. + */ + @Test(groups = "Functional") + public void testGetCdsRanges_fivePrimeIncomplete() + { + SequenceI dnaSeq = new Sequence("dna", "aaagGGCCCaaaTTTttt"); + dnaSeq.createDatasetSequence(); + SequenceI ds = dnaSeq.getDatasetSequence(); + + // CDS for dna 5-6 (incomplete codon), 7-9 + SequenceFeature sf = new SequenceFeature("CDS", "", 5, 9, 0f, null); + sf.setPhase("2"); // skip 2 bases to start of next codon + ds.addSequenceFeature(sf); + // CDS for dna 13-15 + sf = new SequenceFeature("CDS_predicted", "", 13, 15, 0f, null); + ds.addSequenceFeature(sf); + + List ranges = AlignmentUtils.findCdsPositions(dnaSeq); + + /* + * check the mapping starts with the first complete codon + */ + assertEquals(6, MappingUtils.getLength(ranges)); + assertEquals(2, ranges.size()); + assertEquals(7, ranges.get(0)[0]); + assertEquals(9, ranges.get(0)[1]); + assertEquals(13, ranges.get(1)[0]); + assertEquals(15, ranges.get(1)[1]); + } + + /** + * Tests for the method that maps the subset of a dna sequence that has CDS + * (or subtype) feature. + */ + @Test(groups = "Functional") + public void testGetCdsRanges() + { + SequenceI dnaSeq = new Sequence("dna", "aaaGGGcccAAATTTttt"); + dnaSeq.createDatasetSequence(); + SequenceI ds = dnaSeq.getDatasetSequence(); + + // CDS for dna 3-6 + SequenceFeature sf = new SequenceFeature("CDS", "", 4, 6, 0f, null); + ds.addSequenceFeature(sf); + // exon feature should be ignored here + sf = new SequenceFeature("exon", "", 7, 9, 0f, null); + ds.addSequenceFeature(sf); + // CDS for dna 10-12 + sf = new SequenceFeature("CDS_predicted", "", 10, 12, 0f, null); + ds.addSequenceFeature(sf); + + List ranges = AlignmentUtils.findCdsPositions(dnaSeq); + assertEquals(6, MappingUtils.getLength(ranges)); + assertEquals(2, ranges.size()); + assertEquals(4, ranges.get(0)[0]); + assertEquals(6, ranges.get(0)[1]); + assertEquals(10, ranges.get(1)[0]); + assertEquals(12, ranges.get(1)[1]); + } + + /** + * Test the method that computes a map of codon variants for each protein + * position from "sequence_variant" features on dna + */ + @Test(groups = "Functional") + public void testBuildDnaVariantsMap() + { + SequenceI dna = new Sequence("dna", "atgAAATTTGGGCCCtag"); + MapList map = new MapList(new int[] { 1, 18 }, new int[] { 1, 5 }, 3, 1); + + /* + * first with no variants on dna + */ + LinkedHashMap variantsMap = AlignmentUtils + .buildDnaVariantsMap(dna, map); + assertTrue(variantsMap.isEmpty()); + + // single allele codon 1, on base 1 + SequenceFeature sf = new SequenceFeature("sequence_variant", "", 1, 1, + 0f, null); + sf.setValue("alleles", "T"); + dna.addSequenceFeature(sf); + + // two alleles codon 2, on bases 2 and 3 + sf = new SequenceFeature("sequence_variant", "", 5, 5, 0f, null); + sf.setValue("alleles", "T"); + dna.addSequenceFeature(sf); + sf = new SequenceFeature("sequence_variant", "", 6, 6, 0f, null); + sf.setValue("alleles", "G"); + dna.addSequenceFeature(sf); + + // two alleles codon 3, both on base 2 + sf = new SequenceFeature("sequence_variant", "", 8, 8, 0f, null); + sf.setValue("alleles", "C, G"); + dna.addSequenceFeature(sf); + + // no alleles on codon 4 + // alleles on codon 5 on all 3 bases + sf = new SequenceFeature("sequence_variant", "", 13, 13, 0f, null); + sf.setValue("alleles", "C, G"); // (C duplicates given base value) + dna.addSequenceFeature(sf); + sf = new SequenceFeature("sequence_variant", "", 14, 14, 0f, null); + sf.setValue("alleles", "g, a"); // should force to upper-case + dna.addSequenceFeature(sf); + sf = new SequenceFeature("sequence_variant", "", 15, 15, 0f, null); + sf.setValue("alleles", "A, T"); + dna.addSequenceFeature(sf); + + variantsMap = AlignmentUtils.buildDnaVariantsMap(dna, map); + assertEquals(4, variantsMap.size()); + assertTrue(Arrays.deepEquals(new String[][] { { "A", "T" }, { "T" }, + { "G" } }, variantsMap.get(1))); + assertTrue(Arrays.deepEquals(new String[][] { { "A" }, { "A", "T" }, + { "A", "G" } }, variantsMap.get(2))); + assertTrue(Arrays.deepEquals(new String[][] { { "T" }, + { "T", "C", "G" }, { "T" } }, variantsMap.get(3))); + // duplicated bases are not removed here, handled in computePeptideVariants + assertTrue(Arrays.deepEquals(new String[][] { { "C", "C", "G" }, + { "C", "G", "A" }, { "C", "A", "T" } }, variantsMap.get(5))); + } + + /** + * Tests for the method that computes all peptide variants given codon + * variants + */ + @Test(groups = "Functional") + public void testComputePeptideVariants() + { + String[][] codonVariants = new String[][] { { "A" }, { "G" }, { "T" } }; + + /* + * AGT codes for S - this is not included in the variants returned + */ + List variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); + assertEquals("[]", variants.toString()); + + // S is reported if it differs from the current value (A): + variants = AlignmentUtils.computePeptideVariants(codonVariants, "A"); + assertEquals("[S]", variants.toString()); + + /* + * synonymous variant is not reported + */ + codonVariants = new String[][] { { "A" }, { "G" }, { "C", "T" } }; + // AGC and AGT both code for S + variants = AlignmentUtils.computePeptideVariants(codonVariants, "s"); + assertEquals("[]", variants.toString()); + + /* + * equivalent variants are only reported once + */ + codonVariants = new String[][] { { "C" }, { "T" }, + { "A", "C", "G", "T" } }; + // CTA CTC CTG CTT all code for L + variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); + assertEquals("[L]", variants.toString()); + + /* + * vary codons 1 and 2; variant products are sorted and non-redundant + */ + codonVariants = new String[][] { { "a", "C" }, { "g", "T" }, { "A" } }; + // aga ata cga cta code for R, I, R, L + variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); + assertEquals("[I, L, R]", variants.toString()); + + /* + * vary codons 2 and 3 + */ + codonVariants = new String[][] { { "a" }, { "g", "T" }, { "A", "c" } }; + // aga agc ata atc code for R, S, I, I + variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); + assertEquals("[I, R]", variants.toString()); + + /* + * vary codons 1 and 3 + */ + codonVariants = new String[][] { { "a", "t" }, { "a" }, { "t", "g" } }; + // aat aag tat tag code for N, K, Y, STOP - STOP sorted to end + variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); + assertEquals("[K, N, Y, STOP]", variants.toString()); + + /* + * vary codons 1, 2 and 3 + */ + codonVariants = new String[][] { { "a", "t" }, { "G", "C" }, + { "t", "g" } }; + // agt agg act acg tgt tgg tct tcg code for S, R, T, T, C, W, S, S + variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); + assertEquals("[C, R, T, W]", variants.toString()); + } }