X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fanalysis%2FAlignmentUtilsTests.java;h=d811bef14a6225a48c5a7b58f30c1f0818d26bc8;hb=74b3bb2ce3513c972e472406545490fa31e15c0d;hp=810ef5f60cb94fbff154c4be494e6bc79fe78673;hpb=0ae70dbd95d7eb6932c1ec1252628f58f0989668;p=jalview.git diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 810ef5f..d811bef 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -26,6 +26,7 @@ import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; +import jalview.analysis.AlignmentUtils.DnaVariant; import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; @@ -1733,56 +1734,62 @@ public class AlignmentUtilsTests /* * first with no variants on dna */ - LinkedHashMap> variantsMap = AlignmentUtils + LinkedHashMap[]> variantsMap = AlignmentUtils .buildDnaVariantsMap(dna, map); assertTrue(variantsMap.isEmpty()); /* * single allele codon 1, on base 1 */ - SequenceFeature sf = new SequenceFeature("sequence_variant", "", 1, 1, + SequenceFeature sf1 = new SequenceFeature("sequence_variant", "", 1, 1, 0f, null); - sf.setValue("alleles", "T"); - sf.setValue("ID", "sequence_variant:rs758803211"); - dna.addSequenceFeature(sf); + sf1.setValue("alleles", "T"); + sf1.setValue("ID", "sequence_variant:rs758803211"); + dna.addSequenceFeature(sf1); /* * two alleles codon 2, on bases 2 and 3 (distinct variants) */ - sf = new SequenceFeature("sequence_variant", "", 5, 5, 0f, null); - sf.setValue("alleles", "T"); - sf.setValue("ID", "sequence_variant:rs758803212"); - dna.addSequenceFeature(sf); - sf = new SequenceFeature("sequence_variant", "", 6, 6, 0f, null); - sf.setValue("alleles", "G"); - sf.setValue("ID", "sequence_variant:rs758803213"); - dna.addSequenceFeature(sf); + SequenceFeature sf2 = new SequenceFeature("sequence_variant", "", 5, 5, + 0f, null); + sf2.setValue("alleles", "T"); + sf2.setValue("ID", "sequence_variant:rs758803212"); + dna.addSequenceFeature(sf2); + SequenceFeature sf3 = new SequenceFeature("sequence_variant", "", 6, 6, + 0f, null); + sf3.setValue("alleles", "G"); + sf3.setValue("ID", "sequence_variant:rs758803213"); + dna.addSequenceFeature(sf3); /* * two alleles codon 3, both on base 2 (one variant) */ - sf = new SequenceFeature("sequence_variant", "", 8, 8, 0f, null); - sf.setValue("alleles", "C, G"); - sf.setValue("ID", "sequence_variant:rs758803214"); - dna.addSequenceFeature(sf); + SequenceFeature sf4 = new SequenceFeature("sequence_variant", "", 8, 8, + 0f, null); + sf4.setValue("alleles", "C, G"); + sf4.setValue("ID", "sequence_variant:rs758803214"); + dna.addSequenceFeature(sf4); // no alleles on codon 4 /* * alleles on codon 5 on all 3 bases (distinct variants) */ - sf = new SequenceFeature("sequence_variant", "", 13, 13, 0f, null); - sf.setValue("alleles", "C, G"); // (C duplicates given base value) - sf.setValue("ID", "sequence_variant:rs758803215"); - dna.addSequenceFeature(sf); - sf = new SequenceFeature("sequence_variant", "", 14, 14, 0f, null); - sf.setValue("alleles", "g, a"); // should force to upper-case - sf.setValue("ID", "sequence_variant:rs758803216"); - dna.addSequenceFeature(sf); - sf = new SequenceFeature("sequence_variant", "", 15, 15, 0f, null); - sf.setValue("alleles", "A, T"); - sf.setValue("ID", "sequence_variant:rs758803217"); - dna.addSequenceFeature(sf); + SequenceFeature sf5 = new SequenceFeature("sequence_variant", "", 13, + 13, 0f, null); + sf5.setValue("alleles", "C, G"); // (C duplicates given base value) + sf5.setValue("ID", "sequence_variant:rs758803215"); + dna.addSequenceFeature(sf5); + SequenceFeature sf6 = new SequenceFeature("sequence_variant", "", 14, + 14, 0f, null); + sf6.setValue("alleles", "g, a"); // should force to upper-case + sf6.setValue("ID", "sequence_variant:rs758803216"); + dna.addSequenceFeature(sf6); + SequenceFeature sf7 = new SequenceFeature("sequence_variant", "", 15, + 15, 0f, null); + sf7.setValue("alleles", "A, T"); + sf7.setValue("ID", "sequence_variant:rs758803217"); + dna.addSequenceFeature(sf7); /* * build map - expect variants on positions 1, 2, 3, 5 @@ -1791,39 +1798,68 @@ public class AlignmentUtilsTests assertEquals(4, variantsMap.size()); /* - * one variant on protein position 1 - */ - assertEquals(1, variantsMap.get(1).size()); - assertTrue(Arrays.deepEquals(new String[][] { { "A", "T" }, { "T" }, - { "G" } }, variantsMap.get(1).get(0))); - - /* - * two variants on protein position 2 - */ - assertEquals(2, variantsMap.get(2).size()); - assertTrue(Arrays.deepEquals(new String[][] { { "A" }, { "A", "T" }, - { "A" } }, variantsMap.get(2).get(0))); - assertTrue(Arrays.deepEquals(new String[][] { { "A" }, { "A" }, - { "A", "G" } }, variantsMap.get(2).get(1))); - - /* - * one variant on protein position 3 - */ - assertEquals(1, variantsMap.get(3).size()); - assertTrue(Arrays.deepEquals(new String[][] { { "T" }, - { "T", "C", "G" }, { "T" } }, variantsMap.get(3).get(0))); + * protein residue 1: variant on codon (ATG) base 1, not on 2 or 3 + */ + List[] pep1Variants = variantsMap.get(1); + assertEquals(3, pep1Variants.length); + assertEquals(1, pep1Variants[0].size()); + assertEquals("A", pep1Variants[0].get(0).base); // codon[1] base + assertSame(sf1, pep1Variants[0].get(0).variant); // codon[1] variant + assertEquals(1, pep1Variants[1].size()); + assertEquals("T", pep1Variants[1].get(0).base); // codon[2] base + assertNull(pep1Variants[1].get(0).variant); // no variant here + assertEquals(1, pep1Variants[2].size()); + assertEquals("G", pep1Variants[2].get(0).base); // codon[3] base + assertNull(pep1Variants[2].get(0).variant); // no variant here + + /* + * protein residue 2: variants on codon (AAA) bases 2 and 3 + */ + List[] pep2Variants = variantsMap.get(2); + assertEquals(3, pep2Variants.length); + assertEquals(1, pep2Variants[0].size()); + // codon[1] base recorded while processing variant on codon[2] + assertEquals("A", pep2Variants[0].get(0).base); + assertNull(pep2Variants[0].get(0).variant); // no variant here + // codon[2] base and variant: + assertEquals(1, pep2Variants[1].size()); + assertEquals("A", pep2Variants[1].get(0).base); + assertSame(sf2, pep2Variants[1].get(0).variant); + // codon[3] base was recorded when processing codon[2] variant + // and then the variant for codon[3] added to it + assertEquals(1, pep2Variants[2].size()); + assertEquals("A", pep2Variants[2].get(0).base); + assertSame(sf3, pep2Variants[2].get(0).variant); + + /* + * protein residue 3: variants on codon (TTT) base 2 only + */ + List[] pep3Variants = variantsMap.get(3); + assertEquals(3, pep3Variants.length); + assertEquals(1, pep3Variants[0].size()); + assertEquals("T", pep3Variants[0].get(0).base); // codon[1] base + assertNull(pep3Variants[0].get(0).variant); // no variant here + assertEquals(1, pep3Variants[1].size()); + assertEquals("T", pep3Variants[1].get(0).base); // codon[2] base + assertSame(sf4, pep3Variants[1].get(0).variant); // codon[2] variant + assertEquals(1, pep3Variants[2].size()); + assertEquals("T", pep3Variants[2].get(0).base); // codon[3] base + assertNull(pep3Variants[2].get(0).variant); // no variant here /* * three variants on protein position 5 - * duplicated bases are not removed here, handled in computePeptideVariants - */ - assertEquals(3, variantsMap.get(5).size()); - assertTrue(Arrays.deepEquals(new String[][] { { "C", "C", "G" }, - { "C" }, { "C" } }, variantsMap.get(5).get(0))); - assertTrue(Arrays.deepEquals(new String[][] { { "C" }, - { "C", "G", "A" }, { "C" } }, variantsMap.get(5).get(1))); - assertTrue(Arrays.deepEquals(new String[][] { { "C" }, { "C" }, - { "C", "A", "T" } }, variantsMap.get(5).get(2))); + */ + List[] pep5Variants = variantsMap.get(5); + assertEquals(3, pep5Variants.length); + assertEquals(1, pep5Variants[0].size()); + assertEquals("C", pep5Variants[0].get(0).base); // codon[1] base + assertSame(sf5, pep5Variants[0].get(0).variant); // codon[1] variant + assertEquals(1, pep5Variants[1].size()); + assertEquals("C", pep5Variants[1].get(0).base); // codon[2] base + assertSame(sf6, pep5Variants[1].get(0).variant); // codon[2] variant + assertEquals(1, pep5Variants[2].size()); + assertEquals("C", pep5Variants[2].get(0).base); // codon[3] base + assertSame(sf7, pep5Variants[2].get(0).variant); // codon[3] variant } /** @@ -1833,67 +1869,154 @@ public class AlignmentUtilsTests @Test(groups = "Functional") public void testComputePeptideVariants() { - String[][] codonVariants = new String[][] { { "A" }, { "G" }, { "T" } }; - /* - * AGT codes for S - this is not included in the variants returned + * scenario: AAATTTCCC codes for KFP, with variants + * GAA -> E + * CAA -> Q + * AAG synonymous + * AAT -> N + * TTC synonymous + * CAC,CGC -> H,R (as one variant) */ - List variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); - assertEquals("[]", variants.toString()); - - // S is reported if it differs from the current value (A): - variants = AlignmentUtils.computePeptideVariants(codonVariants, "A"); - assertEquals("[S]", variants.toString()); - - /* - * synonymous variant is not reported - */ - codonVariants = new String[][] { { "A" }, { "G" }, { "C", "T" } }; - // AGC and AGT both code for S - variants = AlignmentUtils.computePeptideVariants(codonVariants, "s"); - assertEquals("[]", variants.toString()); - + SequenceI peptide = new Sequence("pep/10-12", "KFP"); + /* - * equivalent variants are only reported once + * two distinct variants for codon 1 position 1 + * second one has clinical significance */ - codonVariants = new String[][] { { "C" }, { "T" }, - { "A", "C", "G", "T" } }; - // CTA CTC CTG CTT all code for L - variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); - assertEquals("[L]", variants.toString()); - + SequenceFeature sf1 = new SequenceFeature("sequence_variant", "", 1, 1, + 0f, null); + sf1.setValue("alleles", "A,G"); // GAA -> E + sf1.setValue("ID", "var1.125A>G"); + SequenceFeature sf2 = new SequenceFeature("sequence_variant", "", 1, 1, + 0f, null); + sf2.setValue("alleles", "A,C"); // CAA -> Q + sf2.setValue("ID", "var2"); + sf2.setValue("clinical_significance", "Dodgy"); + SequenceFeature sf3 = new SequenceFeature("sequence_variant", "", 3, 3, + 0f, null); + sf3.setValue("alleles", "A,G"); // synonymous + sf3.setValue("ID", "var3"); + sf3.setValue("clinical_significance", "None"); + SequenceFeature sf4 = new SequenceFeature("sequence_variant", "", 3, 3, + 0f, null); + sf4.setValue("alleles", "A,T"); // AAT -> N + sf4.setValue("ID", "sequence_variant:var4"); // prefix gets stripped off + sf4.setValue("clinical_significance", "Benign"); + SequenceFeature sf5 = new SequenceFeature("sequence_variant", "", 6, 6, + 0f, null); + sf5.setValue("alleles", "T,C"); // synonymous + sf5.setValue("ID", "var5"); + sf5.setValue("clinical_significance", "Bad"); + SequenceFeature sf6 = new SequenceFeature("sequence_variant", "", 8, 8, + 0f, null); + sf6.setValue("alleles", "C,A,G"); // CAC,CGC -> H,R + sf6.setValue("ID", "var6"); + sf6.setValue("clinical_significance", "Good"); + + List codon1Variants = new ArrayList(); + List codon2Variants = new ArrayList(); + List codon3Variants = new ArrayList(); + List codonVariants[] = new ArrayList[3]; + codonVariants[0] = codon1Variants; + codonVariants[1] = codon2Variants; + codonVariants[2] = codon3Variants; + /* - * vary codons 1 and 2; variant products are sorted and non-redundant + * compute variants for protein position 1 */ - codonVariants = new String[][] { { "a", "C" }, { "g", "T" }, { "A" } }; - // aga ata cga cta code for R, I, R, L - variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); - assertEquals("[I, L, R]", variants.toString()); - + codon1Variants.add(new DnaVariant("A", sf1)); + codon1Variants.add(new DnaVariant("A", sf2)); + codon2Variants.add(new DnaVariant("A")); + codon2Variants.add(new DnaVariant("A")); + codon3Variants.add(new DnaVariant("A", sf3)); + codon3Variants.add(new DnaVariant("A", sf4)); + AlignmentUtils.computePeptideVariants(peptide, 1, codonVariants); + /* - * vary codons 2 and 3 + * compute variants for protein position 2 */ - codonVariants = new String[][] { { "a" }, { "g", "T" }, { "A", "c" } }; - // aga agc ata atc code for R, S, I, I - variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); - assertEquals("[I, R]", variants.toString()); - + codon1Variants.clear(); + codon2Variants.clear(); + codon3Variants.clear(); + codon1Variants.add(new DnaVariant("T")); + codon2Variants.add(new DnaVariant("T")); + codon3Variants.add(new DnaVariant("T", sf5)); + AlignmentUtils.computePeptideVariants(peptide, 2, codonVariants); + /* - * vary codons 1 and 3 + * compute variants for protein position 3 */ - codonVariants = new String[][] { { "a", "t" }, { "a" }, { "t", "g" } }; - // aat aag tat tag code for N, K, Y, STOP - STOP sorted to end - variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); - assertEquals("[K, N, Y, STOP]", variants.toString()); - + codon1Variants.clear(); + codon2Variants.clear(); + codon3Variants.clear(); + codon1Variants.add(new DnaVariant("C")); + codon2Variants.add(new DnaVariant("C", sf6)); + codon3Variants.add(new DnaVariant("C")); + AlignmentUtils.computePeptideVariants(peptide, 3, codonVariants); + /* - * vary codons 1, 2 and 3 + * verify added sequence features for + * var1 K -> E + * var2 K -> Q + * var4 K -> N + * var6 P -> H + * var6 P -> R */ - codonVariants = new String[][] { { "a", "t" }, { "G", "C" }, - { "t", "g" } }; - // agt agg act acg tgt tgg tct tcg code for S, R, T, T, C, W, S, S - variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); - assertEquals("[C, R, T, W]", variants.toString()); + SequenceFeature[] sfs = peptide.getSequenceFeatures(); + assertEquals(5, sfs.length); + SequenceFeature sf = sfs[0]; + assertEquals(1, sf.getBegin()); + assertEquals(1, sf.getEnd()); + assertEquals("K->E", sf.getDescription()); + assertEquals("var1.125A>G", sf.getValue("ID")); + assertNull(sf.getValue("clinical_significance")); + assertEquals(1, sf.links.size()); + // link to variation is urlencoded + assertEquals( + "K->E var1.125A>G|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var1.125A%3EG", + sf.links.get(0)); + sf = sfs[1]; + assertEquals(1, sf.getBegin()); + assertEquals(1, sf.getEnd()); + assertEquals("K->Q", sf.getDescription()); + assertEquals("var2", sf.getValue("ID")); + assertEquals("Dodgy", sf.getValue("clinical_significance")); + assertEquals(1, sf.links.size()); + assertEquals( + "K->Q var2|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var2", + sf.links.get(0)); + sf = sfs[2]; + assertEquals(1, sf.getBegin()); + assertEquals(1, sf.getEnd()); + assertEquals("K->N", sf.getDescription()); + assertEquals("var4", sf.getValue("ID")); + assertEquals("Benign", sf.getValue("clinical_significance")); + assertEquals(1, sf.links.size()); + assertEquals( + "K->N var4|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var4", + sf.links.get(0)); + sf = sfs[3]; + assertEquals(3, sf.getBegin()); + assertEquals(3, sf.getEnd()); + assertEquals("P->H", sf.getDescription()); + assertEquals("var6", sf.getValue("ID")); + assertEquals("Good", sf.getValue("clinical_significance")); + assertEquals(1, sf.links.size()); + assertEquals( + "P->H var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6", + sf.links.get(0)); + // var5 generates two distinct protein variant features + sf = sfs[4]; + assertEquals(3, sf.getBegin()); + assertEquals(3, sf.getEnd()); + assertEquals("P->R", sf.getDescription()); + assertEquals("var6", sf.getValue("ID")); + assertEquals("Good", sf.getValue("clinical_significance")); + assertEquals(1, sf.links.size()); + assertEquals( + "P->R var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6", + sf.links.get(0)); } /**