X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=inline;f=test%2Fjalview%2Fanalysis%2FAlignmentUtilsTests.java;h=2fc53254ae7b49f62780a896685470659dd56637;hb=aaae043e05d30aa7c96839984e745ad156e95feb;hp=3d3736f4e6d783b3c47c1ac48ab6b025c8c4b318;hpb=6ed535f7ef953468f8827255ec6ebcd5a6e54d8d;p=jalview.git diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 3d3736f..2fc5325 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -26,6 +26,7 @@ import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; +import jalview.analysis.AlignmentUtils.DnaVariant; import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; @@ -50,6 +51,7 @@ import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.TreeMap; import org.testng.annotations.Test; @@ -1732,52 +1734,132 @@ public class AlignmentUtilsTests /* * first with no variants on dna */ - LinkedHashMap variantsMap = AlignmentUtils + LinkedHashMap[]> variantsMap = AlignmentUtils .buildDnaVariantsMap(dna, map); assertTrue(variantsMap.isEmpty()); - // single allele codon 1, on base 1 - SequenceFeature sf = new SequenceFeature("sequence_variant", "", 1, 1, + /* + * single allele codon 1, on base 1 + */ + SequenceFeature sf1 = new SequenceFeature("sequence_variant", "", 1, 1, + 0f, null); + sf1.setValue("alleles", "T"); + sf1.setValue("ID", "sequence_variant:rs758803211"); + dna.addSequenceFeature(sf1); + + /* + * two alleles codon 2, on bases 2 and 3 (distinct variants) + */ + SequenceFeature sf2 = new SequenceFeature("sequence_variant", "", 5, 5, + 0f, null); + sf2.setValue("alleles", "T"); + sf2.setValue("ID", "sequence_variant:rs758803212"); + dna.addSequenceFeature(sf2); + SequenceFeature sf3 = new SequenceFeature("sequence_variant", "", 6, 6, + 0f, null); + sf3.setValue("alleles", "G"); + sf3.setValue("ID", "sequence_variant:rs758803213"); + dna.addSequenceFeature(sf3); + + /* + * two alleles codon 3, both on base 2 (one variant) + */ + SequenceFeature sf4 = new SequenceFeature("sequence_variant", "", 8, 8, 0f, null); - sf.setValue("alleles", "T"); - dna.addSequenceFeature(sf); - - // two alleles codon 2, on bases 2 and 3 - sf = new SequenceFeature("sequence_variant", "", 5, 5, 0f, null); - sf.setValue("alleles", "T"); - dna.addSequenceFeature(sf); - sf = new SequenceFeature("sequence_variant", "", 6, 6, 0f, null); - sf.setValue("alleles", "G"); - dna.addSequenceFeature(sf); - - // two alleles codon 3, both on base 2 - sf = new SequenceFeature("sequence_variant", "", 8, 8, 0f, null); - sf.setValue("alleles", "C, G"); - dna.addSequenceFeature(sf); + sf4.setValue("alleles", "C, G"); + sf4.setValue("ID", "sequence_variant:rs758803214"); + dna.addSequenceFeature(sf4); // no alleles on codon 4 - // alleles on codon 5 on all 3 bases - sf = new SequenceFeature("sequence_variant", "", 13, 13, 0f, null); - sf.setValue("alleles", "C, G"); // (C duplicates given base value) - dna.addSequenceFeature(sf); - sf = new SequenceFeature("sequence_variant", "", 14, 14, 0f, null); - sf.setValue("alleles", "g, a"); // should force to upper-case - dna.addSequenceFeature(sf); - sf = new SequenceFeature("sequence_variant", "", 15, 15, 0f, null); - sf.setValue("alleles", "A, T"); - dna.addSequenceFeature(sf); + /* + * alleles on codon 5 on all 3 bases (distinct variants) + */ + SequenceFeature sf5 = new SequenceFeature("sequence_variant", "", 13, + 13, 0f, null); + sf5.setValue("alleles", "C, G"); // (C duplicates given base value) + sf5.setValue("ID", "sequence_variant:rs758803215"); + dna.addSequenceFeature(sf5); + SequenceFeature sf6 = new SequenceFeature("sequence_variant", "", 14, + 14, 0f, null); + sf6.setValue("alleles", "g, a"); // should force to upper-case + sf6.setValue("ID", "sequence_variant:rs758803216"); + dna.addSequenceFeature(sf6); + SequenceFeature sf7 = new SequenceFeature("sequence_variant", "", 15, + 15, 0f, null); + sf7.setValue("alleles", "A, T"); + sf7.setValue("ID", "sequence_variant:rs758803217"); + dna.addSequenceFeature(sf7); + + /* + * build map - expect variants on positions 1, 2, 3, 5 + */ variantsMap = AlignmentUtils.buildDnaVariantsMap(dna, map); assertEquals(4, variantsMap.size()); - assertTrue(Arrays.deepEquals(new String[][] { { "A", "T" }, { "T" }, - { "G" } }, variantsMap.get(1))); - assertTrue(Arrays.deepEquals(new String[][] { { "A" }, { "A", "T" }, - { "A", "G" } }, variantsMap.get(2))); - assertTrue(Arrays.deepEquals(new String[][] { { "T" }, - { "T", "C", "G" }, { "T" } }, variantsMap.get(3))); - // duplicated bases are not removed here, handled in computePeptideVariants - assertTrue(Arrays.deepEquals(new String[][] { { "C", "C", "G" }, - { "C", "G", "A" }, { "C", "A", "T" } }, variantsMap.get(5))); + + /* + * protein residue 1: variant on codon (ATG) base 1, not on 2 or 3 + */ + List[] pep1Variants = variantsMap.get(1); + assertEquals(3, pep1Variants.length); + assertEquals(1, pep1Variants[0].size()); + assertEquals("A", pep1Variants[0].get(0).base); // codon[1] base + assertSame(sf1, pep1Variants[0].get(0).variant); // codon[1] variant + assertEquals(1, pep1Variants[1].size()); + assertEquals("T", pep1Variants[1].get(0).base); // codon[2] base + assertNull(pep1Variants[1].get(0).variant); // no variant here + assertEquals(1, pep1Variants[2].size()); + assertEquals("G", pep1Variants[2].get(0).base); // codon[3] base + assertNull(pep1Variants[2].get(0).variant); // no variant here + + /* + * protein residue 2: variants on codon (AAA) bases 2 and 3 + */ + List[] pep2Variants = variantsMap.get(2); + assertEquals(3, pep2Variants.length); + assertEquals(1, pep2Variants[0].size()); + // codon[1] base recorded while processing variant on codon[2] + assertEquals("A", pep2Variants[0].get(0).base); + assertNull(pep2Variants[0].get(0).variant); // no variant here + // codon[2] base and variant: + assertEquals(1, pep2Variants[1].size()); + assertEquals("A", pep2Variants[1].get(0).base); + assertSame(sf2, pep2Variants[1].get(0).variant); + // codon[3] base was recorded when processing codon[2] variant + // and then the variant for codon[3] added to it + assertEquals(1, pep2Variants[2].size()); + assertEquals("A", pep2Variants[2].get(0).base); + assertSame(sf3, pep2Variants[2].get(0).variant); + + /* + * protein residue 3: variants on codon (TTT) base 2 only + */ + List[] pep3Variants = variantsMap.get(3); + assertEquals(3, pep3Variants.length); + assertEquals(1, pep3Variants[0].size()); + assertEquals("T", pep3Variants[0].get(0).base); // codon[1] base + assertNull(pep3Variants[0].get(0).variant); // no variant here + assertEquals(1, pep3Variants[1].size()); + assertEquals("T", pep3Variants[1].get(0).base); // codon[2] base + assertSame(sf4, pep3Variants[1].get(0).variant); // codon[2] variant + assertEquals(1, pep3Variants[2].size()); + assertEquals("T", pep3Variants[2].get(0).base); // codon[3] base + assertNull(pep3Variants[2].get(0).variant); // no variant here + + /* + * three variants on protein position 5 + */ + List[] pep5Variants = variantsMap.get(5); + assertEquals(3, pep5Variants.length); + assertEquals(1, pep5Variants[0].size()); + assertEquals("C", pep5Variants[0].get(0).base); // codon[1] base + assertSame(sf5, pep5Variants[0].get(0).variant); // codon[1] variant + assertEquals(1, pep5Variants[1].size()); + assertEquals("C", pep5Variants[1].get(0).base); // codon[2] base + assertSame(sf6, pep5Variants[1].get(0).variant); // codon[2] variant + assertEquals(1, pep5Variants[2].size()); + assertEquals("C", pep5Variants[2].get(0).base); // codon[3] base + assertSame(sf7, pep5Variants[2].get(0).variant); // codon[3] variant } /** @@ -1787,67 +1869,164 @@ public class AlignmentUtilsTests @Test(groups = "Functional") public void testComputePeptideVariants() { - String[][] codonVariants = new String[][] { { "A" }, { "G" }, { "T" } }; - - /* - * AGT codes for S - this is not included in the variants returned - */ - List variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); - assertEquals("[]", variants.toString()); - - // S is reported if it differs from the current value (A): - variants = AlignmentUtils.computePeptideVariants(codonVariants, "A"); - assertEquals("[S]", variants.toString()); - /* - * synonymous variant is not reported + * scenario: AAATTTCCC codes for KFP, with variants + * GAA -> E + * CAA -> Q + * AAG synonymous + * AAT -> N + * TTC synonymous + * CAC,CGC -> H,R (as one variant) */ - codonVariants = new String[][] { { "A" }, { "G" }, { "C", "T" } }; - // AGC and AGT both code for S - variants = AlignmentUtils.computePeptideVariants(codonVariants, "s"); - assertEquals("[]", variants.toString()); - + SequenceI peptide = new Sequence("pep/10-12", "KFP"); + /* - * equivalent variants are only reported once + * two distinct variants for codon 1 position 1 + * second one has clinical significance */ - codonVariants = new String[][] { { "C" }, { "T" }, - { "A", "C", "G", "T" } }; - // CTA CTC CTG CTT all code for L - variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); - assertEquals("[L]", variants.toString()); - + SequenceFeature sf1 = new SequenceFeature("sequence_variant", "", 1, 1, + 0f, null); + sf1.setValue("alleles", "A,G"); // GAA -> E + sf1.setValue("ID", "var1.125A>G"); + SequenceFeature sf2 = new SequenceFeature("sequence_variant", "", 1, 1, + 0f, null); + sf2.setValue("alleles", "A,C"); // CAA -> Q + sf2.setValue("ID", "var2"); + sf2.setValue("clinical_significance", "Dodgy"); + SequenceFeature sf3 = new SequenceFeature("sequence_variant", "", 3, 3, + 0f, null); + sf3.setValue("alleles", "A,G"); // synonymous + sf3.setValue("ID", "var3"); + sf3.setValue("clinical_significance", "None"); + SequenceFeature sf4 = new SequenceFeature("sequence_variant", "", 3, 3, + 0f, null); + sf4.setValue("alleles", "A,T"); // AAT -> N + sf4.setValue("ID", "sequence_variant:var4"); // prefix gets stripped off + sf4.setValue("clinical_significance", "Benign"); + SequenceFeature sf5 = new SequenceFeature("sequence_variant", "", 6, 6, + 0f, null); + sf5.setValue("alleles", "T,C"); // synonymous + sf5.setValue("ID", "var5"); + sf5.setValue("clinical_significance", "Bad"); + SequenceFeature sf6 = new SequenceFeature("sequence_variant", "", 8, 8, + 0f, null); + sf6.setValue("alleles", "C,A,G"); // CAC,CGC -> H,R + sf6.setValue("ID", "var6"); + sf6.setValue("clinical_significance", "Good"); + + List codon1Variants = new ArrayList(); + List codon2Variants = new ArrayList(); + List codon3Variants = new ArrayList(); + List codonVariants[] = new ArrayList[3]; + codonVariants[0] = codon1Variants; + codonVariants[1] = codon2Variants; + codonVariants[2] = codon3Variants; + /* - * vary codons 1 and 2; variant products are sorted and non-redundant + * compute variants for protein position 1 */ - codonVariants = new String[][] { { "a", "C" }, { "g", "T" }, { "A" } }; - // aga ata cga cta code for R, I, R, L - variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); - assertEquals("[I, L, R]", variants.toString()); - + codon1Variants.add(new DnaVariant("A", sf1)); + codon1Variants.add(new DnaVariant("A", sf2)); + codon2Variants.add(new DnaVariant("A")); + codon2Variants.add(new DnaVariant("A")); + codon3Variants.add(new DnaVariant("A", sf3)); + codon3Variants.add(new DnaVariant("A", sf4)); + AlignmentUtils.computePeptideVariants(peptide, 1, codonVariants); + /* - * vary codons 2 and 3 + * compute variants for protein position 2 */ - codonVariants = new String[][] { { "a" }, { "g", "T" }, { "A", "c" } }; - // aga agc ata atc code for R, S, I, I - variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); - assertEquals("[I, R]", variants.toString()); - + codon1Variants.clear(); + codon2Variants.clear(); + codon3Variants.clear(); + codon1Variants.add(new DnaVariant("T")); + codon2Variants.add(new DnaVariant("T")); + codon3Variants.add(new DnaVariant("T", sf5)); + AlignmentUtils.computePeptideVariants(peptide, 2, codonVariants); + /* - * vary codons 1 and 3 + * compute variants for protein position 3 */ - codonVariants = new String[][] { { "a", "t" }, { "a" }, { "t", "g" } }; - // aat aag tat tag code for N, K, Y, STOP - STOP sorted to end - variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); - assertEquals("[K, N, Y, STOP]", variants.toString()); - + codon1Variants.clear(); + codon2Variants.clear(); + codon3Variants.clear(); + codon1Variants.add(new DnaVariant("C")); + codon2Variants.add(new DnaVariant("C", sf6)); + codon3Variants.add(new DnaVariant("C")); + AlignmentUtils.computePeptideVariants(peptide, 3, codonVariants); + /* - * vary codons 1, 2 and 3 + * verify added sequence features for + * var1 K -> E + * var2 K -> Q + * var4 K -> N + * var6 P -> H + * var6 P -> R */ - codonVariants = new String[][] { { "a", "t" }, { "G", "C" }, - { "t", "g" } }; - // agt agg act acg tgt tgg tct tcg code for S, R, T, T, C, W, S, S - variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); - assertEquals("[C, R, T, W]", variants.toString()); + SequenceFeature[] sfs = peptide.getSequenceFeatures(); + assertEquals(5, sfs.length); + SequenceFeature sf = sfs[0]; + assertEquals(1, sf.getBegin()); + assertEquals(1, sf.getEnd()); + assertEquals("p.Lys1Glu", sf.getDescription()); + assertEquals("var1.125A>G", sf.getValue("ID")); + assertNull(sf.getValue("clinical_significance")); + assertEquals("ID=var1.125A>G", sf.getAttributes()); + assertEquals(1, sf.links.size()); + // link to variation is urlencoded + assertEquals( + "p.Lys1Glu var1.125A>G|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var1.125A%3EG", + sf.links.get(0)); + assertEquals("Jalview", sf.getFeatureGroup()); + sf = sfs[1]; + assertEquals(1, sf.getBegin()); + assertEquals(1, sf.getEnd()); + assertEquals("p.Lys1Gln", sf.getDescription()); + assertEquals("var2", sf.getValue("ID")); + assertEquals("Dodgy", sf.getValue("clinical_significance")); + assertEquals("ID=var2;clinical_significance=Dodgy", sf.getAttributes()); + assertEquals(1, sf.links.size()); + assertEquals( + "p.Lys1Gln var2|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var2", + sf.links.get(0)); + assertEquals("Jalview", sf.getFeatureGroup()); + sf = sfs[2]; + assertEquals(1, sf.getBegin()); + assertEquals(1, sf.getEnd()); + assertEquals("p.Lys1Asn", sf.getDescription()); + assertEquals("var4", sf.getValue("ID")); + assertEquals("Benign", sf.getValue("clinical_significance")); + assertEquals("ID=var4;clinical_significance=Benign", sf.getAttributes()); + assertEquals(1, sf.links.size()); + assertEquals( + "p.Lys1Asn var4|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var4", + sf.links.get(0)); + assertEquals("Jalview", sf.getFeatureGroup()); + sf = sfs[3]; + assertEquals(3, sf.getBegin()); + assertEquals(3, sf.getEnd()); + assertEquals("p.Pro3His", sf.getDescription()); + assertEquals("var6", sf.getValue("ID")); + assertEquals("Good", sf.getValue("clinical_significance")); + assertEquals("ID=var6;clinical_significance=Good", sf.getAttributes()); + assertEquals(1, sf.links.size()); + assertEquals( + "p.Pro3His var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6", + sf.links.get(0)); + // var5 generates two distinct protein variant features + assertEquals("Jalview", sf.getFeatureGroup()); + sf = sfs[4]; + assertEquals(3, sf.getBegin()); + assertEquals(3, sf.getEnd()); + assertEquals("p.Pro3Arg", sf.getDescription()); + assertEquals("var6", sf.getValue("ID")); + assertEquals("Good", sf.getValue("clinical_significance")); + assertEquals("ID=var6;clinical_significance=Good", sf.getAttributes()); + assertEquals(1, sf.links.size()); + assertEquals( + "p.Pro3Arg var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6", + sf.links.get(0)); + assertEquals("Jalview", sf.getFeatureGroup()); } /** @@ -1953,7 +2132,65 @@ public class AlignmentUtilsTests */ dna.addCodonFrame(acf); AlignmentUtils.alignAs(cds, dna); - assertEquals("---GGGTTT---", cds.getSequenceAt(0).getSequenceAsString()); + assertEquals("---GGGTTT", cds.getSequenceAt(0).getSequenceAsString()); assertEquals("CCC------AAA", cds.getSequenceAt(1).getSequenceAsString()); } + + @Test(groups = { "Functional" }) + public void testAddMappedPositions() + { + SequenceI from = new Sequence("dna", "ggAA-ATcc-TT-g"); + SequenceI seq1 = new Sequence("cds", "AAATTT"); + from.createDatasetSequence(); + seq1.createDatasetSequence(); + Mapping mapping = new Mapping(seq1, new MapList( + new int[] { 3, 6, 9, 10 }, + new int[] { 1, 6 }, 1, 1)); + Map> map = new TreeMap>(); + AlignmentUtils.addMappedPositions(seq1, from, mapping, map); + + /* + * verify map has seq1 residues in columns 3,4,6,7,11,12 + */ + assertEquals(6, map.size()); + assertEquals('A', map.get(3).get(seq1).charValue()); + assertEquals('A', map.get(4).get(seq1).charValue()); + assertEquals('A', map.get(6).get(seq1).charValue()); + assertEquals('T', map.get(7).get(seq1).charValue()); + assertEquals('T', map.get(11).get(seq1).charValue()); + assertEquals('T', map.get(12).get(seq1).charValue()); + + /* + * + */ + } + + /** + * Test case where the mapping 'from' range includes a stop codon which is + * absent in the 'to' range + */ + @Test(groups = { "Functional" }) + public void testAddMappedPositions_withStopCodon() + { + SequenceI from = new Sequence("dna", "ggAA-ATcc-TT-g"); + SequenceI seq1 = new Sequence("cds", "AAATTT"); + from.createDatasetSequence(); + seq1.createDatasetSequence(); + Mapping mapping = new Mapping(seq1, new MapList( + new int[] { 3, 6, 9, 10 }, + new int[] { 1, 6 }, 1, 1)); + Map> map = new TreeMap>(); + AlignmentUtils.addMappedPositions(seq1, from, mapping, map); + + /* + * verify map has seq1 residues in columns 3,4,6,7,11,12 + */ + assertEquals(6, map.size()); + assertEquals('A', map.get(3).get(seq1).charValue()); + assertEquals('A', map.get(4).get(seq1).charValue()); + assertEquals('A', map.get(6).get(seq1).charValue()); + assertEquals('T', map.get(7).get(seq1).charValue()); + assertEquals('T', map.get(11).get(seq1).charValue()); + assertEquals('T', map.get(12).get(seq1).charValue()); + } }