2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import static org.testng.Assert.assertEquals;
25 import jalview.util.MapList;
27 import java.util.ArrayList;
28 import java.util.HashMap;
29 import java.util.List;
32 import org.testng.annotations.Test;
34 public class MappedFeaturesTest
36 @Test(groups = "Functional")
37 public void testFindProteinVariants()
41 * dna/10-20 aCGTaGctGAa (codons CGT=R, GGA = G)
42 * mapping: 3:1 from [11-13,15,18-19] to peptide/1-2 RG
44 SequenceI from = new Sequence("dna/10-20", "acgTAGCTGAA");
45 SequenceI to = new Sequence("peptide", "RG");
46 MapList map = new MapList(new int[] { 11, 13, 15, 15, 18, 19 },
49 Mapping mapping = new Mapping(to, map);
53 * C>T at dna11, consequence CGT>TGT=C
54 * T>C at dna13, consequence CGT>CGC synonymous
56 List<SequenceFeature> features = new ArrayList<>();
57 SequenceFeature sf1 = new SequenceFeature("sequence_variant", "C,T", 11,
59 sf1.setValue("alleles", "C,T");
61 SequenceFeature sf2 = new SequenceFeature("sequence_variant", "T,C", 13,
63 sf2.setValue("alleles", "T,C");
67 * missense variant in first codon
69 MappedFeatures mf = new MappedFeatures(mapping, from, 1, 'R', features);
70 String variant = mf.findProteinVariants(sf1);
71 assertEquals(variant, "p.Arg1Cys");
74 * more than one alternative allele
75 * C>G consequence is GGT=G
76 * peptide variants as a comma-separated list
78 sf1.setValue("alleles", "C,T,G");
79 variant = mf.findProteinVariants(sf1);
80 assertEquals(variant, "p.Arg1Cys,p.Arg1Gly");
83 * synonymous variant in first codon
84 * shown in HGVS notation on peptide
86 variant = mf.findProteinVariants(sf2);
87 assertEquals(variant, "c.13T>C(p.=)");
90 * CSQ:HGVSp value is used if present
91 * _and_ it contains "p." following a colon
93 Map<String, String> csq = new HashMap<>();
94 csq.put("HGVSp", "hello:world");
95 sf2.setValue("CSQ", csq);
96 variant = mf.findProteinVariants(sf2);
97 assertEquals(variant, "c.13T>C(p.=)");
98 csq.put("HGVSp", "p.HelloWorld");
99 variant = mf.findProteinVariants(sf2);
100 assertEquals(variant, "c.13T>C(p.=)");
101 csq.put("HGVSp", "try this:hellop.world");
102 variant = mf.findProteinVariants(sf2);
103 assertEquals(variant, "hellop.world");
106 * missense and indel variants in second codon
107 * - codon is GGA spliced from dna positions 15,18,19
108 * - SNP G>T in second position mutates GGA>G to GTA>V
109 * - indel variants are not computed or reported
111 mf = new MappedFeatures(mapping, from, 2, 'G', features);
113 SequenceFeature sf3 = new SequenceFeature("sequence_variant",
114 "G,-,CG,T", 18, 18, null);
115 sf3.setValue("alleles", "G,-,CG,T");
117 variant = mf.findProteinVariants(sf3);
118 assertEquals(variant, "p.Gly2Val");
121 * G>T in first position gives TGA Stop
122 * shown with HGVS notation as 'Ter'
124 SequenceFeature sf4 = new SequenceFeature("sequence_variant", "G,T", 15,
126 sf4.setValue("alleles", "G,-,CG,T");
128 variant = mf.findProteinVariants(sf4);
129 assertEquals(variant, "p.Gly2Ter");
132 * feature must be one of those in MappedFeatures
134 SequenceFeature sf9 = new SequenceFeature("sequence_variant", "G,C", 15,
136 variant = mf.findProteinVariants(sf9);
137 assertEquals(variant, "");