JAL-3187 derived peptide variants tweaks and tests
[jalview.git] / test / jalview / datamodel / MappedFeaturesTest.java
diff --git a/test/jalview/datamodel/MappedFeaturesTest.java b/test/jalview/datamodel/MappedFeaturesTest.java
new file mode 100644 (file)
index 0000000..e4caac3
--- /dev/null
@@ -0,0 +1,113 @@
+package jalview.datamodel;
+
+import static org.testng.Assert.assertEquals;
+
+import jalview.util.MapList;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.testng.annotations.Test;
+
+public class MappedFeaturesTest
+{
+  @Test
+  public void testFindProteinVariants()
+  {
+    /*
+     * scenario: 
+     * dna/10-20 aCGTaGctGAa (codons CGT=R, GGA = G)
+     * mapping: 3:1 from [11-13,15,18-19] to peptide/1-2 RG 
+     */
+    SequenceI from = new Sequence("dna/10-20", "ACGTAGCTGAA");
+    SequenceI to = new Sequence("peptide", "RG");
+    MapList map = new MapList(new int[] { 11, 13, 15, 15, 18, 19 },
+            new int[]
+            { 1, 2 }, 3, 1);
+    Mapping mapping = new Mapping(to, map);
+
+    /*
+     * variants
+     * C>T at dna11, consequence CGT>TGT=C
+     * T>C at dna13, consequence CGT>CGC synonymous
+     */
+    List<SequenceFeature> features = new ArrayList<>();
+    SequenceFeature sf1 = new SequenceFeature("sequence_variant", "C,T",
+            11, 11, null);
+    sf1.setValue("alleles", "C,T");
+    features.add(sf1);
+    SequenceFeature sf2 = new SequenceFeature("sequence_variant", "T,C", 13,
+            13, null);
+    sf2.setValue("alleles", "T,C");
+    features.add(sf2);
+
+    /*
+     * missense variant in first codon
+     */
+    MappedFeatures mf = new MappedFeatures(mapping, from, 1, 'R',
+            features);
+    String variant = mf.findProteinVariants(sf1);
+    assertEquals(variant, "p.Arg1Cys");
+
+    /*
+     * more than one alternative allele
+     * C>G consequence is GGT=G
+     * peptide variants as a comma-separated list
+     */
+    sf1.setValue("alleles", "C,T,G");
+    variant = mf.findProteinVariants(sf1);
+    assertEquals(variant, "p.Arg1Cys,p.Arg1Gly");
+
+    /*
+     * synonymous variant in first codon
+     * shown in HGVS notation on peptide
+     */
+    variant = mf.findProteinVariants(sf2);
+    assertEquals(variant, "c.13T>C(p.=)");
+
+    /*
+     * CSQ:HGVSp value is used if present
+     */
+    Map<String, String> csq = new HashMap<>();
+    csq.put("HGVSp", "hello:world");
+    sf2.setValue("CSQ", csq);
+    variant = mf.findProteinVariants(sf2);
+    assertEquals(variant, "world");
+
+    /*
+     * missense and indel variants in second codon
+     * - codon is GGA spliced from dna positions 15,18,19
+     * - SNP G>T in second position mutates GGA>G to GTA>V
+     * - indel variants are not computed or reported
+     */
+    mf = new MappedFeatures(mapping, from, 2, 'G', features);
+    features.clear();
+    SequenceFeature sf3 = new SequenceFeature("sequence_variant",
+            "G,-,CG,T", 18, 18, null);
+    sf3.setValue("alleles", "G,-,CG,T");
+    features.add(sf3);
+    variant = mf.findProteinVariants(sf3);
+    assertEquals(variant, "p.Gly2Val");
+
+    /*
+     * G>T in first position gives TGA Stop
+     * shown with HGVS notation as 'Ter'
+     */
+    SequenceFeature sf4 = new SequenceFeature("sequence_variant", "G,T", 15,
+            15, null);
+    sf4.setValue("alleles", "G,-,CG,T");
+    features.add(sf4);
+    variant = mf.findProteinVariants(sf4);
+    assertEquals(variant, "p.Gly2Ter");
+
+    /*
+     * feature must be one of those in MappedFeatures
+     */
+    SequenceFeature sf9 = new SequenceFeature("sequence_variant", "G,C", 15,
+            15, null);
+    variant = mf.findProteinVariants(sf9);
+    assertEquals(variant, "");
+  }
+}