/* * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.datamodel; import static org.testng.Assert.assertEquals; import jalview.util.MapList; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.testng.annotations.Test; public class MappedFeaturesTest { @Test(groups = "Functional") public void testFindProteinVariants() { /* * scenario: * dna/10-20 aCGTaGctGAa (codons CGT=R, GGA = G) * mapping: 3:1 from [11-13,15,18-19] to peptide/1-2 RG */ SequenceI from = new Sequence("dna/10-20", "acgTAGCTGAA"); SequenceI to = new Sequence("peptide", "RG"); MapList map = new MapList(new int[] { 11, 13, 15, 15, 18, 19 }, new int[] { 1, 2 }, 3, 1); Mapping mapping = new Mapping(to, map); /* * variants * C>T at dna11, consequence CGT>TGT=C * T>C at dna13, consequence CGT>CGC synonymous */ List features = new ArrayList<>(); SequenceFeature sf1 = new SequenceFeature("sequence_variant", "C,T", 11, 11, null); sf1.setValue("alleles", "C,T"); features.add(sf1); SequenceFeature sf2 = new SequenceFeature("sequence_variant", "T,C", 13, 13, null); sf2.setValue("alleles", "T,C"); features.add(sf2); /* * missense variant in first codon */ MappedFeatures mf = new MappedFeatures(mapping, from, 1, 'R', features); String variant = mf.findProteinVariants(sf1); assertEquals(variant, "p.Arg1Cys"); /* * more than one alternative allele * C>G consequence is GGT=G * peptide variants as a comma-separated list */ sf1.setValue("alleles", "C,T,G"); variant = mf.findProteinVariants(sf1); assertEquals(variant, "p.Arg1Cys,p.Arg1Gly"); /* * synonymous variant in first codon * shown in HGVS notation on peptide */ variant = mf.findProteinVariants(sf2); assertEquals(variant, "c.13T>C(p.=)"); /* * CSQ:HGVSp value is used if present * _and_ it contains "p." following a colon */ Map csq = new HashMap<>(); csq.put("HGVSp", "hello:world"); sf2.setValue("CSQ", csq); variant = mf.findProteinVariants(sf2); assertEquals(variant, "c.13T>C(p.=)"); csq.put("HGVSp", "p.HelloWorld"); variant = mf.findProteinVariants(sf2); assertEquals(variant, "c.13T>C(p.=)"); csq.put("HGVSp", "try this:hellop.world"); variant = mf.findProteinVariants(sf2); assertEquals(variant, "hellop.world"); /* * missense and indel variants in second codon * - codon is GGA spliced from dna positions 15,18,19 * - SNP G>T in second position mutates GGA>G to GTA>V * - indel variants are not computed or reported */ mf = new MappedFeatures(mapping, from, 2, 'G', features); features.clear(); SequenceFeature sf3 = new SequenceFeature("sequence_variant", "G,-,CG,T", 18, 18, null); sf3.setValue("alleles", "G,-,CG,T"); features.add(sf3); variant = mf.findProteinVariants(sf3); assertEquals(variant, "p.Gly2Val"); /* * G>T in first position gives TGA Stop * shown with HGVS notation as 'Ter' */ SequenceFeature sf4 = new SequenceFeature("sequence_variant", "G,T", 15, 15, null); sf4.setValue("alleles", "G,-,CG,T"); features.add(sf4); variant = mf.findProteinVariants(sf4); assertEquals(variant, "p.Gly2Ter"); /* * feature must be one of those in MappedFeatures */ SequenceFeature sf9 = new SequenceFeature("sequence_variant", "G,C", 15, 15, null); variant = mf.findProteinVariants(sf9); assertEquals(variant, ""); } }