From: gmungoc Date: Fri, 9 Aug 2019 08:01:10 +0000 (+0100) Subject: JAL-3187 removal of variant feature (non-virtual) transfer to protein X-Git-Tag: Release_2_11_1_0~26 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=9b85e1552fa57f02cf6cd312cfbd7efdfd079ea3;p=jalview.git JAL-3187 removal of variant feature (non-virtual) transfer to protein --- diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 8e0335f..55efaa5 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -2389,172 +2389,6 @@ public class AlignmentUtils } /** - * Maps exon features from dna to protein, and computes variants in peptide - * product generated by variants in dna, and adds them as sequence_variant - * features on the protein sequence. Returns the number of variant features - * added. - * - * @param dnaSeq - * @param peptide - * @param dnaToProtein - */ - public static int computeProteinFeatures(SequenceI dnaSeq, - SequenceI peptide, MapList dnaToProtein) - { - while (dnaSeq.getDatasetSequence() != null) - { - dnaSeq = dnaSeq.getDatasetSequence(); - } - while (peptide.getDatasetSequence() != null) - { - peptide = peptide.getDatasetSequence(); - } - - transferFeatures(dnaSeq, peptide, dnaToProtein, SequenceOntologyI.EXON); - - /* - * compute protein variants from dna variants and codon mappings; - * NB - alternatively we could retrieve this using the REST service e.g. - * http://rest.ensembl.org/overlap/translation - * /ENSP00000288602?feature=transcript_variation;content-type=text/xml - * which would be a bit slower but possibly more reliable - */ - - /* - * build a map with codon variations for each potentially varying peptide - */ - LinkedHashMap[]> variants = buildDnaVariantsMap( - dnaSeq, dnaToProtein); - - /* - * scan codon variations, compute peptide variants and add to peptide sequence - */ - int count = 0; - for (Entry[]> variant : variants.entrySet()) - { - int peptidePos = variant.getKey(); - List[] codonVariants = variant.getValue(); - count += computePeptideVariants(peptide, peptidePos, codonVariants); - } - - return count; - } - - /** - * Computes non-synonymous peptide variants from codon variants and adds them as - * sequence_variant features on the protein sequence (one feature per allele - * variant). Selected attributes (variant id, clinical significance) are copied - * over to the new features. - * - * @param peptide - * the protein dataset (ungapped) sequence - * @param peptidePos - * the position to compute peptide variants for - * @param codonVariants - * a list of dna variants per codon position - * @return the number of features added - */ - static int computePeptideVariants(SequenceI peptide, int peptidePos, - List[] codonVariants) - { - String residue = String - .valueOf(peptide.getCharAt(peptidePos - peptide.getStart())); - int count = 0; - String base1 = codonVariants[0].get(0).base; - String base2 = codonVariants[1].get(0).base; - String base3 = codonVariants[2].get(0).base; - - /* - * variants in first codon base - */ - for (DnaVariant var : codonVariants[0]) - { - if (var.variant != null) - { - String alleles = (String) var.variant.getValue(Gff3Helper.ALLELES); - if (alleles != null) - { - for (String base : alleles.split(",")) - { - if (!base1.equalsIgnoreCase(base)) - { - String codon = base.toUpperCase() + base2.toLowerCase() - + base3.toLowerCase(); - String canonical = base1.toUpperCase() + base2.toLowerCase() - + base3.toLowerCase(); - if (addPeptideVariant(peptide, peptidePos, residue, var, - codon, canonical)) - { - count++; - } - } - } - } - } - } - - /* - * variants in second codon base - */ - for (DnaVariant var : codonVariants[1]) - { - if (var.variant != null) - { - String alleles = (String) var.variant.getValue(Gff3Helper.ALLELES); - if (alleles != null) - { - for (String base : alleles.split(",")) - { - if (!base2.equalsIgnoreCase(base)) - { - String codon = base1.toLowerCase() + base.toUpperCase() - + base3.toLowerCase(); - String canonical = base1.toLowerCase() + base2.toUpperCase() - + base3.toLowerCase(); - if (addPeptideVariant(peptide, peptidePos, residue, var, - codon, canonical)) - { - count++; - } - } - } - } - } - } - - /* - * variants in third codon base - */ - for (DnaVariant var : codonVariants[2]) - { - if (var.variant != null) - { - String alleles = (String) var.variant.getValue(Gff3Helper.ALLELES); - if (alleles != null) - { - for (String base : alleles.split(",")) - { - if (!base3.equalsIgnoreCase(base)) - { - String codon = base1.toLowerCase() + base2.toLowerCase() - + base.toUpperCase(); - String canonical = base1.toLowerCase() + base2.toLowerCase() - + base3.toUpperCase(); - if (addPeptideVariant(peptide, peptidePos, residue, var, - codon, canonical)) - { - count++; - } - } - } - } - } - } - - return count; - } - - /** * Helper method that adds a peptide variant feature. ID and * clinical_significance attributes of the dna variant (if present) are copied * to the new feature. @@ -2646,137 +2480,6 @@ public class AlignmentUtils } /** - * Builds a map whose key is position in the protein sequence, and value is a - * list of the base and all variants for each corresponding codon position. - *

- * This depends on dna variants being held as a comma-separated list as - * property "alleles" on variant features. - * - * @param dnaSeq - * @param dnaToProtein - * @return - */ - @SuppressWarnings("unchecked") - static LinkedHashMap[]> buildDnaVariantsMap( - SequenceI dnaSeq, MapList dnaToProtein) - { - /* - * map from peptide position to all variants of the codon which codes for it - * LinkedHashMap ensures we keep the peptide features in sequence order - */ - LinkedHashMap[]> variants = new LinkedHashMap<>(); - - List dnaFeatures = dnaSeq.getFeatures() - .getFeaturesByOntology(SequenceOntologyI.SEQUENCE_VARIANT); - if (dnaFeatures.isEmpty()) - { - return variants; - } - - int dnaStart = dnaSeq.getStart(); - int[] lastCodon = null; - int lastPeptidePostion = 0; - - /* - * build a map of codon variations for peptides - */ - for (SequenceFeature sf : dnaFeatures) - { - int dnaCol = sf.getBegin(); - if (dnaCol != sf.getEnd()) - { - // not handling multi-locus variant features - continue; - } - - /* - * ignore variant if not a SNP - */ - String alls = (String) sf.getValue(Gff3Helper.ALLELES); - if (alls == null) - { - continue; // non-SNP VCF variant perhaps - can't process this - } - - String[] alleles = alls.toUpperCase().split(","); - boolean isSnp = true; - for (String allele : alleles) - { - if (allele.trim().length() > 1) - { - isSnp = false; - } - } - if (!isSnp) - { - continue; - } - - int[] mapsTo = dnaToProtein.locateInTo(dnaCol, dnaCol); - if (mapsTo == null) - { - // feature doesn't lie within coding region - continue; - } - int peptidePosition = mapsTo[0]; - List[] codonVariants = variants.get(peptidePosition); - if (codonVariants == null) - { - codonVariants = new ArrayList[CODON_LENGTH]; - codonVariants[0] = new ArrayList<>(); - codonVariants[1] = new ArrayList<>(); - codonVariants[2] = new ArrayList<>(); - variants.put(peptidePosition, codonVariants); - } - - /* - * get this peptide's codon positions e.g. [3, 4, 5] or [4, 7, 10] - */ - int[] codon = peptidePosition == lastPeptidePostion ? lastCodon - : MappingUtils.flattenRanges(dnaToProtein.locateInFrom( - peptidePosition, peptidePosition)); - lastPeptidePostion = peptidePosition; - lastCodon = codon; - - /* - * save nucleotide (and any variant) for each codon position - */ - for (int codonPos = 0; codonPos < CODON_LENGTH; codonPos++) - { - String nucleotide = String.valueOf( - dnaSeq.getCharAt(codon[codonPos] - dnaStart)).toUpperCase(); - List codonVariant = codonVariants[codonPos]; - if (codon[codonPos] == dnaCol) - { - if (!codonVariant.isEmpty() - && codonVariant.get(0).variant == null) - { - /* - * already recorded base value, add this variant - */ - codonVariant.get(0).variant = sf; - } - else - { - /* - * add variant with base value - */ - codonVariant.add(new DnaVariant(nucleotide, sf)); - } - } - else if (codonVariant.isEmpty()) - { - /* - * record (possibly non-varying) base value - */ - codonVariant.add(new DnaVariant(nucleotide)); - } - } - } - return variants; - } - - /** * Makes an alignment with a copy of the given sequences, adding in any * non-redundant sequences which are mapped to by the cross-referenced * sequences. diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index e6bae9b..40401fb 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -99,7 +99,7 @@ public class CrossRef */ public List findXrefSourcesForSequences(boolean dna) { - List sources = new ArrayList(); + List sources = new ArrayList<>(); for (SequenceI seq : fromSeqs) { if (seq != null) @@ -151,7 +151,7 @@ public class CrossRef * find sequence's direct (dna-to-dna, peptide-to-peptide) xrefs */ DBRefEntry[] lrfs = DBRefUtils.selectDbRefs(fromDna, seq.getDBRefs()); - List foundSeqs = new ArrayList(); + List foundSeqs = new ArrayList<>(); /* * find sequences in the alignment which xref one of these DBRefs @@ -218,7 +218,7 @@ public class CrossRef public Alignment findXrefSequences(String source, boolean fromDna) { - rseqs = new ArrayList(); + rseqs = new ArrayList<>(); AlignedCodonFrame cf = new AlignedCodonFrame(); matcher = new SequenceIdMatcher(dataset.getSequences()); @@ -430,8 +430,8 @@ public class CrossRef if (retrieved != null) { boolean addedXref = false; - List newDsSeqs = new ArrayList(), - doNotAdd = new ArrayList(); + List newDsSeqs = new ArrayList<>(), + doNotAdd = new ArrayList<>(); for (SequenceI retrievedSequence : retrieved) { @@ -921,7 +921,7 @@ public class CrossRef if (fromDna) { - AlignmentUtils.computeProteinFeatures(mapFrom, mapTo, mapping); + // AlignmentUtils.computeProteinFeatures(mapFrom, mapTo, mapping); mappings.addMap(mapFrom, mapTo, mapping); } else diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index dabd3ee..128bc5c 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -1870,415 +1870,6 @@ public class AlignmentUtilsTests } /** - * Test the method that computes a map of codon variants for each protein - * position from "sequence_variant" features on dna - */ - @Test(groups = "Functional") - public void testBuildDnaVariantsMap() - { - SequenceI dna = new Sequence("dna", "atgAAATTTGGGCCCtag"); - MapList map = new MapList(new int[] { 1, 18 }, new int[] { 1, 5 }, 3, 1); - - /* - * first with no variants on dna - */ - LinkedHashMap[]> variantsMap = AlignmentUtils - .buildDnaVariantsMap(dna, map); - assertTrue(variantsMap.isEmpty()); - - /* - * single allele codon 1, on base 1 - */ - SequenceFeature sf1 = new SequenceFeature("sequence_variant", "", 1, 1, - 0f, null); - sf1.setValue("alleles", "T"); - sf1.setValue("ID", "sequence_variant:rs758803211"); - dna.addSequenceFeature(sf1); - - /* - * two alleles codon 2, on bases 2 and 3 (distinct variants) - */ - SequenceFeature sf2 = new SequenceFeature("sequence_variant", "", 5, 5, - 0f, null); - sf2.setValue("alleles", "T"); - sf2.setValue("ID", "sequence_variant:rs758803212"); - dna.addSequenceFeature(sf2); - SequenceFeature sf3 = new SequenceFeature("sequence_variant", "", 6, 6, - 0f, null); - sf3.setValue("alleles", "G"); - sf3.setValue("ID", "sequence_variant:rs758803213"); - dna.addSequenceFeature(sf3); - - /* - * two alleles codon 3, both on base 2 (one variant) - */ - SequenceFeature sf4 = new SequenceFeature("sequence_variant", "", 8, 8, - 0f, null); - sf4.setValue("alleles", "C, G"); - sf4.setValue("ID", "sequence_variant:rs758803214"); - dna.addSequenceFeature(sf4); - - // no alleles on codon 4 - - /* - * alleles on codon 5 on all 3 bases (distinct variants) - */ - SequenceFeature sf5 = new SequenceFeature("sequence_variant", "", 13, - 13, 0f, null); - sf5.setValue("alleles", "C, G"); // (C duplicates given base value) - sf5.setValue("ID", "sequence_variant:rs758803215"); - dna.addSequenceFeature(sf5); - SequenceFeature sf6 = new SequenceFeature("sequence_variant", "", 14, - 14, 0f, null); - sf6.setValue("alleles", "g, a"); // should force to upper-case - sf6.setValue("ID", "sequence_variant:rs758803216"); - dna.addSequenceFeature(sf6); - - SequenceFeature sf7 = new SequenceFeature("sequence_variant", "", 15, - 15, 0f, null); - sf7.setValue("alleles", "A, T"); - sf7.setValue("ID", "sequence_variant:rs758803217"); - dna.addSequenceFeature(sf7); - - /* - * build map - expect variants on positions 1, 2, 3, 5 - */ - variantsMap = AlignmentUtils.buildDnaVariantsMap(dna, map); - assertEquals(4, variantsMap.size()); - - /* - * protein residue 1: variant on codon (ATG) base 1, not on 2 or 3 - */ - List[] pep1Variants = variantsMap.get(1); - assertEquals(3, pep1Variants.length); - assertEquals(1, pep1Variants[0].size()); - assertEquals("A", pep1Variants[0].get(0).base); // codon[1] base - assertSame(sf1, pep1Variants[0].get(0).variant); // codon[1] variant - assertEquals(1, pep1Variants[1].size()); - assertEquals("T", pep1Variants[1].get(0).base); // codon[2] base - assertNull(pep1Variants[1].get(0).variant); // no variant here - assertEquals(1, pep1Variants[2].size()); - assertEquals("G", pep1Variants[2].get(0).base); // codon[3] base - assertNull(pep1Variants[2].get(0).variant); // no variant here - - /* - * protein residue 2: variants on codon (AAA) bases 2 and 3 - */ - List[] pep2Variants = variantsMap.get(2); - assertEquals(3, pep2Variants.length); - assertEquals(1, pep2Variants[0].size()); - // codon[1] base recorded while processing variant on codon[2] - assertEquals("A", pep2Variants[0].get(0).base); - assertNull(pep2Variants[0].get(0).variant); // no variant here - // codon[2] base and variant: - assertEquals(1, pep2Variants[1].size()); - assertEquals("A", pep2Variants[1].get(0).base); - assertSame(sf2, pep2Variants[1].get(0).variant); - // codon[3] base was recorded when processing codon[2] variant - // and then the variant for codon[3] added to it - assertEquals(1, pep2Variants[2].size()); - assertEquals("A", pep2Variants[2].get(0).base); - assertSame(sf3, pep2Variants[2].get(0).variant); - - /* - * protein residue 3: variants on codon (TTT) base 2 only - */ - List[] pep3Variants = variantsMap.get(3); - assertEquals(3, pep3Variants.length); - assertEquals(1, pep3Variants[0].size()); - assertEquals("T", pep3Variants[0].get(0).base); // codon[1] base - assertNull(pep3Variants[0].get(0).variant); // no variant here - assertEquals(1, pep3Variants[1].size()); - assertEquals("T", pep3Variants[1].get(0).base); // codon[2] base - assertSame(sf4, pep3Variants[1].get(0).variant); // codon[2] variant - assertEquals(1, pep3Variants[2].size()); - assertEquals("T", pep3Variants[2].get(0).base); // codon[3] base - assertNull(pep3Variants[2].get(0).variant); // no variant here - - /* - * three variants on protein position 5 - */ - List[] pep5Variants = variantsMap.get(5); - assertEquals(3, pep5Variants.length); - assertEquals(1, pep5Variants[0].size()); - assertEquals("C", pep5Variants[0].get(0).base); // codon[1] base - assertSame(sf5, pep5Variants[0].get(0).variant); // codon[1] variant - assertEquals(1, pep5Variants[1].size()); - assertEquals("C", pep5Variants[1].get(0).base); // codon[2] base - assertSame(sf6, pep5Variants[1].get(0).variant); // codon[2] variant - assertEquals(1, pep5Variants[2].size()); - assertEquals("C", pep5Variants[2].get(0).base); // codon[3] base - assertSame(sf7, pep5Variants[2].get(0).variant); // codon[3] variant - } - - /** - * Tests for the method that computes all peptide variants given codon - * variants - */ - @Test(groups = "Functional") - public void testComputePeptideVariants() - { - /* - * scenario: AAATTTCCC codes for KFP - * variants: - * GAA -> E source: Ensembl - * CAA -> Q source: dbSNP - * TAA -> STOP source: dnSNP - * AAG synonymous source: COSMIC - * AAT -> N source: Ensembl - * ...TTC synonymous source: dbSNP - * ......CAC,CGC -> H,R source: COSMIC - * (one variant with two alleles) - */ - SequenceI peptide = new Sequence("pep/10-12", "KFP"); - - /* - * two distinct variants for codon 1 position 1 - * second one has clinical significance - */ - String ensembl = "Ensembl"; - String dbSnp = "dbSNP"; - String cosmic = "COSMIC"; - - /* - * NB setting "id" (as returned by Ensembl for features in JSON format); - * previously "ID" (as returned for GFF3 format) - */ - SequenceFeature sf1 = new SequenceFeature("sequence_variant", "", 10, - 10, - 0f, ensembl); - sf1.setValue("alleles", "A,G"); // AAA -> GAA -> K/E - sf1.setValue("id", "var1.125A>G"); - - SequenceFeature sf2 = new SequenceFeature("sequence_variant", "", 10, - 10, - 0f, dbSnp); - sf2.setValue("alleles", "A,C"); // AAA -> CAA -> K/Q - sf2.setValue("id", "var2"); - sf2.setValue("clinical_significance", "Dodgy"); - - SequenceFeature sf3 = new SequenceFeature("sequence_variant", "", 11, - 11, - 0f, dbSnp); - sf3.setValue("alleles", "A,T"); // AAA -> TAA -> stop codon - sf3.setValue("id", "var3"); - sf3.setValue("clinical_significance", "Bad"); - - SequenceFeature sf4 = new SequenceFeature("sequence_variant", "", 12, - 12, - 0f, cosmic); - sf4.setValue("alleles", "A,G"); // AAA -> AAG synonymous - sf4.setValue("id", "var4"); - sf4.setValue("clinical_significance", "None"); - - SequenceFeature sf5 = new SequenceFeature("sequence_variant", "", 12, - 12, - 0f, ensembl); - sf5.setValue("alleles", "A,T"); // AAA -> AAT -> K/N - sf5.setValue("id", "sequence_variant:var5"); // prefix gets stripped off - sf5.setValue("clinical_significance", "Benign"); - - SequenceFeature sf6 = new SequenceFeature("sequence_variant", "", 15, - 15, - 0f, dbSnp); - sf6.setValue("alleles", "T,C"); // TTT -> TTC synonymous - sf6.setValue("id", "var6"); - - SequenceFeature sf7 = new SequenceFeature("sequence_variant", "", 17, - 17, - 0f, cosmic); - sf7.setValue("alleles", "C,A,G"); // CCC -> CAC,CGC -> P/H/R - sf7.setValue("id", "var7"); - sf7.setValue("clinical_significance", "Good"); - - List codon1Variants = new ArrayList<>(); - List codon2Variants = new ArrayList<>(); - List codon3Variants = new ArrayList<>(); - - List codonVariants[] = new ArrayList[3]; - codonVariants[0] = codon1Variants; - codonVariants[1] = codon2Variants; - codonVariants[2] = codon3Variants; - - /* - * compute variants for protein position 1 - */ - codon1Variants.add(new DnaVariant("A", sf1)); - codon1Variants.add(new DnaVariant("A", sf2)); - codon1Variants.add(new DnaVariant("A", sf3)); - codon2Variants.add(new DnaVariant("A")); - // codon2Variants.add(new DnaVariant("A")); - codon3Variants.add(new DnaVariant("A", sf4)); - codon3Variants.add(new DnaVariant("A", sf5)); - AlignmentUtils.computePeptideVariants(peptide, 10, codonVariants); - - /* - * compute variants for protein position 2 - */ - codon1Variants.clear(); - codon2Variants.clear(); - codon3Variants.clear(); - codon1Variants.add(new DnaVariant("T")); - codon2Variants.add(new DnaVariant("T")); - codon3Variants.add(new DnaVariant("T", sf6)); - AlignmentUtils.computePeptideVariants(peptide, 11, codonVariants); - - /* - * compute variants for protein position 3 - */ - codon1Variants.clear(); - codon2Variants.clear(); - codon3Variants.clear(); - codon1Variants.add(new DnaVariant("C")); - codon2Variants.add(new DnaVariant("C", sf7)); - codon3Variants.add(new DnaVariant("C")); - AlignmentUtils.computePeptideVariants(peptide, 12, codonVariants); - - /* - * verify added sequence features for - * var1 K -> E Ensembl - * var2 K -> Q dbSNP - * var3 K -> stop - * var4 synonymous - * var5 K -> N Ensembl - * var6 synonymous - * var7 P -> H COSMIC - * var8 P -> R COSMIC - */ - List sfs = peptide.getSequenceFeatures(); - SequenceFeatures.sortFeatures(sfs, true); - assertEquals(8, sfs.size()); - - /* - * features are sorted by start position ascending, but in no - * particular order where start positions match; asserts here - * simply match the data returned (the order is not important) - */ - // AAA -> AAT -> K/N - SequenceFeature sf = sfs.get(0); - assertEquals(10, sf.getBegin()); - assertEquals(10, sf.getEnd()); - assertEquals("nonsynonymous_variant", sf.getType()); - assertEquals("p.Lys10Asn", sf.getDescription()); - assertEquals("var5", sf.getValue("id")); - assertEquals("Benign", sf.getValue("clinical_significance")); - assertEquals("id=var5;clinical_significance=Benign", - sf.getAttributes()); - assertEquals(1, sf.links.size()); - assertEquals( - "p.Lys10Asn var5|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var5", - sf.links.get(0)); - assertEquals(ensembl, sf.getFeatureGroup()); - - // AAA -> CAA -> K/Q - sf = sfs.get(1); - assertEquals(10, sf.getBegin()); - assertEquals(10, sf.getEnd()); - assertEquals("nonsynonymous_variant", sf.getType()); - assertEquals("p.Lys10Gln", sf.getDescription()); - assertEquals("var2", sf.getValue("id")); - assertEquals("Dodgy", sf.getValue("clinical_significance")); - assertEquals("id=var2;clinical_significance=Dodgy", sf.getAttributes()); - assertEquals(1, sf.links.size()); - assertEquals( - "p.Lys10Gln var2|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var2", - sf.links.get(0)); - assertEquals(dbSnp, sf.getFeatureGroup()); - - // AAA -> GAA -> K/E - sf = sfs.get(2); - assertEquals(10, sf.getBegin()); - assertEquals(10, sf.getEnd()); - assertEquals("nonsynonymous_variant", sf.getType()); - assertEquals("p.Lys10Glu", sf.getDescription()); - assertEquals("var1.125A>G", sf.getValue("id")); - assertNull(sf.getValue("clinical_significance")); - assertEquals("id=var1.125A>G", sf.getAttributes()); - assertEquals(1, sf.links.size()); - // link to variation is urlencoded - assertEquals( - "p.Lys10Glu var1.125A>G|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var1.125A%3EG", - sf.links.get(0)); - assertEquals(ensembl, sf.getFeatureGroup()); - - // AAA -> TAA -> stop codon - sf = sfs.get(3); - assertEquals(10, sf.getBegin()); - assertEquals(10, sf.getEnd()); - assertEquals("stop_gained", sf.getType()); - assertEquals("Aaa/Taa", sf.getDescription()); - assertEquals("var3", sf.getValue("id")); - assertEquals("Bad", sf.getValue("clinical_significance")); - assertEquals("id=var3;clinical_significance=Bad", sf.getAttributes()); - assertEquals(1, sf.links.size()); - assertEquals( - "Aaa/Taa var3|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var3", - sf.links.get(0)); - assertEquals(dbSnp, sf.getFeatureGroup()); - - // AAA -> AAG synonymous - sf = sfs.get(4); - assertEquals(10, sf.getBegin()); - assertEquals(10, sf.getEnd()); - assertEquals("synonymous_variant", sf.getType()); - assertEquals("aaA/aaG", sf.getDescription()); - assertEquals("var4", sf.getValue("id")); - assertEquals("None", sf.getValue("clinical_significance")); - assertEquals("id=var4;clinical_significance=None", sf.getAttributes()); - assertEquals(1, sf.links.size()); - assertEquals( - "aaA/aaG var4|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var4", - sf.links.get(0)); - assertEquals(cosmic, sf.getFeatureGroup()); - - // TTT -> TTC synonymous - sf = sfs.get(5); - assertEquals(11, sf.getBegin()); - assertEquals(11, sf.getEnd()); - assertEquals("synonymous_variant", sf.getType()); - assertEquals("ttT/ttC", sf.getDescription()); - assertEquals("var6", sf.getValue("id")); - assertNull(sf.getValue("clinical_significance")); - assertEquals("id=var6", sf.getAttributes()); - assertEquals(1, sf.links.size()); - assertEquals( - "ttT/ttC var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6", - sf.links.get(0)); - assertEquals(dbSnp, sf.getFeatureGroup()); - - // var7 generates two distinct protein variant features (two alleles) - // CCC -> CGC -> P/R - sf = sfs.get(6); - assertEquals(12, sf.getBegin()); - assertEquals(12, sf.getEnd()); - assertEquals("nonsynonymous_variant", sf.getType()); - assertEquals("p.Pro12Arg", sf.getDescription()); - assertEquals("var7", sf.getValue("id")); - assertEquals("Good", sf.getValue("clinical_significance")); - assertEquals("id=var7;clinical_significance=Good", sf.getAttributes()); - assertEquals(1, sf.links.size()); - assertEquals( - "p.Pro12Arg var7|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var7", - sf.links.get(0)); - assertEquals(cosmic, sf.getFeatureGroup()); - - // CCC -> CAC -> P/H - sf = sfs.get(7); - assertEquals(12, sf.getBegin()); - assertEquals(12, sf.getEnd()); - assertEquals("nonsynonymous_variant", sf.getType()); - assertEquals("p.Pro12His", sf.getDescription()); - assertEquals("var7", sf.getValue("id")); - assertEquals("Good", sf.getValue("clinical_significance")); - assertEquals("id=var7;clinical_significance=Good", sf.getAttributes()); - assertEquals(1, sf.links.size()); - assertEquals( - "p.Pro12His var7|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var7", - sf.links.get(0)); - assertEquals(cosmic, sf.getFeatureGroup()); - } - - /** * Tests for the method that maps the subset of a dna sequence that has CDS * (or subtype) feature, with CDS strand = '-' (reverse) */