From: gmungoc Date: Thu, 14 Apr 2016 11:06:29 +0000 (+0100) Subject: JAL-2049 revised computePeptideVariants to transfer id, clinical_sig X-Git-Tag: Release_2_10_0~249^2~24 X-Git-Url: http://source.jalview.org/gitweb/?p=jalview.git;a=commitdiff_plain;h=74b3bb2ce3513c972e472406545490fa31e15c0d JAL-2049 revised computePeptideVariants to transfer id, clinical_sig --- diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 28062c0..fa135f8 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -38,8 +38,9 @@ import jalview.schemes.ResidueProperties; import jalview.util.Comparison; import jalview.util.MapList; import jalview.util.MappingUtils; -import jalview.util.StringUtils; +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -66,6 +67,32 @@ import java.util.TreeMap; public class AlignmentUtils { + private static final String SEQUENCE_VARIANT = "sequence_variant:"; + private static final String ID = "ID"; + private static final String CLINICAL_SIGNIFICANCE = "clinical_significance"; + + /** + * A data model to hold the 'normal' base value at a position, and an optional + * sequence variant feature + */ + static class DnaVariant + { + String base; + + SequenceFeature variant; + + DnaVariant(String nuc) + { + base = nuc; + } + + DnaVariant(String nuc, SequenceFeature var) + { + base = nuc; + variant = var; + } + } + /** * given an existing alignment, create a new alignment including all, or up to * flankSize additional symbols from each sequence's dataset sequence @@ -1744,39 +1771,26 @@ public class AlignmentUtils * /ENSP00000288602?feature=transcript_variation;content-type=text/xml * which would be a bit slower but possibly more reliable */ - LinkedHashMap> variants = buildDnaVariantsMap( + + /* + * build a map with codon variations for each potentially varying peptide + */ + LinkedHashMap[]> variants = buildDnaVariantsMap( dnaSeq, dnaToProtein); /* * scan codon variations, compute peptide variants and add to peptide sequence */ int count = 0; - for (Entry> variant : variants.entrySet()) + for (Entry[]> variant : variants.entrySet()) { int peptidePos = variant.getKey(); - List codonVariants = variant.getValue(); - String residue = String.valueOf(peptide.getCharAt(peptidePos - 1)); // 0-based - for (String[][] codonVariant : codonVariants) - { - List peptideVariants = computePeptideVariants(codonVariant, - residue); - if (!peptideVariants.isEmpty()) - { - String desc = residue - + "->" // include canonical residue in description - + StringUtils - .listToDelimitedString(peptideVariants, ", "); - SequenceFeature sf = new SequenceFeature( - SequenceOntologyI.SEQUENCE_VARIANT, desc, peptidePos, - peptidePos, 0f, null); - peptide.addSequenceFeature(sf); - count++; - } - } + List[] codonVariants = variant.getValue(); + count += computePeptideVariants(peptide, peptidePos, codonVariants); } /* - * ugly sort to get sequence features in start position order + * sort to get sequence features in start position order * - would be better to store in Sequence as a TreeSet or NCList? */ Arrays.sort(peptide.getSequenceFeatures(), @@ -1794,21 +1808,178 @@ public class AlignmentUtils } /** - * Builds a map whose key is position in the protein sequence, and value is an - * array of all variants for the coding codon positions + * Computes non-synonymous peptide variants from codon variants and adds them + * as sequence_variant features on the protein sequence (one feature per + * allele variant). Selected attributes (variant id, clinical significance) + * are copied over to the new features. + * + * @param peptide + * the protein sequence + * @param peptidePos + * the position to compute peptide variants for + * @param codonVariants + * a list of dna variants per codon position + * @return the number of features added + */ + static int computePeptideVariants(SequenceI peptide, int peptidePos, + List[] codonVariants) + { + String residue = String.valueOf(peptide.getCharAt(peptidePos - 1)); + int count = 0; + String base1 = codonVariants[0].get(0).base; + String base2 = codonVariants[1].get(0).base; + String base3 = codonVariants[2].get(0).base; + + /* + * variants in first codon base + */ + for (DnaVariant var : codonVariants[0]) + { + if (var.variant != null) + { + String alleles = (String) var.variant.getValue("alleles"); + if (alleles != null) + { + for (String base : alleles.split(",")) + { + String codon = base + base2 + base3; + if (addPeptideVariant(peptide, peptidePos, residue, var, codon)) + { + count++; + } + } + } + } + } + + /* + * variants in second codon base + */ + for (DnaVariant var : codonVariants[1]) + { + if (var.variant != null) + { + String alleles = (String) var.variant.getValue("alleles"); + if (alleles != null) + { + for (String base : alleles.split(",")) + { + String codon = base1 + base + base3; + if (addPeptideVariant(peptide, peptidePos, residue, var, codon)) + { + count++; + } + } + } + } + } + + /* + * variants in third codon base + */ + for (DnaVariant var : codonVariants[2]) + { + if (var.variant != null) + { + String alleles = (String) var.variant.getValue("alleles"); + if (alleles != null) + { + for (String base : alleles.split(",")) + { + String codon = base1 + base2 + base; + if (addPeptideVariant(peptide, peptidePos, residue, var, codon)) + { + count++; + } + } + } + } + } + + return count; + } + + /** + * Helper method that adds a peptide variant feature, provided the given codon + * translates to a value different to the current residue (is a non-synonymous + * variant). ID and clinical_significance attributes of the dna variant (if + * present) are copied to the new feature. + * + * @param peptide + * @param peptidePos + * @param residue + * @param var + * @param codon + * @return true if a feature was added, else false + */ + static boolean addPeptideVariant(SequenceI peptide, int peptidePos, + String residue, DnaVariant var, String codon) + { + /* + * get peptide translation of codon e.g. GAT -> D + * note that variants which are not single alleles, + * e.g. multibase variants or HGMD_MUTATION etc + * are currently ignored here + */ + String trans = codon.contains("-") ? "-" + : (codon.length() > 3 ? null : ResidueProperties + .codonTranslate(codon)); + if (trans != null && !trans.equals(residue)) + { + String desc = residue + "->" + trans; + // set score to 0f so 'graduated colour' option is offered! + SequenceFeature sf = new SequenceFeature( + SequenceOntologyI.SEQUENCE_VARIANT, desc, peptidePos, + peptidePos, 0f, null); + String id = (String) var.variant.getValue(ID); + if (id != null) + { + if (id.startsWith(SEQUENCE_VARIANT)) + { + id = id.substring(SEQUENCE_VARIANT.length()); + } + sf.setValue(ID, id); + // TODO handle other species variants + StringBuilder link = new StringBuilder(32); + try + { + link.append(desc).append(" ").append(id) + .append("|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=") + .append(URLEncoder.encode(id, "UTF-8")); + sf.addLink(link.toString()); + } catch (UnsupportedEncodingException e) + { + // as if + } + } + String clinSig = (String) var.variant + .getValue(CLINICAL_SIGNIFICANCE); + if (clinSig != null) + { + sf.setValue(CLINICAL_SIGNIFICANCE, clinSig); + } + peptide.addSequenceFeature(sf); + return true; + } + return false; + } + + /** + * Builds a map whose key is position in the protein sequence, and value is a + * list of the base and all variants for each corresponding codon position * * @param dnaSeq * @param dnaToProtein * @return */ - static LinkedHashMap> buildDnaVariantsMap( + static LinkedHashMap[]> buildDnaVariantsMap( SequenceI dnaSeq, MapList dnaToProtein) { /* - * map from peptide position to all variant features of the codon for it - * LinkedHashMap ensures we add the peptide features in sequence order + * map from peptide position to all variants of the codon which codes for it + * LinkedHashMap ensures we keep the peptide features in sequence order */ - LinkedHashMap> variants = new LinkedHashMap>(); + LinkedHashMap[]> variants = new LinkedHashMap[]>(); SequenceOntologyI so = SequenceOntologyFactory.getInstance(); SequenceFeature[] dnaFeatures = dnaSeq.getSequenceFeatures(); @@ -1841,10 +2012,13 @@ public class AlignmentUtils continue; } int peptidePosition = mapsTo[0]; - List codonVariants = variants.get(peptidePosition); + List[] codonVariants = variants.get(peptidePosition); if (codonVariants == null) { - codonVariants = new ArrayList(); + codonVariants = new ArrayList[3]; + codonVariants[0] = new ArrayList(); + codonVariants[1] = new ArrayList(); + codonVariants[2] = new ArrayList(); variants.put(peptidePosition, codonVariants); } @@ -1873,106 +2047,46 @@ public class AlignmentUtils lastCodon = codon; /* - * save nucleotide (and this variant) for each codon position + * save nucleotide (and any variant) for each codon position */ - String[][] codonVariant = new String[3][]; for (int codonPos = 0; codonPos < 3; codonPos++) { String nucleotide = String.valueOf( dnaSeq.getCharAt(codon[codonPos] - dnaStart)) .toUpperCase(); - if (codonVariant[codonPos] == null) + List codonVariant = codonVariants[codonPos]; + if (codon[codonPos] == dnaCol) { - /* - * record current dna base - */ - codonVariant[codonPos] = new String[] { nucleotide }; + if (!codonVariant.isEmpty() + && codonVariant.get(0).variant == null) + { + /* + * already recorded base value, add this variant + */ + codonVariant.get(0).variant = sf; + } + else + { + /* + * add variant with base value + */ + codonVariant.add(new DnaVariant(nucleotide, sf)); + } } - if (codon[codonPos] == dnaCol) + else if (codonVariant.isEmpty()) { /* - * add alleles to dna base (and any previously found alleles) + * record (possibly non-varying) base value */ - String[] known = codonVariant[codonPos]; - String[] dnaVariants = new String[alleles.length + known.length]; - System.arraycopy(known, 0, dnaVariants, 0, known.length); - System.arraycopy(alleles, 0, dnaVariants, known.length, - alleles.length); - codonVariant[codonPos] = dnaVariants; + codonVariant.add(new DnaVariant(nucleotide)); } } - codonVariants.add(codonVariant); } } return variants; } /** - * Returns a sorted, non-redundant list of all peptide translations generated - * by the given dna variants, excluding the current residue value - * - * @param codonVariants - * an array of base values (acgtACGT) for codon positions 1, 2, 3 - * @param residue - * the current residue translation - * @return - */ - static List computePeptideVariants(String[][] codonVariants, - String residue) - { - List result = new ArrayList(); - for (String base1 : codonVariants[0]) - { - for (String base2 : codonVariants[1]) - { - for (String base3 : codonVariants[2]) - { - String codon = base1 + base2 + base3; - /* - * get peptide translation of codon e.g. GAT -> D - * note that variants which are not single alleles, - * e.g. multibase variants or HGMD_MUTATION etc - * are ignored here - */ - String peptide = codon.contains("-") ? "-" - : (codon.length() > 3 ? null : ResidueProperties - .codonTranslate(codon)); - if (peptide != null && !result.contains(peptide) - && !peptide.equalsIgnoreCase(residue)) - { - result.add(peptide); - } - } - } - } - - /* - * sort alphabetically with STOP at the end - */ - Collections.sort(result, new Comparator() - { - - @Override - public int compare(String o1, String o2) - { - if ("STOP".equals(o1)) - { - return 1; - } - else if ("STOP".equals(o2)) - { - return -1; - } - else - { - return o1.compareTo(o2); - } - } - }); - return result; - } - - /** * Makes an alignment with a copy of the given sequences, adding in any * non-redundant sequences which are mapped to by the cross-referenced * sequences. diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 810ef5f..d811bef 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -26,6 +26,7 @@ import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; +import jalview.analysis.AlignmentUtils.DnaVariant; import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; @@ -1733,56 +1734,62 @@ public class AlignmentUtilsTests /* * first with no variants on dna */ - LinkedHashMap> variantsMap = AlignmentUtils + LinkedHashMap[]> variantsMap = AlignmentUtils .buildDnaVariantsMap(dna, map); assertTrue(variantsMap.isEmpty()); /* * single allele codon 1, on base 1 */ - SequenceFeature sf = new SequenceFeature("sequence_variant", "", 1, 1, + SequenceFeature sf1 = new SequenceFeature("sequence_variant", "", 1, 1, 0f, null); - sf.setValue("alleles", "T"); - sf.setValue("ID", "sequence_variant:rs758803211"); - dna.addSequenceFeature(sf); + sf1.setValue("alleles", "T"); + sf1.setValue("ID", "sequence_variant:rs758803211"); + dna.addSequenceFeature(sf1); /* * two alleles codon 2, on bases 2 and 3 (distinct variants) */ - sf = new SequenceFeature("sequence_variant", "", 5, 5, 0f, null); - sf.setValue("alleles", "T"); - sf.setValue("ID", "sequence_variant:rs758803212"); - dna.addSequenceFeature(sf); - sf = new SequenceFeature("sequence_variant", "", 6, 6, 0f, null); - sf.setValue("alleles", "G"); - sf.setValue("ID", "sequence_variant:rs758803213"); - dna.addSequenceFeature(sf); + SequenceFeature sf2 = new SequenceFeature("sequence_variant", "", 5, 5, + 0f, null); + sf2.setValue("alleles", "T"); + sf2.setValue("ID", "sequence_variant:rs758803212"); + dna.addSequenceFeature(sf2); + SequenceFeature sf3 = new SequenceFeature("sequence_variant", "", 6, 6, + 0f, null); + sf3.setValue("alleles", "G"); + sf3.setValue("ID", "sequence_variant:rs758803213"); + dna.addSequenceFeature(sf3); /* * two alleles codon 3, both on base 2 (one variant) */ - sf = new SequenceFeature("sequence_variant", "", 8, 8, 0f, null); - sf.setValue("alleles", "C, G"); - sf.setValue("ID", "sequence_variant:rs758803214"); - dna.addSequenceFeature(sf); + SequenceFeature sf4 = new SequenceFeature("sequence_variant", "", 8, 8, + 0f, null); + sf4.setValue("alleles", "C, G"); + sf4.setValue("ID", "sequence_variant:rs758803214"); + dna.addSequenceFeature(sf4); // no alleles on codon 4 /* * alleles on codon 5 on all 3 bases (distinct variants) */ - sf = new SequenceFeature("sequence_variant", "", 13, 13, 0f, null); - sf.setValue("alleles", "C, G"); // (C duplicates given base value) - sf.setValue("ID", "sequence_variant:rs758803215"); - dna.addSequenceFeature(sf); - sf = new SequenceFeature("sequence_variant", "", 14, 14, 0f, null); - sf.setValue("alleles", "g, a"); // should force to upper-case - sf.setValue("ID", "sequence_variant:rs758803216"); - dna.addSequenceFeature(sf); - sf = new SequenceFeature("sequence_variant", "", 15, 15, 0f, null); - sf.setValue("alleles", "A, T"); - sf.setValue("ID", "sequence_variant:rs758803217"); - dna.addSequenceFeature(sf); + SequenceFeature sf5 = new SequenceFeature("sequence_variant", "", 13, + 13, 0f, null); + sf5.setValue("alleles", "C, G"); // (C duplicates given base value) + sf5.setValue("ID", "sequence_variant:rs758803215"); + dna.addSequenceFeature(sf5); + SequenceFeature sf6 = new SequenceFeature("sequence_variant", "", 14, + 14, 0f, null); + sf6.setValue("alleles", "g, a"); // should force to upper-case + sf6.setValue("ID", "sequence_variant:rs758803216"); + dna.addSequenceFeature(sf6); + SequenceFeature sf7 = new SequenceFeature("sequence_variant", "", 15, + 15, 0f, null); + sf7.setValue("alleles", "A, T"); + sf7.setValue("ID", "sequence_variant:rs758803217"); + dna.addSequenceFeature(sf7); /* * build map - expect variants on positions 1, 2, 3, 5 @@ -1791,39 +1798,68 @@ public class AlignmentUtilsTests assertEquals(4, variantsMap.size()); /* - * one variant on protein position 1 - */ - assertEquals(1, variantsMap.get(1).size()); - assertTrue(Arrays.deepEquals(new String[][] { { "A", "T" }, { "T" }, - { "G" } }, variantsMap.get(1).get(0))); - - /* - * two variants on protein position 2 - */ - assertEquals(2, variantsMap.get(2).size()); - assertTrue(Arrays.deepEquals(new String[][] { { "A" }, { "A", "T" }, - { "A" } }, variantsMap.get(2).get(0))); - assertTrue(Arrays.deepEquals(new String[][] { { "A" }, { "A" }, - { "A", "G" } }, variantsMap.get(2).get(1))); - - /* - * one variant on protein position 3 - */ - assertEquals(1, variantsMap.get(3).size()); - assertTrue(Arrays.deepEquals(new String[][] { { "T" }, - { "T", "C", "G" }, { "T" } }, variantsMap.get(3).get(0))); + * protein residue 1: variant on codon (ATG) base 1, not on 2 or 3 + */ + List[] pep1Variants = variantsMap.get(1); + assertEquals(3, pep1Variants.length); + assertEquals(1, pep1Variants[0].size()); + assertEquals("A", pep1Variants[0].get(0).base); // codon[1] base + assertSame(sf1, pep1Variants[0].get(0).variant); // codon[1] variant + assertEquals(1, pep1Variants[1].size()); + assertEquals("T", pep1Variants[1].get(0).base); // codon[2] base + assertNull(pep1Variants[1].get(0).variant); // no variant here + assertEquals(1, pep1Variants[2].size()); + assertEquals("G", pep1Variants[2].get(0).base); // codon[3] base + assertNull(pep1Variants[2].get(0).variant); // no variant here + + /* + * protein residue 2: variants on codon (AAA) bases 2 and 3 + */ + List[] pep2Variants = variantsMap.get(2); + assertEquals(3, pep2Variants.length); + assertEquals(1, pep2Variants[0].size()); + // codon[1] base recorded while processing variant on codon[2] + assertEquals("A", pep2Variants[0].get(0).base); + assertNull(pep2Variants[0].get(0).variant); // no variant here + // codon[2] base and variant: + assertEquals(1, pep2Variants[1].size()); + assertEquals("A", pep2Variants[1].get(0).base); + assertSame(sf2, pep2Variants[1].get(0).variant); + // codon[3] base was recorded when processing codon[2] variant + // and then the variant for codon[3] added to it + assertEquals(1, pep2Variants[2].size()); + assertEquals("A", pep2Variants[2].get(0).base); + assertSame(sf3, pep2Variants[2].get(0).variant); + + /* + * protein residue 3: variants on codon (TTT) base 2 only + */ + List[] pep3Variants = variantsMap.get(3); + assertEquals(3, pep3Variants.length); + assertEquals(1, pep3Variants[0].size()); + assertEquals("T", pep3Variants[0].get(0).base); // codon[1] base + assertNull(pep3Variants[0].get(0).variant); // no variant here + assertEquals(1, pep3Variants[1].size()); + assertEquals("T", pep3Variants[1].get(0).base); // codon[2] base + assertSame(sf4, pep3Variants[1].get(0).variant); // codon[2] variant + assertEquals(1, pep3Variants[2].size()); + assertEquals("T", pep3Variants[2].get(0).base); // codon[3] base + assertNull(pep3Variants[2].get(0).variant); // no variant here /* * three variants on protein position 5 - * duplicated bases are not removed here, handled in computePeptideVariants - */ - assertEquals(3, variantsMap.get(5).size()); - assertTrue(Arrays.deepEquals(new String[][] { { "C", "C", "G" }, - { "C" }, { "C" } }, variantsMap.get(5).get(0))); - assertTrue(Arrays.deepEquals(new String[][] { { "C" }, - { "C", "G", "A" }, { "C" } }, variantsMap.get(5).get(1))); - assertTrue(Arrays.deepEquals(new String[][] { { "C" }, { "C" }, - { "C", "A", "T" } }, variantsMap.get(5).get(2))); + */ + List[] pep5Variants = variantsMap.get(5); + assertEquals(3, pep5Variants.length); + assertEquals(1, pep5Variants[0].size()); + assertEquals("C", pep5Variants[0].get(0).base); // codon[1] base + assertSame(sf5, pep5Variants[0].get(0).variant); // codon[1] variant + assertEquals(1, pep5Variants[1].size()); + assertEquals("C", pep5Variants[1].get(0).base); // codon[2] base + assertSame(sf6, pep5Variants[1].get(0).variant); // codon[2] variant + assertEquals(1, pep5Variants[2].size()); + assertEquals("C", pep5Variants[2].get(0).base); // codon[3] base + assertSame(sf7, pep5Variants[2].get(0).variant); // codon[3] variant } /** @@ -1833,67 +1869,154 @@ public class AlignmentUtilsTests @Test(groups = "Functional") public void testComputePeptideVariants() { - String[][] codonVariants = new String[][] { { "A" }, { "G" }, { "T" } }; - /* - * AGT codes for S - this is not included in the variants returned + * scenario: AAATTTCCC codes for KFP, with variants + * GAA -> E + * CAA -> Q + * AAG synonymous + * AAT -> N + * TTC synonymous + * CAC,CGC -> H,R (as one variant) */ - List variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); - assertEquals("[]", variants.toString()); - - // S is reported if it differs from the current value (A): - variants = AlignmentUtils.computePeptideVariants(codonVariants, "A"); - assertEquals("[S]", variants.toString()); - - /* - * synonymous variant is not reported - */ - codonVariants = new String[][] { { "A" }, { "G" }, { "C", "T" } }; - // AGC and AGT both code for S - variants = AlignmentUtils.computePeptideVariants(codonVariants, "s"); - assertEquals("[]", variants.toString()); - + SequenceI peptide = new Sequence("pep/10-12", "KFP"); + /* - * equivalent variants are only reported once + * two distinct variants for codon 1 position 1 + * second one has clinical significance */ - codonVariants = new String[][] { { "C" }, { "T" }, - { "A", "C", "G", "T" } }; - // CTA CTC CTG CTT all code for L - variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); - assertEquals("[L]", variants.toString()); - + SequenceFeature sf1 = new SequenceFeature("sequence_variant", "", 1, 1, + 0f, null); + sf1.setValue("alleles", "A,G"); // GAA -> E + sf1.setValue("ID", "var1.125A>G"); + SequenceFeature sf2 = new SequenceFeature("sequence_variant", "", 1, 1, + 0f, null); + sf2.setValue("alleles", "A,C"); // CAA -> Q + sf2.setValue("ID", "var2"); + sf2.setValue("clinical_significance", "Dodgy"); + SequenceFeature sf3 = new SequenceFeature("sequence_variant", "", 3, 3, + 0f, null); + sf3.setValue("alleles", "A,G"); // synonymous + sf3.setValue("ID", "var3"); + sf3.setValue("clinical_significance", "None"); + SequenceFeature sf4 = new SequenceFeature("sequence_variant", "", 3, 3, + 0f, null); + sf4.setValue("alleles", "A,T"); // AAT -> N + sf4.setValue("ID", "sequence_variant:var4"); // prefix gets stripped off + sf4.setValue("clinical_significance", "Benign"); + SequenceFeature sf5 = new SequenceFeature("sequence_variant", "", 6, 6, + 0f, null); + sf5.setValue("alleles", "T,C"); // synonymous + sf5.setValue("ID", "var5"); + sf5.setValue("clinical_significance", "Bad"); + SequenceFeature sf6 = new SequenceFeature("sequence_variant", "", 8, 8, + 0f, null); + sf6.setValue("alleles", "C,A,G"); // CAC,CGC -> H,R + sf6.setValue("ID", "var6"); + sf6.setValue("clinical_significance", "Good"); + + List codon1Variants = new ArrayList(); + List codon2Variants = new ArrayList(); + List codon3Variants = new ArrayList(); + List codonVariants[] = new ArrayList[3]; + codonVariants[0] = codon1Variants; + codonVariants[1] = codon2Variants; + codonVariants[2] = codon3Variants; + /* - * vary codons 1 and 2; variant products are sorted and non-redundant + * compute variants for protein position 1 */ - codonVariants = new String[][] { { "a", "C" }, { "g", "T" }, { "A" } }; - // aga ata cga cta code for R, I, R, L - variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); - assertEquals("[I, L, R]", variants.toString()); - + codon1Variants.add(new DnaVariant("A", sf1)); + codon1Variants.add(new DnaVariant("A", sf2)); + codon2Variants.add(new DnaVariant("A")); + codon2Variants.add(new DnaVariant("A")); + codon3Variants.add(new DnaVariant("A", sf3)); + codon3Variants.add(new DnaVariant("A", sf4)); + AlignmentUtils.computePeptideVariants(peptide, 1, codonVariants); + /* - * vary codons 2 and 3 + * compute variants for protein position 2 */ - codonVariants = new String[][] { { "a" }, { "g", "T" }, { "A", "c" } }; - // aga agc ata atc code for R, S, I, I - variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); - assertEquals("[I, R]", variants.toString()); - + codon1Variants.clear(); + codon2Variants.clear(); + codon3Variants.clear(); + codon1Variants.add(new DnaVariant("T")); + codon2Variants.add(new DnaVariant("T")); + codon3Variants.add(new DnaVariant("T", sf5)); + AlignmentUtils.computePeptideVariants(peptide, 2, codonVariants); + /* - * vary codons 1 and 3 + * compute variants for protein position 3 */ - codonVariants = new String[][] { { "a", "t" }, { "a" }, { "t", "g" } }; - // aat aag tat tag code for N, K, Y, STOP - STOP sorted to end - variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); - assertEquals("[K, N, Y, STOP]", variants.toString()); - + codon1Variants.clear(); + codon2Variants.clear(); + codon3Variants.clear(); + codon1Variants.add(new DnaVariant("C")); + codon2Variants.add(new DnaVariant("C", sf6)); + codon3Variants.add(new DnaVariant("C")); + AlignmentUtils.computePeptideVariants(peptide, 3, codonVariants); + /* - * vary codons 1, 2 and 3 + * verify added sequence features for + * var1 K -> E + * var2 K -> Q + * var4 K -> N + * var6 P -> H + * var6 P -> R */ - codonVariants = new String[][] { { "a", "t" }, { "G", "C" }, - { "t", "g" } }; - // agt agg act acg tgt tgg tct tcg code for S, R, T, T, C, W, S, S - variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); - assertEquals("[C, R, T, W]", variants.toString()); + SequenceFeature[] sfs = peptide.getSequenceFeatures(); + assertEquals(5, sfs.length); + SequenceFeature sf = sfs[0]; + assertEquals(1, sf.getBegin()); + assertEquals(1, sf.getEnd()); + assertEquals("K->E", sf.getDescription()); + assertEquals("var1.125A>G", sf.getValue("ID")); + assertNull(sf.getValue("clinical_significance")); + assertEquals(1, sf.links.size()); + // link to variation is urlencoded + assertEquals( + "K->E var1.125A>G|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var1.125A%3EG", + sf.links.get(0)); + sf = sfs[1]; + assertEquals(1, sf.getBegin()); + assertEquals(1, sf.getEnd()); + assertEquals("K->Q", sf.getDescription()); + assertEquals("var2", sf.getValue("ID")); + assertEquals("Dodgy", sf.getValue("clinical_significance")); + assertEquals(1, sf.links.size()); + assertEquals( + "K->Q var2|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var2", + sf.links.get(0)); + sf = sfs[2]; + assertEquals(1, sf.getBegin()); + assertEquals(1, sf.getEnd()); + assertEquals("K->N", sf.getDescription()); + assertEquals("var4", sf.getValue("ID")); + assertEquals("Benign", sf.getValue("clinical_significance")); + assertEquals(1, sf.links.size()); + assertEquals( + "K->N var4|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var4", + sf.links.get(0)); + sf = sfs[3]; + assertEquals(3, sf.getBegin()); + assertEquals(3, sf.getEnd()); + assertEquals("P->H", sf.getDescription()); + assertEquals("var6", sf.getValue("ID")); + assertEquals("Good", sf.getValue("clinical_significance")); + assertEquals(1, sf.links.size()); + assertEquals( + "P->H var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6", + sf.links.get(0)); + // var5 generates two distinct protein variant features + sf = sfs[4]; + assertEquals(3, sf.getBegin()); + assertEquals(3, sf.getEnd()); + assertEquals("P->R", sf.getDescription()); + assertEquals("var6", sf.getValue("ID")); + assertEquals("Good", sf.getValue("clinical_significance")); + assertEquals(1, sf.links.size()); + assertEquals( + "P->R var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6", + sf.links.get(0)); } /**