From 7ac2996be1e7428d1651793c4a1b7e4091f221b9 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Tue, 12 Apr 2016 10:36:51 +0100 Subject: [PATCH] JAL-2049 separate protein variant per dna variant (combinations tbd) --- src/jalview/analysis/AlignmentUtils.java | 51 +++++++++++-------- test/jalview/analysis/AlignmentUtilsTests.java | 65 ++++++++++++++++++++---- 2 files changed, 84 insertions(+), 32 deletions(-) diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 14e3907..28062c0 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -1744,35 +1744,40 @@ public class AlignmentUtils * /ENSP00000288602?feature=transcript_variation;content-type=text/xml * which would be a bit slower but possibly more reliable */ - LinkedHashMap variants = buildDnaVariantsMap( + LinkedHashMap> variants = buildDnaVariantsMap( dnaSeq, dnaToProtein); /* * scan codon variations, compute peptide variants and add to peptide sequence */ int count = 0; - for (Entry variant : variants.entrySet()) + for (Entry> variant : variants.entrySet()) { int peptidePos = variant.getKey(); - String[][] codonVariants = variant.getValue(); + List codonVariants = variant.getValue(); String residue = String.valueOf(peptide.getCharAt(peptidePos - 1)); // 0-based - List peptideVariants = computePeptideVariants(codonVariants, - residue); - if (!peptideVariants.isEmpty()) + for (String[][] codonVariant : codonVariants) { - String desc = residue + "," // include canonical residue in description - + StringUtils.listToDelimitedString(peptideVariants, ", "); - SequenceFeature sf = new SequenceFeature( - SequenceOntologyI.SEQUENCE_VARIANT, desc, peptidePos, - peptidePos, 0f, null); - peptide.addSequenceFeature(sf); - count++; + List peptideVariants = computePeptideVariants(codonVariant, + residue); + if (!peptideVariants.isEmpty()) + { + String desc = residue + + "->" // include canonical residue in description + + StringUtils + .listToDelimitedString(peptideVariants, ", "); + SequenceFeature sf = new SequenceFeature( + SequenceOntologyI.SEQUENCE_VARIANT, desc, peptidePos, + peptidePos, 0f, null); + peptide.addSequenceFeature(sf); + count++; + } } } /* * ugly sort to get sequence features in start position order - * - would be better to store in Sequence as a TreeSet instead? + * - would be better to store in Sequence as a TreeSet or NCList? */ Arrays.sort(peptide.getSequenceFeatures(), new Comparator() @@ -1796,14 +1801,14 @@ public class AlignmentUtils * @param dnaToProtein * @return */ - static LinkedHashMap buildDnaVariantsMap( + static LinkedHashMap> buildDnaVariantsMap( SequenceI dnaSeq, MapList dnaToProtein) { /* * map from peptide position to all variant features of the codon for it * LinkedHashMap ensures we add the peptide features in sequence order */ - LinkedHashMap variants = new LinkedHashMap(); + LinkedHashMap> variants = new LinkedHashMap>(); SequenceOntologyI so = SequenceOntologyFactory.getInstance(); SequenceFeature[] dnaFeatures = dnaSeq.getSequenceFeatures(); @@ -1836,10 +1841,10 @@ public class AlignmentUtils continue; } int peptidePosition = mapsTo[0]; - String[][] codonVariants = variants.get(peptidePosition); + List codonVariants = variants.get(peptidePosition); if (codonVariants == null) { - codonVariants = new String[3][]; + codonVariants = new ArrayList(); variants.put(peptidePosition, codonVariants); } @@ -1870,31 +1875,33 @@ public class AlignmentUtils /* * save nucleotide (and this variant) for each codon position */ + String[][] codonVariant = new String[3][]; for (int codonPos = 0; codonPos < 3; codonPos++) { String nucleotide = String.valueOf( dnaSeq.getCharAt(codon[codonPos] - dnaStart)) .toUpperCase(); - if (codonVariants[codonPos] == null) + if (codonVariant[codonPos] == null) { /* * record current dna base */ - codonVariants[codonPos] = new String[] { nucleotide }; + codonVariant[codonPos] = new String[] { nucleotide }; } if (codon[codonPos] == dnaCol) { /* * add alleles to dna base (and any previously found alleles) */ - String[] known = codonVariants[codonPos]; + String[] known = codonVariant[codonPos]; String[] dnaVariants = new String[alleles.length + known.length]; System.arraycopy(known, 0, dnaVariants, 0, known.length); System.arraycopy(alleles, 0, dnaVariants, known.length, alleles.length); - codonVariants[codonPos] = dnaVariants; + codonVariant[codonPos] = dnaVariants; } } + codonVariants.add(codonVariant); } } return variants; diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 7ccbf97..810ef5f 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -1733,52 +1733,97 @@ public class AlignmentUtilsTests /* * first with no variants on dna */ - LinkedHashMap variantsMap = AlignmentUtils + LinkedHashMap> variantsMap = AlignmentUtils .buildDnaVariantsMap(dna, map); assertTrue(variantsMap.isEmpty()); - // single allele codon 1, on base 1 + /* + * single allele codon 1, on base 1 + */ SequenceFeature sf = new SequenceFeature("sequence_variant", "", 1, 1, 0f, null); sf.setValue("alleles", "T"); + sf.setValue("ID", "sequence_variant:rs758803211"); dna.addSequenceFeature(sf); - // two alleles codon 2, on bases 2 and 3 + /* + * two alleles codon 2, on bases 2 and 3 (distinct variants) + */ sf = new SequenceFeature("sequence_variant", "", 5, 5, 0f, null); sf.setValue("alleles", "T"); + sf.setValue("ID", "sequence_variant:rs758803212"); dna.addSequenceFeature(sf); sf = new SequenceFeature("sequence_variant", "", 6, 6, 0f, null); sf.setValue("alleles", "G"); + sf.setValue("ID", "sequence_variant:rs758803213"); dna.addSequenceFeature(sf); - // two alleles codon 3, both on base 2 + /* + * two alleles codon 3, both on base 2 (one variant) + */ sf = new SequenceFeature("sequence_variant", "", 8, 8, 0f, null); sf.setValue("alleles", "C, G"); + sf.setValue("ID", "sequence_variant:rs758803214"); dna.addSequenceFeature(sf); // no alleles on codon 4 - // alleles on codon 5 on all 3 bases + + /* + * alleles on codon 5 on all 3 bases (distinct variants) + */ sf = new SequenceFeature("sequence_variant", "", 13, 13, 0f, null); sf.setValue("alleles", "C, G"); // (C duplicates given base value) + sf.setValue("ID", "sequence_variant:rs758803215"); dna.addSequenceFeature(sf); sf = new SequenceFeature("sequence_variant", "", 14, 14, 0f, null); sf.setValue("alleles", "g, a"); // should force to upper-case + sf.setValue("ID", "sequence_variant:rs758803216"); dna.addSequenceFeature(sf); sf = new SequenceFeature("sequence_variant", "", 15, 15, 0f, null); sf.setValue("alleles", "A, T"); + sf.setValue("ID", "sequence_variant:rs758803217"); dna.addSequenceFeature(sf); + /* + * build map - expect variants on positions 1, 2, 3, 5 + */ variantsMap = AlignmentUtils.buildDnaVariantsMap(dna, map); assertEquals(4, variantsMap.size()); + + /* + * one variant on protein position 1 + */ + assertEquals(1, variantsMap.get(1).size()); assertTrue(Arrays.deepEquals(new String[][] { { "A", "T" }, { "T" }, - { "G" } }, variantsMap.get(1))); + { "G" } }, variantsMap.get(1).get(0))); + + /* + * two variants on protein position 2 + */ + assertEquals(2, variantsMap.get(2).size()); assertTrue(Arrays.deepEquals(new String[][] { { "A" }, { "A", "T" }, - { "A", "G" } }, variantsMap.get(2))); + { "A" } }, variantsMap.get(2).get(0))); + assertTrue(Arrays.deepEquals(new String[][] { { "A" }, { "A" }, + { "A", "G" } }, variantsMap.get(2).get(1))); + + /* + * one variant on protein position 3 + */ + assertEquals(1, variantsMap.get(3).size()); assertTrue(Arrays.deepEquals(new String[][] { { "T" }, - { "T", "C", "G" }, { "T" } }, variantsMap.get(3))); - // duplicated bases are not removed here, handled in computePeptideVariants + { "T", "C", "G" }, { "T" } }, variantsMap.get(3).get(0))); + + /* + * three variants on protein position 5 + * duplicated bases are not removed here, handled in computePeptideVariants + */ + assertEquals(3, variantsMap.get(5).size()); assertTrue(Arrays.deepEquals(new String[][] { { "C", "C", "G" }, - { "C", "G", "A" }, { "C", "A", "T" } }, variantsMap.get(5))); + { "C" }, { "C" } }, variantsMap.get(5).get(0))); + assertTrue(Arrays.deepEquals(new String[][] { { "C" }, + { "C", "G", "A" }, { "C" } }, variantsMap.get(5).get(1))); + assertTrue(Arrays.deepEquals(new String[][] { { "C" }, { "C" }, + { "C", "A", "T" } }, variantsMap.get(5).get(2))); } /** -- 1.7.10.2