JAL-2049 separate protein variant per dna variant (combinations tbd)

author gmungoc <g.m.carstairs@dundee.ac.uk>

Tue, 12 Apr 2016 09:36:51 +0000 (10:36 +0100)

committer gmungoc <g.m.carstairs@dundee.ac.uk>

Tue, 12 Apr 2016 09:36:51 +0000 (10:36 +0100)
author gmungoc <g.m.carstairs@dundee.ac.uk>
Tue, 12 Apr 2016 09:36:51 +0000 (10:36 +0100)
committer gmungoc <g.m.carstairs@dundee.ac.uk>
Tue, 12 Apr 2016 09:36:51 +0000 (10:36 +0100)
diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java

index 14e3907..28062c0 100644 (file)
--- a/src/jalview/analysis/AlignmentUtils.java
+++ b/src/jalview/analysis/AlignmentUtils.java
@@ -1744,35 +1744,40 @@ public class AlignmentUtils
       * /ENSP00000288602?feature=transcript_variation;content-type=text/xml
       * which would be a bit slower but possibly more reliable
       */
-    LinkedHashMap<Integer, String[][]> variants = buildDnaVariantsMap(
+    LinkedHashMap<Integer, List<String[][]>> variants = buildDnaVariantsMap(
              dnaSeq, dnaToProtein);
  
      /*
       * scan codon variations, compute peptide variants and add to peptide sequence
       */
      int count = 0;
-    for (Entry<Integer, String[][]> variant : variants.entrySet())
+    for (Entry<Integer, List<String[][]>> variant : variants.entrySet())
      {
        int peptidePos = variant.getKey();
-      String[][] codonVariants = variant.getValue();
+      List<String[][]> codonVariants = variant.getValue();
        String residue = String.valueOf(peptide.getCharAt(peptidePos - 1)); // 0-based
-      List<String> peptideVariants = computePeptideVariants(codonVariants,
-              residue);
-      if (!peptideVariants.isEmpty())
+      for (String[][] codonVariant : codonVariants)
        {
-        String desc = residue + "," // include canonical residue in description
-                + StringUtils.listToDelimitedString(peptideVariants, ", ");
-        SequenceFeature sf = new SequenceFeature(
-                SequenceOntologyI.SEQUENCE_VARIANT, desc, peptidePos,
-                peptidePos, 0f, null);
-        peptide.addSequenceFeature(sf);
-        count++;
+        List<String> peptideVariants = computePeptideVariants(codonVariant,
+                residue);
+        if (!peptideVariants.isEmpty())
+        {
+          String desc = residue
+                  + "->" // include canonical residue in description
+                  + StringUtils
+                          .listToDelimitedString(peptideVariants, ", ");
+          SequenceFeature sf = new SequenceFeature(
+                  SequenceOntologyI.SEQUENCE_VARIANT, desc, peptidePos,
+                  peptidePos, 0f, null);
+          peptide.addSequenceFeature(sf);
+          count++;
+        }
        }
      }
  
      /*
       * ugly sort to get sequence features in start position order
-     * - would be better to store in Sequence as a TreeSet instead?
+     * - would be better to store in Sequence as a TreeSet or NCList?
       */
      Arrays.sort(peptide.getSequenceFeatures(),
              new Comparator<SequenceFeature>()
@@ -1796,14 +1801,14 @@ public class AlignmentUtils
     * @param dnaToProtein
     * @return
     */
-  static LinkedHashMap<Integer, String[][]> buildDnaVariantsMap(
+  static LinkedHashMap<Integer, List<String[][]>> buildDnaVariantsMap(
            SequenceI dnaSeq, MapList dnaToProtein)
    {
      /*
       * map from peptide position to all variant features of the codon for it
       * LinkedHashMap ensures we add the peptide features in sequence order
       */
-    LinkedHashMap<Integer, String[][]> variants = new LinkedHashMap<Integer, String[][]>();
+    LinkedHashMap<Integer, List<String[][]>> variants = new LinkedHashMap<Integer, List<String[][]>>();
      SequenceOntologyI so = SequenceOntologyFactory.getInstance();
  
      SequenceFeature[] dnaFeatures = dnaSeq.getSequenceFeatures();
@@ -1836,10 +1841,10 @@ public class AlignmentUtils
            continue;
          }
          int peptidePosition = mapsTo[0];
-        String[][] codonVariants = variants.get(peptidePosition);
+        List<String[][]> codonVariants = variants.get(peptidePosition);
          if (codonVariants == null)
          {
-          codonVariants = new String[3][];
+          codonVariants = new ArrayList<String[][]>();
            variants.put(peptidePosition, codonVariants);
          }
  
@@ -1870,31 +1875,33 @@ public class AlignmentUtils
          /*
           * save nucleotide (and this variant) for each codon position
           */
+        String[][] codonVariant = new String[3][];
          for (int codonPos = 0; codonPos < 3; codonPos++)
          {
            String nucleotide = String.valueOf(
                    dnaSeq.getCharAt(codon[codonPos] - dnaStart))
                    .toUpperCase();
-          if (codonVariants[codonPos] == null)
+          if (codonVariant[codonPos] == null)
            {
              /*
               * record current dna base
               */
-            codonVariants[codonPos] = new String[] { nucleotide };
+            codonVariant[codonPos] = new String[] { nucleotide };
            }
            if (codon[codonPos] == dnaCol)
            {
              /*
               * add alleles to dna base (and any previously found alleles)
               */
-            String[] known = codonVariants[codonPos];
+            String[] known = codonVariant[codonPos];
              String[] dnaVariants = new String[alleles.length + known.length];
              System.arraycopy(known, 0, dnaVariants, 0, known.length);
              System.arraycopy(alleles, 0, dnaVariants, known.length,
                      alleles.length);
-            codonVariants[codonPos] = dnaVariants;
+            codonVariant[codonPos] = dnaVariants;
            }
          }
+        codonVariants.add(codonVariant);
        }
      }
      return variants;
diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java

index 7ccbf97..810ef5f 100644 (file)
--- a/test/jalview/analysis/AlignmentUtilsTests.java
+++ b/test/jalview/analysis/AlignmentUtilsTests.java
@@ -1733,52 +1733,97 @@ public class AlignmentUtilsTests
      /*
       * first with no variants on dna
       */
-    LinkedHashMap<Integer, String[][]> variantsMap = AlignmentUtils
+    LinkedHashMap<Integer, List<String[][]>> variantsMap = AlignmentUtils
              .buildDnaVariantsMap(dna, map);
      assertTrue(variantsMap.isEmpty());
  
-    // single allele codon 1, on base 1
+    /*
+     * single allele codon 1, on base 1
+     */
      SequenceFeature sf = new SequenceFeature("sequence_variant", "", 1, 1,
              0f, null);
      sf.setValue("alleles", "T");
+    sf.setValue("ID", "sequence_variant:rs758803211");
      dna.addSequenceFeature(sf);
  
-    // two alleles codon 2, on bases 2 and 3
+    /*
+     * two alleles codon 2, on bases 2 and 3 (distinct variants)
+     */
      sf = new SequenceFeature("sequence_variant", "", 5, 5, 0f, null);
      sf.setValue("alleles", "T");
+    sf.setValue("ID", "sequence_variant:rs758803212");
      dna.addSequenceFeature(sf);
      sf = new SequenceFeature("sequence_variant", "", 6, 6, 0f, null);
      sf.setValue("alleles", "G");
+    sf.setValue("ID", "sequence_variant:rs758803213");
      dna.addSequenceFeature(sf);
  
-    // two alleles codon 3, both on base 2
+    /*
+     * two alleles codon 3, both on base 2 (one variant)
+     */
      sf = new SequenceFeature("sequence_variant", "", 8, 8, 0f, null);
      sf.setValue("alleles", "C, G");
+    sf.setValue("ID", "sequence_variant:rs758803214");
      dna.addSequenceFeature(sf);
  
      // no alleles on codon 4
-    // alleles on codon 5 on all 3 bases
+
+    /*
+     * alleles on codon 5 on all 3 bases (distinct variants)
+     */
      sf = new SequenceFeature("sequence_variant", "", 13, 13, 0f, null);
      sf.setValue("alleles", "C, G"); // (C duplicates given base value)
+    sf.setValue("ID", "sequence_variant:rs758803215");
      dna.addSequenceFeature(sf);
      sf = new SequenceFeature("sequence_variant", "", 14, 14, 0f, null);
      sf.setValue("alleles", "g, a"); // should force to upper-case
+    sf.setValue("ID", "sequence_variant:rs758803216");
      dna.addSequenceFeature(sf);
      sf = new SequenceFeature("sequence_variant", "", 15, 15, 0f, null);
      sf.setValue("alleles", "A, T");
+    sf.setValue("ID", "sequence_variant:rs758803217");
      dna.addSequenceFeature(sf);
  
+    /*
+     * build map - expect variants on positions 1, 2, 3, 5
+     */
      variantsMap = AlignmentUtils.buildDnaVariantsMap(dna, map);
      assertEquals(4, variantsMap.size());
+
+    /*
+     * one variant on protein position 1
+     */
+    assertEquals(1, variantsMap.get(1).size());
      assertTrue(Arrays.deepEquals(new String[][] { { "A", "T" }, { "T" },
-        { "G" } }, variantsMap.get(1)));
+        { "G" } }, variantsMap.get(1).get(0)));
+
+    /*
+     * two variants on protein position 2
+     */
+    assertEquals(2, variantsMap.get(2).size());
      assertTrue(Arrays.deepEquals(new String[][] { { "A" }, { "A", "T" },
-        { "A", "G" } }, variantsMap.get(2)));
+        { "A" } }, variantsMap.get(2).get(0)));
+    assertTrue(Arrays.deepEquals(new String[][] { { "A" }, { "A" },
+        { "A", "G" } }, variantsMap.get(2).get(1)));
+
+    /*
+     * one variant on protein position 3
+     */
+    assertEquals(1, variantsMap.get(3).size());
      assertTrue(Arrays.deepEquals(new String[][] { { "T" },
-        { "T", "C", "G" }, { "T" } }, variantsMap.get(3)));
-    // duplicated bases are not removed here, handled in computePeptideVariants
+        { "T", "C", "G" }, { "T" } }, variantsMap.get(3).get(0)));
+
+    /*
+     * three variants on protein position 5
+     * duplicated bases are not removed here, handled in computePeptideVariants
+     */
+    assertEquals(3, variantsMap.get(5).size());
      assertTrue(Arrays.deepEquals(new String[][] { { "C", "C", "G" },
-        { "C", "G", "A" }, { "C", "A", "T" } }, variantsMap.get(5)));
+        { "C" }, { "C" } }, variantsMap.get(5).get(0)));
+    assertTrue(Arrays.deepEquals(new String[][] { { "C" },
+        { "C", "G", "A" }, { "C" } }, variantsMap.get(5).get(1)));
+    assertTrue(Arrays.deepEquals(new String[][] { { "C" }, { "C" },
+        { "C", "A", "T" } }, variantsMap.get(5).get(2)));
    }
  
    /**
author	gmungoc <g.m.carstairs@dundee.ac.uk>
	Tue, 12 Apr 2016 09:36:51 +0000 (10:36 +0100)
committer	gmungoc <g.m.carstairs@dundee.ac.uk>
	Tue, 12 Apr 2016 09:36:51 +0000 (10:36 +0100)
src/jalview/analysis/AlignmentUtils.java		patch \| blob \| history
test/jalview/analysis/AlignmentUtilsTests.java		patch \| blob \| history