JAL-1705 compute reverse complement of variants on reverse strand
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Tue, 15 Mar 2016 14:26:45 +0000 (14:26 +0000)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Tue, 15 Mar 2016 14:26:45 +0000 (14:26 +0000)
src/jalview/ext/ensembl/EnsemblSeqProxy.java
test/jalview/ext/ensembl/EnsemblSeqProxyTest.java

index 4af6525..b4c708d 100644 (file)
@@ -1,6 +1,7 @@
 package jalview.ext.ensembl;
 
 import jalview.analysis.AlignmentUtils;
+import jalview.analysis.Dna;
 import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefEntry;
@@ -33,6 +34,8 @@ import java.util.List;
  */
 public abstract class EnsemblSeqProxy extends EnsemblRestClient
 {
+  private static final String ALLELES = "alleles";
+
   private static final List<String> CROSS_REFERENCES = Arrays
           .asList(new String[] { "CCDS", "Uniprot/SWISSPROT",
               "Uniprot/SPTREMBL" });
@@ -611,9 +614,10 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
    * @param mapping
    *          mapping from the sequence feature's coordinates to the target
    *          sequence
+   * @param forwardStrand
    */
   protected void transferFeature(SequenceFeature sf,
-          SequenceI targetSequence, MapList mapping)
+          SequenceI targetSequence, MapList mapping, boolean forwardStrand)
   {
     int start = sf.getBegin();
     int end = sf.getEnd();
@@ -627,20 +631,77 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
       targetSequence.addSequenceFeature(copy);
 
       /*
-       * for sequence_variant, make an additional feature with consequence
+       * for sequence_variant on reverse strand, have to convert the allele
+       * values to their complements
        */
-      // if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
-      // SequenceOntologyI.SEQUENCE_VARIANT))
-      // {
-      // String consequence = (String) sf.getValue(CONSEQUENCE_TYPE);
-      // if (consequence != null)
-      // {
-      // SequenceFeature sf2 = new SequenceFeature("consequence",
-      // consequence, copy.getBegin(), copy.getEnd(), 0f,
-      // null);
-      // targetSequence.addSequenceFeature(sf2);
-      // }
-      // }
+      if (!forwardStrand
+              && SequenceOntologyFactory.getInstance().isA(sf.getType(),
+                      SequenceOntologyI.SEQUENCE_VARIANT))
+      {
+        reverseComplementAlleles(copy);
+      }
+    }
+  }
+
+  /**
+   * Change the 'alleles' value of a feature by converting to complementary
+   * bases, and also update the feature description to match
+   * 
+   * @param sf
+   */
+  static void reverseComplementAlleles(SequenceFeature sf)
+  {
+    final String alleles = (String) sf.getValue(ALLELES);
+    if (alleles == null)
+    {
+      return;
+    }
+    StringBuilder complement = new StringBuilder(alleles.length());
+    for (String allele : alleles.split(","))
+    {
+      reverseComplementAllele(complement, allele);
+    }
+    String comp = complement.toString();
+    sf.setValue(ALLELES, comp);
+    sf.setDescription(comp);
+
+    /*
+     * replace value of "alleles=" in sf.ATTRIBUTES as well
+     * so 'output as GFF' shows reverse complement alleles
+     */
+    String atts = sf.getAttributes();
+    if (atts != null)
+    {
+      atts = atts.replace(ALLELES + "=" + alleles, ALLELES + "=" + comp);
+      sf.setAttributes(atts);
+    }
+  }
+
+  /**
+   * Makes the 'reverse complement' of the given allele and appends it to the
+   * buffer, after a comma separator if not the first
+   * 
+   * @param complement
+   * @param allele
+   */
+  static void reverseComplementAllele(StringBuilder complement,
+          String allele)
+  {
+    if (complement.length() > 0)
+    {
+      complement.append(",");
+    }
+    if ("HGMD_MUTATION".equalsIgnoreCase(allele))
+    {
+      complement.append(allele);
+    }
+    else
+    {
+      char[] alleles = allele.toCharArray();
+      for (int i = alleles.length - 1; i >= 0; i--)
+      {
+        complement.append(Dna.getComplement(alleles[i]));
+      }
     }
   }
 
@@ -695,25 +756,18 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
     final boolean forwardStrand = mapping.isFromForwardStrand();
 
     /*
-     * sort features by start position (descending if reverse strand) 
-     * before transferring (in forwards order) to the target sequence
+     * sort features by start position (which corresponds to end
+     * position descending if reverse strand) so as to add them in
+     * 'forwards' order to the target sequence
      */
-    Arrays.sort(features, new Comparator<SequenceFeature>()
-    {
-      @Override
-      public int compare(SequenceFeature o1, SequenceFeature o2)
-      {
-        int c = Integer.compare(o1.getBegin(), o2.getBegin());
-        return forwardStrand ? c : -c;
-      }
-    });
+    sortFeatures(features, forwardStrand);
 
     boolean transferred = false;
     for (SequenceFeature sf : features)
     {
       if (retainFeature(sf, parentId))
       {
-        transferFeature(sf, targetSequence, mapping);
+        transferFeature(sf, targetSequence, mapping, forwardStrand);
         transferred = true;
       }
     }
@@ -721,6 +775,33 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
   }
 
   /**
+   * Sort features by start position ascending (if on forward strand), or end
+   * position descending (if on reverse strand)
+   * 
+   * @param features
+   * @param forwardStrand
+   */
+  protected static void sortFeatures(SequenceFeature[] features,
+          final boolean forwardStrand)
+  {
+    Arrays.sort(features, new Comparator<SequenceFeature>()
+    {
+      @Override
+      public int compare(SequenceFeature o1, SequenceFeature o2)
+      {
+        if (forwardStrand)
+        {
+          return Integer.compare(o1.getBegin(), o2.getBegin());
+        }
+        else
+        {
+          return Integer.compare(o2.getEnd(), o1.getEnd());
+        }
+      }
+    });
+  }
+
+  /**
    * Answers true if the feature type is one we want to keep for the sequence.
    * Some features are only retrieved in order to identify the sequence range,
    * and may then be discarded as redundant information (e.g. "CDS" feature for
index f9c2c4b..6df479c 100644 (file)
@@ -3,10 +3,11 @@ package jalview.ext.ensembl;
 import static org.testng.AssertJUnit.assertEquals;
 import static org.testng.AssertJUnit.assertFalse;
 import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
 
-import jalview.analysis.AlignmentUtils;
 import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentI;
+import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
 import jalview.io.AppletFormatAdapter;
 import jalview.io.FastaFile;
@@ -247,4 +248,67 @@ public class EnsemblSeqProxyTest
     assertFalse(testee.isGeneIdentifier("ENSG000000123456"));
     assertFalse(testee.isGeneIdentifier("ENSG0000001234"));
   }
+
+  /**
+   * Test the method that appends a single allele's reverse complement to a
+   * string buffer
+   */
+  @Test(groups = "Functional")
+  public void testReverseComplementAllele()
+  {
+    StringBuilder sb = new StringBuilder();
+    EnsemblSeqProxy.reverseComplementAllele(sb, "G"); // comp=C
+    EnsemblSeqProxy.reverseComplementAllele(sb, "g"); // comp=c
+    EnsemblSeqProxy.reverseComplementAllele(sb, "C"); // comp=G
+    EnsemblSeqProxy.reverseComplementAllele(sb, "T"); // comp=A
+    EnsemblSeqProxy.reverseComplementAllele(sb, "A"); // comp=T
+    assertEquals("C,c,G,A,T", sb.toString());
+
+    sb = new StringBuilder();
+    EnsemblSeqProxy.reverseComplementAllele(sb, "-GATt"); // revcomp=aATC-
+    EnsemblSeqProxy.reverseComplementAllele(sb, "hgmd_mutation");
+    assertEquals("aATC-,hgmd_mutation", sb.toString());
+  }
+
+  /**
+   * Test the method that computes the reverse complement of the alleles in a
+   * sequence_variant feature
+   */
+  @Test(groups = "Functional")
+  public void testReverseComplementAlleles()
+  {
+    String alleles = "C,G,-TAC,HGMD_MUTATION,gac";
+    SequenceFeature sf = new SequenceFeature("sequence_variant", alleles,
+            1, 2, 0f, null);
+    sf.setValue("alleles", alleles);
+    sf.setAttributes("x=y,z;alleles=" + alleles + ";a=b,c");
+
+    EnsemblSeqProxy.reverseComplementAlleles(sf);
+    String revcomp = "G,C,GTA-,HGMD_MUTATION,gtc";
+    // verify description is updated with reverse complement
+    assertEquals(revcomp, sf.getDescription());
+    // verify alleles attribute is updated with reverse complement
+    assertEquals(revcomp, sf.getValue("alleles"));
+    // verify attributes string is updated with reverse complement
+    assertEquals("x=y,z;alleles=" + revcomp + ";a=b,c", sf.getAttributes());
+  }
+
+  @Test(groups = "Functional")
+  public void testSortFeatures()
+  {
+    SequenceFeature sf1 = new SequenceFeature("", "", 10, 15, 0f, null);
+    SequenceFeature sf2 = new SequenceFeature("", "", 8, 12, 0f, null);
+    SequenceFeature sf3 = new SequenceFeature("", "", 8, 13, 0f, null);
+    SequenceFeature sf4 = new SequenceFeature("", "", 11, 11, 0f, null);
+    SequenceFeature[] sfs = new SequenceFeature[] { sf1, sf2, sf3, sf4 };
+
+    // sort by start position ascending (forward strand)
+    // sf2 and sf3 tie and should not be reordered by sorting
+    EnsemblSeqProxy.sortFeatures(sfs, true);
+    assertArrayEquals(new SequenceFeature[] { sf2, sf3, sf1, sf4 }, sfs);
+
+    // sort by end position descending (reverse strand)
+    EnsemblSeqProxy.sortFeatures(sfs, false);
+    assertArrayEquals(new SequenceFeature[] { sf1, sf3, sf2, sf4 }, sfs);
+  }
 }
\ No newline at end of file