JAL-1705 refactored/utility methods to detect e.g. 'PhenCode_variation'
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Thu, 14 Apr 2016 11:04:15 +0000 (12:04 +0100)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Thu, 14 Apr 2016 11:04:15 +0000 (12:04 +0100)
src/jalview/ext/ensembl/EnsemblSeqProxy.java
src/jalview/util/Comparison.java
test/jalview/ext/ensembl/EnsemblSeqProxyTest.java
test/jalview/util/ComparisonTest.java

index 8fb668a..fb81e66 100644 (file)
@@ -14,6 +14,7 @@ import jalview.io.FastaFile;
 import jalview.io.FileParse;
 import jalview.io.gff.SequenceOntologyFactory;
 import jalview.io.gff.SequenceOntologyI;
+import jalview.util.Comparison;
 import jalview.util.DBRefUtils;
 import jalview.util.MapList;
 
@@ -679,16 +680,21 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient
     {
       complement.append(",");
     }
-    if ("HGMD_MUTATION".equalsIgnoreCase(allele))
+
+    /*
+     * some 'alleles' are actually descriptive terms 
+     * e.g. HGMD_MUTATION, PhenCode_variation
+     * - we don't want to 'reverse complement' these
+     */
+    if (!Comparison.isNucleotideSequence(allele, true))
     {
       complement.append(allele);
     }
     else
     {
-      char[] alleles = allele.toCharArray();
-      for (int i = alleles.length - 1; i >= 0; i--)
+      for (int i = allele.length() - 1; i >= 0; i--)
       {
-        complement.append(Dna.getComplement(alleles[i]));
+        complement.append(Dna.getComplement(allele.charAt(i)));
       }
     }
   }
index 8902e2c..5605a53 100644 (file)
@@ -286,7 +286,7 @@ public class Comparison
    * @param letters
    * @return
    */
-  public static final boolean areNucleotide(char[][] letters)
+  static final boolean areNucleotide(char[][] letters)
   {
     int ntCount = 0;
     int aaCount = 0;
@@ -300,16 +300,11 @@ public class Comparison
       // to save a lengthy calculation
       for (char c : seq)
       {
-        if ('a' <= c && c <= 'z')
-        {
-          c -= TO_UPPER_CASE;
-        }
-
-        if (c == 'A' || c == 'G' || c == 'C' || c == 'T' || c == 'U')
+        if (isNucleotide(c))
         {
           ntCount++;
         }
-        else if (!Comparison.isGap(c))
+        else if (!isGap(c))
         {
           aaCount++;
         }
@@ -332,6 +327,59 @@ public class Comparison
   }
 
   /**
+   * Answers true if the character is one of aAcCgGtTuU
+   * 
+   * @param c
+   * @return
+   */
+  public static boolean isNucleotide(char c)
+  {
+    if ('a' <= c && c <= 'z')
+    {
+      c -= TO_UPPER_CASE;
+    }
+
+    switch (c)
+    {
+    case 'A':
+    case 'C':
+    case 'G':
+    case 'T':
+    case 'U':
+      return true;
+    }
+    return false;
+  }
+
+  /**
+   * Answers true if every character in the string is one of aAcCgGtTuU, or
+   * (optionally) a gap character (dot, dash, space), else false
+   * 
+   * @param s
+   * @param allowGaps
+   * @return
+   */
+  public static boolean isNucleotideSequence(String s, boolean allowGaps)
+  {
+    if (s == null)
+    {
+      return false;
+    }
+    for (int i = 0; i < s.length(); i++)
+    {
+      char c = s.charAt(i);
+      if (!isNucleotide(c))
+      {
+        if (!allowGaps || !isGap(c))
+        {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  /**
    * Convenience overload of isNucleotide
    * 
    * @param seqs
index 6df479c..71f0212 100644 (file)
@@ -267,7 +267,8 @@ public class EnsemblSeqProxyTest
     sb = new StringBuilder();
     EnsemblSeqProxy.reverseComplementAllele(sb, "-GATt"); // revcomp=aATC-
     EnsemblSeqProxy.reverseComplementAllele(sb, "hgmd_mutation");
-    assertEquals("aATC-,hgmd_mutation", sb.toString());
+    EnsemblSeqProxy.reverseComplementAllele(sb, "PhenCode_variation");
+    assertEquals("aATC-,hgmd_mutation,PhenCode_variation", sb.toString());
   }
 
   /**
index 0c2c998..9aab66c 100644 (file)
@@ -49,7 +49,7 @@ public class ComparisonTest
    * AGCTU. Test is not case-sensitive and ignores gaps.
    */
   @Test(groups = { "Functional" })
-  public void testIsNucleotide()
+  public void testIsNucleotide_sequences()
   {
     SequenceI seq = new Sequence("eightypercent", "agctuAGCPV");
     assertFalse(Comparison.isNucleotide(new SequenceI[] { seq }));
@@ -130,6 +130,23 @@ public class ComparisonTest
             0.001f);
   }
 
+  @Test(groups = { "Functional" })
+  public void testIsNucleotide()
+  {
+    assertTrue(Comparison.isNucleotide('a'));
+    assertTrue(Comparison.isNucleotide('A'));
+    assertTrue(Comparison.isNucleotide('c'));
+    assertTrue(Comparison.isNucleotide('C'));
+    assertTrue(Comparison.isNucleotide('g'));
+    assertTrue(Comparison.isNucleotide('G'));
+    assertTrue(Comparison.isNucleotide('t'));
+    assertTrue(Comparison.isNucleotide('T'));
+    assertTrue(Comparison.isNucleotide('u'));
+    assertTrue(Comparison.isNucleotide('U'));
+    assertFalse(Comparison.isNucleotide('-'));
+    assertFalse(Comparison.isNucleotide('P'));
+  }
+
   /**
    * Test the percentage identity calculation for two sequences
    */
@@ -158,4 +175,17 @@ public class ComparisonTest
     assertEquals(87.5f, Comparison.PID(seq1, seq2, 0, length, false, true),
             0.001f);
   }
+
+  @Test(groups = { "Functional" })
+  public void testIsNucleotideSequence()
+  {
+    assertFalse(Comparison.isNucleotideSequence(null, true));
+    assertTrue(Comparison.isNucleotideSequence("", true));
+    assertTrue(Comparison.isNucleotideSequence("aAgGcCtTuU", true));
+    assertTrue(Comparison.isNucleotideSequence("aAgGcCtTuU", false));
+    assertFalse(Comparison.isNucleotideSequence("xAgGcCtTuU", false));
+    assertFalse(Comparison.isNucleotideSequence("aAgGcCtTuUx", false));
+    assertTrue(Comparison.isNucleotideSequence("a A-g.GcCtTuU", true));
+    assertFalse(Comparison.isNucleotideSequence("a A-g.GcCtTuU", false));
+  }
 }