JAL-4019 Nucleotide Ambiguity Colour Scheme added. Test for nucleotide sequence adapt...

author Ben Soares <b.soares@dundee.ac.uk>

Thu, 10 Nov 2022 15:21:08 +0000 (15:21 +0000)

committer Ben Soares <b.soares@dundee.ac.uk>

Thu, 10 Nov 2022 15:21:08 +0000 (15:21 +0000)
author Ben Soares <b.soares@dundee.ac.uk>
Thu, 10 Nov 2022 15:21:08 +0000 (15:21 +0000)
committer Ben Soares <b.soares@dundee.ac.uk>
Thu, 10 Nov 2022 15:21:08 +0000 (15:21 +0000)
diff --git a/resources/lang/Messages.properties b/resources/lang/Messages.properties

index 3843ddb..fff8ff4 100644 (file)
--- a/resources/lang/Messages.properties
+++ b/resources/lang/Messages.properties
@@ -201,6 +201,7 @@ label.colourScheme_turnpropensity = Turn Propensity
  label.colourScheme_buriedindex = Buried Index
  label.colourScheme_purine/pyrimidine = Purine/Pyrimidine
  label.colourScheme_nucleotide = Nucleotide
+label.colourScheme_nucleotideambiguity = Nucleotide Ambiguity
  label.colourScheme_t-coffeescores = T-Coffee Scores
  label.colourScheme_rnahelices = By RNA Helices
  label.colourScheme_sequenceid = Sequence ID Colour
diff --git a/resources/lang/Messages_es.properties b/resources/lang/Messages_es.properties

index d0bfd65..ec090ae 100644 (file)
--- a/resources/lang/Messages_es.properties
+++ b/resources/lang/Messages_es.properties
@@ -194,6 +194,7 @@ label.colourScheme_turnpropensity = Tendencia de giro
  label.colourScheme_buriedindex = Índice de encubrimiento
  label.colourScheme_purine/pyrimidine = Purina/Pirimidina
  label.colourScheme_nucleotide = Nucleótido
+label.colourScheme_nucleotideambiguity = Ambigüedad de nucleótido
  label.colourScheme_t-coffeescores = Puntuación del T-Coffee
  label.colourScheme_rnahelices = Por hélices de RNA
  label.colourScheme_sequenceid = Color de ID de secuencia
diff --git a/src/jalview/schemes/JalviewColourScheme.java b/src/jalview/schemes/JalviewColourScheme.java

index 965a26b..accdc8a 100644 (file)
--- a/src/jalview/schemes/JalviewColourScheme.java
+++ b/src/jalview/schemes/JalviewColourScheme.java
@@ -44,10 +44,12 @@ public enum JalviewColourScheme
    Turn("Turn Propensity", TurnColourScheme.class),
    Buried("Buried Index", BuriedColourScheme.class),
    Nucleotide("Nucleotide", NucleotideColourScheme.class),
+  NucleotideAmbiguity("Nucleotide Ambiguity",
+          NucleotideAmbiguityColourScheme.class),
    PurinePyrimidine("Purine/Pyrimidine", PurinePyrimidineColourScheme.class),
    RNAHelices("RNA Helices", RNAHelicesColour.class),
    TCoffee("T-Coffee Scores", TCoffeeColourScheme.class),
-  IdColour("Sequence ID", IdColourScheme.class);
+  IdColour("Sequence ID", IdColourScheme.class),;
    // RNAInteraction("RNA Interaction type", RNAInteractionColourScheme.class)
  
    private String name;
diff --git a/src/jalview/schemes/NucleotideAmbiguityColourScheme.java b/src/jalview/schemes/NucleotideAmbiguityColourScheme.java

new file mode 100644 (file)

index 0000000..dd42db3
--- /dev/null
+++ b/src/jalview/schemes/NucleotideAmbiguityColourScheme.java
@@ -0,0 +1,40 @@
+package jalview.schemes;
+
+import jalview.api.AlignViewportI;
+import jalview.datamodel.AnnotatedCollectionI;
+
+public class NucleotideAmbiguityColourScheme extends ResidueColourScheme
+{
+  /**
+   * Creates a new NucleotideColourScheme object.
+   */
+  public NucleotideAmbiguityColourScheme()
+  {
+    super(ResidueProperties.nucleotideIndex,
+            ResidueProperties.nucleotideAmbiguity);
+  }
+
+  @Override
+  public boolean isNucleotideSpecific()
+  {
+    return true;
+  }
+
+  @Override
+  public String getSchemeName()
+  {
+    return JalviewColourScheme.NucleotideAmbiguity.toString();
+  }
+
+  /**
+   * Returns a new instance of this colour scheme with which the given data may
+   * be coloured
+   */
+  @Override
+  public ColourSchemeI getInstance(AlignViewportI view,
+          AnnotatedCollectionI coll)
+  {
+    return new NucleotideAmbiguityColourScheme();
+  }
+
+}
diff --git a/src/jalview/schemes/ResidueProperties.java b/src/jalview/schemes/ResidueProperties.java

index 7ad35c3..73b48a9 100755 (executable)
--- a/src/jalview/schemes/ResidueProperties.java
+++ b/src/jalview/schemes/ResidueProperties.java
@@ -20,10 +20,6 @@
   */
  package jalview.schemes;
  
-import java.util.Locale;
-
-import jalview.analysis.GeneticCodes;
-
  import java.awt.Color;
  import java.util.ArrayList;
  import java.util.Arrays;
@@ -31,9 +27,12 @@ import java.util.Enumeration;
  import java.util.HashMap;
  import java.util.Hashtable;
  import java.util.List;
+import java.util.Locale;
  import java.util.Map;
  import java.util.Vector;
  
+import jalview.analysis.GeneticCodes;
+
  public class ResidueProperties
  {
    // Stores residue codes/names and colours and other things
@@ -119,74 +118,50 @@ public class ResidueProperties
    /**
     * maximum (gap) index for matrices involving nucleotide alphabet
     */
-  public final static int maxNucleotideIndex = 10;
+  // public final static int maxNucleotideIndex = 10;
+  public final static int maxNucleotideIndex;
  
    static
    {
+
+    String[][] namesArray = { { "a", "Adenine" }, { "g", "Guanine" },
+        { "c", "Cytosine" },
+        { "t", "Thymine" },
+        { "u", "Uracil" },
+        { "i", "Inosine" },
+        { "x", "Xanthine" },
+        { "r", "Unknown Purine" },
+        { "y", "Unknown Pyrimidine" },
+        { "n", "Unknown" },
+        { "w", "Weak nucleotide (A or T)" },
+        { "s", "Strong nucleotide (G or C)" },
+        { "m", "Amino (A or C)" },
+        { "k", "Keto (G or T)" },
+        { "b", "Not A (G or C or T)" },
+        { "h", "Not G (A or C or T)" },
+        { "d", "Not C (A or G or T)" },
+        { "v", "Not T (A or G or C" } };
+
+    // "gap" index
+    maxNucleotideIndex = namesArray.length + 1;
+
      nucleotideIndex = new int[255];
      for (int i = 0; i < 255; i++)
      {
-      nucleotideIndex[i] = 10; // non-nucleotide symbols are all non-gap gaps.
+      nucleotideIndex[i] = maxNucleotideIndex; // non-nucleotide symbols are all
+                                               // non-gap gaps.
      }
  
-    nucleotideIndex['A'] = 0;
-    nucleotideIndex['a'] = 0;
-    nucleotideIndex['C'] = 1;
-    nucleotideIndex['c'] = 1;
-    nucleotideIndex['G'] = 2;
-    nucleotideIndex['g'] = 2;
-    nucleotideIndex['T'] = 3;
-    nucleotideIndex['t'] = 3;
-    nucleotideIndex['U'] = 4;
-    nucleotideIndex['u'] = 4;
-    nucleotideIndex['I'] = 5;
-    nucleotideIndex['i'] = 5;
-    nucleotideIndex['X'] = 6;
-    nucleotideIndex['x'] = 6;
-    nucleotideIndex['R'] = 7;
-    nucleotideIndex['r'] = 7;
-    nucleotideIndex['Y'] = 8;
-    nucleotideIndex['y'] = 8;
-    nucleotideIndex['N'] = 9;
-    nucleotideIndex['n'] = 9;
-
-    nucleotideName.put("A", "Adenine");
-    nucleotideName.put("a", "Adenine");
-    nucleotideName.put("G", "Guanine");
-    nucleotideName.put("g", "Guanine");
-    nucleotideName.put("C", "Cytosine");
-    nucleotideName.put("c", "Cytosine");
-    nucleotideName.put("T", "Thymine");
-    nucleotideName.put("t", "Thymine");
-    nucleotideName.put("U", "Uracil");
-    nucleotideName.put("u", "Uracil");
-    nucleotideName.put("I", "Inosine");
-    nucleotideName.put("i", "Inosine");
-    nucleotideName.put("X", "Xanthine");
-    nucleotideName.put("x", "Xanthine");
-    nucleotideName.put("R", "Unknown Purine");
-    nucleotideName.put("r", "Unknown Purine");
-    nucleotideName.put("Y", "Unknown Pyrimidine");
-    nucleotideName.put("y", "Unknown Pyrimidine");
-    nucleotideName.put("N", "Unknown");
-    nucleotideName.put("n", "Unknown");
-    nucleotideName.put("W", "Weak nucleotide (A or T)");
-    nucleotideName.put("w", "Weak nucleotide (A or T)");
-    nucleotideName.put("S", "Strong nucleotide (G or C)");
-    nucleotideName.put("s", "Strong nucleotide (G or C)");
-    nucleotideName.put("M", "Amino (A or C)");
-    nucleotideName.put("m", "Amino (A or C)");
-    nucleotideName.put("K", "Keto (G or T)");
-    nucleotideName.put("k", "Keto (G or T)");
-    nucleotideName.put("B", "Not A (G or C or T)");
-    nucleotideName.put("b", "Not A (G or C or T)");
-    nucleotideName.put("H", "Not G (A or C or T)");
-    nucleotideName.put("h", "Not G (A or C or T)");
-    nucleotideName.put("D", "Not C (A or G or T)");
-    nucleotideName.put("d", "Not C (A or G or T)");
-    nucleotideName.put("V", "Not T (A or G or C");
-    nucleotideName.put("v", "Not T (A or G or C");
-
+    for (int i = 0; i < namesArray.length; i++)
+    {
+      char c = namesArray[i][0].charAt(0);
+      nucleotideIndex[c] = i;
+      // Character.toUpperCase is Locale insensitive
+      nucleotideIndex[Character.toUpperCase(c)] = i;
+      nucleotideName.put(namesArray[i][0], namesArray[i][1]);
+      nucleotideName.put(namesArray[i][0].toUpperCase(Locale.ROOT),
+              namesArray[i][1]);
+    }
    }
  
    static
@@ -363,6 +338,36 @@ public class ResidueProperties
        Color.white, // R
        Color.white, // Y
        Color.white, // N
+      Color.white, // w
+      Color.white, // s
+      Color.white, // m
+      Color.white, // k
+      Color.white, // b
+      Color.white, // h
+      Color.white, // d
+      Color.white, // v
+      Color.white, // Gap
+  };
+
+  public static final Color[] nucleotideAmbiguity = {
+      Color.decode("#f0fff0"), // a
+      Color.decode("#f0fff0"), // g
+      Color.decode("#f0fff0"), // c
+      Color.decode("#f0fff0"), // t
+      Color.decode("#f0fff0"), // u
+      Color.decode("#ffffff"), // i
+      Color.decode("#ffffff"), // x
+      Color.decode("#CD5C5C"), // r
+      Color.decode("#008000"), // y
+      Color.decode("#2f4f4f"), // n
+      Color.decode("#4682B4"), // w
+      Color.decode("#FF8C00"), // s
+      Color.decode("#9ACD32"), // m
+      Color.decode("#9932CC"), // k
+      Color.decode("#8b4513"), // b
+      Color.decode("#808080"), // h
+      Color.decode("#483D8B"), // d
+      Color.decode("#b8860b"), // v
        Color.white, // Gap
    };
  
diff --git a/src/jalview/util/Comparison.java b/src/jalview/util/Comparison.java

index cd98ee7..2dcbeb5 100644 (file)
--- a/src/jalview/util/Comparison.java
+++ b/src/jalview/util/Comparison.java
@@ -20,11 +20,11 @@
   */
  package jalview.util;
  
-import jalview.datamodel.SequenceI;
-
  import java.util.ArrayList;
  import java.util.List;
  
+import jalview.datamodel.SequenceI;
+
  /**
   * Assorted methods for analysing or comparing sequences.
   */
@@ -32,6 +32,8 @@ public class Comparison
  {
    private static final int EIGHTY_FIVE = 85;
  
+  private static final int NINETY_NINE = 99;
+
    private static final int TO_UPPER_CASE = 'a' - 'A';
  
    public static final char GAP_SPACE = ' ';
@@ -256,7 +258,7 @@ public class Comparison
     */
    public static final boolean isGap(char c)
    {
-    return (c == GAP_DASH || c == GAP_DOT || c == GAP_SPACE) ? true : false;
+    return c == GAP_DASH || c == GAP_DOT || c == GAP_SPACE;
    }
  
    /**
@@ -275,12 +277,13 @@ public class Comparison
      long ntCount = 0;
      long aaCount = 0;
      long nCount = 0;
+    long ntaCount = 0;
  
      int len = seq.getLength();
      for (int i = 0; i < len; i++)
      {
        char c = seq.getCharAt(i);
-      if (isNucleotide(c) || isX(c))
+      if (isNucleotide(c))
        {
          ntCount++;
        }
@@ -291,6 +294,13 @@ public class Comparison
          {
            nCount++;
          }
+        else
+        {
+          if (isNucleotideAmbiguity(c))
+          {
+            ntaCount++;
+          }
+        }
        }
      }
      /*
@@ -304,6 +314,12 @@ public class Comparison
      }
      else
      {
+      // check for very large proportion of nucleotide and all ambiguity codes
+      if ((ntCount + nCount + ntaCount) * 100 >= NINETY_NINE
+              * (ntCount + aaCount))
+      {
+        return ntCount > 0;
+      }
        return false;
      }
    }
@@ -350,11 +366,13 @@ public class Comparison
     */
    public static boolean isNucleotide(char c)
    {
-    if ('a' <= c && c <= 'z')
-    {
-      c -= TO_UPPER_CASE;
-    }
-    switch (c)
+    return isNucleotide(c, false);
+  }
+
+  public static boolean isNucleotide(char c, boolean countAmbiguity)
+  {
+    char C = Character.toUpperCase(c);
+    switch (C)
      {
      case 'A':
      case 'C':
@@ -363,29 +381,45 @@ public class Comparison
      case 'U':
        return true;
      }
+    if (countAmbiguity)
+    {
+      boolean ambiguity = isNucleotideAmbiguity(C);
+      if (ambiguity)
+        return true;
+    }
      return false;
    }
  
-  public static boolean isN(char c)
+  public static boolean isNucleotideAmbiguity(char c)
    {
-    switch (c)
+    switch (Character.toUpperCase(c))
      {
-    case 'N':
-    case 'n':
+    case 'I':
+    case 'X':
+    case 'R':
+    case 'Y':
+    case 'W':
+    case 'S':
+    case 'M':
+    case 'K':
+    case 'B':
+    case 'H':
+    case 'D':
+    case 'V':
        return true;
+    case 'N': // not counting N as nucleotide
      }
      return false;
    }
  
+  public static boolean isN(char c)
+  {
+    return 'n' == Character.toLowerCase(c);
+  }
+
    public static boolean isX(char c)
    {
-    switch (c)
-    {
-    case 'X':
-    case 'x':
-      return true;
-    }
-    return false;
+    return 'x' == Character.toLowerCase(c);
    }
  
    /**
@@ -456,13 +490,7 @@ public class Comparison
    public static boolean isSameResidue(char c1, char c2,
            boolean caseSensitive)
    {
-    if (caseSensitive)
-    {
-      return (c1 == c2);
-    }
-    else
-    {
-      return Character.toUpperCase(c1) == Character.toUpperCase(c2);
-    }
+    return caseSensitive ? c1 == c2
+            : Character.toUpperCase(c1) == Character.toUpperCase(c2);
    }
  }
author	Ben Soares <b.soares@dundee.ac.uk>
	Thu, 10 Nov 2022 15:21:08 +0000 (15:21 +0000)
committer	Ben Soares <b.soares@dundee.ac.uk>
	Thu, 10 Nov 2022 15:21:08 +0000 (15:21 +0000)
resources/lang/Messages.properties		patch \| blob \| history
resources/lang/Messages_es.properties		patch \| blob \| history
src/jalview/schemes/JalviewColourScheme.java		patch \| blob \| history
src/jalview/schemes/NucleotideAmbiguityColourScheme.java	[new file with mode: 0644]	patch \| blob
src/jalview/schemes/ResidueProperties.java		patch \| blob \| history
src/jalview/util/Comparison.java		patch \| blob \| history