JAL-98 ResidueCount 100% test coverage (and bug fixes!)

author gmungoc <g.m.carstairs@dundee.ac.uk>

Mon, 24 Oct 2016 18:11:28 +0000 (19:11 +0100)

committer gmungoc <g.m.carstairs@dundee.ac.uk>

Mon, 24 Oct 2016 18:11:28 +0000 (19:11 +0100)
author gmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 24 Oct 2016 18:11:28 +0000 (19:11 +0100)
committer gmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 24 Oct 2016 18:11:28 +0000 (19:11 +0100)
diff --git a/src/jalview/analysis/ResidueCount.java b/src/jalview/analysis/ResidueCount.java

index 2c7cb20..cd35206 100644 (file)
--- a/src/jalview/analysis/ResidueCount.java
+++ b/src/jalview/analysis/ResidueCount.java
@@ -19,7 +19,7 @@ public class ResidueCount
    public class SymbolCounts
    {
      /**
-     * the symbols seen (as char values)
+     * the symbols seen (as char values), in no particular order
       */
      public final char[] symbols;
  
@@ -40,7 +40,7 @@ public class ResidueCount
    /*
     * nucleotide symbols to count (including N unknown)
     */
-  private static final String NUCS = "ACGTUN";
+  private static final String NUCS = "ACGNTU";
  
    /*
     * amino acid symbols to count (including X unknown)
@@ -134,10 +134,11 @@ public class ResidueCount
     * @param c
     * @return the new value of the count for the character
     */
-  public int add(char c)
+  public int add(final char c)
    {
+    char u = toUpperCase(c);
      int newValue = 0;
-    int offset = getOffset(c);
+    int offset = getOffset(u);
  
      /*
       * offset 0 is reserved for gap counting, so 0 here means either
@@ -145,13 +146,13 @@ public class ResidueCount
       */
      if (offset == 0)
      {
-      if (Comparison.isGap(c))
+      if (Comparison.isGap(u))
        {
          newValue = addGap();
        }
        else
        {
-        newValue = addOtherCharacter(c);
+        newValue = addOtherCharacter(u);
        }
      }
      else
@@ -209,22 +210,13 @@ public class ResidueCount
    }
  
    /**
+   * Returns this character's offset in the count array
+   * 
     * @param c
     * @return
     */
    int getOffset(char c)
    {
-    /*
-     * ensure upper-case (fails fast if it already is!)
-     */
-    if ('a' <= c && c <= 'z')
-    {
-      c = (char) (c + TOUPPERCASE);
-    }
-
-    /*
-     * locate this character's offset in the count array
-     */
      int offset = 0;
      if ('A' <= c && c <= 'Z')
      {
@@ -234,6 +226,20 @@ public class ResidueCount
    }
  
    /**
+   * @param c
+   * @return
+   */
+  protected char toUpperCase(final char c)
+  {
+    char u = c;
+    if ('a' <= c && c <= 'z')
+    {
+      u = (char) (c + TOUPPERCASE);
+    }
+    return u;
+  }
+
+  /**
     * Increment count for some unanticipated character. The first time this
     * called, a SparseCount is instantiated to hold these 'extra' counts.
     * 
@@ -305,7 +311,8 @@ public class ResidueCount
     */
    public void put(char c, int count)
    {
-    int offset = getOffset(c);
+    char u = toUpperCase(c);
+    int offset = getOffset(u);
  
      /*
       * offset 0 is reserved for gap counting, so 0 here means either
@@ -313,13 +320,13 @@ public class ResidueCount
       */
      if (offset == 0)
      {
-      if (Comparison.isGap(c))
+      if (Comparison.isGap(u))
        {
-        addGap();
+        set(0, count);
        }
        else
        {
-        setOtherCharacter(c, count);
+        setOtherCharacter(u, count);
          maxCount = Math.max(maxCount, count);
        }
      }
@@ -365,13 +372,14 @@ public class ResidueCount
     */
    public int getCount(char c)
    {
-    int offset = getOffset(c);
+    char u = toUpperCase(c);
+    int offset = getOffset(u);
      if (offset == 0)
      {
-      if (!Comparison.isGap(c))
+      if (!Comparison.isGap(u))
        {
          // should have called getGapCount()
-        return otherData == null ? 0 : otherData.get(c);
+        return otherData == null ? 0 : otherData.get(u);
        }
      }
      return useIntCounts ? intCounts[offset] : counts[offset];
@@ -449,7 +457,7 @@ public class ResidueCount
    }
  
    /**
-   * Returns the highest count for any symbol in the profile (excluding gap)
+   * Returns the highest count for any symbol(s) in the profile (excluding gap)
     * 
     * @return
     */
@@ -495,12 +503,10 @@ public class ResidueCount
    }
  
    /**
-   * Returns those symbols that have a non-zero count (excluding the gap
-   * symbol), with their counts. The symbols are in no special order. Returns an
-   * array of size 2 whose first element is a char array of symbols, and second
-   * element an int array of corresponding counts.
+   * Returns a data bean holding those symbols that have a non-zero count
+   * (excluding the gap symbol), with their counts.
     * 
-   * @return an array [[char1, char2, ...] [char1Count, char2Count, ...] ... ]
+   * @return
     */
    public SymbolCounts getSymbolCounts()
    {
@@ -540,13 +546,9 @@ public class ResidueCount
      {
        for (int i = 0; i < otherData.size(); i++)
        {
-        int value = otherData.valueAt(i);
-        if (value > 0)
-        {
-          symbols[j] = (char) otherData.keyAt(i);
-          values[j] = otherData.valueAt(i);
-          j++;
-        }
+        symbols[j] = (char) otherData.keyAt(i);
+        values[j] = otherData.valueAt(i);
+        j++;
        }
      }
  
@@ -566,7 +568,6 @@ public class ResidueCount
     */
    public String getTooltip(int normaliseBy, int percentageDecPl)
    {
-    StringBuilder sb = new StringBuilder(64);
      SymbolCounts symbolCounts = getSymbolCounts();
      char[] ca = symbolCounts.symbols;
      int[] vl = symbolCounts.values;
@@ -579,20 +580,18 @@ public class ResidueCount
      /*
       * traverse in reverse order (highest count first) to build tooltip
       */
-    for (int p = 0, c = ca.length - 1; c >= 0; c--)
+    boolean first = true;
+    StringBuilder sb = new StringBuilder(64);
+    for (int c = ca.length - 1; c >= 0; c--)
      {
        final char residue = ca[c];
-      if (residue != '-')
-      {
-        // TODO combine residues which share a percentage
-        // (see AAFrequency.completeCdnaConsensus)
-        float tval = (vl[c] * 100f) / normaliseBy;
-        sb.append((((p == 0) ? "" : "; "))).append(residue)
-                .append(" ");
-        Format.appendPercentage(sb, tval, percentageDecPl);
-        sb.append("%");
-        p++;
-      }
+      // TODO combine residues which share a percentage
+      // (see AAFrequency.completeCdnaConsensus)
+      float tval = (vl[c] * 100f) / normaliseBy;
+      sb.append(first ? "" : "; ").append(residue).append(" ");
+      Format.appendPercentage(sb, tval, percentageDecPl);
+      sb.append("%");
+      first = false;
      }
      return sb.toString();
    }
diff --git a/test/jalview/analysis/ResidueCountTest.java b/test/jalview/analysis/ResidueCountTest.java

index a26252c..4a71f89 100644 (file)
--- a/test/jalview/analysis/ResidueCountTest.java
+++ b/test/jalview/analysis/ResidueCountTest.java
@@ -71,6 +71,7 @@ public class ResidueCountTest
       * overflow from add
       */
      ResidueCount rc = new ResidueCount(true);
+    rc.addGap();
      rc.put('A', Short.MAX_VALUE - 1);
      assertFalse(rc.isCountingInts());
      rc.add('A');
@@ -78,6 +79,12 @@ public class ResidueCountTest
      rc.add('A');
      assertTrue(rc.isCountingInts());
      assertEquals(rc.getCount('a'), Short.MAX_VALUE + 1);
+    rc.add('A');
+    assertTrue(rc.isCountingInts());
+    assertEquals(rc.getCount('a'), Short.MAX_VALUE + 2);
+    assertEquals(rc.getGapCount(), 1);
+    rc.addGap();
+    assertEquals(rc.getGapCount(), 2);
  
      /*
       * overflow from put
@@ -86,6 +93,17 @@ public class ResidueCountTest
      rc.put('G', Short.MAX_VALUE + 1);
      assertTrue(rc.isCountingInts());
      assertEquals(rc.getCount('g'), Short.MAX_VALUE + 1);
+    rc.put('G', 1);
+    assertTrue(rc.isCountingInts());
+    assertEquals(rc.getCount('g'), 1);
+
+    /*
+     * underflow from put
+     */
+    rc = new ResidueCount(true);
+    rc.put('G', Short.MIN_VALUE - 1);
+    assertTrue(rc.isCountingInts());
+    assertEquals(rc.getCount('g'), Short.MIN_VALUE - 1);
    }
  
    /**
@@ -135,6 +153,9 @@ public class ResidueCountTest
  
      rc.put('J', 4);
      assertTrue(rc.isUsingOtherData());
+    assertEquals(rc.getCount('J'), 4);
+    rc.add('j');
+    assertEquals(rc.getCount('J'), 5);
    }
  
    @Test(groups = "Functional")
@@ -162,7 +183,7 @@ public class ResidueCountTest
    @Test(groups = "Functional")
    public void testGetModalCount()
    {
-    ResidueCount rc = new ResidueCount();
+    ResidueCount rc = new ResidueCount(true);
      rc.add('c');
      rc.add('g');
      rc.add('c');
@@ -176,7 +197,7 @@ public class ResidueCountTest
      assertEquals(rc.getModalCount(), Short.MAX_VALUE + 1);
  
      // modal count is in the 'other data' counts
-    rc = new ResidueCount();
+    rc = new ResidueCount(false);
      rc.add('Q');
      rc.add('{');
      rc.add('{');
@@ -196,7 +217,7 @@ public class ResidueCountTest
    @Test(groups = "Functional")
    public void testGetResiduesForCount()
    {
-    ResidueCount rc = new ResidueCount();
+    ResidueCount rc = new ResidueCount(true);
      rc.add('c');
      rc.add('g');
      rc.add('c');
@@ -207,12 +228,20 @@ public class ResidueCountTest
      assertEquals(rc.getResiduesForCount(-1), "");
  
      // modal count is in the 'short overflow' counts
-    rc = new ResidueCount();
+    rc = new ResidueCount(true);
      rc.add('c');
      rc.put('g', Short.MAX_VALUE);
      rc.add('G');
      assertEquals(rc.getResiduesForCount(Short.MAX_VALUE + 1), "G");
      assertEquals(rc.getResiduesForCount(1), "C");
+
+    // peptide modal count is in the 'short overflow' counts
+    rc = new ResidueCount(false);
+    rc.add('c');
+    rc.put('p', Short.MAX_VALUE);
+    rc.add('P');
+    assertEquals(rc.getResiduesForCount(Short.MAX_VALUE + 1), "P");
+    assertEquals(rc.getResiduesForCount(1), "C");
    
      // modal count is in the 'other data' counts
      rc = new ResidueCount();
@@ -248,27 +277,52 @@ public class ResidueCountTest
    }
  
    @Test(groups = "Functional")
-  public void testGetSymbolCounts()
+  public void testGetSymbolCounts_nucleotide()
    {
-    ResidueCount rc = new ResidueCount();
-    rc.add('q');
+    ResidueCount rc = new ResidueCount(true);
+    rc.add('g');
      rc.add('c');
-    rc.add('Q');
+    rc.add('G');
      rc.add('J'); // 'otherData'
-    rc.add('q');
-    rc.add('x');
+    rc.add('g');
+    rc.add('N');
+    rc.put('[', 0); // 'otherdata'
  
      SymbolCounts sc = rc.getSymbolCounts();
-    Assert.assertArrayEquals(new char[] { 'C', 'Q', 'X', 'J' }, sc.symbols);
-    Assert.assertArrayEquals(new int[] { 1, 3, 1, 1 }, sc.values);
+    Assert.assertArrayEquals(new char[] { 'C', 'G', 'N', 'J', '[' },
+            sc.symbols);
+    Assert.assertArrayEquals(new int[] { 1, 3, 1, 1, 0 }, sc.values);
  
      // now with overflow to int counts
-    rc.put('g', Short.MAX_VALUE);
-    rc.add('g');
+    rc.put('U', Short.MAX_VALUE);
+    rc.add('u');
      sc = rc.getSymbolCounts();
-    Assert.assertArrayEquals(new char[] { 'C', 'G', 'Q', 'X', 'J' },
+    Assert.assertArrayEquals(new char[] { 'C', 'G', 'N', 'U', 'J', '[' },
              sc.symbols);
-    Assert.assertArrayEquals(new int[] { 1, 32768, 3, 1, 1 }, sc.values);
+    Assert.assertArrayEquals(new int[] { 1, 3, 1, 32768, 1, 0 }, sc.values);
+  }
+
+  @Test(groups = "Functional")
+  public void testGetSymbolCounts_peptide()
+  {
+    ResidueCount rc = new ResidueCount(false);
+    rc.add('W');
+    rc.add('q');
+    rc.add('W');
+    rc.add('Z'); // 'otherData'
+    rc.add('w');
+    rc.add('L');
+
+    SymbolCounts sc = rc.getSymbolCounts();
+    Assert.assertArrayEquals(new char[] { 'L', 'Q', 'W', 'Z' }, sc.symbols);
+    Assert.assertArrayEquals(new int[] { 1, 1, 3, 1 }, sc.values);
+
+    // now with overflow to int counts
+    rc.put('W', Short.MAX_VALUE);
+    rc.add('W');
+    sc = rc.getSymbolCounts();
+    Assert.assertArrayEquals(new char[] { 'L', 'Q', 'W', 'Z' }, sc.symbols);
+    Assert.assertArrayEquals(new int[] { 1, 1, 32768, 1 }, sc.values);
    }
  
    @Test(groups = "Functional")
@@ -289,4 +343,62 @@ public class ResidueCountTest
      rc.add('g');
      assertEquals(rc.toString(), "[ C:1 G:32768 Q:2 {:1 ]");
    }
+
+  @Test(groups = "Functional")
+  public void testGetTooltip()
+  {
+    ResidueCount rc = new ResidueCount();
+
+    // no counts!
+    assertEquals(rc.getTooltip(20, 1), "");
+
+    /*
+     * count 7 C, 6 K, 7 Q, 10 P, 9 W, 1 F (total 40)
+     */
+    for (int i = 0; i < 7; i++)
+    {
+      rc.add('c');
+      rc.add('q');
+    }
+    for (int i = 0; i < 10; i++)
+    {
+      rc.add('p');
+    }
+    for (int i = 0; i < 9; i++)
+    {
+      rc.add('W');
+    }
+    for (int i = 0; i < 6; i++)
+    {
+      rc.add('K');
+    }
+    rc.add('F');
+    
+    assertEquals(rc.getTooltip(40, 0),
+            "P 25%; W 22%; C 17%; Q 17%; K 15%; F 2%");
+
+    assertEquals(rc.getTooltip(30, 1),
+            "P 33.3%; W 30.0%; C 23.3%; Q 23.3%; K 20.0%; F 3.3%");
+  }
+
+  @Test(groups = "Functional")
+  public void testPut()
+  {
+    ResidueCount rc = new ResidueCount();
+    rc.put('q', 3);
+    assertEquals(rc.getCount('Q'), 3);
+    rc.put(' ', 4);
+    assertEquals(rc.getGapCount(), 4);
+    rc.put('.', 5);
+    assertEquals(rc.getGapCount(), 5);
+    rc.put('-', 6);
+    assertEquals(rc.getGapCount(), 6);
+
+    rc.put('?', 5);
+    assertEquals(rc.getCount('?'), 5);
+    rc.put('?', 6);
+    rc.put('!', 7);
+    assertEquals(rc.getCount('?'), 6);
+    assertEquals(rc.getCount('!'), 7);
+  }
  }
author	gmungoc <g.m.carstairs@dundee.ac.uk>
	Mon, 24 Oct 2016 18:11:28 +0000 (19:11 +0100)
committer	gmungoc <g.m.carstairs@dundee.ac.uk>
	Mon, 24 Oct 2016 18:11:28 +0000 (19:11 +0100)
src/jalview/analysis/ResidueCount.java		patch \| blob \| history
test/jalview/analysis/ResidueCountTest.java		patch \| blob \| history