JAL-3202 discard zero percentages in extracted profile

[jalview.git] / src / jalview / analysis / AAFrequency.java
diff --git a/src/jalview/analysis/AAFrequency.java b/src/jalview/analysis/AAFrequency.java

index e4f2dfa..a1b0325 100755 (executable)
--- a/src/jalview/analysis/AAFrequency.java
+++ b/src/jalview/analysis/AAFrequency.java
@@ -398,7 +398,7 @@ public class AAFrequency
     * contains
     * 
     * <pre>
-   *    [profileType, numberOfValues, nonGapCount, charValue1, percentage1, charValue2, percentage2, ...]
+   *    [profileType, numberOfValues, totalPercent, charValue1, percentage1, charValue2, percentage2, ...]
     * in descending order of percentage value
     * </pre>
     * 
@@ -411,7 +411,6 @@ public class AAFrequency
     */
    public static int[] extractProfile(ProfileI profile, boolean ignoreGaps)
    {
-    int[] rtnval = new int[64];
      ResidueCount counts = profile.getCounts();
      if (counts == null)
      {
@@ -422,7 +421,6 @@ public class AAFrequency
      char[] symbols = symbolCounts.symbols;
      int[] values = symbolCounts.values;
      QuickSort.sort(values, symbols);
-    int nextArrayPos = 2;
      int totalPercentage = 0;
      final int divisor = ignoreGaps ? profile.getNonGapped()
              : profile.getHeight();
@@ -430,21 +428,44 @@ public class AAFrequency
      /*
       * traverse the arrays in reverse order (highest counts first)
       */
+    int[] result = new int[3 + 2 * symbols.length];
+    int nextArrayPos = 3;
+    int nonZeroCount = 0;
+
      for (int i = symbols.length - 1; i >= 0; i--)
      {
        int theChar = symbols[i];
        int charCount = values[i];
-
-      rtnval[nextArrayPos++] = theChar;
        final int percentage = (charCount * 100) / divisor;
-      rtnval[nextArrayPos++] = percentage;
+      if (percentage == 0)
+      {
+        /*
+         * this count (and any remaining) round down to 0% - discard
+         */
+        break;
+      }
+      nonZeroCount++;
+      result[nextArrayPos++] = theChar;
+      result[nextArrayPos++] = percentage;
        totalPercentage += percentage;
      }
-    rtnval[0] = symbols.length;
-    rtnval[1] = totalPercentage;
-    int[] result = new int[rtnval.length + 1];
+
+    /*
+     * truncate array if any zero values were discarded
+     */
+    if (nonZeroCount < symbols.length)
+    {
+      int[] tmp = new int[3 + 2 * nonZeroCount];
+      System.arraycopy(result, 0, tmp, 0, tmp.length);
+      result = tmp;
+    }
+
+    /*
+     * fill in 'header' values
+     */
      result[0] = AlignmentAnnotation.SEQUENCE_PROFILE;
-    System.arraycopy(rtnval, 0, result, 1, rtnval.length);
+    result[1] = nonZeroCount;
+    result[2] = totalPercentage;
  
      return result;
    }
@@ -454,7 +475,7 @@ public class AAFrequency
     * contains
     * 
     * <pre>
-   *    [profileType, numberOfValues, totalCount, charValue1, percentage1, charValue2, percentage2, ...]
+   *    [profileType, numberOfValues, totalPercentage, charValue1, percentage1, charValue2, percentage2, ...]
     * in descending order of percentage value, where the character values encode codon triplets
     * </pre>
     * 
@@ -492,9 +513,16 @@ public class AAFrequency
        {
          break; // nothing else of interest here
        }
+      final int percentage = codonCount * 100 / divisor;
+      if (percentage == 0)
+      {
+        /*
+         * this (and any remaining) values rounded down to 0 - discard
+         */
+        break;
+      }
        distinctValuesCount++;
        result[j++] = codons[i];
-      final int percentage = codonCount * 100 / divisor;
        result[j++] = percentage;
        totalPercentage += percentage;
      }
@@ -531,7 +559,7 @@ public class AAFrequency
      for (int col = 0; col < cols; col++)
      {
        // todo would prefer a Java bean for consensus data
-      Hashtable<String, int[]> columnHash = new Hashtable<String, int[]>();
+      Hashtable<String, int[]> columnHash = new Hashtable<>();
        // #seqs, #ungapped seqs, counts indexed by (codon encoded + 1)
        int[] codonCounts = new int[66];
        codonCounts[0] = alignment.getSequences().size();