JAL-1007 refactored code and patched calculation bug

[jalview.git] / src / jalview / analysis / StructureFrequency.java
diff --git a/src/jalview/analysis/StructureFrequency.java b/src/jalview/analysis/StructureFrequency.java

index 9bdaa10..67d8b9b 100644 (file)
--- a/src/jalview/analysis/StructureFrequency.java
+++ b/src/jalview/analysis/StructureFrequency.java
@@ -1,20 +1,21 @@
  /*
- * Jalview - A Sequence Alignment Editor and Viewer (Version 2.6)
- * Copyright (C) 2010 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle
- * 
+ * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)
+ * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle
+ *
   * This file is part of Jalview.
- * 
+ *
   * Jalview is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License 
+ * modify it under the terms of the GNU General Public License
   * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
- * 
- * Jalview is distributed in the hope that it will be useful, but 
- * WITHOUT ANY WARRANTY; without even the implied warranty 
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
   * PURPOSE.  See the GNU General Public License for more details.
- * 
+ *
   * You should have received a copy of the GNU General Public License along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
   */
+
  package jalview.analysis;
  
  import java.util.*;
@@ -26,7 +27,7 @@ import jalview.datamodel.*;
   * returns a new Hashtable[] of size maxSeqLength, if Hashtable not supplied.
   * This class is used extensively in calculating alignment colourschemes that
   * depend on the amount of conservation in each alignment column.
- * 
+ *
   * @author $author$
   * @version $Revision$
   */
@@ -46,12 +47,15 @@ public class StructureFrequency
  
    public static final String PAIRPROFILE = "B";
  
-/**
- * Returns the 3' position of a base pair
- * @param pairs Secondary structure annotation
- * @param indice 5' position of a base pair
- * @return 3' position of a base pair
- */
+  /**
+   * Returns the 3' position of a base pair
+   *
+   * @param pairs
+   *          Secondary structure annotation
+   * @param indice
+   *          5' position of a base pair
+   * @return 3' position of a base pair
+   */
    public static int findPair(SequenceFeature[] pairs, int indice)
    {
      for (int i = 0; i < pairs.length; i++)
@@ -67,7 +71,7 @@ public class StructureFrequency
    /**
     * Method to calculate a 'base pair consensus row', very similar to nucleotide
     * consensus but takes into account a given structure
-   * 
+   *
     * @param sequences
     * @param start
     * @param end
@@ -96,8 +100,14 @@ public class StructureFrequency
        values = new int[255];
        pairs = new int[255][255];
        bpEnd = -1;
-
-      s = struc[i];
+      if (i < struc.length)
+      {
+        s = struc[i];
+      }
+      else
+      {
+        s = '-';
+      }
        if (s == '.' || s == ' ')
        {
          s = '-';
@@ -112,6 +122,9 @@ public class StructureFrequency
        }
        else
        {
+        bpEnd = findPair(rna, i);
+        if (bpEnd>-1)
+        {
          for (j = 0; j < jSize; j++) // foreach row
          {
            if (sequences[j] == null)
@@ -120,11 +133,8 @@ public class StructureFrequency
                      .println("WARNING: Consensus skipping null sequence - possible race condition.");
              continue;
            }
-          seq = sequences[j].getSequence();
-
-          if (seq.length > i)
+          c = sequences[j].getCharAt(i);
            {
-            c = seq[i];
  
              // standard representation for gaps in sequence and structure
              if (c == '.' || c == ' ')
@@ -137,17 +147,17 @@ public class StructureFrequency
                values['-']++;
                continue;
              }
-            bpEnd = findPair(rna, i);
-            cEnd = seq[bpEnd];
+            cEnd = sequences[j].getCharAt(bpEnd);
              if (checkBpType(c, cEnd))
              {
-              values['H']++; // H means it's a helix (structured)
+              values['(']++; // H means it's a helix (structured)
              }
              pairs[c][cEnd]++;
  
-            maxResidue = "H";
+            maxResidue = "(";
            }
          }
+        }
          // nonGap++;
        }
        // UPDATE this for new values
@@ -160,12 +170,12 @@ public class StructureFrequency
          residueHash.put(PAIRPROFILE, pairs);
        }
  
-      count = values['H'];
+      count = values['('];
  
        residueHash.put(MAXCOUNT, new Integer(count));
        residueHash.put(MAXRESIDUE, maxResidue);
  
-      percentage = ((float) count * 100) / (float) jSize;
+      percentage = ((float) count * 100) / jSize;
        residueHash.put(PID_GAPS, new Float(percentage));
  
        // percentage = ((float) count * 100) / (float) nongap;
@@ -176,6 +186,27 @@ public class StructureFrequency
        }
        if (bpEnd > 0)
        {
+        values[')'] = values['('];
+        values['('] = 0;
+
+        residueHash = new Hashtable();
+        maxResidue = ")";
+
+        if (profile)
+        {
+          residueHash.put(PROFILE, new int[][]
+          { values, new int[]
+          { jSize, (jSize - values['-']) } });
+
+          residueHash.put(PAIRPROFILE, pairs);
+        }
+
+        residueHash.put(MAXCOUNT, new Integer(count));
+        residueHash.put(MAXRESIDUE, maxResidue);
+
+        percentage = ((float) count * 100) / jSize;
+        residueHash.put(PID_GAPS, new Float(percentage));
+
          result[bpEnd] = residueHash;
        }
      }
@@ -183,7 +214,7 @@ public class StructureFrequency
  
    /**
     * Method to check if a base-pair is a canonical or a wobble bp
-   * 
+   *
     * @param up
     *          5' base
     * @param down
@@ -255,7 +286,7 @@ public class StructureFrequency
    /**
     * Compute all or part of the annotation row from the given consensus
     * hashtable
-   * 
+   *
     * @param consensus
     *          - pre-allocated annotation row
     * @param hconsensus
@@ -269,17 +300,6 @@ public class StructureFrequency
            boolean ignoreGapsInConsensusCalculation,
            boolean includeAllConsSymbols)
    {
-    completeConsensus(consensus, hconsensus, iStart, width,
-            ignoreGapsInConsensusCalculation, includeAllConsSymbols, null); // new
-    // char[]
-    // { 'A', 'C', 'G', 'T', 'U' });
-  }
-
-  public static void completeConsensus(AlignmentAnnotation consensus,
-          Hashtable[] hconsensus, int iStart, int width,
-          boolean ignoreGapsInConsensusCalculation,
-          boolean includeAllConsSymbols, char[] alphabet)
-  {
      float tval, value;
      if (consensus == null || consensus.annotations == null
              || consensus.annotations.length < width)
@@ -290,7 +310,8 @@ public class StructureFrequency
      }
      for (int i = iStart; i < width; i++)
      {
-      if (i >= hconsensus.length)
+      Hashtable hci;
+      if (i >= hconsensus.length || ((hci=hconsensus[i])==null))
        {
          // happens if sequences calculated over were shorter than alignment
          // width
@@ -298,72 +319,80 @@ public class StructureFrequency
          continue;
        }
        value = 0;
+      Float fv;
        if (ignoreGapsInConsensusCalculation)
        {
-        value = ((Float) hconsensus[i].get(StructureFrequency.PID_NOGAPS))
-                .floatValue();
+        fv =(Float) hci.get(StructureFrequency.PID_NOGAPS);
        }
        else
        {
-        value = ((Float) hconsensus[i].get(StructureFrequency.PID_GAPS))
-                .floatValue();
+        fv = (Float) hci.get(StructureFrequency.PID_GAPS);
        }
-
-      String maxRes = hconsensus[i].get(StructureFrequency.MAXRESIDUE)
+      if (fv==null)
+      {
+        consensus.annotations[i] = null;
+        // data has changed below us .. give up and
+        continue;
+      }
+      value = fv.floatValue();
+      String maxRes = hci.get(StructureFrequency.MAXRESIDUE)
                .toString();
-      String mouseOver = hconsensus[i].get(StructureFrequency.MAXRESIDUE)
+      String mouseOver = hci.get(StructureFrequency.MAXRESIDUE)
                + " ";
        if (maxRes.length() > 1)
        {
          mouseOver = "[" + maxRes + "] ";
          maxRes = "+";
        }
-      int[][] profile = (int[][]) hconsensus[i]
+      int[][] profile = (int[][]) hci
                .get(StructureFrequency.PROFILE);
-      if (profile != null && includeAllConsSymbols) // Just responsible for the
+      int[][] pairs = (int[][]) hci
+              .get(StructureFrequency.PAIRPROFILE);
+
+      if (pairs != null && includeAllConsSymbols) // Just responsible for the
        // tooltip
+      // TODO Update tooltips for Structure row
        {
          mouseOver = "";
-        if (alphabet != null)
+
+        /* TODO It's not sure what is the purpose of the alphabet and wheter it is useful for structure?
+         *
+         * if (alphabet != null) { for (int c = 0; c < alphabet.length; c++) {
+         * tval = ((float) profile[0][alphabet[c]]) 100f / (float)
+         * profile[1][ignoreGapsInConsensusCalculation ? 1 : 0]; mouseOver +=
+         * ((c == 0) ? "" : "; ") + alphabet[c] + " " + ((int) tval) + "%"; } }
+         * else {
+         */
+        Object[] ca = new Object[625];
+        float[] vl = new float[625];
+        int x = 0;
+        for (int c = 65; c < 90; c++)
          {
-          for (int c = 0; c < alphabet.length; c++)
+          for (int d = 65; d < 90; d++)
            {
-            tval = ((float) profile[0][alphabet[c]])
-                    * 100f
-                    / (float) profile[1][ignoreGapsInConsensusCalculation ? 1
-                            : 0];
-            mouseOver += ((c == 0) ? "" : "; ") + alphabet[c] + " "
-                    + ((int) tval) + "%";
+            ca[x] = new int[]
+            { c, d };
+            vl[x] = pairs[c][d];
+            x++;
            }
          }
-        else
+        jalview.util.QuickSort.sort(vl, ca);
+        int p = 0;
+
+        for (int c = 624; c > 0; c--)
          {
-          Object[] ca = new Object[profile[0].length];
-          float[] vl = new float[profile[0].length];
-          for (int c = 0; c < ca.length; c++)
-          {
-            ca[c] = new char[]
-            { (char) c };
-            vl[c] = (float) profile[0][c];
-          }
-          ;
-          jalview.util.QuickSort.sort(vl, ca);
-          for (int p = 0, c = ca.length - 1; profile[0][((char[]) ca[c])[0]] > 0; c--)
+          if (vl[c] > 0)
            {
-            if (((char[]) ca[c])[0] != '-')
-            {
-              tval = ((float) profile[0][((char[]) ca[c])[0]])
-                      * 100f
-                      / (float) profile[1][ignoreGapsInConsensusCalculation ? 1
-                              : 0];
-              mouseOver += ((p == 0) ? "" : "; ") + ((char[]) ca[c])[0]
-                      + " " + ((int) tval) + "%";
-              p++;
+            tval = (vl[c] * 100f / profile[1][ignoreGapsInConsensusCalculation ? 1
+                    : 0]);
+            mouseOver += ((p == 0) ? "" : "; ") + (char) ((int[]) ca[c])[0]
+                    + (char) ((int[]) ca[c])[1] + " " + ((int) tval) + "%";
+            p++;
  
-            }
            }
-
          }
+
+        // }
        }
        else
        {
@@ -376,38 +405,48 @@ public class StructureFrequency
  
    /**
     * get the sorted base-pair profile for the given position of the consensus
-   * 
+   *
     * @param hconsensus
     * @return profile of the given column
     */
    public static int[] extractProfile(Hashtable hconsensus,
-          boolean ignoreGapsInConsensusCalculation, int column)
+          boolean ignoreGapsInConsensusCalculation)
    {
-    // TODO is there a more elegant way to acces the column number?
-    /*
-     * calculate the frequence of the 16 bp variations for this column 'somehow'
-     * transfer this via getProfile and let it correctly draw
-     */
-    int[] rtnval = new int[51]; // 2*(5*5)+1
+    int[] rtnval = new int[74]; // 2*(5*5)+2
      int[][] profile = (int[][]) hconsensus.get(StructureFrequency.PROFILE);
      int[][] pairs = (int[][]) hconsensus
              .get(StructureFrequency.PAIRPROFILE);
  
      if (profile == null)
        return null;
-    
-    rtnval[0] = 1;
-    for (int j = 65; j <= 90; j++)
+
+    // TODO fix the object length, also do it in completeConsensus
+    Object[] ca = new Object[625];
+    float[] vl = new float[625];
+    int x = 0;
+    for (int c = 65; c < 90; c++)
      {
-      for (int k = 65; k <= 90; k++)
+      for (int d = 65; d < 90; d++)
        {
-        if (pairs[j][k] > 0)
-        {
-          rtnval[rtnval[0]++] = j;
-          rtnval[rtnval[0]++] = k;
-          rtnval[rtnval[0]++] = (int) ((float) pairs[j][k] * 100f / (float) profile[1][ignoreGapsInConsensusCalculation ? 1
-                  : 0]);
-        }
+        ca[x] = new int[]
+        { c, d };
+        vl[x] = pairs[c][d];
+        x++;
+      }
+    }
+    jalview.util.QuickSort.sort(vl, ca);
+
+    rtnval[0] = 2;
+    rtnval[1] = 0;
+    for (int c = 624; c > 0; c--)
+    {
+      if (vl[c] > 0)
+      {
+        rtnval[rtnval[0]++] = ((int[]) ca[c])[0];
+        rtnval[rtnval[0]++] = ((int[]) ca[c])[1];
+        rtnval[rtnval[0]] = (int) (vl[c] * 100f / profile[1][ignoreGapsInConsensusCalculation ? 1
+                : 0]);
+        rtnval[1]+=rtnval[rtnval[0]++];
        }
      }