Optimized
authoramwaterhouse <Andrew Waterhouse>
Tue, 10 Oct 2006 10:28:31 +0000 (10:28 +0000)
committeramwaterhouse <Andrew Waterhouse>
Tue, 10 Oct 2006 10:28:31 +0000 (10:28 +0000)
src/jalview/analysis/Conservation.java

index f65420c..d1b508b 100755 (executable)
@@ -31,13 +31,13 @@ import java.util.*;
  */\r
 public class Conservation\r
 {\r
-    Vector sequences;\r
+    SequenceI [] sequences;\r
     int start;\r
     int end;\r
     Vector seqNums; // vector of int vectors where first is sequence checksum\r
     int maxLength = 0; //  used by quality calcs\r
     boolean seqNumsChanged = false; // updated after any change via calcSeqNum;\r
-    Vector total = new Vector();\r
+    Hashtable [] total;\r
 \r
     /** Stores calculated quality values */\r
     public Vector quality;\r
@@ -64,28 +64,27 @@ public class Conservation
     public Conservation(String name, Hashtable propHash, int threshold,\r
         Vector sequences, int start, int end)\r
     {\r
+\r
         this.name = name;\r
         this.propHash = propHash;\r
         this.threshold = threshold;\r
-        this.sequences = sequences;\r
         this.start = start;\r
         this.end = end;\r
-        seqNums = new Vector(sequences.size());\r
-        calcSeqNums();\r
-    }\r
 \r
-    /**\r
-     * DOCUMENT ME!\r
-     */\r
-    private void calcSeqNums()\r
-    {\r
-      int i=0, iSize=sequences.size();\r
-        for (i=0; i < iSize; i++)\r
+\r
+        int s, sSize = sequences.size();\r
+        SequenceI[] sarray = new SequenceI[sSize];\r
+        this.sequences = sarray;\r
+\r
+        for (s = 0; s < sSize; s++)\r
         {\r
-            calcSeqNum(i);\r
+          sarray[s] = (SequenceI) sequences.elementAt(s);\r
+          if(sarray[s].getLength()>maxLength)\r
+            maxLength = sarray[s].getLength();\r
         }\r
     }\r
 \r
+\r
     /**\r
      * DOCUMENT ME!\r
      *\r
@@ -96,9 +95,11 @@ public class Conservation
         String sq = null; // for dumb jbuilder not-inited exception warning\r
         int[] sqnum = null;\r
 \r
-        if ((i > -1) && (i < sequences.size()))\r
+        int sSize = sequences.length;\r
+\r
+        if ((i > -1) && (i < sSize))\r
         {\r
-            sq = ((SequenceI) sequences.elementAt(i)).getSequence();\r
+            sq = sequences[i].getSequence();\r
 \r
             if (seqNums.size() <= i)\r
             {\r
@@ -110,7 +111,6 @@ public class Conservation
                 int j;\r
                 int len;\r
                 seqNumsChanged = true;\r
-                sq = ((SequenceI) sequences.elementAt(i)).getSequence();\r
                 len = sq.length();\r
 \r
                 if (maxLength < len)\r
@@ -123,12 +123,14 @@ public class Conservation
 \r
                 for (j = 1; j <= len; j++)\r
                 {\r
-                    sqnum[j] = ((Integer) jalview.schemes.ResidueProperties.aaHash.get(String.valueOf(\r
-                                sq.charAt(j - 1)))).intValue(); // yuk - JBPNote - case taken care of in aaHash\r
+                    sqnum[j] = jalview.schemes.ResidueProperties.aaIndex[sq.charAt(j-1)];\r
                 }\r
 \r
+\r
                 seqNums.setElementAt(sqnum, i);\r
             }\r
+            else\r
+              System.out.println("NEVER THE EXCEPTION");\r
         }\r
         else\r
         {\r
@@ -143,80 +145,54 @@ public class Conservation
      */\r
     public void calculate()\r
     {\r
-      Hashtable resultHash, residueHash, ht;\r
-      int count, thresh, j, jSize = sequences.size();\r
+      Hashtable resultHash, ht;\r
+      int thresh, j, jSize = sequences.length;\r
+      int[] values; // Replaces residueHash\r
       String type, res=null;\r
-      SequenceI sequence;\r
       char c;\r
-      Enumeration enumeration, enumeration2;\r
+      Enumeration enumeration2;\r
+\r
+      total = new Hashtable[maxLength];\r
 \r
       for (int i = start; i <= end; i++)\r
         {\r
-            resultHash = new Hashtable();\r
-            residueHash = new Hashtable();\r
+            values = new int[132];\r
 \r
             for (j = 0; j < jSize; j++)\r
             {\r
-                // JBPNote - have to make sure elements of the sequences vector\r
-                //  are tested like this everywhere...\r
-                    sequence = (Sequence) sequences.elementAt(j);\r
-\r
-                    if (sequence.getLength() > i)\r
-                    {\r
-                        c = sequence.getCharAt(i);\r
-\r
-                        // No need to check if its a '-'\r
-                        if(c == '.' || c==' ')\r
-                          c = '-';\r
-\r
-                        if ('a' <= c && c <= 'z')\r
-                        {\r
-                          // TO UPPERCASE !!!\r
-                          //Faster than toUpperCase\r
-                          c -= ('a' - 'A') ;\r
-                        }\r
+              if (sequences[j].getLength() > i)\r
+              {\r
+                c = sequences[j].getCharAt(i);\r
 \r
-                        res = String.valueOf( c );\r
+                // No need to check if its a '-'\r
+                if (c == '.' || c == ' ')\r
+                  c = '-';\r
 \r
+                if ('a' <= c && c <= 'z')\r
+                {\r
+                  c -= (32);// 32 = 'a' - 'A'\r
+                }\r
 \r
-                        if (residueHash.containsKey(res))\r
-                        {\r
-                            count = ((Integer) residueHash.get(res)).intValue();\r
-                            count++;\r
-                            residueHash.put(res, new Integer(count));\r
-                        }\r
-                        else\r
-                        {\r
-                            residueHash.put(res, new Integer(1));\r
-                        }\r
-                    }\r
-                    else\r
-                    {\r
-                        if (residueHash.containsKey("-"))\r
-                        {\r
-                            count = ((Integer) residueHash.get("-")).intValue();\r
-                            count++;\r
-                            residueHash.put("-", new Integer(count));\r
-                        }\r
-                        else\r
-                        {\r
-                            residueHash.put("-", new Integer(1));\r
-                        }\r
-                    }\r
+                values[c]++;\r
+              }\r
+              else\r
+              {\r
+                values['-']++;\r
+              }\r
             }\r
 \r
             //What is the count threshold to count the residues in residueHash()\r
-            thresh = (threshold * (sequences.size())) / 100;\r
+            thresh = (threshold * (jSize)) / 100;\r
 \r
             //loop over all the found residues\r
-            enumeration = residueHash.keys();\r
-\r
-            while (enumeration.hasMoreElements())\r
+            resultHash = new Hashtable();\r
+            for (int v = '-'; v < 'Z'; v++)\r
             {\r
-                res = (String) enumeration.nextElement();\r
 \r
-                if (((Integer) residueHash.get(res)).intValue() > thresh)\r
+                if (values[v] > thresh)\r
                 {\r
+                  res =  String.valueOf( (char) v);\r
+\r
                     //Now loop over the properties\r
                     enumeration2 = propHash.keys();\r
 \r
@@ -246,7 +222,7 @@ public class Conservation
                 }\r
             }\r
 \r
-            total.addElement(resultHash);\r
+            total[i] = resultHash;\r
         }\r
     }\r
 \r
@@ -262,18 +238,18 @@ public class Conservation
         int nres = 0;\r
         int[] r = new int[2];\r
         char f = '$';\r
-        int i, iSize = sequences.size();\r
+        int i, iSize = sequences.length;\r
         char c;\r
 \r
         for (i = 0; i < iSize; i++)\r
         {\r
-            if (j >= ((Sequence) sequences.elementAt(i)).getLength())\r
+            if (j >= sequences[i].getLength())\r
             {\r
                 count++;\r
                 continue;\r
             }\r
 \r
-            c = ((Sequence) sequences.elementAt(i)).getCharAt(j); // gaps do not have upper/lower case\r
+            c = sequences[i].getCharAt(j); // gaps do not have upper/lower case\r
 \r
             if (jalview.util.Comparison.isGap((c)))\r
             {\r
@@ -323,11 +299,11 @@ public class Conservation
         {\r
             gapcons = countConsNGaps(i);\r
             totGaps = gapcons[1];\r
-            pgaps = ((float) totGaps * 100) / (float) sequences.size();\r
+            pgaps = ((float) totGaps * 100) / (float) sequences.length;\r
 \r
             if (percentageGaps > pgaps)\r
             {\r
-                resultHash = (Hashtable) total.elementAt(i - start);\r
+                resultHash =  total[i - start];\r
 \r
                 //Now find the verdict\r
                 count = 0;\r
@@ -394,7 +370,15 @@ public class Conservation
      */\r
     private void percentIdentity2()\r
     {\r
-        calcSeqNums(); // updates maxLength, too.\r
+      seqNums = new Vector();\r
+     // calcSeqNum(s);\r
+      int i = 0, iSize = sequences.length;\r
+    //Do we need to calculate this again?\r
+      for (i = 0; i < iSize; i++)\r
+      {\r
+       calcSeqNum(i);\r
+      }\r
+\r
 \r
         if ((cons2 == null) || seqNumsChanged)\r
         {\r
@@ -403,7 +387,7 @@ public class Conservation
             // Initialize the array\r
             for (int j = 0; j < 24; j++)\r
             {\r
-                for (int i = 0; i < maxLength; i++)\r
+                for (i = 0; i < maxLength; i++)\r
                 {\r
                     cons2[i][j] = 0;\r
                 }\r
@@ -412,16 +396,16 @@ public class Conservation
             int[] sqnum;\r
             int j = 0;\r
 \r
-            while (j < sequences.size())\r
+            while (j < sequences.length)\r
             {\r
                 sqnum = (int[]) seqNums.elementAt(j);\r
 \r
-                for (int i = 1; i < sqnum.length; i++)\r
+                for (i = 1; i < sqnum.length; i++)\r
                 {\r
                     cons2[i - 1][sqnum[i]]++;\r
                 }\r
 \r
-                for (int i = sqnum.length - 1; i < maxLength; i++)\r
+                for (i = sqnum.length - 1; i < maxLength; i++)\r
                 {\r
                     cons2[i][23]++; // gap count\r
                 }\r
@@ -462,7 +446,7 @@ public class Conservation
         int[][] BLOSUM62 = jalview.schemes.ResidueProperties.getBLOSUM62();\r
 \r
         //Loop over columns // JBPNote Profiling info\r
-        //    long ts = System.currentTimeMillis();\r
+        //long ts = System.currentTimeMillis();\r
         //long te = System.currentTimeMillis();\r
         percentIdentity2();\r
 \r
@@ -470,12 +454,11 @@ public class Conservation
         int[] lengths = new int[size];\r
         double tot, bigtot, sr, tmp;\r
         double [] x, xx;\r
-        int l, j, i, ii, seqNum;\r
+        int l, j, i, ii, i2, k, seqNum;\r
 \r
         for (l = 0; l < size; l++)\r
             lengths[l] = ((int[]) seqNums.elementAt(l)).length - 1;\r
 \r
-\r
         for (j = start; j <= end; j++)\r
         {\r
             bigtot = 0;\r
@@ -487,28 +470,17 @@ public class Conservation
             {\r
                 x[ii] = 0;\r
 \r
-                try\r
-                {\r
-                    for (int i2 = 0; i2 < 24; i2++)\r
-                    {\r
-                        x[ii] += (((double) cons2[j][i2] * BLOSUM62[ii][i2]) +\r
-                        4);\r
-                    }\r
-                }\r
-                catch (Exception e)\r
+                for (i2 = 0; i2 < 24; i2++)\r
                 {\r
-                    System.err.println("Exception during quality calculation.");\r
-                    e.printStackTrace();\r
+                  x[ii] += ( ( (double) cons2[j][i2] * BLOSUM62[ii][i2]) +\r
+                            4);\r
                 }\r
 \r
-                //System.out.println("X " + ii + " " + x[ii]);\r
-                x[ii] /= (size);\r
-\r
-                //System.out.println("X " + ii + " " + x[ii]);\r
+                x[ii] /= size;\r
             }\r
 \r
             // Now calculate D for each position and sum\r
-            for (int k = 0; k < size; k++)\r
+            for (k = 0; k < size; k++)\r
             {\r
                 tot = 0;\r
                 xx = new double[24];\r
@@ -520,15 +492,7 @@ public class Conservation
                 {\r
                     sr = 0;\r
 \r
-                    try\r
-                    {\r
-                        sr = (double) BLOSUM62[i][seqNum] + 4;\r
-                    }\r
-                    catch (Exception e)\r
-                    {\r
-                        System.out.println("Exception in sr: " + e);\r
-                        e.printStackTrace();\r
-                    }\r
+                    sr = (double) BLOSUM62[i][seqNum] + 4;\r
 \r
                     //Calculate X with another loop over residues\r
                     //  System.out.println("Xi " + i + " " + x[i] + " " + sr);\r
@@ -549,7 +513,6 @@ public class Conservation
             //      bigtot  = bigtot * (size-cons2[j][23])/size;\r
             quality.addElement(new Double(bigtot));\r
 \r
-\r
             // Need to normalize by gaps\r
         }\r
 \r