From d67d1f2fcae60a351e318e6885c20420f7f6964e Mon Sep 17 00:00:00 2001 From: amwaterhouse Date: Tue, 10 Oct 2006 10:28:31 +0000 Subject: [PATCH] Optimized --- src/jalview/analysis/Conservation.java | 191 +++++++++++++------------------- 1 file changed, 77 insertions(+), 114 deletions(-) diff --git a/src/jalview/analysis/Conservation.java b/src/jalview/analysis/Conservation.java index f65420c..d1b508b 100755 --- a/src/jalview/analysis/Conservation.java +++ b/src/jalview/analysis/Conservation.java @@ -31,13 +31,13 @@ import java.util.*; */ public class Conservation { - Vector sequences; + SequenceI [] sequences; int start; int end; Vector seqNums; // vector of int vectors where first is sequence checksum int maxLength = 0; // used by quality calcs boolean seqNumsChanged = false; // updated after any change via calcSeqNum; - Vector total = new Vector(); + Hashtable [] total; /** Stores calculated quality values */ public Vector quality; @@ -64,28 +64,27 @@ public class Conservation public Conservation(String name, Hashtable propHash, int threshold, Vector sequences, int start, int end) { + this.name = name; this.propHash = propHash; this.threshold = threshold; - this.sequences = sequences; this.start = start; this.end = end; - seqNums = new Vector(sequences.size()); - calcSeqNums(); - } - /** - * DOCUMENT ME! - */ - private void calcSeqNums() - { - int i=0, iSize=sequences.size(); - for (i=0; i < iSize; i++) + + int s, sSize = sequences.size(); + SequenceI[] sarray = new SequenceI[sSize]; + this.sequences = sarray; + + for (s = 0; s < sSize; s++) { - calcSeqNum(i); + sarray[s] = (SequenceI) sequences.elementAt(s); + if(sarray[s].getLength()>maxLength) + maxLength = sarray[s].getLength(); } } + /** * DOCUMENT ME! * @@ -96,9 +95,11 @@ public class Conservation String sq = null; // for dumb jbuilder not-inited exception warning int[] sqnum = null; - if ((i > -1) && (i < sequences.size())) + int sSize = sequences.length; + + if ((i > -1) && (i < sSize)) { - sq = ((SequenceI) sequences.elementAt(i)).getSequence(); + sq = sequences[i].getSequence(); if (seqNums.size() <= i) { @@ -110,7 +111,6 @@ public class Conservation int j; int len; seqNumsChanged = true; - sq = ((SequenceI) sequences.elementAt(i)).getSequence(); len = sq.length(); if (maxLength < len) @@ -123,12 +123,14 @@ public class Conservation for (j = 1; j <= len; j++) { - sqnum[j] = ((Integer) jalview.schemes.ResidueProperties.aaHash.get(String.valueOf( - sq.charAt(j - 1)))).intValue(); // yuk - JBPNote - case taken care of in aaHash + sqnum[j] = jalview.schemes.ResidueProperties.aaIndex[sq.charAt(j-1)]; } + seqNums.setElementAt(sqnum, i); } + else + System.out.println("NEVER THE EXCEPTION"); } else { @@ -143,80 +145,54 @@ public class Conservation */ public void calculate() { - Hashtable resultHash, residueHash, ht; - int count, thresh, j, jSize = sequences.size(); + Hashtable resultHash, ht; + int thresh, j, jSize = sequences.length; + int[] values; // Replaces residueHash String type, res=null; - SequenceI sequence; char c; - Enumeration enumeration, enumeration2; + Enumeration enumeration2; + + total = new Hashtable[maxLength]; for (int i = start; i <= end; i++) { - resultHash = new Hashtable(); - residueHash = new Hashtable(); + values = new int[132]; for (j = 0; j < jSize; j++) { - // JBPNote - have to make sure elements of the sequences vector - // are tested like this everywhere... - sequence = (Sequence) sequences.elementAt(j); - - if (sequence.getLength() > i) - { - c = sequence.getCharAt(i); - - // No need to check if its a '-' - if(c == '.' || c==' ') - c = '-'; - - if ('a' <= c && c <= 'z') - { - // TO UPPERCASE !!! - //Faster than toUpperCase - c -= ('a' - 'A') ; - } + if (sequences[j].getLength() > i) + { + c = sequences[j].getCharAt(i); - res = String.valueOf( c ); + // No need to check if its a '-' + if (c == '.' || c == ' ') + c = '-'; + if ('a' <= c && c <= 'z') + { + c -= (32);// 32 = 'a' - 'A' + } - if (residueHash.containsKey(res)) - { - count = ((Integer) residueHash.get(res)).intValue(); - count++; - residueHash.put(res, new Integer(count)); - } - else - { - residueHash.put(res, new Integer(1)); - } - } - else - { - if (residueHash.containsKey("-")) - { - count = ((Integer) residueHash.get("-")).intValue(); - count++; - residueHash.put("-", new Integer(count)); - } - else - { - residueHash.put("-", new Integer(1)); - } - } + values[c]++; + } + else + { + values['-']++; + } } //What is the count threshold to count the residues in residueHash() - thresh = (threshold * (sequences.size())) / 100; + thresh = (threshold * (jSize)) / 100; //loop over all the found residues - enumeration = residueHash.keys(); - - while (enumeration.hasMoreElements()) + resultHash = new Hashtable(); + for (int v = '-'; v < 'Z'; v++) { - res = (String) enumeration.nextElement(); - if (((Integer) residueHash.get(res)).intValue() > thresh) + if (values[v] > thresh) { + res = String.valueOf( (char) v); + //Now loop over the properties enumeration2 = propHash.keys(); @@ -246,7 +222,7 @@ public class Conservation } } - total.addElement(resultHash); + total[i] = resultHash; } } @@ -262,18 +238,18 @@ public class Conservation int nres = 0; int[] r = new int[2]; char f = '$'; - int i, iSize = sequences.size(); + int i, iSize = sequences.length; char c; for (i = 0; i < iSize; i++) { - if (j >= ((Sequence) sequences.elementAt(i)).getLength()) + if (j >= sequences[i].getLength()) { count++; continue; } - c = ((Sequence) sequences.elementAt(i)).getCharAt(j); // gaps do not have upper/lower case + c = sequences[i].getCharAt(j); // gaps do not have upper/lower case if (jalview.util.Comparison.isGap((c))) { @@ -323,11 +299,11 @@ public class Conservation { gapcons = countConsNGaps(i); totGaps = gapcons[1]; - pgaps = ((float) totGaps * 100) / (float) sequences.size(); + pgaps = ((float) totGaps * 100) / (float) sequences.length; if (percentageGaps > pgaps) { - resultHash = (Hashtable) total.elementAt(i - start); + resultHash = total[i - start]; //Now find the verdict count = 0; @@ -394,7 +370,15 @@ public class Conservation */ private void percentIdentity2() { - calcSeqNums(); // updates maxLength, too. + seqNums = new Vector(); + // calcSeqNum(s); + int i = 0, iSize = sequences.length; + //Do we need to calculate this again? + for (i = 0; i < iSize; i++) + { + calcSeqNum(i); + } + if ((cons2 == null) || seqNumsChanged) { @@ -403,7 +387,7 @@ public class Conservation // Initialize the array for (int j = 0; j < 24; j++) { - for (int i = 0; i < maxLength; i++) + for (i = 0; i < maxLength; i++) { cons2[i][j] = 0; } @@ -412,16 +396,16 @@ public class Conservation int[] sqnum; int j = 0; - while (j < sequences.size()) + while (j < sequences.length) { sqnum = (int[]) seqNums.elementAt(j); - for (int i = 1; i < sqnum.length; i++) + for (i = 1; i < sqnum.length; i++) { cons2[i - 1][sqnum[i]]++; } - for (int i = sqnum.length - 1; i < maxLength; i++) + for (i = sqnum.length - 1; i < maxLength; i++) { cons2[i][23]++; // gap count } @@ -462,7 +446,7 @@ public class Conservation int[][] BLOSUM62 = jalview.schemes.ResidueProperties.getBLOSUM62(); //Loop over columns // JBPNote Profiling info - // long ts = System.currentTimeMillis(); + //long ts = System.currentTimeMillis(); //long te = System.currentTimeMillis(); percentIdentity2(); @@ -470,12 +454,11 @@ public class Conservation int[] lengths = new int[size]; double tot, bigtot, sr, tmp; double [] x, xx; - int l, j, i, ii, seqNum; + int l, j, i, ii, i2, k, seqNum; for (l = 0; l < size; l++) lengths[l] = ((int[]) seqNums.elementAt(l)).length - 1; - for (j = start; j <= end; j++) { bigtot = 0; @@ -487,28 +470,17 @@ public class Conservation { x[ii] = 0; - try - { - for (int i2 = 0; i2 < 24; i2++) - { - x[ii] += (((double) cons2[j][i2] * BLOSUM62[ii][i2]) + - 4); - } - } - catch (Exception e) + for (i2 = 0; i2 < 24; i2++) { - System.err.println("Exception during quality calculation."); - e.printStackTrace(); + x[ii] += ( ( (double) cons2[j][i2] * BLOSUM62[ii][i2]) + + 4); } - //System.out.println("X " + ii + " " + x[ii]); - x[ii] /= (size); - - //System.out.println("X " + ii + " " + x[ii]); + x[ii] /= size; } // Now calculate D for each position and sum - for (int k = 0; k < size; k++) + for (k = 0; k < size; k++) { tot = 0; xx = new double[24]; @@ -520,15 +492,7 @@ public class Conservation { sr = 0; - try - { - sr = (double) BLOSUM62[i][seqNum] + 4; - } - catch (Exception e) - { - System.out.println("Exception in sr: " + e); - e.printStackTrace(); - } + sr = (double) BLOSUM62[i][seqNum] + 4; //Calculate X with another loop over residues // System.out.println("Xi " + i + " " + x[i] + " " + sr); @@ -549,7 +513,6 @@ public class Conservation // bigtot = bigtot * (size-cons2[j][23])/size; quality.addElement(new Double(bigtot)); - // Need to normalize by gaps } -- 1.7.10.2