From: gmungoc Date: Fri, 23 Sep 2016 15:28:41 +0000 (+0100) Subject: JAL-98 performance tweaks to use of SparseIntArray X-Git-Tag: Release_2_10_1^2~22^2~3^2~21 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=72694ab755c766fcc84a5e329bfd066242435614;p=jalview.git JAL-98 performance tweaks to use of SparseIntArray --- diff --git a/src/jalview/analysis/AAFrequency.java b/src/jalview/analysis/AAFrequency.java index 274a114..cb06882 100755 --- a/src/jalview/analysis/AAFrequency.java +++ b/src/jalview/analysis/AAFrequency.java @@ -109,38 +109,34 @@ public class AAFrequency public static final void calculate(SequenceI[] sequences, int start, int end, Hashtable[] result, boolean profile) { + // long now = System.currentTimeMillis(); Hashtable residueHash; - int maxCount, nongap, i, j, v; - int jSize = sequences.length; - String maxResidue; + int seqCount = sequences.length; char c = '-'; - float percentage; + SparseIntArray profileSizes = new SparseIntArray(); - // int[] values = new int[255]; - - char[] seq; - - for (i = start; i < end; i++) + for (int column = start; column < end; column++) { residueHash = new Hashtable(); - maxCount = 0; - maxResidue = ""; - nongap = 0; - // values = new int[255]; - SparseIntArray values = new SparseIntArray(); - - for (j = 0; j < jSize; j++) + int maxCount = 0; + String maxResidue = ""; + int nongap = 0; + // int [] values = new int[255]; + int guessProfileSize = estimateProfileSize(profileSizes); + SparseIntArray values = new SparseIntArray(guessProfileSize); + + for (int row = 0; row < seqCount; row++) { - if (sequences[j] == null) + if (sequences[row] == null) { System.err .println("WARNING: Consensus skipping null sequence - possible race condition."); continue; } - seq = sequences[j].getSequence(); - if (seq.length > i) + char[] seq = sequences[row].getSequence(); + if (seq.length > column) { - c = seq[i]; + c = seq[column]; if (c == '.' || c == ' ') { @@ -150,7 +146,8 @@ public class AAFrequency if (c == '-') { // values['-']++; - values.put('-', values.get('-') + 1); + // values.put('-', values.get('-') + 1); + values.increment('-'); continue; } else if ('a' <= c && c <= 'z') @@ -160,39 +157,41 @@ public class AAFrequency nongap++; // values[c]++; - values.put(c, values.get(c) + 1); - + // values.put(c, values.get(c) + 1); + values.increment(c); } else { // values['-']++; - values.put('-', values.get('-') + 1); + // values.put('-', values.get('-') + 1); + values.increment('-'); } } - if (jSize == 1) + if (seqCount == 1) { maxResidue = String.valueOf(c); maxCount = 1; } else { - // FIXME iterate over values keys instead - for (v = 'A'; v <= 'Z'; v++) + // iterate over values keys not alphabet + // for (int v = 'A'; v <= 'Z'; v++) + for (int k = 0; k < values.size(); k++) { - // TODO why ignore values[v] == 1? - int count = values.get(v); // values[v]; - if (count < 1 /* 2 */|| count < maxCount) + int v = values.keyAt(k); + int count = values.valueAt(k); // values[v]; + if (count < 1 || count < maxCount) { continue; } if (count > maxCount) { - maxResidue = CHARS[v - 'A']; + maxResidue = String.valueOf((char) v);// CHARS[v - 'A']; } else if (count == maxCount) { - maxResidue += CHARS[v - 'A']; + maxResidue += String.valueOf((char) v); // CHARS[v - 'A']; } maxCount = count; } @@ -203,15 +202,14 @@ public class AAFrequency } if (profile) { - // TODO use a 1-dimensional array with jSize, nongap in [0] and [1] // residueHash.put(PROFILE, new int[][] { values, // new int[] { jSize, nongap } }); - residueHash.put(PROFILE, new Profile(values, jSize, nongap)); + residueHash.put(PROFILE, new Profile(values, seqCount, nongap)); } residueHash.put(MAXCOUNT, new Integer(maxCount)); residueHash.put(MAXRESIDUE, maxResidue); - percentage = ((float) maxCount * 100) / jSize; + float percentage = ((float) maxCount * 100) / seqCount; residueHash.put(PID_GAPS, new Float(percentage)); if (nongap > 0) @@ -221,8 +219,36 @@ public class AAFrequency } residueHash.put(PID_NOGAPS, new Float(percentage)); - result[i] = residueHash; + result[column] = residueHash; + + profileSizes.increment(values.size()); } + // long elapsed = System.currentTimeMillis() - now; + // System.out.println(elapsed); + } + + /** + * Make an estimate of the profile size we are going to compute i.e. how many + * different characters may be present in it. Overestimating has a cost of + * using more memory than necessary. Underestimating has a cost of needing to + * extend the SparseIntArray holding the profile counts. + * + * @param profileSizes + * counts of sizes of profiles so far encountered + * @return + */ + static int estimateProfileSize(SparseIntArray profileSizes) + { + if (profileSizes.size() == 0) + { + return 4; + } + + /* + * could do a statistical heuristic here e.g. 75%ile + * for now just return the largest value + */ + return profileSizes.keyAt(profileSizes.size() - 1); } /** @@ -276,6 +302,7 @@ public class AAFrequency boolean ignoreGapsInConsensusCalculation, boolean includeAllConsSymbols, char[] alphabet, long nseq) { + // long now = System.currentTimeMillis(); if (consensus == null || consensus.annotations == null || consensus.annotations.length < width) { @@ -338,7 +365,7 @@ public class AAFrequency ca[c] = (char) theChar;// c; // ca[c] = new char[] // { (char) c }; - vl[c] = profile.profile.get(theChar);// profile[0][c]; + vl[c] = profile.profile.valueAt(c);// profile[0][c]; } /* @@ -377,6 +404,8 @@ public class AAFrequency consensus.annotations[i] = new Annotation(maxRes, mouseOver.toString(), ' ', value); } + // long elapsed = System.currentTimeMillis() - now; + // System.out.println(-elapsed); } /** @@ -437,7 +466,7 @@ public class AAFrequency { int c = profile.profile.keyAt(i); ca[i] = (char) c; - vl[i] = profile.profile.get(c); + vl[i] = profile.profile.valueAt(i); } QuickSort.sort(vl, ca); int nextArrayPos = 2; @@ -450,7 +479,7 @@ public class AAFrequency // for (int c = ca.length - 1; profile[0][ca[c]] > 0; c--) { int theChar = profile.profile.keyAt(i); - int charCount = profile.profile.get(theChar); + int charCount = profile.profile.valueAt(i); // if (ca[c] != '-') if (theChar != '-') diff --git a/src/jalview/ext/android/SparseIntArray.java b/src/jalview/ext/android/SparseIntArray.java index 0602d50..0e05803 100644 --- a/src/jalview/ext/android/SparseIntArray.java +++ b/src/jalview/ext/android/SparseIntArray.java @@ -346,4 +346,23 @@ public class SparseIntArray implements Cloneable buffer.append('}'); return buffer.toString(); } + + /** + * Method added for Jalview to increment a key's value if present, else add it + * with the value 1 + * + * @param key + */ + public void increment(int key) + { + int i = ContainerHelpers.binarySearch(mKeys, mSize, key); + if (i >= 0) + { + mValues[i]++; + } + else + { + put(key, 1); + } + } }