public static final void calculate(SequenceI[] sequences, int start,
int end, Hashtable[] result, boolean profile)
{
+ // long now = System.currentTimeMillis();
Hashtable residueHash;
- int maxCount, nongap, i, j, v;
- int jSize = sequences.length;
- String maxResidue;
+ int seqCount = sequences.length;
char c = '-';
- float percentage;
+ SparseIntArray profileSizes = new SparseIntArray();
- // int[] values = new int[255];
-
- char[] seq;
-
- for (i = start; i < end; i++)
+ for (int column = start; column < end; column++)
{
residueHash = new Hashtable();
- maxCount = 0;
- maxResidue = "";
- nongap = 0;
- // values = new int[255];
- SparseIntArray values = new SparseIntArray();
-
- for (j = 0; j < jSize; j++)
+ int maxCount = 0;
+ String maxResidue = "";
+ int nongap = 0;
+ // int [] values = new int[255];
+ int guessProfileSize = estimateProfileSize(profileSizes);
+ SparseIntArray values = new SparseIntArray(guessProfileSize);
+
+ for (int row = 0; row < seqCount; row++)
{
- if (sequences[j] == null)
+ if (sequences[row] == null)
{
System.err
.println("WARNING: Consensus skipping null sequence - possible race condition.");
continue;
}
- seq = sequences[j].getSequence();
- if (seq.length > i)
+ char[] seq = sequences[row].getSequence();
+ if (seq.length > column)
{
- c = seq[i];
+ c = seq[column];
if (c == '.' || c == ' ')
{
if (c == '-')
{
// values['-']++;
- values.put('-', values.get('-') + 1);
+ // values.put('-', values.get('-') + 1);
+ values.increment('-');
continue;
}
else if ('a' <= c && c <= 'z')
nongap++;
// values[c]++;
- values.put(c, values.get(c) + 1);
-
+ // values.put(c, values.get(c) + 1);
+ values.increment(c);
}
else
{
// values['-']++;
- values.put('-', values.get('-') + 1);
+ // values.put('-', values.get('-') + 1);
+ values.increment('-');
}
}
- if (jSize == 1)
+ if (seqCount == 1)
{
maxResidue = String.valueOf(c);
maxCount = 1;
}
else
{
- // FIXME iterate over values keys instead
- for (v = 'A'; v <= 'Z'; v++)
+ // iterate over values keys not alphabet
+ // for (int v = 'A'; v <= 'Z'; v++)
+ for (int k = 0; k < values.size(); k++)
{
- // TODO why ignore values[v] == 1?
- int count = values.get(v); // values[v];
- if (count < 1 /* 2 */|| count < maxCount)
+ int v = values.keyAt(k);
+ int count = values.valueAt(k); // values[v];
+ if (count < 1 || count < maxCount)
{
continue;
}
if (count > maxCount)
{
- maxResidue = CHARS[v - 'A'];
+ maxResidue = String.valueOf((char) v);// CHARS[v - 'A'];
}
else if (count == maxCount)
{
- maxResidue += CHARS[v - 'A'];
+ maxResidue += String.valueOf((char) v); // CHARS[v - 'A'];
}
maxCount = count;
}
}
if (profile)
{
- // TODO use a 1-dimensional array with jSize, nongap in [0] and [1]
// residueHash.put(PROFILE, new int[][] { values,
// new int[] { jSize, nongap } });
- residueHash.put(PROFILE, new Profile(values, jSize, nongap));
+ residueHash.put(PROFILE, new Profile(values, seqCount, nongap));
}
residueHash.put(MAXCOUNT, new Integer(maxCount));
residueHash.put(MAXRESIDUE, maxResidue);
- percentage = ((float) maxCount * 100) / jSize;
+ float percentage = ((float) maxCount * 100) / seqCount;
residueHash.put(PID_GAPS, new Float(percentage));
if (nongap > 0)
}
residueHash.put(PID_NOGAPS, new Float(percentage));
- result[i] = residueHash;
+ result[column] = residueHash;
+
+ profileSizes.increment(values.size());
}
+ // long elapsed = System.currentTimeMillis() - now;
+ // System.out.println(elapsed);
+ }
+
+ /**
+ * Make an estimate of the profile size we are going to compute i.e. how many
+ * different characters may be present in it. Overestimating has a cost of
+ * using more memory than necessary. Underestimating has a cost of needing to
+ * extend the SparseIntArray holding the profile counts.
+ *
+ * @param profileSizes
+ * counts of sizes of profiles so far encountered
+ * @return
+ */
+ static int estimateProfileSize(SparseIntArray profileSizes)
+ {
+ if (profileSizes.size() == 0)
+ {
+ return 4;
+ }
+
+ /*
+ * could do a statistical heuristic here e.g. 75%ile
+ * for now just return the largest value
+ */
+ return profileSizes.keyAt(profileSizes.size() - 1);
}
/**
boolean ignoreGapsInConsensusCalculation,
boolean includeAllConsSymbols, char[] alphabet, long nseq)
{
+ // long now = System.currentTimeMillis();
if (consensus == null || consensus.annotations == null
|| consensus.annotations.length < width)
{
ca[c] = (char) theChar;// c;
// ca[c] = new char[]
// { (char) c };
- vl[c] = profile.profile.get(theChar);// profile[0][c];
+ vl[c] = profile.profile.valueAt(c);// profile[0][c];
}
/*
consensus.annotations[i] = new Annotation(maxRes,
mouseOver.toString(), ' ', value);
}
+ // long elapsed = System.currentTimeMillis() - now;
+ // System.out.println(-elapsed);
}
/**
{
int c = profile.profile.keyAt(i);
ca[i] = (char) c;
- vl[i] = profile.profile.get(c);
+ vl[i] = profile.profile.valueAt(i);
}
QuickSort.sort(vl, ca);
int nextArrayPos = 2;
// for (int c = ca.length - 1; profile[0][ca[c]] > 0; c--)
{
int theChar = profile.profile.keyAt(i);
- int charCount = profile.profile.get(theChar);
+ int charCount = profile.profile.valueAt(i);
// if (ca[c] != '-')
if (theChar != '-')