*/
package jalview.analysis;
+import jalview.analysis.ResidueCount.SymbolCounts;
import jalview.datamodel.AlignedCodonFrame;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
*/
public class AAFrequency
{
- private static final int TO_UPPER_CASE = 'A' - 'a'; // -32
-
public static final String MAXCOUNT = "C";
public static final String MAXRESIDUE = "R";
}
}
- public static final Hashtable[] calculate(List<SequenceI> list,
+ public static final Profile[] calculate(List<SequenceI> list,
int start, int end)
{
return calculate(list, start, end, false);
}
- public static final Hashtable[] calculate(List<SequenceI> sequences,
+ public static final Profile[] calculate(List<SequenceI> sequences,
int start, int end, boolean profile)
{
SequenceI[] seqs = new SequenceI[sequences.size()];
}
}
- Hashtable[] reply = new Hashtable[width];
+ Profile[] reply = new Profile[width];
if (end >= width)
{
}
}
- public static final void calculate(SequenceI[] sequences, int start,
- int end, Hashtable[] result, boolean profile)
+ /**
+ * Calculate the consensus symbol(s) for each column in the given range.
+ *
+ * @param sequences
+ * @param start
+ * @param end
+ * @param result
+ * array in which to store profile per column
+ * @param saveFullProfile
+ * if true, store all symbol counts
+ */
+ public static final void calculate(final SequenceI[] sequences,
+ int start, int end, Profile[] result, boolean saveFullProfile)
{
- long now = System.currentTimeMillis();
- Hashtable residueHash;
+ // long now = System.currentTimeMillis();
int seqCount = sequences.length;
- char c = '-';
- SparseIntArray profileSizes = new SparseIntArray();
+ boolean nucleotide = false;
+ int nucleotideCount = 0;
+ int peptideCount = 0;
for (int column = start; column < end; column++)
{
- residueHash = new Hashtable();
- int maxCount = 0;
- String maxResidue = "";
- int nongap = 0;
- // int [] values = new int[255];
- int guessProfileSize = estimateProfileSize(profileSizes);
- SparseIntArray values = new SparseIntArray(guessProfileSize);
+ /*
+ * Apply a heuristic to detect nucleotide data (which can
+ * be counted in more compact arrays); here we test for
+ * more than 90% nucleotide; recheck every 10 columns in case
+ * of misleading data e.g. highly conserved Alanine in peptide!
+ * Mistakenly guessing nucleotide has a small performance cost,
+ * as it will result in counting in sparse arrays.
+ * Mistakenly guessing peptide has a small space cost,
+ * as it will use a larger than necessary array to hold counts.
+ */
+ if (nucleotideCount > 100 && column % 10 == 0)
+ {
+ nucleotide = (9 * peptideCount < nucleotideCount);
+ }
+ ResidueCount residueCounts = new ResidueCount(nucleotide);
for (int row = 0; row < seqCount; row++)
{
char[] seq = sequences[row].getSequence();
if (seq.length > column)
{
- c = seq[column];
-
- if (Comparison.isGap(c))
+ char c = seq[column];
+ residueCounts.add(c);
+ if (Comparison.isNucleotide(c))
{
- // values['-']++;
- // values.put('-', values.get('-') + 1);
- values.add('-', 1);
- continue;
+ nucleotideCount++;
}
- else if ('a' <= c && c <= 'z')
+ else if (!Comparison.isGap(c))
{
- c += TO_UPPER_CASE;
+ peptideCount++;
}
-
- nongap++;
- // values[c]++;
- // values.put(c, values.get(c) + 1);
- values.add(c, 1);
}
else
{
/*
- * here we count as a gap if the sequence doesn't
+ * here we count a gap if the sequence doesn't
* reach this column (is that correct?)
*/
- // values['-']++;
- // values.put('-', values.get('-') + 1);
- values.add('-', 1);
+ residueCounts.addGap();
}
}
- if (seqCount == 1)
- {
- maxResidue = String.valueOf(c);
- maxCount = 1;
- }
- else
- {
- // iterate over values keys not alphabet
- // for (int v = 'A'; v <= 'Z'; v++)
- for (int k = 0; k < values.size(); k++)
- {
- int v = values.keyAt(k);
- int count = values.valueAt(k); // values[v];
- if (v == '-' || count < 1 || count < maxCount)
- {
- continue;
- }
-
- if (count > maxCount)
- {
- maxResidue = String.valueOf((char) v);// CHARS[v - 'A'];
- }
- else if (count == maxCount)
- {
- maxResidue += String.valueOf((char) v); // CHARS[v - 'A'];
- }
- maxCount = count;
- }
- }
- if (maxResidue.length() == 0)
- {
- maxResidue = "-";
- }
- if (profile)
- {
- // residueHash.put(PROFILE, new int[][] { values,
- // new int[] { jSize, nongap } });
- residueHash.put(PROFILE, new Profile(values, seqCount, nongap));
- }
- residueHash.put(MAXCOUNT, new Integer(maxCount));
- residueHash.put(MAXRESIDUE, maxResidue);
- float percentage = ((float) maxCount * 100) / seqCount;
- residueHash.put(PID_GAPS, new Float(percentage));
+ int maxCount = residueCounts.getModalCount();
+ String maxResidue = residueCounts.getResiduesForCount(maxCount);
+ int gapCount = residueCounts.getGapCount();
+ Profile profile = new Profile(seqCount, gapCount, maxCount,
+ maxResidue);
- if (nongap > 0)
+ if (saveFullProfile)
{
- // calculate for non-gapped too
- percentage = ((float) maxCount * 100) / nongap;
+ profile.setCounts(residueCounts);
}
- residueHash.put(PID_NOGAPS, new Float(percentage));
-
- result[column] = residueHash;
- profileSizes.add(values.size(), 1);
+ result[column] = profile;
}
- long elapsed = System.currentTimeMillis() - now;
- System.out.println(elapsed);
+ // long elapsed = System.currentTimeMillis() - now;
+ // System.out.println(elapsed);
}
/**
}
/**
- * Compute all or part of the annotation row from the given consensus
- * hashtable
- *
- * @param consensus
- * - pre-allocated annotation row
- * @param hconsensus
- * @param iStart
- * @param width
- * @param ignoreGapsInConsensusCalculation
- * @param includeAllConsSymbols
- * @param nseq
- */
- public static void completeConsensus(AlignmentAnnotation consensus,
- Hashtable[] hconsensus, int iStart, int width,
- boolean ignoreGapsInConsensusCalculation,
- boolean includeAllConsSymbols, long nseq)
- {
- completeConsensus(consensus, hconsensus, iStart, width,
- ignoreGapsInConsensusCalculation, includeAllConsSymbols, null,
- nseq);
- }
-
- /**
* Derive the consensus annotations to be added to the alignment for display.
* This does not recompute the raw data, but may be called on a change in
* display options, such as 'show logo', which may in turn result in a change
*
* @param consensus
* the annotation row to add annotations to
- * @param hconsensus
+ * @param profiles
* the source consensus data
* @param iStart
* start column
* @param width
* end column
- * @param ignoreGapsInConsensusCalculation
- * if true, use the consensus calculated ignoring gaps
- * @param includeAllConsSymbols
+ * @param ignoreGaps
+ * if true, normalise residue percentages ignoring gaps
+ * @param showSequenceLogo
* if true include all consensus symbols, else just show modal
* residue
- * @param alphabet
* @param nseq
* number of sequences
*/
public static void completeConsensus(AlignmentAnnotation consensus,
- Hashtable[] hconsensus, int iStart, int width,
- boolean ignoreGapsInConsensusCalculation,
- boolean includeAllConsSymbols, char[] alphabet, long nseq)
+ Profile[] profiles, int iStart, int width, boolean ignoreGaps,
+ boolean showSequenceLogo, long nseq)
{
- long now = System.currentTimeMillis();
+ // long now = System.currentTimeMillis();
if (consensus == null || consensus.annotations == null
|| consensus.annotations.length < width)
{
- // called with a bad alignment annotation row - wait for it to be
- // initialised properly
+ /*
+ * called with a bad alignment annotation row
+ * wait for it to be initialised properly
+ */
return;
}
- final Format fmt = getPercentageFormat(nseq);
+ final int dp = getPercentageDp(nseq);
for (int i = iStart; i < width; i++)
{
- Hashtable hci;
- if (i >= hconsensus.length || ((hci = hconsensus[i]) == null))
- {
- // happens if sequences calculated over were shorter than alignment
- // width
- consensus.annotations[i] = null;
- continue;
- }
- Float fv = (Float) hci
- .get(ignoreGapsInConsensusCalculation ? PID_NOGAPS : PID_GAPS);
- if (fv == null)
+ Profile profile;
+ if (i >= profiles.length || ((profile = profiles[i]) == null))
{
+ /*
+ * happens if sequences calculated over were
+ * shorter than alignment width
+ */
consensus.annotations[i] = null;
- // data has changed below us .. give up and
continue;
}
- float value = fv.floatValue();
- String maxRes = hci.get(AAFrequency.MAXRESIDUE).toString();
- StringBuilder mouseOver = new StringBuilder(64);
- if (maxRes.length() > 1)
- {
- mouseOver.append("[").append(maxRes).append("] ");
- maxRes = "+";
- }
- else
- {
- mouseOver.append(hci.get(AAFrequency.MAXRESIDUE) + " ");
- }
- // int[][] profile = (int[][]) hci.get(AAFrequency.PROFILE);
- Profile profile = (Profile) hci.get(AAFrequency.PROFILE);
- if (profile != null && includeAllConsSymbols)
- {
- int sequenceCount = profile.height;// profile[1][0];
- int nonGappedCount = profile.nonGapped;// [1][1];
- int normalisedBy = ignoreGapsInConsensusCalculation ? nonGappedCount
- : sequenceCount;
- mouseOver.setLength(0);
- // TODO do this sort once only in calculate()?
- // char[][] ca = new char[profile[0].length][];
- // /int length = profile[0].length;
- int length = profile.profile.size();
- char[] ca = new char[length];
- // float[] vl = new float[length];
- int[] vl = new int[length];
- for (int c = 0; c < ca.length; c++)
- {
- int theChar = profile.profile.keyAt(c);
- ca[c] = (char) theChar;// c;
- // ca[c] = new char[]
- // { (char) c };
- vl[c] = profile.profile.valueAt(c);// profile[0][c];
- }
- /*
- * sort characters into ascending order of their counts
- */
- QuickSort.sort(vl, ca);
+ float value = profile.getPercentageIdentity(ignoreGaps);
- /*
- * traverse in reverse order (highest count first) to build tooltip
- */
- // for (int p = 0, c = ca.length - 1; profile[0][ca[c]] > 0; c--)
- for (int p = 0, c = ca.length - 1; c >= 0; c--)
- {
- final char residue = ca[c];
- if (residue != '-')
- {
- // float tval = profile[0][residue] * 100f / normalisedBy;
- // float tval = profile[0][residue] * 100f / normalisedBy;
- float tval = (vl[c] * 100f) / normalisedBy;
- mouseOver
- .append((((p == 0) ? "" : "; ")))
- .append(residue)
- .append(" ")
- .append(((fmt != null) ? fmt.form(tval) : ((int) tval)))
- .append("%");
- p++;
- }
- }
- }
- else
- {
- mouseOver.append(
- (((fmt != null) ? fmt.form(value) : ((int) value))))
- .append("%");
- }
- consensus.annotations[i] = new Annotation(maxRes,
- mouseOver.toString(), ' ', value);
+ String description = getTooltip(profile, value, showSequenceLogo,
+ ignoreGaps, dp);
+
+ consensus.annotations[i] = new Annotation(profile.getModalResidue(),
+ description, ' ', value);
}
- long elapsed = System.currentTimeMillis() - now;
- System.out.println(-elapsed);
+ // long elapsed = System.currentTimeMillis() - now;
+ // System.out.println(-elapsed);
}
/**
- * Returns a Format designed to show all significant figures for profile
- * percentages. For less than 100 sequences, returns null (the integer
- * percentage value will be displayed). For 100-999 sequences, returns "%3.1f"
+ * Returns a tooltip showing either
+ * <ul>
+ * <li>the full profile (percentages of all residues present), if
+ * showSequenceLogo is true, or</li>
+ * <li>just the modal (most common) residue(s), if showSequenceLogo is false</li>
+ * </ul>
+ * Percentages are as a fraction of all sequence, or only ungapped sequences
+ * if ignoreGaps is true.
*
- * @param nseq
+ * @param profile
+ * @param pid
+ * @param showSequenceLogo
+ * @param ignoreGaps
+ * @param dp
+ * the number of decimal places to format percentages to
* @return
*/
- protected static Format getPercentageFormat(long nseq)
+ static String getTooltip(Profile profile, float pid,
+ boolean showSequenceLogo, boolean ignoreGaps, int dp)
{
- int scale = 0;
- while (nseq >= 10)
+ ResidueCount counts = profile.getCounts();
+
+ String description = null;
+ if (counts != null && showSequenceLogo)
{
- scale++;
- nseq /= 10;
+ int normaliseBy = ignoreGaps ? profile.getNonGapped() : profile
+ .getHeight();
+ description = counts.getTooltip(normaliseBy, dp);
+ }
+ else
+ {
+ StringBuilder sb = new StringBuilder(64);
+ String maxRes = profile.getModalResidue();
+ if (maxRes.length() > 1)
+ {
+ sb.append("[").append(maxRes).append("] ");
+ maxRes = "+";
+ }
+ else
+ {
+ sb.append(maxRes).append(" ");
+ }
+ Format.appendPercentage(sb, pid, dp);
+ sb.append("%");
+ description = sb.toString();
}
- return scale <= 1 ? null : new Format("%3." + (scale - 1) + "f");
+ return description;
}
/**
* in descending order of percentage value
* </pre>
*
- * @param hconsensus
- * the data table from which to extract and sort values
+ * @param profile
+ * the data object from which to extract and sort values
* @param ignoreGaps
* if true, only non-gapped values are included in percentage
* calculations
* @return
*/
- public static int[] extractProfile(Hashtable hconsensus,
+ public static int[] extractProfile(Profile profile,
boolean ignoreGaps)
{
int[] rtnval = new int[64];
- // int[][] profile = (int[][]) hconsensus.get(AAFrequency.PROFILE);
- Profile profile = (Profile) hconsensus.get(AAFrequency.PROFILE);
- if (profile == null)
+ ResidueCount counts = profile.getCounts();
+ if (counts == null)
{
return null;
}
- // int profileLength = profile[0].length;
- int profileLength = profile.profile.size();
- char[] ca = new char[profileLength];
- float[] vl = new float[profileLength];
- // for (int c = 0; c < ca.length; c++)
- // {
- // ca[c] = (char) c;
- // vl[c] = profile[0][c];
- // }
- for (int i = 0; i < profileLength; i++)
- {
- int c = profile.profile.keyAt(i);
- ca[i] = (char) c;
- vl[i] = profile.profile.valueAt(i);
- }
- QuickSort.sort(vl, ca);
+
+ SymbolCounts symbolCounts = counts.getSymbolCounts();
+ char[] symbols = symbolCounts.symbols;
+ int[] values = symbolCounts.values;
+ QuickSort.sort(values, symbols);
int nextArrayPos = 2;
int totalPercentage = 0;
- int distinctValuesCount = 0;
- final int divisor = ignoreGaps ? profile.nonGapped : profile.height;
- // final int divisor = profile[1][ignoreGaps ? 1 : 0];
- int j = profile.profile.size();
- for (int i = 0; i < j; i++)
-// for (int c = ca.length - 1; profile[0][ca[c]] > 0; c--)
+ // FIXME what if all gapped (divisor is zero)?
+ final int divisor = ignoreGaps ? profile.getNonGapped() : profile
+ .getHeight();
+
+ /*
+ * traverse the arrays in reverse order (highest counts first)
+ */
+ for (int i = symbols.length - 1; i >= 0; i--)
{
- int theChar = profile.profile.keyAt(i);
- int charCount = profile.profile.valueAt(i);
-
-// if (ca[c] != '-')
- if (theChar != '-')
- {
-// rtnval[nextArrayPos++] = ca[c];
- rtnval[nextArrayPos++] = theChar;
-// final int percentage = (int) (profile[0][ca[c]] * 100f / divisor);
- final int percentage = (charCount * 100) / divisor;
- rtnval[nextArrayPos++] = percentage;
- totalPercentage += percentage;
- distinctValuesCount++;
- }
+ int theChar = symbols[i];
+ int charCount = values[i];
+
+ rtnval[nextArrayPos++] = theChar;
+ final int percentage = (charCount * 100) / divisor;
+ rtnval[nextArrayPos++] = percentage;
+ totalPercentage += percentage;
}
- rtnval[0] = distinctValuesCount;
+ rtnval[0] = symbols.length;
rtnval[1] = totalPercentage;
int[] result = new int[rtnval.length + 1];
result[0] = AlignmentAnnotation.SEQUENCE_PROFILE;
StringBuilder samePercent = new StringBuilder();
String percent = null;
String lastPercent = null;
- Format fmt = getPercentageFormat(nseqs);
+ int percentDecPl = getPercentageDp(nseqs);
for (int j = codons.length - 1; j >= 0; j--)
{
final int pct = codonCount * 100 / totalCount;
String codon = String
.valueOf(CodingUtils.decodeCodon(codonEncoded));
- percent = fmt == null ? Integer.toString(pct) : fmt.form(pct);
+ StringBuilder sb = new StringBuilder();
+ Format.appendPercentage(sb, pct, percentDecPl);
+ percent = sb.toString();
if (showProfileLogo || codonCount == modalCodonCount)
{
if (percent.equals(lastPercent) && j > 0)
mouseOver.toString(), ' ', pid);
}
}
+
+ /**
+ * Returns the number of decimal places to show for profile percentages. For
+ * less than 100 sequences, returns zero (the integer percentage value will be
+ * displayed). For 100-999 sequences, returns 1, for 1000-9999 returns 2, etc.
+ *
+ * @param nseq
+ * @return
+ */
+ protected static int getPercentageDp(long nseq)
+ {
+ int scale = 0;
+ while (nseq >= 100)
+ {
+ scale++;
+ nseq /= 10;
+ }
+ return scale;
+ }
}
package jalview.analysis;
-import jalview.ext.android.SparseIntArray;
+/**
+ * A data bean to hold the result of computing a profile for a column of an
+ * alignment
+ *
+ * @author gmcarstairs
+ *
+ */
public class Profile
{
/*
- * array of keys (chars) and values (counts)
+ * counts of keys (chars)
*/
- public final SparseIntArray profile;
+ private ResidueCount counts;
/*
* the number of sequences in the profile
*/
- public final int height;
+ private int height;
/*
* the number of non-gapped sequences in the profile
*/
- public final int nonGapped;
+ private int gapped;
- public Profile(SparseIntArray counts, int ht, int nongappedCount)
+ /*
+ * the highest count for any residue in the profile
+ */
+ private int maxCount;
+
+ /*
+ * the residue (e.g. K) or residues (e.g. KQW) with the
+ * highest count in the profile
+ */
+ private String modalResidue;
+
+ /**
+ * Constructor which allows derived data to be stored without having to store
+ * the full profile
+ *
+ * @param seqCount
+ * the number of sequences in the profile
+ * @param gaps
+ * the number of gapped sequences
+ * @param max
+ * the highest count for any residue
+ * @param modalres
+ * the residue (or concatenated residues) with the highest count
+ */
+ public Profile(int seqCount, int gaps, int max, String modalRes)
+ {
+ this.height = seqCount;
+ this.gapped = gaps;
+ this.maxCount = max;
+ this.modalResidue = modalRes;
+ }
+
+ /**
+ * Set the full profile of counts
+ *
+ * @param residueCounts
+ */
+ public void setCounts(ResidueCount residueCounts)
+ {
+ this.counts = residueCounts;
+ }
+
+ /**
+ * Returns the percentage identity of the profile, i.e. the highest proportion
+ * of conserved (equal) symbols. The percentage is as a fraction of all
+ * sequences, or only ungapped sequences if flag ignoreGaps is set true.
+ *
+ * @param ignoreGaps
+ * @return
+ */
+ public float getPercentageIdentity(boolean ignoreGaps)
{
- this.profile = counts;
- this.height = ht;
- this.nonGapped = nongappedCount;
+ if (height == 0)
+ {
+ return 0f;
+ }
+ float pid = 0f;
+ if (ignoreGaps && gapped < height)
+ {
+ pid = (maxCount * 100f) / (height - gapped);
+ }
+ else
+ {
+ pid = (maxCount * 100f) / height;
+ }
+ return pid;
}
- public SparseIntArray getProfile()
+ public ResidueCount getCounts()
+ {
+ return counts;
+ }
+
+ public int getHeight()
+ {
+ return height;
+ }
+
+ public int getGapped()
+ {
+ return gapped;
+ }
+
+ public int getMaxCount()
+ {
+ return maxCount;
+ }
+
+ public String getModalResidue()
+ {
+ return modalResidue;
+ }
+
+ /**
+ * Answers the number of non-gapped sequences in the profile
+ *
+ * @return
+ */
+ public int getNonGapped()
{
- return profile;
+ return height - gapped;
}
}
--- /dev/null
+package jalview.analysis;
+
+import jalview.util.Comparison;
+import jalview.util.Format;
+import jalview.util.QuickSort;
+import jalview.util.SparseCount;
+
+/**
+ * A class to count occurrences of residues in a profile, optimised for speed
+ * and memory footprint.
+ * @author gmcarstairs
+ *
+ */
+public class ResidueCount
+{
+ /**
+ * A data bean to hold the results of counting symbols
+ */
+ public class SymbolCounts
+ {
+ /**
+ * the symbols seen (as char values)
+ */
+ public final char[] symbols;
+
+ /**
+ * the counts for each symbol, in the same order as the symbols
+ */
+ public final int[] values;
+
+ SymbolCounts(char[] s, int[] v)
+ {
+ symbols = s;
+ values = v;
+ }
+ }
+
+ private static final int TOUPPERCASE = 'A' - 'a';
+
+ /*
+ * nucleotide symbols to count (including N unknown)
+ */
+ private static final String NUCS = "ACGTUN";
+
+ /*
+ * amino acid symbols to count (including X unknown)
+ * NB we also include U so as to support counting of RNA bases
+ * in the "don't know" case of nucleotide / peptide
+ */
+ private static final String AAS = "ACDEFGHIKLMNPQRSTUVWXY";
+
+ private static final int GAP_COUNT = 0;
+
+ /*
+ * fast lookup tables holding the index into our count
+ * arrays of each symbol; index 0 is reserved for gap counting
+ */
+ private static int[] NUC_INDEX = new int[26];
+
+ private static int[] AA_INDEX = new int[26];
+ static
+ {
+ for (int i = 0; i < NUCS.length(); i++)
+ {
+ NUC_INDEX[NUCS.charAt(i) - 'A'] = i + 1;
+ }
+ for (int i = 0; i < AAS.length(); i++)
+ {
+ AA_INDEX[AAS.charAt(i) - 'A'] = i + 1;
+ }
+ }
+
+ /*
+ * counts array, just big enough for the nucleotide or peptide
+ * character set (plus gap counts in position 0)
+ */
+ private short[] counts;
+
+ /*
+ * alternative array of int counts for use if any count
+ * exceeds the maximum value of short (32767)
+ */
+ private int[] intCounts;
+
+ /*
+ * flag set if we switch from short to int counts
+ */
+ private boolean useIntCounts;
+
+ /*
+ * general-purpose counter, only for use for characters
+ * that are not in the expected alphabet
+ */
+ private SparseCount otherData;
+
+ /*
+ * keeps track of the maximum count value recorded
+ * (if this class every allows decrements, would need to
+ * calculate this on request instead)
+ */
+ int maxCount;
+
+ /*
+ * if we think we are counting nucleotide, can get by with smaller
+ * array to hold counts
+ */
+ private boolean isNucleotide;
+
+ /**
+ * Default constructor allocates arrays able to count either nucleotide or
+ * peptide bases. Use this constructor if not sure which the data is.
+ */
+ public ResidueCount()
+ {
+ this(false);
+ }
+
+ /**
+ * Constructor that allocates an array just big enough for the anticipated
+ * characters, plus one position to count gaps
+ */
+ public ResidueCount(boolean nucleotide)
+ {
+ isNucleotide = nucleotide;
+ int charsToCount = nucleotide ? NUCS.length() : AAS.length();
+ counts = new short[charsToCount + 1];
+ }
+
+ /**
+ * Increments the count for the given character. The supplied character may be
+ * upper or lower case but counts are for the upper case only. Gap characters
+ * (space, ., -) are all counted together.
+ *
+ * @param c
+ * @return the new value of the count for the character
+ */
+ public int add(char c)
+ {
+ int newValue = 0;
+ int offset = getOffset(c);
+
+ /*
+ * offset 0 is reserved for gap counting, so 0 here means either
+ * an unexpected character, or a gap character passed in error
+ */
+ if (offset == 0)
+ {
+ if (Comparison.isGap(c))
+ {
+ newValue = addGap();
+ }
+ else
+ {
+ newValue = addOtherCharacter(c);
+ }
+ }
+ else
+ {
+ newValue = increment(offset);
+ }
+ return newValue;
+ }
+
+ /**
+ * Increment the count at the specified offset. If this would result in short
+ * overflow, promote to counting int values instead.
+ *
+ * @param offset
+ * @return the new value of the count at this offset
+ */
+ int increment(int offset)
+ {
+ int newValue = 0;
+ if (useIntCounts)
+ {
+ newValue = intCounts[offset];
+ intCounts[offset] = ++newValue;
+ }
+ else
+ {
+ if (counts[offset] == Short.MAX_VALUE)
+ {
+ handleOverflow();
+ newValue = intCounts[offset];
+ intCounts[offset] = ++newValue;
+ }
+ else
+ {
+ newValue = counts[offset];
+ counts[offset] = (short) ++newValue;
+ }
+ }
+ maxCount = Math.max(maxCount, newValue);
+ return newValue;
+ }
+
+ /**
+ * Switch from counting in short to counting in int
+ */
+ synchronized void handleOverflow()
+ {
+ intCounts = new int[counts.length];
+ for (int i = 0; i < counts.length; i++)
+ {
+ intCounts[i] = counts[i];
+ }
+ counts = null;
+ useIntCounts = true;
+ }
+
+ /**
+ * @param c
+ * @return
+ */
+ int getOffset(char c)
+ {
+ /*
+ * ensure upper-case (fails fast if it already is!)
+ */
+ if ('a' <= c && c <= 'z')
+ {
+ c = (char) (c + TOUPPERCASE);
+ }
+
+ /*
+ * locate this character's offset in the count array
+ */
+ int offset = 0;
+ if ('A' <= c && c <= 'Z')
+ {
+ offset = isNucleotide ? NUC_INDEX[c - 'A'] : AA_INDEX[c - 'A'];
+ }
+ return offset;
+ }
+
+ /**
+ * Increment count for some unanticipated character. The first time this
+ * called, a SparseCount is instantiated to hold these 'extra' counts.
+ *
+ * @param c
+ * @return the new value of the count for the character
+ */
+ int addOtherCharacter(char c)
+ {
+ if (otherData == null)
+ {
+ otherData = new SparseCount();
+ }
+ int newValue = otherData.add(c, 1);
+ maxCount = Math.max(maxCount, newValue);
+ return newValue;
+ }
+
+ /**
+ * Set count for some unanticipated character. The first time this called, a
+ * SparseCount is instantiated to hold these 'extra' counts.
+ *
+ * @param c
+ * @param value
+ */
+ void setOtherCharacter(char c, int value)
+ {
+ if (otherData == null)
+ {
+ otherData = new SparseCount();
+ }
+ otherData.put(c, value);
+ }
+
+ /**
+ * Increment count of gap characters
+ *
+ * @return the new count of gaps
+ */
+ public int addGap()
+ {
+ int newValue;
+ if (useIntCounts)
+ {
+ newValue = ++intCounts[GAP_COUNT];
+ }
+ else
+ {
+ newValue = ++counts[GAP_COUNT];
+ }
+ return newValue;
+ }
+
+ /**
+ * Answers true if we are counting ints (only after overflow of short counts)
+ *
+ * @return
+ */
+ boolean isCountingInts()
+ {
+ return useIntCounts;
+ }
+
+ /**
+ * Sets the count for the given character. The supplied character may be upper
+ * or lower case but counts are for the upper case only.
+ *
+ * @param c
+ * @param count
+ */
+ public void put(char c, int count)
+ {
+ int offset = getOffset(c);
+
+ /*
+ * offset 0 is reserved for gap counting, so 0 here means either
+ * an unexpected character, or a gap character passed in error
+ */
+ if (offset == 0)
+ {
+ if (Comparison.isGap(c))
+ {
+ addGap();
+ }
+ else
+ {
+ setOtherCharacter(c, count);
+ maxCount = Math.max(maxCount, count);
+ }
+ }
+ else
+ {
+ set(offset, count);
+ maxCount = Math.max(maxCount, count);
+ }
+ }
+
+ /**
+ * Sets the count at the specified offset. If this would result in short
+ * overflow, promote to counting int values instead.
+ *
+ * @param offset
+ * @param value
+ */
+ void set(int offset, int value)
+ {
+ if (useIntCounts)
+ {
+ intCounts[offset] = value;
+ }
+ else
+ {
+ if (value > Short.MAX_VALUE || value < Short.MIN_VALUE)
+ {
+ handleOverflow();
+ intCounts[offset] = value;
+ }
+ else
+ {
+ counts[offset] = (short) value;
+ }
+ }
+ }
+
+ /**
+ * Returns the count for the given character, or zero if no count held
+ *
+ * @param c
+ * @return
+ */
+ public int getCount(char c)
+ {
+ int offset = getOffset(c);
+ if (offset == 0)
+ {
+ if (!Comparison.isGap(c))
+ {
+ // should have called getGapCount()
+ return otherData == null ? 0 : otherData.get(c);
+ }
+ }
+ return useIntCounts ? intCounts[offset] : counts[offset];
+ }
+
+ public int getGapCount()
+ {
+ return useIntCounts ? intCounts[0] : counts[0];
+ }
+
+ /**
+ * Answers true if this object wraps a counter for unexpected characters
+ *
+ * @return
+ */
+ boolean isUsingOtherData()
+ {
+ return otherData != null;
+ }
+
+ /**
+ * Returns the character (or concatenated characters) for the symbol(s) with
+ * the given count in the profile. Can be used to get the modal residue by
+ * supplying the modal count value. Returns an empty string if symbol has the
+ * given count. The symbols are in alphabetic order of standard peptide or
+ * nucleotide characters, followed by 'other' symbols if any.
+ *
+ * @return
+ */
+ public String getResiduesForCount(int count)
+ {
+ if (count == 0)
+ {
+ return "";
+ }
+
+ /*
+ * find counts for the given value and append the
+ * corresponding symbol
+ */
+ StringBuilder modal = new StringBuilder();
+ if (useIntCounts)
+ {
+ for (int i = 1; i < intCounts.length; i++)
+ {
+ if (intCounts[i] == count)
+ {
+ modal.append(isNucleotide ? NUCS.charAt(i - 1) : AAS
+ .charAt(i - 1));
+ }
+ }
+ }
+ else
+ {
+ for (int i = 1; i < counts.length; i++)
+ {
+ if (counts[i] == count)
+ {
+ modal.append(isNucleotide ? NUCS.charAt(i - 1) : AAS
+ .charAt(i - 1));
+ }
+ }
+ }
+ if (otherData != null)
+ {
+ for (int i = 0; i < otherData.size(); i++)
+ {
+ if (otherData.valueAt(i) == count)
+ {
+ modal.append((char) otherData.keyAt(i));
+ }
+ }
+ }
+ return modal.toString();
+ }
+
+ /**
+ * Returns the highest count for any symbol in the profile (excluding gap)
+ *
+ * @return
+ */
+ public int getModalCount()
+ {
+ return maxCount;
+ }
+
+ /**
+ * Returns the number of distinct symbols with a non-zero count (excluding the
+ * gap symbol)
+ *
+ * @return
+ */
+ public int size() {
+ int size = 0;
+ if (useIntCounts)
+ {
+ for (int i = 1; i < intCounts.length; i++)
+ {
+ if (intCounts[i] > 0)
+ {
+ size++;
+ }
+ }
+ }
+ else
+ {
+ for (int i = 1; i < counts.length; i++)
+ {
+ if (counts[i] > 0)
+ {
+ size++;
+ }
+ }
+ }
+ if (otherData != null)
+ {
+ size += otherData.size();
+ }
+
+ return size;
+ }
+
+ /**
+ * Returns those symbols that have a non-zero count (excluding the gap
+ * symbol), with their counts. The symbols are in no special order. Returns an
+ * array of size 2 whose first element is a char array of symbols, and second
+ * element an int array of corresponding counts.
+ *
+ * @return an array [[char1, char2, ...] [char1Count, char2Count, ...] ... ]
+ */
+ public SymbolCounts getSymbolCounts()
+ {
+ char[] symbols = new char[size()];
+ int[] values = new int[size()];
+ int j = 0;
+
+ if (useIntCounts)
+ {
+ for (int i = 1; i < intCounts.length; i++)
+ {
+ if (intCounts[i] > 0)
+ {
+ char symbol = isNucleotide ? NUCS.charAt(i - 1) : AAS
+ .charAt(i - 1);
+ symbols[j] = symbol;
+ values[j] = intCounts[i];
+ j++;
+ }
+ }
+ }
+ else
+ {
+ for (int i = 1; i < counts.length; i++)
+ {
+ if (counts[i] > 0)
+ {
+ char symbol = isNucleotide ? NUCS.charAt(i - 1) : AAS
+ .charAt(i - 1);
+ symbols[j] = symbol;
+ values[j] = counts[i];
+ j++;
+ }
+ }
+ }
+ if (otherData != null)
+ {
+ for (int i = 0; i < otherData.size(); i++)
+ {
+ int value = otherData.valueAt(i);
+ if (value > 0)
+ {
+ symbols[j] = (char) otherData.keyAt(i);
+ values[j] = otherData.valueAt(i);
+ j++;
+ }
+ }
+ }
+
+ return new SymbolCounts(symbols, values);
+ }
+
+ /**
+ * Returns a tooltip string showing residues in descending order of their
+ * percentage frequency in the profile
+ *
+ * @param normaliseBy
+ * the divisor for residue counts (may or may not include gapped
+ * sequence count)
+ * @param percentageDecPl
+ * the number of decimal places to show in percentages
+ * @return
+ */
+ public String getTooltip(int normaliseBy, int percentageDecPl)
+ {
+ StringBuilder sb = new StringBuilder(64);
+ SymbolCounts symbolCounts = getSymbolCounts();
+ char[] ca = symbolCounts.symbols;
+ int[] vl = symbolCounts.values;
+
+ /*
+ * sort characters into ascending order of their counts
+ */
+ QuickSort.sort(vl, ca);
+
+ /*
+ * traverse in reverse order (highest count first) to build tooltip
+ */
+ for (int p = 0, c = ca.length - 1; c >= 0; c--)
+ {
+ final char residue = ca[c];
+ if (residue != '-')
+ {
+ // TODO combine residues which share a percentage
+ // (see AAFrequency.completeCdnaConsensus)
+ float tval = (vl[c] * 100f) / normaliseBy;
+ sb.append((((p == 0) ? "" : "; "))).append(residue)
+ .append(" ");
+ Format.appendPercentage(sb, tval, percentageDecPl);
+ sb.append("%");
+ p++;
+ }
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Returns a string representation of the symbol counts, for debug purposes.
+ */
+ @Override
+ public String toString()
+ {
+ StringBuilder sb = new StringBuilder();
+ sb.append("[ ");
+ SymbolCounts sc = getSymbolCounts();
+ for (int i = 0; i < sc.symbols.length; i++)
+ {
+ sb.append(sc.symbols[i]).append(":").append(sc.values[i]).append(" ");
+ }
+ sb.append("]");
+ return sb.toString();
+ }
+}
package jalview.api;
import jalview.analysis.Conservation;
+import jalview.analysis.Profile;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.AlignmentView;
ColumnSelection getColumnSelection();
- Hashtable[] getSequenceConsensusHash();
+ Profile[] getSequenceConsensusHash();
/**
* Get consensus data table for the cDNA complement of this alignment (if any)
*
* @param hconsensus
*/
- void setSequenceConsensusHash(Hashtable[] hconsensus);
+ void setSequenceConsensusHash(Profile[] hconsensus);
/**
* Set the cDNA complement consensus for the viewport
import jalview.analysis.AAFrequency;
import jalview.analysis.Conservation;
+import jalview.analysis.Profile;
import jalview.schemes.ColourSchemeI;
import java.awt.Color;
import java.util.ArrayList;
-import java.util.Hashtable;
import java.util.List;
import java.util.Map;
-import java.util.Vector;
/**
* Collects a set contiguous ranges on a set of sequences
Conservation conserve;
- Vector aaFrequency;
-
boolean displayBoxes = true;
boolean displayText = true;
boolean upd = false;
try
{
- Hashtable cnsns[] = AAFrequency.calculate(sequences, startRes,
+ Profile[] cnsns = AAFrequency.calculate(sequences, startRes,
endRes + 1, showSequenceLogo);
if (consensus != null)
{
c.completeAnnotations(conservation, null, startRes, endRes + 1);
}
- public Hashtable[] consensusData = null;
+ public Profile[] consensusData = null;
- private void _updateConsensusRow(Hashtable[] cnsns, long nseq)
+ private void _updateConsensusRow(Profile[] cnsns, long nseq)
{
if (consensus == null)
{
}
return ~lo; // value not present
}
+
+ // This is Arrays.binarySearch(), but doesn't do any argument validation.
+ static int binarySearch(short[] array, int size, short value)
+ {
+ int lo = 0;
+ int hi = size - 1;
+ while (lo <= hi)
+ {
+ final int mid = (lo + hi) >>> 1;
+ final int midVal = array[mid];
+ if (midVal < value)
+ {
+ lo = mid + 1;
+ }
+ else if (midVal > value)
+ {
+ hi = mid - 1;
+ }
+ else
+ {
+ return mid; // value found
+ }
+ }
+ return ~lo; // value not present
+ }
}
*
* @param key
* @oparam toAdd
+ * @return the new value of the count for the key
*/
- public void add(int key, int toAdd)
+ public int add(int key, int toAdd)
{
+ int newValue = toAdd;
int i = ContainerHelpers.binarySearch(mKeys, mSize, key);
if (i >= 0)
{
mValues[i] += toAdd;
+ newValue = mValues[i];
}
else
{
int n = idealIntArraySize(mSize + 1);
int[] nkeys = new int[n];
int[] nvalues = new int[n];
- // Log.e("SparseIntArray", "grow " + mKeys.length + " to " + n);
System.arraycopy(mKeys, 0, nkeys, 0, mKeys.length);
System.arraycopy(mValues, 0, nvalues, 0, mValues.length);
mKeys = nkeys;
}
if (mSize - i != 0)
{
- // Log.e("SparseIntArray", "move " + (mSize - i));
System.arraycopy(mKeys, i, mKeys, i + 1, mSize - i);
System.arraycopy(mValues, i, mValues, i + 1, mSize - i);
}
mValues[i] = toAdd;
mSize++;
}
+ return newValue;
}
}
--- /dev/null
+package jalview.ext.android;
+
+/*
+ * Copyright (C) 2006 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * SparseShortArrays map shorts to shorts. Unlike a normal array of shorts,
+ * there can be gaps in the indices. It is intended to be more memory efficient
+ * than using a HashMap to map Shorts to Shorts, both because it avoids
+ * auto-boxing keys and values and its data structure doesn't rely on an extra
+ * entry object for each mapping.
+ *
+ * <p>
+ * Note that this container keeps its mappings in an array data structure, using
+ * a binary search to find keys. The implementation is not intended to be
+ * appropriate for data structures that may contain large numbers of items. It
+ * is generally slower than a traditional HashMap, since lookups require a
+ * binary search and adds and removes require inserting and deleting entries in
+ * the array. For containers holding up to hundreds of items, the performance
+ * difference is not significant, less than 50%.
+ * </p>
+ *
+ * <p>
+ * It is possible to iterate over the items in this container using
+ * {@link #keyAt(int)} and {@link #valueAt(int)}. Iterating over the keys using
+ * <code>keyAt(int)</code> with ascending values of the index will return the
+ * keys in ascending order, or the values corresponding to the keys in ascending
+ * order in the case of <code>valueAt(int)<code>.
+ * </p>
+ */
+/**
+ * A copy of SparseShortArray designed to store short values (to minimise space
+ * usage).
+ * <p>
+ * Note that operations append, put, add throw ArithmeticException if the
+ * resulting value overflows the range of a short.
+ */
+public class SparseShortArray implements Cloneable
+{
+ private short[] mKeys;
+
+ private short[] mValues;
+
+ private int mSize;
+
+ /**
+ * Creates a new SparseShortArray containing no mappings.
+ */
+ public SparseShortArray()
+ {
+ this(10);
+ }
+
+ /**
+ * Creates a new SparseShortArray containing no mappings that will not require
+ * any additional memory allocation to store the specified number of mappings.
+ * If you supply an initial capacity of 0, the sparse array will be
+ * initialized with a light-weight representation not requiring any additional
+ * array allocations.
+ */
+ public SparseShortArray(int initialCapacity)
+ {
+ if (initialCapacity == 0)
+ {
+ mKeys = new short[0];
+ mValues = new short[0];
+ }
+ else
+ {
+ initialCapacity = idealShortArraySize(initialCapacity);
+ mKeys = new short[initialCapacity];
+ mValues = new short[initialCapacity];
+ }
+ mSize = 0;
+ }
+
+ @Override
+ public SparseShortArray clone()
+ {
+ SparseShortArray clone = null;
+ try
+ {
+ clone = (SparseShortArray) super.clone();
+ clone.mKeys = mKeys.clone();
+ clone.mValues = mValues.clone();
+ } catch (CloneNotSupportedException cnse)
+ {
+ /* ignore */
+ }
+ return clone;
+ }
+
+ /**
+ * Gets the int mapped from the specified key, or <code>0</code> if no such
+ * mapping has been made.
+ */
+ public int get(int key)
+ {
+ return get(key, 0);
+ }
+
+ /**
+ * Gets the int mapped from the specified key, or the specified value if no
+ * such mapping has been made.
+ *
+ * @throws ArithmeticException
+ * if key is outside the range of a short value
+ */
+ public int get(int key, int valueIfKeyNotFound)
+ {
+ checkOverflow(key);
+ int i = ContainerHelpers.binarySearch(mKeys, mSize, (short) key);
+ if (i < 0)
+ {
+ return valueIfKeyNotFound;
+ }
+ else
+ {
+ return mValues[i];
+ }
+ }
+
+ /**
+ * Removes the mapping from the specified key, if there was any.
+ *
+ * @throws ArithmeticException
+ * if key is outside the range of a short value
+ */
+ public void delete(int key)
+ {
+ checkOverflow(key);
+ int i = ContainerHelpers.binarySearch(mKeys, mSize, (short) key);
+ if (i >= 0)
+ {
+ removeAt(i);
+ }
+ }
+
+ /**
+ * Removes the mapping at the given index.
+ */
+ public void removeAt(int index)
+ {
+ System.arraycopy(mKeys, index + 1, mKeys, index, mSize - (index + 1));
+ System.arraycopy(mValues, index + 1, mValues, index, mSize
+ - (index + 1));
+ mSize--;
+ }
+
+ /**
+ * Adds a mapping from the specified key to the specified value, replacing the
+ * previous mapping from the specified key if there was one.
+ *
+ * @throws ArithmeticException
+ * if either argument is outside the range of a short value
+ */
+ public void put(int key, int value)
+ {
+ checkOverflow(key);
+ checkOverflow(value);
+ int i = ContainerHelpers.binarySearch(mKeys, mSize, (short) key);
+ if (i >= 0)
+ {
+ mValues[i] = (short) value;
+ }
+ else
+ {
+ i = ~i;
+ if (mSize >= mKeys.length)
+ {
+ int n = idealShortArraySize(mSize + 1);
+ short[] nkeys = new short[n];
+ short[] nvalues = new short[n];
+ // Log.e("SparseShortArray", "grow " + mKeys.length + " to " + n);
+ System.arraycopy(mKeys, 0, nkeys, 0, mKeys.length);
+ System.arraycopy(mValues, 0, nvalues, 0, mValues.length);
+ mKeys = nkeys;
+ mValues = nvalues;
+ }
+ if (mSize - i != 0)
+ {
+ // Log.e("SparseShortArray", "move " + (mSize - i));
+ System.arraycopy(mKeys, i, mKeys, i + 1, mSize - i);
+ System.arraycopy(mValues, i, mValues, i + 1, mSize - i);
+ }
+ mKeys[i] = (short) key;
+ mValues[i] = (short) value;
+ mSize++;
+ }
+ }
+
+ /**
+ * Returns the number of key-value mappings that this SparseShortArray
+ * currently stores.
+ */
+ public int size()
+ {
+ return mSize;
+ }
+
+ /**
+ * Given an index in the range <code>0...size()-1</code>, returns the key from
+ * the <code>index</code>th key-value mapping that this SparseShortArray
+ * stores.
+ *
+ * <p>
+ * The keys corresponding to indices in ascending order are guaranteed to be
+ * in ascending order, e.g., <code>keyAt(0)</code> will return the smallest
+ * key and <code>keyAt(size()-1)</code> will return the largest key.
+ * </p>
+ */
+ public short keyAt(int index)
+ {
+ return mKeys[index];
+ }
+
+ /**
+ * Given an index in the range <code>0...size()-1</code>, returns the value
+ * from the <code>index</code>th key-value mapping that this SparseShortArray
+ * stores.
+ *
+ * <p>
+ * The values corresponding to indices in ascending order are guaranteed to be
+ * associated with keys in ascending order, e.g., <code>valueAt(0)</code> will
+ * return the value associated with the smallest key and
+ * <code>valueAt(size()-1)</code> will return the value associated with the
+ * largest key.
+ * </p>
+ */
+ public short valueAt(int index)
+ {
+ return mValues[index];
+ }
+
+ /**
+ * Returns the index for which {@link #keyAt} would return the specified key,
+ * or a negative number if the specified key is not mapped.
+ *
+ * @throws ArithmeticException
+ * if key is outside the range of a short value
+ */
+ public int indexOfKey(int key)
+ {
+ checkOverflow(key);
+ return ContainerHelpers.binarySearch(mKeys, mSize, (short) key);
+ }
+
+ /**
+ * Returns an index for which {@link #valueAt} would return the specified key,
+ * or a negative number if no keys map to the specified value. Beware that
+ * this is a linear search, unlike lookups by key, and that multiple keys can
+ * map to the same value and this will find only one of them.
+ */
+ public int indexOfValue(int value)
+ {
+ for (int i = 0; i < mSize; i++)
+ {
+ if (mValues[i] == value)
+ {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * Removes all key-value mappings from this SparseShortArray.
+ */
+ public void clear()
+ {
+ mSize = 0;
+ }
+
+ /**
+ * Puts a key/value pair into the array, optimizing for the case where the key
+ * is greater than all existing keys in the array.
+ */
+ public void append(int key, int value)
+ {
+ if (mSize != 0 && key <= mKeys[mSize - 1])
+ {
+ put(key, value);
+ return;
+ }
+ int pos = mSize;
+ if (pos >= mKeys.length)
+ {
+ int n = idealShortArraySize(pos + 1);
+ short[] nkeys = new short[n];
+ short[] nvalues = new short[n];
+ // Log.e("SparseShortArray", "grow " + mKeys.length + " to " + n);
+ System.arraycopy(mKeys, 0, nkeys, 0, mKeys.length);
+ System.arraycopy(mValues, 0, nvalues, 0, mValues.length);
+ mKeys = nkeys;
+ mValues = nvalues;
+ }
+ checkOverflow(key);
+ checkOverflow(value);
+ mKeys[pos] = (short) key;
+ mValues[pos] = (short) value;
+ mSize = pos + 1;
+ }
+
+ /**
+ * Throws an exception if the value is outside the range of a short.
+ *
+ * @param value
+ * @throws ArithmeticException
+ */
+ public static void checkOverflow(int value)
+ {
+ if (value > Short.MAX_VALUE || value < Short.MIN_VALUE)
+ {
+ throw new ArithmeticException(String.valueOf(value));
+ }
+ }
+
+ /**
+ * Inlined here by copying from com.android.internal.util.ArrayUtils
+ *
+ * @param i
+ * @return
+ */
+ public static int idealShortArraySize(int need)
+ {
+ return idealByteArraySize(need * 2) / 2;
+ }
+
+ /**
+ * Inlined here by copying from com.android.internal.util.ArrayUtils
+ *
+ * @param i
+ * @return
+ */
+ public static int idealByteArraySize(int need)
+ {
+ for (int i = 4; i < 32; i++)
+ {
+ if (need <= (1 << i) - 12)
+ {
+ return (1 << i) - 12;
+ }
+ }
+
+ return need;
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * <p>
+ * This implementation composes a string by iterating over its mappings.
+ */
+ @Override
+ public String toString()
+ {
+ if (size() <= 0)
+ {
+ return "{}";
+ }
+ StringBuilder buffer = new StringBuilder(mSize * 28);
+ buffer.append('{');
+ for (int i = 0; i < mSize; i++)
+ {
+ if (i > 0)
+ {
+ buffer.append(", ");
+ }
+ int key = keyAt(i);
+ buffer.append(key);
+ buffer.append('=');
+ int value = valueAt(i);
+ buffer.append(value);
+ }
+ buffer.append('}');
+ return buffer.toString();
+ }
+
+ /**
+ * Method (copied from put) added for Jalview to efficiently increment a key's
+ * value if present, else add it with the given value. This avoids a double
+ * binary search (once to get the value, again to put the updated value).
+ *
+ * @param key
+ * @oparam toAdd
+ * @return the new value of the count for the key
+ * @throws ArithmeticException
+ * if key, or result of adding toAdd, is outside the range of a
+ * short value
+ */
+ public int add(int key, int toAdd)
+ {
+ int newValue = toAdd;
+ checkOverflow(key);
+ int i = ContainerHelpers.binarySearch(mKeys, mSize, (short) key);
+ if (i >= 0)
+ {
+ checkOverflow(toAdd + mValues[i]);
+ mValues[i] += (short) toAdd;
+ newValue = mValues[i];
+ }
+ else
+ {
+ checkOverflow(toAdd);
+ i = ~i;
+ if (mSize >= mKeys.length)
+ {
+ int n = idealShortArraySize(mSize + 1);
+ short[] nkeys = new short[n];
+ short[] nvalues = new short[n];
+ System.arraycopy(mKeys, 0, nkeys, 0, mKeys.length);
+ System.arraycopy(mValues, 0, nvalues, 0, mValues.length);
+ mKeys = nkeys;
+ mValues = nvalues;
+ }
+ if (mSize - i != 0)
+ {
+ System.arraycopy(mKeys, i, mKeys, i + 1, mSize - i);
+ System.arraycopy(mValues, i, mValues, i + 1, mSize - i);
+ }
+ mKeys[i] = (short) key;
+ mValues[i] = (short) toAdd;
+ mSize++;
+ }
+ return newValue;
+ }
+}
import jalview.analysis.AAFrequency;
import jalview.analysis.CodingUtils;
+import jalview.analysis.Profile;
import jalview.analysis.Rna;
import jalview.analysis.StructureFrequency;
import jalview.api.AlignViewportI;
private ColumnSelection columnSelection;
- private Hashtable[] hconsensus;
+ private Profile[] hconsensus;
private Hashtable[] complementConsensus;
*/
package jalview.schemes;
-import jalview.analysis.AAFrequency;
import jalview.datamodel.AnnotatedCollectionI;
import jalview.datamodel.SequenceCollectionI;
import jalview.datamodel.SequenceI;
+import jalview.util.Comparison;
import java.awt.Color;
import java.util.Map;
Color currentColour;
- if (!jalview.util.Comparison.isGap(res))
+ if (!Comparison.isGap(res))
{
- String max = (String) consensus[j].get(AAFrequency.MAXRESIDUE);
+ String max = consensus[j].getModalResidue();
if (max.indexOf(res) > -1)
{
*/
package jalview.schemes;
+import jalview.analysis.Profile;
import jalview.datamodel.AnnotatedCollectionI;
import jalview.datamodel.SequenceCollectionI;
import jalview.datamodel.SequenceI;
/**
* assign the given consensus profile for the colourscheme
*/
- public void setConsensus(java.util.Hashtable[] h);
+ public void setConsensus(Profile[] hconsensus);
/**
* assign the given conservation to the colourscheme
package jalview.schemes;
import jalview.analysis.Conservation;
-
-import java.util.Hashtable;
+import jalview.analysis.Profile;
/**
* Colourscheme that takes its colours from some other colourscheme
}
@Override
- public void setConsensus(Hashtable[] consensus)
+ public void setConsensus(Profile[] consensus)
{
if (colourScheme != null)
{
*/
package jalview.schemes;
-import jalview.analysis.AAFrequency;
import jalview.datamodel.SequenceGroup;
import jalview.datamodel.SequenceI;
return Color.white;
}
- if ((Integer
- .parseInt(consensus[j].get(AAFrequency.MAXCOUNT).toString()) != -1)
- && consensus[j].contains(String.valueOf(c)))
+ if (consensus[j].getMaxCount() > 0) //!= -1)
+ //&& consensus[j].contains(String.valueOf(c)))
{
- sc = ((Float) consensus[j].get(ignoreGaps)).floatValue();
+ sc = consensus[j].getPercentageIdentity(ignoreGaps);
if (!jalview.util.Comparison.isGap(c))
{
*/
package jalview.schemes;
-import jalview.analysis.AAFrequency;
import jalview.analysis.Conservation;
+import jalview.analysis.Profile;
import jalview.datamodel.AnnotatedCollectionI;
import jalview.datamodel.SequenceCollectionI;
import jalview.datamodel.SequenceI;
import jalview.util.MessageManager;
import java.awt.Color;
-import java.util.Hashtable;
import java.util.Map;
/**
int threshold = 0;
/* Set when threshold colouring to either pid_gaps or pid_nogaps */
- protected String ignoreGaps = AAFrequency.PID_GAPS;
+ protected boolean ignoreGaps = false;
/** Consenus as a hashtable array */
- Hashtable[] consensus;
+ Profile[] consensus;
/** Conservation string as a char array */
char[] conservation;
/**
* Find a colour without an index in a sequence
*/
+ @Override
public Color findColour(char c)
{
return colors == null ? Color.white : colors[symbolIndex[c]];
*
* @return Returns the percentage threshold
*/
+ @Override
public int getThreshold()
{
return threshold;
* @param ct
* DOCUMENT ME!
*/
+ @Override
public void setThreshold(int ct, boolean ignoreGaps)
{
threshold = ct;
- if (ignoreGaps)
- {
- this.ignoreGaps = AAFrequency.PID_NOGAPS;
- }
- else
- {
- this.ignoreGaps = AAFrequency.PID_GAPS;
- }
+ this.ignoreGaps = ignoreGaps;
}
/**
return false;
}
- if ((((Integer) consensus[j].get(AAFrequency.MAXCOUNT)).intValue() != -1)
- && consensus[j].contains(String.valueOf(c)))
+ if (consensus[j].getMaxCount() > 0) // != -1))
{
- if (((Float) consensus[j].get(ignoreGaps)).floatValue() >= threshold)
+ if (consensus[j].getPercentageIdentity(ignoreGaps) >= threshold)
{
return true;
}
return false;
}
+ @Override
public boolean conservationApplied()
{
return conservationColouring;
conservationColouring = conservationApplied;
}
+ @Override
public void setConservationInc(int i)
{
inc = i;
}
+ @Override
public int getConservationInc()
{
return inc;
* @param consensus
* DOCUMENT ME!
*/
- public void setConsensus(Hashtable[] consensus)
+ @Override
+ public void setConsensus(Profile[] consensus)
{
if (consensus == null)
{
this.consensus = consensus;
}
+ @Override
public void setConservation(Conservation cons)
{
if (cons == null)
--- /dev/null
+package jalview.util;
+
+import jalview.ext.android.SparseIntArray;
+import jalview.ext.android.SparseShortArray;
+
+/**
+ * A class to count occurrences of characters with minimal memory footprint.
+ * Sparse arrays of short values are used to hold the counts, with automatic
+ * promotion to arrays of int if any count exceeds the maximum value for a
+ * short.
+ *
+ * @author gmcarstairs
+ *
+ */
+public class SparseCount
+{
+ private static final int DEFAULT_PROFILE_SIZE = 2;
+
+ /*
+ * array of keys (chars) and values (counts)
+ * held either as shorts or (if shorts overflow) as ints
+ */
+ private SparseShortArray shortProfile;
+
+ private SparseIntArray intProfile;
+
+ /*
+ * flag is set true after short overflow occurs
+ */
+ private boolean useInts;
+
+ /**
+ * Constructor which initially creates a new sparse array of short values to
+ * hold counts.
+ *
+ * @param profileSize
+ */
+ public SparseCount(int profileSize)
+ {
+ this.shortProfile = new SparseShortArray(profileSize);
+ }
+
+ /**
+ * Constructor which allocates an initial count array for only two distinct
+ * values (the array will grow if needed)
+ */
+ public SparseCount()
+ {
+ this(DEFAULT_PROFILE_SIZE);
+ }
+
+ /**
+ * Adds the given value for the given key (or sets the initial value), and
+ * returns the new value
+ *
+ * @param key
+ * @param value
+ */
+ public int add(int key, int value)
+ {
+ int newValue = 0;
+ if (useInts)
+ {
+ newValue = intProfile.add(key, value);
+ }
+ else
+ {
+ try {
+ newValue = shortProfile.add(key, value);
+ } catch (ArithmeticException e) {
+ handleOverflow();
+ newValue = intProfile.add(key, value);
+ }
+ }
+ return newValue;
+ }
+
+ /**
+ * Switch from counting shorts to counting ints
+ */
+ synchronized void handleOverflow()
+ {
+ int size = shortProfile.size();
+ intProfile = new SparseIntArray(size);
+ for (int i = 0; i < size; i++)
+ {
+ short key = shortProfile.keyAt(i);
+ short value = shortProfile.valueAt(i);
+ intProfile.put(key, value);
+ }
+ shortProfile = null;
+ useInts = true;
+ }
+
+ /**
+ * Returns the size of the profile (number of distinct items counted)
+ *
+ * @return
+ */
+ public int size()
+ {
+ return useInts ? intProfile.size() : shortProfile.size();
+ }
+
+ /**
+ * Returns the value for the key (zero if no such key)
+ *
+ * @param key
+ * @return
+ */
+ public int get(int key)
+ {
+ return useInts ? intProfile.get(key) : shortProfile.get(key);
+ }
+
+ /**
+ * Sets the value for the given key
+ *
+ * @param key
+ * @param value
+ */
+ public void put(int key, int value)
+ {
+ if (useInts)
+ {
+ intProfile.put(key, value);
+ }
+ else
+ {
+ shortProfile.put(key, value);
+ }
+ }
+
+ public int keyAt(int k)
+ {
+ return useInts ? intProfile.keyAt(k) : shortProfile.keyAt(k);
+ }
+
+ public int valueAt(int k)
+ {
+ return useInts ? intProfile.valueAt(k) : shortProfile.valueAt(k);
+ }
+
+ /**
+ * Answers true if this object wraps arrays of int values, false if using
+ * short values
+ *
+ * @return
+ */
+ boolean isUsingInt()
+ {
+ return useInts;
+ }
+}
import jalview.analysis.AnnotationSorter.SequenceAnnotationOrder;
import jalview.analysis.Conservation;
+import jalview.analysis.Profile;
import jalview.api.AlignCalcManagerI;
import jalview.api.AlignViewportI;
import jalview.api.AlignmentViewPanel;
/**
* results of alignment consensus analysis for visible portion of view
*/
- protected Hashtable[] hconsensus = null;
+ protected Profile[] hconsensus = null;
/**
* results of cDNA complement consensus visible portion of view
}
@Override
- public void setSequenceConsensusHash(Hashtable[] hconsensus)
+ public void setSequenceConsensusHash(Profile[] hconsensus)
{
this.hconsensus = hconsensus;
}
}
@Override
- public Hashtable[] getSequenceConsensusHash()
+ public Profile[] getSequenceConsensusHash()
{
return hconsensus;
}
* @param consensusData
* the computed consensus data
*/
- @Override
protected void deriveConsensus(AlignmentAnnotation consensusAnnotation,
Hashtable[] consensusData)
{
alignViewport.isShowSequenceLogo(), getSequences().length);
}
+ @Override
+ public void updateResultAnnotation(boolean immediate)
+ {
+ AlignmentAnnotation consensus = getConsensusAnnotation();
+ Hashtable[] hconsensus = getViewportConsensus();
+ if (immediate || !calcMan.isWorking(this) && consensus != null
+ && hconsensus != null)
+ {
+ deriveConsensus(consensus, hconsensus);
+ }
+ }
+
}
package jalview.workers;
import jalview.analysis.AAFrequency;
+import jalview.analysis.Profile;
import jalview.api.AlignViewportI;
import jalview.api.AlignmentViewPanel;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.SequenceI;
import jalview.schemes.ColourSchemeI;
-import java.util.Hashtable;
-
public class ConsensusThread extends AlignCalcWorker
{
public ConsensusThread(AlignViewportI alignViewport,
*/
protected void computeConsensus(AlignmentI alignment)
{
- Hashtable[] hconsensus = new Hashtable[alignment.getWidth()];
+ Profile[] hconsensus = new Profile[alignment.getWidth()];
SequenceI[] aseqs = getSequences();
AAFrequency.calculate(aseqs, 0, alignment.getWidth(), hconsensus, true);
/**
* @param hconsensus
*/
- protected void setColourSchemeConsensus(Hashtable[] hconsensus)
+ protected void setColourSchemeConsensus(Profile[] hconsensus)
{
ColourSchemeI globalColourScheme = alignViewport
.getGlobalColourScheme();
public void updateResultAnnotation(boolean immediate)
{
AlignmentAnnotation consensus = getConsensusAnnotation();
- Hashtable[] hconsensus = getViewportConsensus();
+ Profile[] hconsensus = (Profile[]) getViewportConsensus();
if (immediate || !calcMan.isWorking(this) && consensus != null
&& hconsensus != null)
{
*
* @param consensusAnnotation
* the annotation to be populated
- * @param consensusData
+ * @param hconsensus
* the computed consensus data
*/
protected void deriveConsensus(AlignmentAnnotation consensusAnnotation,
- Hashtable[] consensusData)
+ Profile[] hconsensus)
{
long nseq = getSequences().length;
- AAFrequency.completeConsensus(consensusAnnotation, consensusData, 0,
- consensusData.length, alignViewport.isIgnoreGapsConsensus(),
+ AAFrequency.completeConsensus(consensusAnnotation, hconsensus, 0,
+ hconsensus.length, alignViewport.isIgnoreGapsConsensus(),
alignViewport.isShowSequenceLogo(), nseq);
}
*
* @return
*/
- protected Hashtable[] getViewportConsensus()
+ protected Object[] getViewportConsensus()
{
+ // TODO convert ComplementConsensusThread to use Profile
return alignViewport.getSequenceConsensusHash();
}
}
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceI;
-import java.util.Hashtable;
-
import org.testng.annotations.Test;
public class AAFrequencyTest
SequenceI seq3 = new Sequence("Seq3", "C--G");
SequenceI seq4 = new Sequence("Seq4", "CA-t");
SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
- Hashtable[] result = new Hashtable[seq1.getLength()];
+ Profile[] result = new Profile[seq1.getLength()];
AAFrequency.calculate(seqs, 0, seq1.getLength(), result, false);
// col 0 is 100% C
- Hashtable col = result[0];
- assertEquals(100f, (Float) col.get(G), 0.0001f);
- assertEquals(100f, (Float) col.get(N), 0.0001f);
- assertEquals(4, col.get(C));
- assertEquals("C", col.get(R));
- assertNull(col.get(P));
+ Profile col = result[0];
+ assertEquals(100f, col.getPercentageIdentity(false));
+ assertEquals(100f, col.getPercentageIdentity(true));
+ assertEquals(4, col.getMaxCount());
+ assertEquals("C", col.getModalResidue());
+ assertNull(col.getCounts());
// col 1 is 75% A
col = result[1];
- assertEquals(75f, (Float) col.get(G), 0.0001f);
- assertEquals(100f, (Float) col.get(N), 0.0001f);
- assertEquals(3, col.get(C));
- assertEquals("A", col.get(R));
+ assertEquals(75f, col.getPercentageIdentity(false));
+ assertEquals(100f, col.getPercentageIdentity(true));
+ assertEquals(3, col.getMaxCount());
+ assertEquals("A", col.getModalResidue());
// col 2 is 50% G 50% C or 25/25 counting gaps
col = result[2];
- assertEquals(25f, (Float) col.get(G), 0.0001f);
- assertEquals(50f, (Float) col.get(N), 0.0001f);
- assertEquals(1, col.get(C));
- assertEquals("CG", col.get(R));
+ assertEquals(25f, col.getPercentageIdentity(false));
+ assertEquals(50f, col.getPercentageIdentity(true));
+ assertEquals(1, col.getMaxCount());
+ assertEquals("CG", col.getModalResidue());
// col 3 is 75% T 25% G
col = result[3];
- assertEquals(75f, (Float) col.get(G), 0.0001f);
- assertEquals(75f, (Float) col.get(N), 0.0001f);
- assertEquals(3, col.get(C));
- assertEquals("T", col.get(R));
+ assertEquals(75f, col.getPercentageIdentity(false));
+ assertEquals(75f, col.getPercentageIdentity(true));
+ assertEquals(3, col.getMaxCount());
+ assertEquals("T", col.getModalResidue());
}
@Test(groups = { "Functional" })
SequenceI seq3 = new Sequence("Seq3", "C--G");
SequenceI seq4 = new Sequence("Seq4", "CA-t");
SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
- Hashtable[] result = new Hashtable[seq1.getLength()];
+ Profile[] result = new Profile[seq1.getLength()];
AAFrequency.calculate(seqs, 0, seq1.getLength(), result, true);
- int[][] profile = (int[][]) result[0].get(P);
- assertEquals(4, profile[0]['C']);
- assertEquals(4, profile[1][0]); // no of seqs
- assertEquals(4, profile[1][1]); // nongapped in column
-
- profile = (int[][]) result[1].get(P);
- assertEquals(3, profile[0]['A']);
- assertEquals(4, profile[1][0]);
- assertEquals(3, profile[1][1]);
-
- profile = (int[][]) result[2].get(P);
- assertEquals(1, profile[0]['G']);
- assertEquals(1, profile[0]['C']);
- assertEquals(4, profile[1][0]);
- assertEquals(2, profile[1][1]);
-
- profile = (int[][]) result[3].get(P);
- assertEquals(3, profile[0]['T']);
- assertEquals(1, profile[0]['G']);
- assertEquals(4, profile[1][0]);
- assertEquals(4, profile[1][1]);
+ Profile profile = result[0];
+ assertEquals(4, profile.getCounts().getCount('C'));
+ assertEquals(4, profile.getHeight());
+ assertEquals(4, profile.getNonGapped());
+
+ profile = result[1];
+ assertEquals(3, profile.getCounts().getCount('A'));
+ assertEquals(4, profile.getHeight());
+ assertEquals(3, profile.getNonGapped());
+
+ profile = result[2];
+ assertEquals(1, profile.getCounts().getCount('C'));
+ assertEquals(1, profile.getCounts().getCount('G'));
+ assertEquals(4, profile.getHeight());
+ assertEquals(2, profile.getNonGapped());
+
+ profile = result[3];
+ assertEquals(3, profile.getCounts().getCount('T'));
+ assertEquals(1, profile.getCounts().getCount('G'));
+ assertEquals(4, profile.getHeight());
+ assertEquals(4, profile.getNonGapped());
}
@Test(groups = { "Functional" })
SequenceI seq3 = new Sequence("Seq3", "C--G");
SequenceI seq4 = new Sequence("Seq4", "CA-t");
SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
- Hashtable[] result = new Hashtable[seq1.getLength()];
+ Profile[] result = new Profile[seq1.getLength()];
// ensure class loaded and initialized
AAFrequency.calculate(seqs, 0, seq1.getLength(), result, true);
}
System.out.println(System.currentTimeMillis() - start);
}
-
- @Test(groups = { "Functional" })
- public void testGetPercentageFormat()
- {
- assertNull(AAFrequency.getPercentageFormat(0));
- assertNull(AAFrequency.getPercentageFormat(99));
- assertEquals("%3.1f", AAFrequency.getPercentageFormat(100).toString());
- assertEquals("%3.1f", AAFrequency.getPercentageFormat(999).toString());
- assertEquals("%3.2f", AAFrequency.getPercentageFormat(1000).toString());
- assertEquals("%3.2f", AAFrequency.getPercentageFormat(9999).toString());
- }
}
--- /dev/null
+package jalview.analysis;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+
+import jalview.analysis.ResidueCount.SymbolCounts;
+
+import org.junit.Assert;
+import org.testng.annotations.Test;
+
+public class ResidueCountTest
+{
+ /**
+ * Test a mix of add and put for nucleotide counting
+ */
+ @Test(groups = "Functional")
+ public void test_countNucleotide()
+ {
+ ResidueCount rc = new ResidueCount(true);
+ assertEquals(rc.getCount('A'), 0);
+ assertEquals(rc.getGapCount(), 0);
+ // add then add
+ assertEquals(rc.add('A'), 1);
+ assertEquals(rc.add('a'), 2);
+ // put then add
+ rc.put('g', 3);
+ assertEquals(rc.add('G'), 4);
+ // add then put
+ assertEquals(rc.add('c'), 1);
+ rc.put('C', 4);
+ assertEquals(rc.add('N'), 1);
+
+ assertEquals(rc.getCount('a'), 2);
+ assertEquals(rc.getCount('A'), 2);
+ assertEquals(rc.getCount('G'), 4);
+ assertEquals(rc.getCount('c'), 4);
+ assertEquals(rc.getCount('T'), 0); // never seen
+ assertEquals(rc.getCount('N'), 1);
+ assertEquals(rc.getCount('?'), 0);
+ assertEquals(rc.getCount('-'), 0);
+
+ assertFalse(rc.isCountingInts());
+ assertFalse(rc.isUsingOtherData());
+ }
+
+ /**
+ * Test adding to gap count (either using addGap or add)
+ */
+ @Test(groups = "Functional")
+ public void testAddGap()
+ {
+ ResidueCount rc = new ResidueCount(true);
+ rc.addGap();
+ rc.add('-');
+ rc.add('.');
+ rc.add(' ');
+
+ assertEquals(rc.getGapCount(), 4);
+ assertEquals(rc.getCount(' '), 4);
+ assertEquals(rc.getCount('-'), 4);
+ assertEquals(rc.getCount('.'), 4);
+ assertFalse(rc.isUsingOtherData());
+ assertFalse(rc.isCountingInts());
+ }
+
+ @Test(groups = "Functional")
+ public void testOverflow()
+ {
+ /*
+ * overflow from add
+ */
+ ResidueCount rc = new ResidueCount(true);
+ rc.put('A', Short.MAX_VALUE - 1);
+ assertFalse(rc.isCountingInts());
+ rc.add('A');
+ assertFalse(rc.isCountingInts());
+ rc.add('A');
+ assertTrue(rc.isCountingInts());
+ assertEquals(rc.getCount('a'), Short.MAX_VALUE + 1);
+
+ /*
+ * overflow from put
+ */
+ rc = new ResidueCount(true);
+ rc.put('G', Short.MAX_VALUE + 1);
+ assertTrue(rc.isCountingInts());
+ assertEquals(rc.getCount('g'), Short.MAX_VALUE + 1);
+ }
+
+ /**
+ * Test a mix of add and put for peptide counting
+ */
+ @Test(groups = "Functional")
+ public void test_countPeptide()
+ {
+ ResidueCount rc = new ResidueCount(false);
+ rc.put('q', 4);
+ rc.add('Q');
+ rc.add('X');
+ rc.add('x');
+ rc.add('W');
+ rc.put('w', 7);
+ rc.put('m', 12);
+ rc.put('M', 13);
+
+ assertEquals(rc.getCount('q'), 5);
+ assertEquals(rc.getCount('X'), 2);
+ assertEquals(rc.getCount('W'), 7);
+ assertEquals(rc.getCount('m'), 13);
+ assertEquals(rc.getCount('G'), 0);
+ assertEquals(rc.getCount('-'), 0);
+
+ assertFalse(rc.isCountingInts());
+ assertFalse(rc.isUsingOtherData());
+ }
+
+ @Test(groups = "Functional")
+ public void test_unexpectedPeptide()
+ {
+ ResidueCount rc = new ResidueCount(false);
+ // expected characters (upper or lower case):
+ String aas = "ACDEFGHIKLMNPQRSTVWXY";
+ String lower = aas.toLowerCase();
+ for (int i = 0; i < aas.length(); i++)
+ {
+ rc.put(aas.charAt(i), i);
+ rc.add(lower.charAt(i));
+ }
+ for (int i = 0; i < aas.length(); i++)
+ {
+ assertEquals(rc.getCount(aas.charAt(i)), i + 1);
+ }
+ assertFalse(rc.isUsingOtherData());
+
+ rc.put('J', 4);
+ assertTrue(rc.isUsingOtherData());
+ }
+
+ @Test(groups = "Functional")
+ public void test_unexpectedNucleotide()
+ {
+ ResidueCount rc = new ResidueCount(true);
+ // expected characters (upper or lower case):
+ String nucs = "ACGTUN";
+ String lower = nucs.toLowerCase();
+ for (int i = 0; i < nucs.length(); i++)
+ {
+ rc.put(nucs.charAt(i), i);
+ rc.add(lower.charAt(i));
+ }
+ for (int i = 0; i < nucs.length(); i++)
+ {
+ assertEquals(rc.getCount(nucs.charAt(i)), i + 1);
+ }
+ assertFalse(rc.isUsingOtherData());
+
+ rc.add('J');
+ assertTrue(rc.isUsingOtherData());
+ }
+
+ @Test(groups = "Functional")
+ public void testGetModalCount()
+ {
+ ResidueCount rc = new ResidueCount();
+ rc.add('c');
+ rc.add('g');
+ rc.add('c');
+ assertEquals(rc.getModalCount(), 2);
+
+ // modal count is in the 'short overflow' counts
+ rc = new ResidueCount();
+ rc.add('c');
+ rc.put('g', Short.MAX_VALUE);
+ rc.add('G');
+ assertEquals(rc.getModalCount(), Short.MAX_VALUE + 1);
+
+ // modal count is in the 'other data' counts
+ rc = new ResidueCount();
+ rc.add('Q');
+ rc.add('{');
+ rc.add('{');
+ assertEquals(rc.getModalCount(), 2);
+
+ // verify modal count excludes gap
+ rc = new ResidueCount();
+ rc.add('Q');
+ rc.add('P');
+ rc.add('Q');
+ rc.addGap();
+ rc.addGap();
+ rc.addGap();
+ assertEquals(rc.getModalCount(), 2);
+ }
+
+ @Test(groups = "Functional")
+ public void testGetResiduesForCount()
+ {
+ ResidueCount rc = new ResidueCount();
+ rc.add('c');
+ rc.add('g');
+ rc.add('c');
+ assertEquals(rc.getResiduesForCount(2), "C");
+ assertEquals(rc.getResiduesForCount(1), "G");
+ assertEquals(rc.getResiduesForCount(3), "");
+ assertEquals(rc.getResiduesForCount(0), "");
+ assertEquals(rc.getResiduesForCount(-1), "");
+
+ // modal count is in the 'short overflow' counts
+ rc = new ResidueCount();
+ rc.add('c');
+ rc.put('g', Short.MAX_VALUE);
+ rc.add('G');
+ assertEquals(rc.getResiduesForCount(Short.MAX_VALUE + 1), "G");
+ assertEquals(rc.getResiduesForCount(1), "C");
+
+ // modal count is in the 'other data' counts
+ rc = new ResidueCount();
+ rc.add('Q');
+ rc.add('{');
+ rc.add('{');
+ assertEquals(rc.getResiduesForCount(1), "Q");
+ assertEquals(rc.getResiduesForCount(2), "{");
+
+ // residues share modal count
+ rc = new ResidueCount();
+ rc.add('G');
+ rc.add('G');
+ rc.add('c');
+ rc.add('C');
+ rc.add('U');
+ assertEquals(rc.getResiduesForCount(1), "U");
+ assertEquals(rc.getResiduesForCount(2), "CG");
+
+ // expected and unexpected symbols share modal count
+ rc = new ResidueCount();
+ rc.add('G');
+ rc.add('t');
+ rc.add('[');
+ rc.add('[');
+ rc.add('t');
+ rc.add('G');
+ rc.add('c');
+ rc.add('C');
+ rc.add('U');
+ assertEquals(rc.getResiduesForCount(1), "U");
+ assertEquals(rc.getResiduesForCount(2), "CGT[");
+ }
+
+ @Test(groups = "Functional")
+ public void testGetSymbolCounts()
+ {
+ ResidueCount rc = new ResidueCount();
+ rc.add('q');
+ rc.add('c');
+ rc.add('Q');
+ rc.add('J'); // 'otherData'
+ rc.add('q');
+ rc.add('x');
+
+ SymbolCounts sc = rc.getSymbolCounts();
+ Assert.assertArrayEquals(new char[] { 'C', 'Q', 'X', 'J' }, sc.symbols);
+ Assert.assertArrayEquals(new int[] { 1, 3, 1, 1 }, sc.values);
+
+ // now with overflow to int counts
+ rc.put('g', Short.MAX_VALUE);
+ rc.add('g');
+ sc = rc.getSymbolCounts();
+ Assert.assertArrayEquals(new char[] { 'C', 'G', 'Q', 'X', 'J' },
+ sc.symbols);
+ Assert.assertArrayEquals(new int[] { 1, 32768, 3, 1, 1 }, sc.values);
+ }
+
+ @Test(groups = "Functional")
+ public void testToString()
+ {
+ ResidueCount rc = new ResidueCount();
+ rc.add('q');
+ rc.add('c');
+ rc.add('Q');
+ assertEquals(rc.toString(), "[ C:1 Q:2 ]");
+
+ // add 'other data'
+ rc.add('{');
+ assertEquals(rc.toString(), "[ C:1 Q:2 {:1 ]");
+
+ // switch from short to int counting:
+ rc.put('G', Short.MAX_VALUE);
+ rc.add('g');
+ assertEquals(rc.toString(), "[ C:1 G:32768 Q:2 {:1 ]");
+ }
+}
--- /dev/null
+package jalview.ext.android;
+
+import static org.testng.Assert.assertEquals;
+
+import org.testng.annotations.Test;
+
+/*
+ * Tests for SparseIntArray. Unlike SparseShortArray, SparseIntArray does not throw
+ * any exception for overflow.
+ */
+public class SparseIntArrayTest
+{
+ @Test(groups = "Functional")
+ public void testPut()
+ {
+ SparseIntArray counter = new SparseIntArray();
+
+ /*
+ * either key or value may be in the range of int
+ */
+ counter.put(Integer.MAX_VALUE, Integer.MIN_VALUE);
+ counter.put(Integer.MIN_VALUE, Integer.MAX_VALUE);
+ assertEquals(counter.get(Integer.MAX_VALUE), Integer.MIN_VALUE);
+ assertEquals(counter.get(Integer.MIN_VALUE), Integer.MAX_VALUE);
+ }
+
+ @Test(groups = "Functional")
+ public void testAdd()
+ {
+ SparseIntArray counter = new SparseIntArray();
+
+ assertEquals(counter.add('P', 2), 2);
+ assertEquals(counter.add('P', 3), 5);
+ counter.put('Q', 7);
+ assertEquals(counter.add('Q', 4), 11);
+
+ counter.put('x', Integer.MAX_VALUE);
+ counter.add('x', 1);
+
+ counter.put('y', Integer.MIN_VALUE);
+ counter.add('y', -1);
+ }
+}
--- /dev/null
+package jalview.ext.android;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.fail;
+
+import org.testng.annotations.Test;
+
+public class SparseShortArrayTest
+{
+ @Test(groups = "Functional")
+ public void testPut()
+ {
+ SparseShortArray counter = new SparseShortArray();
+
+ /*
+ * either key or value may be in the range of short
+ */
+ counter.put(Short.MAX_VALUE, Short.MIN_VALUE);
+ counter.put(Short.MIN_VALUE, Short.MAX_VALUE);
+
+ // put a too large value
+ try
+ {
+ counter.put(0, Short.MAX_VALUE + 1);
+ fail("Expected exception");
+ } catch (ArithmeticException e)
+ {
+ // expected;
+ }
+
+ // put a too small value
+ try
+ {
+ counter.put(1, Short.MIN_VALUE - 1);
+ fail("Expected exception");
+ } catch (ArithmeticException e)
+ {
+ // expected;
+ }
+
+ // put a too large key
+ try
+ {
+ counter.put(Short.MAX_VALUE + 1, 0);
+ fail("Expected exception");
+ } catch (ArithmeticException e)
+ {
+ // expected;
+ }
+
+ // put a too small key
+ try
+ {
+ counter.put(Short.MIN_VALUE - 1, 2);
+ fail("Expected exception");
+ } catch (ArithmeticException e)
+ {
+ // expected;
+ }
+ }
+
+ @Test(groups = "Functional")
+ public void testAdd()
+ {
+ SparseShortArray counter = new SparseShortArray();
+
+ assertEquals(counter.add('P', 2), 2);
+ assertEquals(counter.add('P', 3), 5);
+ counter.put('Q', 7);
+ assertEquals(counter.add('Q', 4), 11);
+
+ // increment giving overflow
+ counter.put('x', Short.MAX_VALUE);
+ try
+ {
+ counter.add('x', 1);
+ fail("Expected exception");
+ } catch (ArithmeticException e)
+ {
+ // expected;
+ }
+
+ // decrement giving underflow
+ counter.put('y', Short.MIN_VALUE);
+ try
+ {
+ counter.add('y', -1);
+ fail("Expected exception");
+ } catch (ArithmeticException e)
+ {
+ // expected;
+ }
+ }
+}
--- /dev/null
+package jalview.util;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+
+import org.testng.annotations.Test;
+public class SparseCountTest
+{
+ @Test(groups = "Functional")
+ public void testAdd()
+ {
+ SparseCount p = new SparseCount(8);
+ p.add('a', 1);
+ p.add('b', 2);
+ p.add('a', 3);
+ p.add('b', -4);
+ assertEquals(p.size(), 2);
+ assertEquals(p.get('a'), 4);
+ assertEquals(p.get('b'), -2);
+ }
+
+ @Test(groups = "Functional")
+ public void testPut()
+ {
+ SparseCount p = new SparseCount(8);
+ p.put('a', 3);
+ p.add('b', 2);
+ p.put('b', 4);
+ assertEquals(p.size(), 2);
+ assertEquals(p.get('a'), 3);
+ assertEquals(p.get('b'), 4);
+ }
+
+ /**
+ * Test handling overflow of short by switching to counting ints
+ */
+ @Test(groups = "Functional")
+ public void testOverflow()
+ {
+ SparseCount p = new SparseCount(8);
+ p.put('a', Short.MAX_VALUE - 1);
+ p.add('a', 1);
+ assertFalse(p.isUsingInt());
+ p.add('a', 1);
+ assertTrue(p.isUsingInt());
+ }
+
+ /**
+ * Test handling underflow of short by switching to counting ints
+ */
+ @Test(groups = "Functional")
+ public void testUnderflow()
+ {
+ SparseCount p = new SparseCount(8);
+ p.put('a', Short.MIN_VALUE + 1);
+ p.add('a', -1);
+ assertFalse(p.isUsingInt());
+ p.add('a', -1);
+ assertTrue(p.isUsingInt());
+ }
+
+ @Test(groups = "Functional")
+ public void testKeyAt_ValueAt()
+ {
+ SparseCount p = new SparseCount(8);
+ p.put('W', 12);
+ p.put('K', 9);
+ p.put('R', 6);
+ assertEquals(p.size(), 3);
+ assertEquals(p.keyAt(0), 'K');
+ assertEquals(p.valueAt(0), 9);
+ assertEquals(p.keyAt(1), 'R');
+ assertEquals(p.valueAt(1), 6);
+ assertEquals(p.keyAt(2), 'W');
+ assertEquals(p.valueAt(2), 12);
+ }
+
+}