From: gmungoc Date: Mon, 3 Oct 2016 11:45:47 +0000 (+0100) Subject: JAL-98 use Profile to store consensus, ResidueCount for fast compact X-Git-Tag: Release_2_10_1^2~22^2~3^2~15 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=1c757dc1e6ee864277825c1ebd9c6a9fbe0da7b2;p=jalview.git JAL-98 use Profile to store consensus, ResidueCount for fast compact counting --- diff --git a/src/jalview/analysis/AAFrequency.java b/src/jalview/analysis/AAFrequency.java index 49c2340..8b42a01 100755 --- a/src/jalview/analysis/AAFrequency.java +++ b/src/jalview/analysis/AAFrequency.java @@ -20,6 +20,7 @@ */ package jalview.analysis; +import jalview.analysis.ResidueCount.SymbolCounts; import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; @@ -46,8 +47,6 @@ import java.util.List; */ public class AAFrequency { - private static final int TO_UPPER_CASE = 'A' - 'a'; // -32 - public static final String MAXCOUNT = "C"; public static final String MAXRESIDUE = "R"; @@ -73,13 +72,13 @@ public class AAFrequency } } - public static final Hashtable[] calculate(List list, + public static final Profile[] calculate(List list, int start, int end) { return calculate(list, start, end, false); } - public static final Hashtable[] calculate(List sequences, + public static final Profile[] calculate(List sequences, int start, int end, boolean profile) { SequenceI[] seqs = new SequenceI[sequences.size()]; @@ -95,7 +94,7 @@ public class AAFrequency } } - Hashtable[] reply = new Hashtable[width]; + Profile[] reply = new Profile[width]; if (end >= width) { @@ -107,24 +106,43 @@ public class AAFrequency } } - public static final void calculate(SequenceI[] sequences, int start, - int end, Hashtable[] result, boolean profile) + /** + * Calculate the consensus symbol(s) for each column in the given range. + * + * @param sequences + * @param start + * @param end + * @param result + * array in which to store profile per column + * @param saveFullProfile + * if true, store all symbol counts + */ + public static final void calculate(final SequenceI[] sequences, + int start, int end, Profile[] result, boolean saveFullProfile) { - long now = System.currentTimeMillis(); - Hashtable residueHash; + // long now = System.currentTimeMillis(); int seqCount = sequences.length; - char c = '-'; - SparseIntArray profileSizes = new SparseIntArray(); + boolean nucleotide = false; + int nucleotideCount = 0; + int peptideCount = 0; for (int column = start; column < end; column++) { - residueHash = new Hashtable(); - int maxCount = 0; - String maxResidue = ""; - int nongap = 0; - // int [] values = new int[255]; - int guessProfileSize = estimateProfileSize(profileSizes); - SparseIntArray values = new SparseIntArray(guessProfileSize); + /* + * Apply a heuristic to detect nucleotide data (which can + * be counted in more compact arrays); here we test for + * more than 90% nucleotide; recheck every 10 columns in case + * of misleading data e.g. highly conserved Alanine in peptide! + * Mistakenly guessing nucleotide has a small performance cost, + * as it will result in counting in sparse arrays. + * Mistakenly guessing peptide has a small space cost, + * as it will use a larger than necessary array to hold counts. + */ + if (nucleotideCount > 100 && column % 10 == 0) + { + nucleotide = (9 * peptideCount < nucleotideCount); + } + ResidueCount residueCounts = new ResidueCount(nucleotide); for (int row = 0; row < seqCount; row++) { @@ -137,94 +155,42 @@ public class AAFrequency char[] seq = sequences[row].getSequence(); if (seq.length > column) { - c = seq[column]; - - if (Comparison.isGap(c)) + char c = seq[column]; + residueCounts.add(c); + if (Comparison.isNucleotide(c)) { - // values['-']++; - // values.put('-', values.get('-') + 1); - values.add('-', 1); - continue; + nucleotideCount++; } - else if ('a' <= c && c <= 'z') + else if (!Comparison.isGap(c)) { - c += TO_UPPER_CASE; + peptideCount++; } - - nongap++; - // values[c]++; - // values.put(c, values.get(c) + 1); - values.add(c, 1); } else { /* - * here we count as a gap if the sequence doesn't + * here we count a gap if the sequence doesn't * reach this column (is that correct?) */ - // values['-']++; - // values.put('-', values.get('-') + 1); - values.add('-', 1); + residueCounts.addGap(); } } - if (seqCount == 1) - { - maxResidue = String.valueOf(c); - maxCount = 1; - } - else - { - // iterate over values keys not alphabet - // for (int v = 'A'; v <= 'Z'; v++) - for (int k = 0; k < values.size(); k++) - { - int v = values.keyAt(k); - int count = values.valueAt(k); // values[v]; - if (v == '-' || count < 1 || count < maxCount) - { - continue; - } - - if (count > maxCount) - { - maxResidue = String.valueOf((char) v);// CHARS[v - 'A']; - } - else if (count == maxCount) - { - maxResidue += String.valueOf((char) v); // CHARS[v - 'A']; - } - maxCount = count; - } - } - if (maxResidue.length() == 0) - { - maxResidue = "-"; - } - if (profile) - { - // residueHash.put(PROFILE, new int[][] { values, - // new int[] { jSize, nongap } }); - residueHash.put(PROFILE, new Profile(values, seqCount, nongap)); - } - residueHash.put(MAXCOUNT, new Integer(maxCount)); - residueHash.put(MAXRESIDUE, maxResidue); - float percentage = ((float) maxCount * 100) / seqCount; - residueHash.put(PID_GAPS, new Float(percentage)); + int maxCount = residueCounts.getModalCount(); + String maxResidue = residueCounts.getResiduesForCount(maxCount); + int gapCount = residueCounts.getGapCount(); + Profile profile = new Profile(seqCount, gapCount, maxCount, + maxResidue); - if (nongap > 0) + if (saveFullProfile) { - // calculate for non-gapped too - percentage = ((float) maxCount * 100) / nongap; + profile.setCounts(residueCounts); } - residueHash.put(PID_NOGAPS, new Float(percentage)); - - result[column] = residueHash; - profileSizes.add(values.size(), 1); + result[column] = profile; } - long elapsed = System.currentTimeMillis() - now; - System.out.println(elapsed); + // long elapsed = System.currentTimeMillis() - now; + // System.out.println(elapsed); } /** @@ -252,29 +218,6 @@ public class AAFrequency } /** - * Compute all or part of the annotation row from the given consensus - * hashtable - * - * @param consensus - * - pre-allocated annotation row - * @param hconsensus - * @param iStart - * @param width - * @param ignoreGapsInConsensusCalculation - * @param includeAllConsSymbols - * @param nseq - */ - public static void completeConsensus(AlignmentAnnotation consensus, - Hashtable[] hconsensus, int iStart, int width, - boolean ignoreGapsInConsensusCalculation, - boolean includeAllConsSymbols, long nseq) - { - completeConsensus(consensus, hconsensus, iStart, width, - ignoreGapsInConsensusCalculation, includeAllConsSymbols, null, - nseq); - } - - /** * Derive the consensus annotations to be added to the alignment for display. * This does not recompute the raw data, but may be called on a change in * display options, such as 'show logo', which may in turn result in a change @@ -282,149 +225,110 @@ public class AAFrequency * * @param consensus * the annotation row to add annotations to - * @param hconsensus + * @param profiles * the source consensus data * @param iStart * start column * @param width * end column - * @param ignoreGapsInConsensusCalculation - * if true, use the consensus calculated ignoring gaps - * @param includeAllConsSymbols + * @param ignoreGaps + * if true, normalise residue percentages ignoring gaps + * @param showSequenceLogo * if true include all consensus symbols, else just show modal * residue - * @param alphabet * @param nseq * number of sequences */ public static void completeConsensus(AlignmentAnnotation consensus, - Hashtable[] hconsensus, int iStart, int width, - boolean ignoreGapsInConsensusCalculation, - boolean includeAllConsSymbols, char[] alphabet, long nseq) + Profile[] profiles, int iStart, int width, boolean ignoreGaps, + boolean showSequenceLogo, long nseq) { - long now = System.currentTimeMillis(); + // long now = System.currentTimeMillis(); if (consensus == null || consensus.annotations == null || consensus.annotations.length < width) { - // called with a bad alignment annotation row - wait for it to be - // initialised properly + /* + * called with a bad alignment annotation row + * wait for it to be initialised properly + */ return; } - final Format fmt = getPercentageFormat(nseq); + final int dp = getPercentageDp(nseq); for (int i = iStart; i < width; i++) { - Hashtable hci; - if (i >= hconsensus.length || ((hci = hconsensus[i]) == null)) - { - // happens if sequences calculated over were shorter than alignment - // width - consensus.annotations[i] = null; - continue; - } - Float fv = (Float) hci - .get(ignoreGapsInConsensusCalculation ? PID_NOGAPS : PID_GAPS); - if (fv == null) + Profile profile; + if (i >= profiles.length || ((profile = profiles[i]) == null)) { + /* + * happens if sequences calculated over were + * shorter than alignment width + */ consensus.annotations[i] = null; - // data has changed below us .. give up and continue; } - float value = fv.floatValue(); - String maxRes = hci.get(AAFrequency.MAXRESIDUE).toString(); - StringBuilder mouseOver = new StringBuilder(64); - if (maxRes.length() > 1) - { - mouseOver.append("[").append(maxRes).append("] "); - maxRes = "+"; - } - else - { - mouseOver.append(hci.get(AAFrequency.MAXRESIDUE) + " "); - } - // int[][] profile = (int[][]) hci.get(AAFrequency.PROFILE); - Profile profile = (Profile) hci.get(AAFrequency.PROFILE); - if (profile != null && includeAllConsSymbols) - { - int sequenceCount = profile.height;// profile[1][0]; - int nonGappedCount = profile.nonGapped;// [1][1]; - int normalisedBy = ignoreGapsInConsensusCalculation ? nonGappedCount - : sequenceCount; - mouseOver.setLength(0); - // TODO do this sort once only in calculate()? - // char[][] ca = new char[profile[0].length][]; - // /int length = profile[0].length; - int length = profile.profile.size(); - char[] ca = new char[length]; - // float[] vl = new float[length]; - int[] vl = new int[length]; - for (int c = 0; c < ca.length; c++) - { - int theChar = profile.profile.keyAt(c); - ca[c] = (char) theChar;// c; - // ca[c] = new char[] - // { (char) c }; - vl[c] = profile.profile.valueAt(c);// profile[0][c]; - } - /* - * sort characters into ascending order of their counts - */ - QuickSort.sort(vl, ca); + float value = profile.getPercentageIdentity(ignoreGaps); - /* - * traverse in reverse order (highest count first) to build tooltip - */ - // for (int p = 0, c = ca.length - 1; profile[0][ca[c]] > 0; c--) - for (int p = 0, c = ca.length - 1; c >= 0; c--) - { - final char residue = ca[c]; - if (residue != '-') - { - // float tval = profile[0][residue] * 100f / normalisedBy; - // float tval = profile[0][residue] * 100f / normalisedBy; - float tval = (vl[c] * 100f) / normalisedBy; - mouseOver - .append((((p == 0) ? "" : "; "))) - .append(residue) - .append(" ") - .append(((fmt != null) ? fmt.form(tval) : ((int) tval))) - .append("%"); - p++; - } - } - } - else - { - mouseOver.append( - (((fmt != null) ? fmt.form(value) : ((int) value)))) - .append("%"); - } - consensus.annotations[i] = new Annotation(maxRes, - mouseOver.toString(), ' ', value); + String description = getTooltip(profile, value, showSequenceLogo, + ignoreGaps, dp); + + consensus.annotations[i] = new Annotation(profile.getModalResidue(), + description, ' ', value); } - long elapsed = System.currentTimeMillis() - now; - System.out.println(-elapsed); + // long elapsed = System.currentTimeMillis() - now; + // System.out.println(-elapsed); } /** - * Returns a Format designed to show all significant figures for profile - * percentages. For less than 100 sequences, returns null (the integer - * percentage value will be displayed). For 100-999 sequences, returns "%3.1f" + * Returns a tooltip showing either + *
    + *
  • the full profile (percentages of all residues present), if + * showSequenceLogo is true, or
  • + *
  • just the modal (most common) residue(s), if showSequenceLogo is false
  • + *
+ * Percentages are as a fraction of all sequence, or only ungapped sequences + * if ignoreGaps is true. * - * @param nseq + * @param profile + * @param pid + * @param showSequenceLogo + * @param ignoreGaps + * @param dp + * the number of decimal places to format percentages to * @return */ - protected static Format getPercentageFormat(long nseq) + static String getTooltip(Profile profile, float pid, + boolean showSequenceLogo, boolean ignoreGaps, int dp) { - int scale = 0; - while (nseq >= 10) + ResidueCount counts = profile.getCounts(); + + String description = null; + if (counts != null && showSequenceLogo) { - scale++; - nseq /= 10; + int normaliseBy = ignoreGaps ? profile.getNonGapped() : profile + .getHeight(); + description = counts.getTooltip(normaliseBy, dp); + } + else + { + StringBuilder sb = new StringBuilder(64); + String maxRes = profile.getModalResidue(); + if (maxRes.length() > 1) + { + sb.append("[").append(maxRes).append("] "); + maxRes = "+"; + } + else + { + sb.append(maxRes).append(" "); + } + Format.appendPercentage(sb, pid, dp); + sb.append("%"); + description = sb.toString(); } - return scale <= 1 ? null : new Format("%3." + (scale - 1) + "f"); + return description; } /** @@ -436,64 +340,47 @@ public class AAFrequency * in descending order of percentage value * * - * @param hconsensus - * the data table from which to extract and sort values + * @param profile + * the data object from which to extract and sort values * @param ignoreGaps * if true, only non-gapped values are included in percentage * calculations * @return */ - public static int[] extractProfile(Hashtable hconsensus, + public static int[] extractProfile(Profile profile, boolean ignoreGaps) { int[] rtnval = new int[64]; - // int[][] profile = (int[][]) hconsensus.get(AAFrequency.PROFILE); - Profile profile = (Profile) hconsensus.get(AAFrequency.PROFILE); - if (profile == null) + ResidueCount counts = profile.getCounts(); + if (counts == null) { return null; } - // int profileLength = profile[0].length; - int profileLength = profile.profile.size(); - char[] ca = new char[profileLength]; - float[] vl = new float[profileLength]; - // for (int c = 0; c < ca.length; c++) - // { - // ca[c] = (char) c; - // vl[c] = profile[0][c]; - // } - for (int i = 0; i < profileLength; i++) - { - int c = profile.profile.keyAt(i); - ca[i] = (char) c; - vl[i] = profile.profile.valueAt(i); - } - QuickSort.sort(vl, ca); + + SymbolCounts symbolCounts = counts.getSymbolCounts(); + char[] symbols = symbolCounts.symbols; + int[] values = symbolCounts.values; + QuickSort.sort(values, symbols); int nextArrayPos = 2; int totalPercentage = 0; - int distinctValuesCount = 0; - final int divisor = ignoreGaps ? profile.nonGapped : profile.height; - // final int divisor = profile[1][ignoreGaps ? 1 : 0]; - int j = profile.profile.size(); - for (int i = 0; i < j; i++) -// for (int c = ca.length - 1; profile[0][ca[c]] > 0; c--) + // FIXME what if all gapped (divisor is zero)? + final int divisor = ignoreGaps ? profile.getNonGapped() : profile + .getHeight(); + + /* + * traverse the arrays in reverse order (highest counts first) + */ + for (int i = symbols.length - 1; i >= 0; i--) { - int theChar = profile.profile.keyAt(i); - int charCount = profile.profile.valueAt(i); - -// if (ca[c] != '-') - if (theChar != '-') - { -// rtnval[nextArrayPos++] = ca[c]; - rtnval[nextArrayPos++] = theChar; -// final int percentage = (int) (profile[0][ca[c]] * 100f / divisor); - final int percentage = (charCount * 100) / divisor; - rtnval[nextArrayPos++] = percentage; - totalPercentage += percentage; - distinctValuesCount++; - } + int theChar = symbols[i]; + int charCount = values[i]; + + rtnval[nextArrayPos++] = theChar; + final int percentage = (charCount * 100) / divisor; + rtnval[nextArrayPos++] = percentage; + totalPercentage += percentage; } - rtnval[0] = distinctValuesCount; + rtnval[0] = symbols.length; rtnval[1] = totalPercentage; int[] result = new int[rtnval.length + 1]; result[0] = AlignmentAnnotation.SEQUENCE_PROFILE; @@ -702,7 +589,7 @@ public class AAFrequency StringBuilder samePercent = new StringBuilder(); String percent = null; String lastPercent = null; - Format fmt = getPercentageFormat(nseqs); + int percentDecPl = getPercentageDp(nseqs); for (int j = codons.length - 1; j >= 0; j--) { @@ -724,7 +611,9 @@ public class AAFrequency final int pct = codonCount * 100 / totalCount; String codon = String .valueOf(CodingUtils.decodeCodon(codonEncoded)); - percent = fmt == null ? Integer.toString(pct) : fmt.form(pct); + StringBuilder sb = new StringBuilder(); + Format.appendPercentage(sb, pct, percentDecPl); + percent = sb.toString(); if (showProfileLogo || codonCount == modalCodonCount) { if (percent.equals(lastPercent) && j > 0) @@ -750,4 +639,23 @@ public class AAFrequency mouseOver.toString(), ' ', pid); } } + + /** + * Returns the number of decimal places to show for profile percentages. For + * less than 100 sequences, returns zero (the integer percentage value will be + * displayed). For 100-999 sequences, returns 1, for 1000-9999 returns 2, etc. + * + * @param nseq + * @return + */ + protected static int getPercentageDp(long nseq) + { + int scale = 0; + while (nseq >= 100) + { + scale++; + nseq /= 10; + } + return scale; + } } diff --git a/src/jalview/analysis/Profile.java b/src/jalview/analysis/Profile.java index b5857c7..b27da9f 100644 --- a/src/jalview/analysis/Profile.java +++ b/src/jalview/analysis/Profile.java @@ -1,33 +1,130 @@ package jalview.analysis; -import jalview.ext.android.SparseIntArray; +/** + * A data bean to hold the result of computing a profile for a column of an + * alignment + * + * @author gmcarstairs + * + */ public class Profile { /* - * array of keys (chars) and values (counts) + * counts of keys (chars) */ - public final SparseIntArray profile; + private ResidueCount counts; /* * the number of sequences in the profile */ - public final int height; + private int height; /* * the number of non-gapped sequences in the profile */ - public final int nonGapped; + private int gapped; - public Profile(SparseIntArray counts, int ht, int nongappedCount) + /* + * the highest count for any residue in the profile + */ + private int maxCount; + + /* + * the residue (e.g. K) or residues (e.g. KQW) with the + * highest count in the profile + */ + private String modalResidue; + + /** + * Constructor which allows derived data to be stored without having to store + * the full profile + * + * @param seqCount + * the number of sequences in the profile + * @param gaps + * the number of gapped sequences + * @param max + * the highest count for any residue + * @param modalres + * the residue (or concatenated residues) with the highest count + */ + public Profile(int seqCount, int gaps, int max, String modalRes) + { + this.height = seqCount; + this.gapped = gaps; + this.maxCount = max; + this.modalResidue = modalRes; + } + + /** + * Set the full profile of counts + * + * @param residueCounts + */ + public void setCounts(ResidueCount residueCounts) + { + this.counts = residueCounts; + } + + /** + * Returns the percentage identity of the profile, i.e. the highest proportion + * of conserved (equal) symbols. The percentage is as a fraction of all + * sequences, or only ungapped sequences if flag ignoreGaps is set true. + * + * @param ignoreGaps + * @return + */ + public float getPercentageIdentity(boolean ignoreGaps) { - this.profile = counts; - this.height = ht; - this.nonGapped = nongappedCount; + if (height == 0) + { + return 0f; + } + float pid = 0f; + if (ignoreGaps && gapped < height) + { + pid = (maxCount * 100f) / (height - gapped); + } + else + { + pid = (maxCount * 100f) / height; + } + return pid; } - public SparseIntArray getProfile() + public ResidueCount getCounts() + { + return counts; + } + + public int getHeight() + { + return height; + } + + public int getGapped() + { + return gapped; + } + + public int getMaxCount() + { + return maxCount; + } + + public String getModalResidue() + { + return modalResidue; + } + + /** + * Answers the number of non-gapped sequences in the profile + * + * @return + */ + public int getNonGapped() { - return profile; + return height - gapped; } } diff --git a/src/jalview/analysis/ResidueCount.java b/src/jalview/analysis/ResidueCount.java new file mode 100644 index 0000000..2c7cb20 --- /dev/null +++ b/src/jalview/analysis/ResidueCount.java @@ -0,0 +1,616 @@ +package jalview.analysis; + +import jalview.util.Comparison; +import jalview.util.Format; +import jalview.util.QuickSort; +import jalview.util.SparseCount; + +/** + * A class to count occurrences of residues in a profile, optimised for speed + * and memory footprint. + * @author gmcarstairs + * + */ +public class ResidueCount +{ + /** + * A data bean to hold the results of counting symbols + */ + public class SymbolCounts + { + /** + * the symbols seen (as char values) + */ + public final char[] symbols; + + /** + * the counts for each symbol, in the same order as the symbols + */ + public final int[] values; + + SymbolCounts(char[] s, int[] v) + { + symbols = s; + values = v; + } + } + + private static final int TOUPPERCASE = 'A' - 'a'; + + /* + * nucleotide symbols to count (including N unknown) + */ + private static final String NUCS = "ACGTUN"; + + /* + * amino acid symbols to count (including X unknown) + * NB we also include U so as to support counting of RNA bases + * in the "don't know" case of nucleotide / peptide + */ + private static final String AAS = "ACDEFGHIKLMNPQRSTUVWXY"; + + private static final int GAP_COUNT = 0; + + /* + * fast lookup tables holding the index into our count + * arrays of each symbol; index 0 is reserved for gap counting + */ + private static int[] NUC_INDEX = new int[26]; + + private static int[] AA_INDEX = new int[26]; + static + { + for (int i = 0; i < NUCS.length(); i++) + { + NUC_INDEX[NUCS.charAt(i) - 'A'] = i + 1; + } + for (int i = 0; i < AAS.length(); i++) + { + AA_INDEX[AAS.charAt(i) - 'A'] = i + 1; + } + } + + /* + * counts array, just big enough for the nucleotide or peptide + * character set (plus gap counts in position 0) + */ + private short[] counts; + + /* + * alternative array of int counts for use if any count + * exceeds the maximum value of short (32767) + */ + private int[] intCounts; + + /* + * flag set if we switch from short to int counts + */ + private boolean useIntCounts; + + /* + * general-purpose counter, only for use for characters + * that are not in the expected alphabet + */ + private SparseCount otherData; + + /* + * keeps track of the maximum count value recorded + * (if this class every allows decrements, would need to + * calculate this on request instead) + */ + int maxCount; + + /* + * if we think we are counting nucleotide, can get by with smaller + * array to hold counts + */ + private boolean isNucleotide; + + /** + * Default constructor allocates arrays able to count either nucleotide or + * peptide bases. Use this constructor if not sure which the data is. + */ + public ResidueCount() + { + this(false); + } + + /** + * Constructor that allocates an array just big enough for the anticipated + * characters, plus one position to count gaps + */ + public ResidueCount(boolean nucleotide) + { + isNucleotide = nucleotide; + int charsToCount = nucleotide ? NUCS.length() : AAS.length(); + counts = new short[charsToCount + 1]; + } + + /** + * Increments the count for the given character. The supplied character may be + * upper or lower case but counts are for the upper case only. Gap characters + * (space, ., -) are all counted together. + * + * @param c + * @return the new value of the count for the character + */ + public int add(char c) + { + int newValue = 0; + int offset = getOffset(c); + + /* + * offset 0 is reserved for gap counting, so 0 here means either + * an unexpected character, or a gap character passed in error + */ + if (offset == 0) + { + if (Comparison.isGap(c)) + { + newValue = addGap(); + } + else + { + newValue = addOtherCharacter(c); + } + } + else + { + newValue = increment(offset); + } + return newValue; + } + + /** + * Increment the count at the specified offset. If this would result in short + * overflow, promote to counting int values instead. + * + * @param offset + * @return the new value of the count at this offset + */ + int increment(int offset) + { + int newValue = 0; + if (useIntCounts) + { + newValue = intCounts[offset]; + intCounts[offset] = ++newValue; + } + else + { + if (counts[offset] == Short.MAX_VALUE) + { + handleOverflow(); + newValue = intCounts[offset]; + intCounts[offset] = ++newValue; + } + else + { + newValue = counts[offset]; + counts[offset] = (short) ++newValue; + } + } + maxCount = Math.max(maxCount, newValue); + return newValue; + } + + /** + * Switch from counting in short to counting in int + */ + synchronized void handleOverflow() + { + intCounts = new int[counts.length]; + for (int i = 0; i < counts.length; i++) + { + intCounts[i] = counts[i]; + } + counts = null; + useIntCounts = true; + } + + /** + * @param c + * @return + */ + int getOffset(char c) + { + /* + * ensure upper-case (fails fast if it already is!) + */ + if ('a' <= c && c <= 'z') + { + c = (char) (c + TOUPPERCASE); + } + + /* + * locate this character's offset in the count array + */ + int offset = 0; + if ('A' <= c && c <= 'Z') + { + offset = isNucleotide ? NUC_INDEX[c - 'A'] : AA_INDEX[c - 'A']; + } + return offset; + } + + /** + * Increment count for some unanticipated character. The first time this + * called, a SparseCount is instantiated to hold these 'extra' counts. + * + * @param c + * @return the new value of the count for the character + */ + int addOtherCharacter(char c) + { + if (otherData == null) + { + otherData = new SparseCount(); + } + int newValue = otherData.add(c, 1); + maxCount = Math.max(maxCount, newValue); + return newValue; + } + + /** + * Set count for some unanticipated character. The first time this called, a + * SparseCount is instantiated to hold these 'extra' counts. + * + * @param c + * @param value + */ + void setOtherCharacter(char c, int value) + { + if (otherData == null) + { + otherData = new SparseCount(); + } + otherData.put(c, value); + } + + /** + * Increment count of gap characters + * + * @return the new count of gaps + */ + public int addGap() + { + int newValue; + if (useIntCounts) + { + newValue = ++intCounts[GAP_COUNT]; + } + else + { + newValue = ++counts[GAP_COUNT]; + } + return newValue; + } + + /** + * Answers true if we are counting ints (only after overflow of short counts) + * + * @return + */ + boolean isCountingInts() + { + return useIntCounts; + } + + /** + * Sets the count for the given character. The supplied character may be upper + * or lower case but counts are for the upper case only. + * + * @param c + * @param count + */ + public void put(char c, int count) + { + int offset = getOffset(c); + + /* + * offset 0 is reserved for gap counting, so 0 here means either + * an unexpected character, or a gap character passed in error + */ + if (offset == 0) + { + if (Comparison.isGap(c)) + { + addGap(); + } + else + { + setOtherCharacter(c, count); + maxCount = Math.max(maxCount, count); + } + } + else + { + set(offset, count); + maxCount = Math.max(maxCount, count); + } + } + + /** + * Sets the count at the specified offset. If this would result in short + * overflow, promote to counting int values instead. + * + * @param offset + * @param value + */ + void set(int offset, int value) + { + if (useIntCounts) + { + intCounts[offset] = value; + } + else + { + if (value > Short.MAX_VALUE || value < Short.MIN_VALUE) + { + handleOverflow(); + intCounts[offset] = value; + } + else + { + counts[offset] = (short) value; + } + } + } + + /** + * Returns the count for the given character, or zero if no count held + * + * @param c + * @return + */ + public int getCount(char c) + { + int offset = getOffset(c); + if (offset == 0) + { + if (!Comparison.isGap(c)) + { + // should have called getGapCount() + return otherData == null ? 0 : otherData.get(c); + } + } + return useIntCounts ? intCounts[offset] : counts[offset]; + } + + public int getGapCount() + { + return useIntCounts ? intCounts[0] : counts[0]; + } + + /** + * Answers true if this object wraps a counter for unexpected characters + * + * @return + */ + boolean isUsingOtherData() + { + return otherData != null; + } + + /** + * Returns the character (or concatenated characters) for the symbol(s) with + * the given count in the profile. Can be used to get the modal residue by + * supplying the modal count value. Returns an empty string if symbol has the + * given count. The symbols are in alphabetic order of standard peptide or + * nucleotide characters, followed by 'other' symbols if any. + * + * @return + */ + public String getResiduesForCount(int count) + { + if (count == 0) + { + return ""; + } + + /* + * find counts for the given value and append the + * corresponding symbol + */ + StringBuilder modal = new StringBuilder(); + if (useIntCounts) + { + for (int i = 1; i < intCounts.length; i++) + { + if (intCounts[i] == count) + { + modal.append(isNucleotide ? NUCS.charAt(i - 1) : AAS + .charAt(i - 1)); + } + } + } + else + { + for (int i = 1; i < counts.length; i++) + { + if (counts[i] == count) + { + modal.append(isNucleotide ? NUCS.charAt(i - 1) : AAS + .charAt(i - 1)); + } + } + } + if (otherData != null) + { + for (int i = 0; i < otherData.size(); i++) + { + if (otherData.valueAt(i) == count) + { + modal.append((char) otherData.keyAt(i)); + } + } + } + return modal.toString(); + } + + /** + * Returns the highest count for any symbol in the profile (excluding gap) + * + * @return + */ + public int getModalCount() + { + return maxCount; + } + + /** + * Returns the number of distinct symbols with a non-zero count (excluding the + * gap symbol) + * + * @return + */ + public int size() { + int size = 0; + if (useIntCounts) + { + for (int i = 1; i < intCounts.length; i++) + { + if (intCounts[i] > 0) + { + size++; + } + } + } + else + { + for (int i = 1; i < counts.length; i++) + { + if (counts[i] > 0) + { + size++; + } + } + } + if (otherData != null) + { + size += otherData.size(); + } + + return size; + } + + /** + * Returns those symbols that have a non-zero count (excluding the gap + * symbol), with their counts. The symbols are in no special order. Returns an + * array of size 2 whose first element is a char array of symbols, and second + * element an int array of corresponding counts. + * + * @return an array [[char1, char2, ...] [char1Count, char2Count, ...] ... ] + */ + public SymbolCounts getSymbolCounts() + { + char[] symbols = new char[size()]; + int[] values = new int[size()]; + int j = 0; + + if (useIntCounts) + { + for (int i = 1; i < intCounts.length; i++) + { + if (intCounts[i] > 0) + { + char symbol = isNucleotide ? NUCS.charAt(i - 1) : AAS + .charAt(i - 1); + symbols[j] = symbol; + values[j] = intCounts[i]; + j++; + } + } + } + else + { + for (int i = 1; i < counts.length; i++) + { + if (counts[i] > 0) + { + char symbol = isNucleotide ? NUCS.charAt(i - 1) : AAS + .charAt(i - 1); + symbols[j] = symbol; + values[j] = counts[i]; + j++; + } + } + } + if (otherData != null) + { + for (int i = 0; i < otherData.size(); i++) + { + int value = otherData.valueAt(i); + if (value > 0) + { + symbols[j] = (char) otherData.keyAt(i); + values[j] = otherData.valueAt(i); + j++; + } + } + } + + return new SymbolCounts(symbols, values); + } + + /** + * Returns a tooltip string showing residues in descending order of their + * percentage frequency in the profile + * + * @param normaliseBy + * the divisor for residue counts (may or may not include gapped + * sequence count) + * @param percentageDecPl + * the number of decimal places to show in percentages + * @return + */ + public String getTooltip(int normaliseBy, int percentageDecPl) + { + StringBuilder sb = new StringBuilder(64); + SymbolCounts symbolCounts = getSymbolCounts(); + char[] ca = symbolCounts.symbols; + int[] vl = symbolCounts.values; + + /* + * sort characters into ascending order of their counts + */ + QuickSort.sort(vl, ca); + + /* + * traverse in reverse order (highest count first) to build tooltip + */ + for (int p = 0, c = ca.length - 1; c >= 0; c--) + { + final char residue = ca[c]; + if (residue != '-') + { + // TODO combine residues which share a percentage + // (see AAFrequency.completeCdnaConsensus) + float tval = (vl[c] * 100f) / normaliseBy; + sb.append((((p == 0) ? "" : "; "))).append(residue) + .append(" "); + Format.appendPercentage(sb, tval, percentageDecPl); + sb.append("%"); + p++; + } + } + return sb.toString(); + } + + /** + * Returns a string representation of the symbol counts, for debug purposes. + */ + @Override + public String toString() + { + StringBuilder sb = new StringBuilder(); + sb.append("[ "); + SymbolCounts sc = getSymbolCounts(); + for (int i = 0; i < sc.symbols.length; i++) + { + sb.append(sc.symbols[i]).append(":").append(sc.values[i]).append(" "); + } + sb.append("]"); + return sb.toString(); + } +} diff --git a/src/jalview/api/AlignViewportI.java b/src/jalview/api/AlignViewportI.java index 0840520..7985504 100644 --- a/src/jalview/api/AlignViewportI.java +++ b/src/jalview/api/AlignViewportI.java @@ -21,6 +21,7 @@ package jalview.api; import jalview.analysis.Conservation; +import jalview.analysis.Profile; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.AlignmentView; @@ -81,7 +82,7 @@ public interface AlignViewportI extends ViewStyleI ColumnSelection getColumnSelection(); - Hashtable[] getSequenceConsensusHash(); + Profile[] getSequenceConsensusHash(); /** * Get consensus data table for the cDNA complement of this alignment (if any) @@ -144,7 +145,7 @@ public interface AlignViewportI extends ViewStyleI * * @param hconsensus */ - void setSequenceConsensusHash(Hashtable[] hconsensus); + void setSequenceConsensusHash(Profile[] hconsensus); /** * Set the cDNA complement consensus for the viewport diff --git a/src/jalview/datamodel/SequenceGroup.java b/src/jalview/datamodel/SequenceGroup.java index 9a408e3..98fd8f2 100755 --- a/src/jalview/datamodel/SequenceGroup.java +++ b/src/jalview/datamodel/SequenceGroup.java @@ -22,14 +22,13 @@ package jalview.datamodel; import jalview.analysis.AAFrequency; import jalview.analysis.Conservation; +import jalview.analysis.Profile; import jalview.schemes.ColourSchemeI; import java.awt.Color; import java.util.ArrayList; -import java.util.Hashtable; import java.util.List; import java.util.Map; -import java.util.Vector; /** * Collects a set contiguous ranges on a set of sequences @@ -45,8 +44,6 @@ public class SequenceGroup implements AnnotatedCollectionI Conservation conserve; - Vector aaFrequency; - boolean displayBoxes = true; boolean displayText = true; @@ -531,7 +528,7 @@ public class SequenceGroup implements AnnotatedCollectionI boolean upd = false; try { - Hashtable cnsns[] = AAFrequency.calculate(sequences, startRes, + Profile[] cnsns = AAFrequency.calculate(sequences, startRes, endRes + 1, showSequenceLogo); if (consensus != null) { @@ -603,9 +600,9 @@ public class SequenceGroup implements AnnotatedCollectionI c.completeAnnotations(conservation, null, startRes, endRes + 1); } - public Hashtable[] consensusData = null; + public Profile[] consensusData = null; - private void _updateConsensusRow(Hashtable[] cnsns, long nseq) + private void _updateConsensusRow(Profile[] cnsns, long nseq) { if (consensus == null) { diff --git a/src/jalview/ext/android/ContainerHelpers.java b/src/jalview/ext/android/ContainerHelpers.java index cae77b5..f536819 100644 --- a/src/jalview/ext/android/ContainerHelpers.java +++ b/src/jalview/ext/android/ContainerHelpers.java @@ -74,4 +74,29 @@ class ContainerHelpers } return ~lo; // value not present } + + // This is Arrays.binarySearch(), but doesn't do any argument validation. + static int binarySearch(short[] array, int size, short value) + { + int lo = 0; + int hi = size - 1; + while (lo <= hi) + { + final int mid = (lo + hi) >>> 1; + final int midVal = array[mid]; + if (midVal < value) + { + lo = mid + 1; + } + else if (midVal > value) + { + hi = mid - 1; + } + else + { + return mid; // value found + } + } + return ~lo; // value not present + } } diff --git a/src/jalview/ext/android/SparseIntArray.java b/src/jalview/ext/android/SparseIntArray.java index 4ddf776..e286cb4 100644 --- a/src/jalview/ext/android/SparseIntArray.java +++ b/src/jalview/ext/android/SparseIntArray.java @@ -354,13 +354,16 @@ public class SparseIntArray implements Cloneable * * @param key * @oparam toAdd + * @return the new value of the count for the key */ - public void add(int key, int toAdd) + public int add(int key, int toAdd) { + int newValue = toAdd; int i = ContainerHelpers.binarySearch(mKeys, mSize, key); if (i >= 0) { mValues[i] += toAdd; + newValue = mValues[i]; } else { @@ -370,7 +373,6 @@ public class SparseIntArray implements Cloneable int n = idealIntArraySize(mSize + 1); int[] nkeys = new int[n]; int[] nvalues = new int[n]; - // Log.e("SparseIntArray", "grow " + mKeys.length + " to " + n); System.arraycopy(mKeys, 0, nkeys, 0, mKeys.length); System.arraycopy(mValues, 0, nvalues, 0, mValues.length); mKeys = nkeys; @@ -378,7 +380,6 @@ public class SparseIntArray implements Cloneable } if (mSize - i != 0) { - // Log.e("SparseIntArray", "move " + (mSize - i)); System.arraycopy(mKeys, i, mKeys, i + 1, mSize - i); System.arraycopy(mValues, i, mValues, i + 1, mSize - i); } @@ -386,5 +387,6 @@ public class SparseIntArray implements Cloneable mValues[i] = toAdd; mSize++; } + return newValue; } } diff --git a/src/jalview/ext/android/SparseShortArray.java b/src/jalview/ext/android/SparseShortArray.java new file mode 100644 index 0000000..375d745 --- /dev/null +++ b/src/jalview/ext/android/SparseShortArray.java @@ -0,0 +1,439 @@ +package jalview.ext.android; + +/* + * Copyright (C) 2006 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * SparseShortArrays map shorts to shorts. Unlike a normal array of shorts, + * there can be gaps in the indices. It is intended to be more memory efficient + * than using a HashMap to map Shorts to Shorts, both because it avoids + * auto-boxing keys and values and its data structure doesn't rely on an extra + * entry object for each mapping. + * + *

+ * Note that this container keeps its mappings in an array data structure, using + * a binary search to find keys. The implementation is not intended to be + * appropriate for data structures that may contain large numbers of items. It + * is generally slower than a traditional HashMap, since lookups require a + * binary search and adds and removes require inserting and deleting entries in + * the array. For containers holding up to hundreds of items, the performance + * difference is not significant, less than 50%. + *

+ * + *

+ * It is possible to iterate over the items in this container using + * {@link #keyAt(int)} and {@link #valueAt(int)}. Iterating over the keys using + * keyAt(int) with ascending values of the index will return the + * keys in ascending order, or the values corresponding to the keys in ascending + * order in the case of valueAt(int). + *

+ */ +/** + * A copy of SparseShortArray designed to store short values (to minimise space + * usage). + *

+ * Note that operations append, put, add throw ArithmeticException if the + * resulting value overflows the range of a short. + */ +public class SparseShortArray implements Cloneable +{ + private short[] mKeys; + + private short[] mValues; + + private int mSize; + + /** + * Creates a new SparseShortArray containing no mappings. + */ + public SparseShortArray() + { + this(10); + } + + /** + * Creates a new SparseShortArray containing no mappings that will not require + * any additional memory allocation to store the specified number of mappings. + * If you supply an initial capacity of 0, the sparse array will be + * initialized with a light-weight representation not requiring any additional + * array allocations. + */ + public SparseShortArray(int initialCapacity) + { + if (initialCapacity == 0) + { + mKeys = new short[0]; + mValues = new short[0]; + } + else + { + initialCapacity = idealShortArraySize(initialCapacity); + mKeys = new short[initialCapacity]; + mValues = new short[initialCapacity]; + } + mSize = 0; + } + + @Override + public SparseShortArray clone() + { + SparseShortArray clone = null; + try + { + clone = (SparseShortArray) super.clone(); + clone.mKeys = mKeys.clone(); + clone.mValues = mValues.clone(); + } catch (CloneNotSupportedException cnse) + { + /* ignore */ + } + return clone; + } + + /** + * Gets the int mapped from the specified key, or 0 if no such + * mapping has been made. + */ + public int get(int key) + { + return get(key, 0); + } + + /** + * Gets the int mapped from the specified key, or the specified value if no + * such mapping has been made. + * + * @throws ArithmeticException + * if key is outside the range of a short value + */ + public int get(int key, int valueIfKeyNotFound) + { + checkOverflow(key); + int i = ContainerHelpers.binarySearch(mKeys, mSize, (short) key); + if (i < 0) + { + return valueIfKeyNotFound; + } + else + { + return mValues[i]; + } + } + + /** + * Removes the mapping from the specified key, if there was any. + * + * @throws ArithmeticException + * if key is outside the range of a short value + */ + public void delete(int key) + { + checkOverflow(key); + int i = ContainerHelpers.binarySearch(mKeys, mSize, (short) key); + if (i >= 0) + { + removeAt(i); + } + } + + /** + * Removes the mapping at the given index. + */ + public void removeAt(int index) + { + System.arraycopy(mKeys, index + 1, mKeys, index, mSize - (index + 1)); + System.arraycopy(mValues, index + 1, mValues, index, mSize + - (index + 1)); + mSize--; + } + + /** + * Adds a mapping from the specified key to the specified value, replacing the + * previous mapping from the specified key if there was one. + * + * @throws ArithmeticException + * if either argument is outside the range of a short value + */ + public void put(int key, int value) + { + checkOverflow(key); + checkOverflow(value); + int i = ContainerHelpers.binarySearch(mKeys, mSize, (short) key); + if (i >= 0) + { + mValues[i] = (short) value; + } + else + { + i = ~i; + if (mSize >= mKeys.length) + { + int n = idealShortArraySize(mSize + 1); + short[] nkeys = new short[n]; + short[] nvalues = new short[n]; + // Log.e("SparseShortArray", "grow " + mKeys.length + " to " + n); + System.arraycopy(mKeys, 0, nkeys, 0, mKeys.length); + System.arraycopy(mValues, 0, nvalues, 0, mValues.length); + mKeys = nkeys; + mValues = nvalues; + } + if (mSize - i != 0) + { + // Log.e("SparseShortArray", "move " + (mSize - i)); + System.arraycopy(mKeys, i, mKeys, i + 1, mSize - i); + System.arraycopy(mValues, i, mValues, i + 1, mSize - i); + } + mKeys[i] = (short) key; + mValues[i] = (short) value; + mSize++; + } + } + + /** + * Returns the number of key-value mappings that this SparseShortArray + * currently stores. + */ + public int size() + { + return mSize; + } + + /** + * Given an index in the range 0...size()-1, returns the key from + * the indexth key-value mapping that this SparseShortArray + * stores. + * + *

+ * The keys corresponding to indices in ascending order are guaranteed to be + * in ascending order, e.g., keyAt(0) will return the smallest + * key and keyAt(size()-1) will return the largest key. + *

+ */ + public short keyAt(int index) + { + return mKeys[index]; + } + + /** + * Given an index in the range 0...size()-1, returns the value + * from the indexth key-value mapping that this SparseShortArray + * stores. + * + *

+ * The values corresponding to indices in ascending order are guaranteed to be + * associated with keys in ascending order, e.g., valueAt(0) will + * return the value associated with the smallest key and + * valueAt(size()-1) will return the value associated with the + * largest key. + *

+ */ + public short valueAt(int index) + { + return mValues[index]; + } + + /** + * Returns the index for which {@link #keyAt} would return the specified key, + * or a negative number if the specified key is not mapped. + * + * @throws ArithmeticException + * if key is outside the range of a short value + */ + public int indexOfKey(int key) + { + checkOverflow(key); + return ContainerHelpers.binarySearch(mKeys, mSize, (short) key); + } + + /** + * Returns an index for which {@link #valueAt} would return the specified key, + * or a negative number if no keys map to the specified value. Beware that + * this is a linear search, unlike lookups by key, and that multiple keys can + * map to the same value and this will find only one of them. + */ + public int indexOfValue(int value) + { + for (int i = 0; i < mSize; i++) + { + if (mValues[i] == value) + { + return i; + } + } + return -1; + } + + /** + * Removes all key-value mappings from this SparseShortArray. + */ + public void clear() + { + mSize = 0; + } + + /** + * Puts a key/value pair into the array, optimizing for the case where the key + * is greater than all existing keys in the array. + */ + public void append(int key, int value) + { + if (mSize != 0 && key <= mKeys[mSize - 1]) + { + put(key, value); + return; + } + int pos = mSize; + if (pos >= mKeys.length) + { + int n = idealShortArraySize(pos + 1); + short[] nkeys = new short[n]; + short[] nvalues = new short[n]; + // Log.e("SparseShortArray", "grow " + mKeys.length + " to " + n); + System.arraycopy(mKeys, 0, nkeys, 0, mKeys.length); + System.arraycopy(mValues, 0, nvalues, 0, mValues.length); + mKeys = nkeys; + mValues = nvalues; + } + checkOverflow(key); + checkOverflow(value); + mKeys[pos] = (short) key; + mValues[pos] = (short) value; + mSize = pos + 1; + } + + /** + * Throws an exception if the value is outside the range of a short. + * + * @param value + * @throws ArithmeticException + */ + public static void checkOverflow(int value) + { + if (value > Short.MAX_VALUE || value < Short.MIN_VALUE) + { + throw new ArithmeticException(String.valueOf(value)); + } + } + + /** + * Inlined here by copying from com.android.internal.util.ArrayUtils + * + * @param i + * @return + */ + public static int idealShortArraySize(int need) + { + return idealByteArraySize(need * 2) / 2; + } + + /** + * Inlined here by copying from com.android.internal.util.ArrayUtils + * + * @param i + * @return + */ + public static int idealByteArraySize(int need) + { + for (int i = 4; i < 32; i++) + { + if (need <= (1 << i) - 12) + { + return (1 << i) - 12; + } + } + + return need; + } + + /** + * {@inheritDoc} + * + *

+ * This implementation composes a string by iterating over its mappings. + */ + @Override + public String toString() + { + if (size() <= 0) + { + return "{}"; + } + StringBuilder buffer = new StringBuilder(mSize * 28); + buffer.append('{'); + for (int i = 0; i < mSize; i++) + { + if (i > 0) + { + buffer.append(", "); + } + int key = keyAt(i); + buffer.append(key); + buffer.append('='); + int value = valueAt(i); + buffer.append(value); + } + buffer.append('}'); + return buffer.toString(); + } + + /** + * Method (copied from put) added for Jalview to efficiently increment a key's + * value if present, else add it with the given value. This avoids a double + * binary search (once to get the value, again to put the updated value). + * + * @param key + * @oparam toAdd + * @return the new value of the count for the key + * @throws ArithmeticException + * if key, or result of adding toAdd, is outside the range of a + * short value + */ + public int add(int key, int toAdd) + { + int newValue = toAdd; + checkOverflow(key); + int i = ContainerHelpers.binarySearch(mKeys, mSize, (short) key); + if (i >= 0) + { + checkOverflow(toAdd + mValues[i]); + mValues[i] += (short) toAdd; + newValue = mValues[i]; + } + else + { + checkOverflow(toAdd); + i = ~i; + if (mSize >= mKeys.length) + { + int n = idealShortArraySize(mSize + 1); + short[] nkeys = new short[n]; + short[] nvalues = new short[n]; + System.arraycopy(mKeys, 0, nkeys, 0, mKeys.length); + System.arraycopy(mValues, 0, nvalues, 0, mValues.length); + mKeys = nkeys; + mValues = nvalues; + } + if (mSize - i != 0) + { + System.arraycopy(mKeys, i, mKeys, i + 1, mSize - i); + System.arraycopy(mValues, i, mValues, i + 1, mSize - i); + } + mKeys[i] = (short) key; + mValues[i] = (short) toAdd; + mSize++; + } + return newValue; + } +} diff --git a/src/jalview/renderer/AnnotationRenderer.java b/src/jalview/renderer/AnnotationRenderer.java index e58ba02..e12b88a 100644 --- a/src/jalview/renderer/AnnotationRenderer.java +++ b/src/jalview/renderer/AnnotationRenderer.java @@ -22,6 +22,7 @@ package jalview.renderer; import jalview.analysis.AAFrequency; import jalview.analysis.CodingUtils; +import jalview.analysis.Profile; import jalview.analysis.Rna; import jalview.analysis.StructureFrequency; import jalview.api.AlignViewportI; @@ -73,7 +74,7 @@ public class AnnotationRenderer private ColumnSelection columnSelection; - private Hashtable[] hconsensus; + private Profile[] hconsensus; private Hashtable[] complementConsensus; diff --git a/src/jalview/schemes/Blosum62ColourScheme.java b/src/jalview/schemes/Blosum62ColourScheme.java index 9d09259..90f90f0 100755 --- a/src/jalview/schemes/Blosum62ColourScheme.java +++ b/src/jalview/schemes/Blosum62ColourScheme.java @@ -20,10 +20,10 @@ */ package jalview.schemes; -import jalview.analysis.AAFrequency; import jalview.datamodel.AnnotatedCollectionI; import jalview.datamodel.SequenceCollectionI; import jalview.datamodel.SequenceI; +import jalview.util.Comparison; import java.awt.Color; import java.util.Map; @@ -52,9 +52,9 @@ public class Blosum62ColourScheme extends ResidueColourScheme Color currentColour; - if (!jalview.util.Comparison.isGap(res)) + if (!Comparison.isGap(res)) { - String max = (String) consensus[j].get(AAFrequency.MAXRESIDUE); + String max = consensus[j].getModalResidue(); if (max.indexOf(res) > -1) { diff --git a/src/jalview/schemes/ColourSchemeI.java b/src/jalview/schemes/ColourSchemeI.java index effdf59..165ece9 100755 --- a/src/jalview/schemes/ColourSchemeI.java +++ b/src/jalview/schemes/ColourSchemeI.java @@ -20,6 +20,7 @@ */ package jalview.schemes; +import jalview.analysis.Profile; import jalview.datamodel.AnnotatedCollectionI; import jalview.datamodel.SequenceCollectionI; import jalview.datamodel.SequenceI; @@ -52,7 +53,7 @@ public interface ColourSchemeI /** * assign the given consensus profile for the colourscheme */ - public void setConsensus(java.util.Hashtable[] h); + public void setConsensus(Profile[] hconsensus); /** * assign the given conservation to the colourscheme diff --git a/src/jalview/schemes/FollowerColourScheme.java b/src/jalview/schemes/FollowerColourScheme.java index eac467a..0dcf960 100644 --- a/src/jalview/schemes/FollowerColourScheme.java +++ b/src/jalview/schemes/FollowerColourScheme.java @@ -21,8 +21,7 @@ package jalview.schemes; import jalview.analysis.Conservation; - -import java.util.Hashtable; +import jalview.analysis.Profile; /** * Colourscheme that takes its colours from some other colourscheme @@ -41,7 +40,7 @@ public class FollowerColourScheme extends ResidueColourScheme } @Override - public void setConsensus(Hashtable[] consensus) + public void setConsensus(Profile[] consensus) { if (colourScheme != null) { diff --git a/src/jalview/schemes/PIDColourScheme.java b/src/jalview/schemes/PIDColourScheme.java index 9dd763d..5f63ca9 100755 --- a/src/jalview/schemes/PIDColourScheme.java +++ b/src/jalview/schemes/PIDColourScheme.java @@ -20,7 +20,6 @@ */ package jalview.schemes; -import jalview.analysis.AAFrequency; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; @@ -67,11 +66,10 @@ public class PIDColourScheme extends ResidueColourScheme return Color.white; } - if ((Integer - .parseInt(consensus[j].get(AAFrequency.MAXCOUNT).toString()) != -1) - && consensus[j].contains(String.valueOf(c))) + if (consensus[j].getMaxCount() > 0) //!= -1) + //&& consensus[j].contains(String.valueOf(c))) { - sc = ((Float) consensus[j].get(ignoreGaps)).floatValue(); + sc = consensus[j].getPercentageIdentity(ignoreGaps); if (!jalview.util.Comparison.isGap(c)) { diff --git a/src/jalview/schemes/ResidueColourScheme.java b/src/jalview/schemes/ResidueColourScheme.java index bca98cf..e6b28db 100755 --- a/src/jalview/schemes/ResidueColourScheme.java +++ b/src/jalview/schemes/ResidueColourScheme.java @@ -20,15 +20,14 @@ */ package jalview.schemes; -import jalview.analysis.AAFrequency; import jalview.analysis.Conservation; +import jalview.analysis.Profile; import jalview.datamodel.AnnotatedCollectionI; import jalview.datamodel.SequenceCollectionI; import jalview.datamodel.SequenceI; import jalview.util.MessageManager; import java.awt.Color; -import java.util.Hashtable; import java.util.Map; /** @@ -48,10 +47,10 @@ public class ResidueColourScheme implements ColourSchemeI int threshold = 0; /* Set when threshold colouring to either pid_gaps or pid_nogaps */ - protected String ignoreGaps = AAFrequency.PID_GAPS; + protected boolean ignoreGaps = false; /** Consenus as a hashtable array */ - Hashtable[] consensus; + Profile[] consensus; /** Conservation string as a char array */ char[] conservation; @@ -100,6 +99,7 @@ public class ResidueColourScheme implements ColourSchemeI /** * Find a colour without an index in a sequence */ + @Override public Color findColour(char c) { return colors == null ? Color.white : colors[symbolIndex[c]]; @@ -133,6 +133,7 @@ public class ResidueColourScheme implements ColourSchemeI * * @return Returns the percentage threshold */ + @Override public int getThreshold() { return threshold; @@ -144,17 +145,11 @@ public class ResidueColourScheme implements ColourSchemeI * @param ct * DOCUMENT ME! */ + @Override public void setThreshold(int ct, boolean ignoreGaps) { threshold = ct; - if (ignoreGaps) - { - this.ignoreGaps = AAFrequency.PID_NOGAPS; - } - else - { - this.ignoreGaps = AAFrequency.PID_GAPS; - } + this.ignoreGaps = ignoreGaps; } /** @@ -181,10 +176,9 @@ public class ResidueColourScheme implements ColourSchemeI return false; } - if ((((Integer) consensus[j].get(AAFrequency.MAXCOUNT)).intValue() != -1) - && consensus[j].contains(String.valueOf(c))) + if (consensus[j].getMaxCount() > 0) // != -1)) { - if (((Float) consensus[j].get(ignoreGaps)).floatValue() >= threshold) + if (consensus[j].getPercentageIdentity(ignoreGaps) >= threshold) { return true; } @@ -193,6 +187,7 @@ public class ResidueColourScheme implements ColourSchemeI return false; } + @Override public boolean conservationApplied() { return conservationColouring; @@ -204,11 +199,13 @@ public class ResidueColourScheme implements ColourSchemeI conservationColouring = conservationApplied; } + @Override public void setConservationInc(int i) { inc = i; } + @Override public int getConservationInc() { return inc; @@ -220,7 +217,8 @@ public class ResidueColourScheme implements ColourSchemeI * @param consensus * DOCUMENT ME! */ - public void setConsensus(Hashtable[] consensus) + @Override + public void setConsensus(Profile[] consensus) { if (consensus == null) { @@ -230,6 +228,7 @@ public class ResidueColourScheme implements ColourSchemeI this.consensus = consensus; } + @Override public void setConservation(Conservation cons) { if (cons == null) diff --git a/src/jalview/util/SparseCount.java b/src/jalview/util/SparseCount.java new file mode 100644 index 0000000..e6b45f2 --- /dev/null +++ b/src/jalview/util/SparseCount.java @@ -0,0 +1,154 @@ +package jalview.util; + +import jalview.ext.android.SparseIntArray; +import jalview.ext.android.SparseShortArray; + +/** + * A class to count occurrences of characters with minimal memory footprint. + * Sparse arrays of short values are used to hold the counts, with automatic + * promotion to arrays of int if any count exceeds the maximum value for a + * short. + * + * @author gmcarstairs + * + */ +public class SparseCount +{ + private static final int DEFAULT_PROFILE_SIZE = 2; + + /* + * array of keys (chars) and values (counts) + * held either as shorts or (if shorts overflow) as ints + */ + private SparseShortArray shortProfile; + + private SparseIntArray intProfile; + + /* + * flag is set true after short overflow occurs + */ + private boolean useInts; + + /** + * Constructor which initially creates a new sparse array of short values to + * hold counts. + * + * @param profileSize + */ + public SparseCount(int profileSize) + { + this.shortProfile = new SparseShortArray(profileSize); + } + + /** + * Constructor which allocates an initial count array for only two distinct + * values (the array will grow if needed) + */ + public SparseCount() + { + this(DEFAULT_PROFILE_SIZE); + } + + /** + * Adds the given value for the given key (or sets the initial value), and + * returns the new value + * + * @param key + * @param value + */ + public int add(int key, int value) + { + int newValue = 0; + if (useInts) + { + newValue = intProfile.add(key, value); + } + else + { + try { + newValue = shortProfile.add(key, value); + } catch (ArithmeticException e) { + handleOverflow(); + newValue = intProfile.add(key, value); + } + } + return newValue; + } + + /** + * Switch from counting shorts to counting ints + */ + synchronized void handleOverflow() + { + int size = shortProfile.size(); + intProfile = new SparseIntArray(size); + for (int i = 0; i < size; i++) + { + short key = shortProfile.keyAt(i); + short value = shortProfile.valueAt(i); + intProfile.put(key, value); + } + shortProfile = null; + useInts = true; + } + + /** + * Returns the size of the profile (number of distinct items counted) + * + * @return + */ + public int size() + { + return useInts ? intProfile.size() : shortProfile.size(); + } + + /** + * Returns the value for the key (zero if no such key) + * + * @param key + * @return + */ + public int get(int key) + { + return useInts ? intProfile.get(key) : shortProfile.get(key); + } + + /** + * Sets the value for the given key + * + * @param key + * @param value + */ + public void put(int key, int value) + { + if (useInts) + { + intProfile.put(key, value); + } + else + { + shortProfile.put(key, value); + } + } + + public int keyAt(int k) + { + return useInts ? intProfile.keyAt(k) : shortProfile.keyAt(k); + } + + public int valueAt(int k) + { + return useInts ? intProfile.valueAt(k) : shortProfile.valueAt(k); + } + + /** + * Answers true if this object wraps arrays of int values, false if using + * short values + * + * @return + */ + boolean isUsingInt() + { + return useInts; + } +} diff --git a/src/jalview/viewmodel/AlignmentViewport.java b/src/jalview/viewmodel/AlignmentViewport.java index 80319be..6ecfe99 100644 --- a/src/jalview/viewmodel/AlignmentViewport.java +++ b/src/jalview/viewmodel/AlignmentViewport.java @@ -22,6 +22,7 @@ package jalview.viewmodel; import jalview.analysis.AnnotationSorter.SequenceAnnotationOrder; import jalview.analysis.Conservation; +import jalview.analysis.Profile; import jalview.api.AlignCalcManagerI; import jalview.api.AlignViewportI; import jalview.api.AlignmentViewPanel; @@ -699,7 +700,7 @@ public abstract class AlignmentViewport implements AlignViewportI, /** * results of alignment consensus analysis for visible portion of view */ - protected Hashtable[] hconsensus = null; + protected Profile[] hconsensus = null; /** * results of cDNA complement consensus visible portion of view @@ -733,7 +734,7 @@ public abstract class AlignmentViewport implements AlignViewportI, } @Override - public void setSequenceConsensusHash(Hashtable[] hconsensus) + public void setSequenceConsensusHash(Profile[] hconsensus) { this.hconsensus = hconsensus; } @@ -745,7 +746,7 @@ public abstract class AlignmentViewport implements AlignViewportI, } @Override - public Hashtable[] getSequenceConsensusHash() + public Profile[] getSequenceConsensusHash() { return hconsensus; } diff --git a/src/jalview/workers/ComplementConsensusThread.java b/src/jalview/workers/ComplementConsensusThread.java index 529df6f..431fbec 100644 --- a/src/jalview/workers/ComplementConsensusThread.java +++ b/src/jalview/workers/ComplementConsensusThread.java @@ -96,7 +96,6 @@ public class ComplementConsensusThread extends ConsensusThread * @param consensusData * the computed consensus data */ - @Override protected void deriveConsensus(AlignmentAnnotation consensusAnnotation, Hashtable[] consensusData) { @@ -104,4 +103,16 @@ public class ComplementConsensusThread extends ConsensusThread alignViewport.isShowSequenceLogo(), getSequences().length); } + @Override + public void updateResultAnnotation(boolean immediate) + { + AlignmentAnnotation consensus = getConsensusAnnotation(); + Hashtable[] hconsensus = getViewportConsensus(); + if (immediate || !calcMan.isWorking(this) && consensus != null + && hconsensus != null) + { + deriveConsensus(consensus, hconsensus); + } + } + } diff --git a/src/jalview/workers/ConsensusThread.java b/src/jalview/workers/ConsensusThread.java index 5f0ec84..c88a24a 100644 --- a/src/jalview/workers/ConsensusThread.java +++ b/src/jalview/workers/ConsensusThread.java @@ -21,6 +21,7 @@ package jalview.workers; import jalview.analysis.AAFrequency; +import jalview.analysis.Profile; import jalview.api.AlignViewportI; import jalview.api.AlignmentViewPanel; import jalview.datamodel.AlignmentAnnotation; @@ -29,8 +30,6 @@ import jalview.datamodel.Annotation; import jalview.datamodel.SequenceI; import jalview.schemes.ColourSchemeI; -import java.util.Hashtable; - public class ConsensusThread extends AlignCalcWorker { public ConsensusThread(AlignViewportI alignViewport, @@ -125,7 +124,7 @@ public class ConsensusThread extends AlignCalcWorker */ protected void computeConsensus(AlignmentI alignment) { - Hashtable[] hconsensus = new Hashtable[alignment.getWidth()]; + Profile[] hconsensus = new Profile[alignment.getWidth()]; SequenceI[] aseqs = getSequences(); AAFrequency.calculate(aseqs, 0, alignment.getWidth(), hconsensus, true); @@ -145,7 +144,7 @@ public class ConsensusThread extends AlignCalcWorker /** * @param hconsensus */ - protected void setColourSchemeConsensus(Hashtable[] hconsensus) + protected void setColourSchemeConsensus(Profile[] hconsensus) { ColourSchemeI globalColourScheme = alignViewport .getGlobalColourScheme(); @@ -178,7 +177,7 @@ public class ConsensusThread extends AlignCalcWorker public void updateResultAnnotation(boolean immediate) { AlignmentAnnotation consensus = getConsensusAnnotation(); - Hashtable[] hconsensus = getViewportConsensus(); + Profile[] hconsensus = (Profile[]) getViewportConsensus(); if (immediate || !calcMan.isWorking(this) && consensus != null && hconsensus != null) { @@ -192,15 +191,15 @@ public class ConsensusThread extends AlignCalcWorker * * @param consensusAnnotation * the annotation to be populated - * @param consensusData + * @param hconsensus * the computed consensus data */ protected void deriveConsensus(AlignmentAnnotation consensusAnnotation, - Hashtable[] consensusData) + Profile[] hconsensus) { long nseq = getSequences().length; - AAFrequency.completeConsensus(consensusAnnotation, consensusData, 0, - consensusData.length, alignViewport.isIgnoreGapsConsensus(), + AAFrequency.completeConsensus(consensusAnnotation, hconsensus, 0, + hconsensus.length, alignViewport.isIgnoreGapsConsensus(), alignViewport.isShowSequenceLogo(), nseq); } @@ -209,8 +208,9 @@ public class ConsensusThread extends AlignCalcWorker * * @return */ - protected Hashtable[] getViewportConsensus() + protected Object[] getViewportConsensus() { + // TODO convert ComplementConsensusThread to use Profile return alignViewport.getSequenceConsensusHash(); } } diff --git a/test/jalview/analysis/AAFrequencyTest.java b/test/jalview/analysis/AAFrequencyTest.java index eff6bbf..ecddad1 100644 --- a/test/jalview/analysis/AAFrequencyTest.java +++ b/test/jalview/analysis/AAFrequencyTest.java @@ -26,8 +26,6 @@ import static org.testng.AssertJUnit.assertNull; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; -import java.util.Hashtable; - import org.testng.annotations.Test; public class AAFrequencyTest @@ -50,38 +48,38 @@ public class AAFrequencyTest SequenceI seq3 = new Sequence("Seq3", "C--G"); SequenceI seq4 = new Sequence("Seq4", "CA-t"); SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 }; - Hashtable[] result = new Hashtable[seq1.getLength()]; + Profile[] result = new Profile[seq1.getLength()]; AAFrequency.calculate(seqs, 0, seq1.getLength(), result, false); // col 0 is 100% C - Hashtable col = result[0]; - assertEquals(100f, (Float) col.get(G), 0.0001f); - assertEquals(100f, (Float) col.get(N), 0.0001f); - assertEquals(4, col.get(C)); - assertEquals("C", col.get(R)); - assertNull(col.get(P)); + Profile col = result[0]; + assertEquals(100f, col.getPercentageIdentity(false)); + assertEquals(100f, col.getPercentageIdentity(true)); + assertEquals(4, col.getMaxCount()); + assertEquals("C", col.getModalResidue()); + assertNull(col.getCounts()); // col 1 is 75% A col = result[1]; - assertEquals(75f, (Float) col.get(G), 0.0001f); - assertEquals(100f, (Float) col.get(N), 0.0001f); - assertEquals(3, col.get(C)); - assertEquals("A", col.get(R)); + assertEquals(75f, col.getPercentageIdentity(false)); + assertEquals(100f, col.getPercentageIdentity(true)); + assertEquals(3, col.getMaxCount()); + assertEquals("A", col.getModalResidue()); // col 2 is 50% G 50% C or 25/25 counting gaps col = result[2]; - assertEquals(25f, (Float) col.get(G), 0.0001f); - assertEquals(50f, (Float) col.get(N), 0.0001f); - assertEquals(1, col.get(C)); - assertEquals("CG", col.get(R)); + assertEquals(25f, col.getPercentageIdentity(false)); + assertEquals(50f, col.getPercentageIdentity(true)); + assertEquals(1, col.getMaxCount()); + assertEquals("CG", col.getModalResidue()); // col 3 is 75% T 25% G col = result[3]; - assertEquals(75f, (Float) col.get(G), 0.0001f); - assertEquals(75f, (Float) col.get(N), 0.0001f); - assertEquals(3, col.get(C)); - assertEquals("T", col.get(R)); + assertEquals(75f, col.getPercentageIdentity(false)); + assertEquals(75f, col.getPercentageIdentity(true)); + assertEquals(3, col.getMaxCount()); + assertEquals("T", col.getModalResidue()); } @Test(groups = { "Functional" }) @@ -92,30 +90,30 @@ public class AAFrequencyTest SequenceI seq3 = new Sequence("Seq3", "C--G"); SequenceI seq4 = new Sequence("Seq4", "CA-t"); SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 }; - Hashtable[] result = new Hashtable[seq1.getLength()]; + Profile[] result = new Profile[seq1.getLength()]; AAFrequency.calculate(seqs, 0, seq1.getLength(), result, true); - int[][] profile = (int[][]) result[0].get(P); - assertEquals(4, profile[0]['C']); - assertEquals(4, profile[1][0]); // no of seqs - assertEquals(4, profile[1][1]); // nongapped in column - - profile = (int[][]) result[1].get(P); - assertEquals(3, profile[0]['A']); - assertEquals(4, profile[1][0]); - assertEquals(3, profile[1][1]); - - profile = (int[][]) result[2].get(P); - assertEquals(1, profile[0]['G']); - assertEquals(1, profile[0]['C']); - assertEquals(4, profile[1][0]); - assertEquals(2, profile[1][1]); - - profile = (int[][]) result[3].get(P); - assertEquals(3, profile[0]['T']); - assertEquals(1, profile[0]['G']); - assertEquals(4, profile[1][0]); - assertEquals(4, profile[1][1]); + Profile profile = result[0]; + assertEquals(4, profile.getCounts().getCount('C')); + assertEquals(4, profile.getHeight()); + assertEquals(4, profile.getNonGapped()); + + profile = result[1]; + assertEquals(3, profile.getCounts().getCount('A')); + assertEquals(4, profile.getHeight()); + assertEquals(3, profile.getNonGapped()); + + profile = result[2]; + assertEquals(1, profile.getCounts().getCount('C')); + assertEquals(1, profile.getCounts().getCount('G')); + assertEquals(4, profile.getHeight()); + assertEquals(2, profile.getNonGapped()); + + profile = result[3]; + assertEquals(3, profile.getCounts().getCount('T')); + assertEquals(1, profile.getCounts().getCount('G')); + assertEquals(4, profile.getHeight()); + assertEquals(4, profile.getNonGapped()); } @Test(groups = { "Functional" }) @@ -126,7 +124,7 @@ public class AAFrequencyTest SequenceI seq3 = new Sequence("Seq3", "C--G"); SequenceI seq4 = new Sequence("Seq4", "CA-t"); SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 }; - Hashtable[] result = new Hashtable[seq1.getLength()]; + Profile[] result = new Profile[seq1.getLength()]; // ensure class loaded and initialized AAFrequency.calculate(seqs, 0, seq1.getLength(), result, true); @@ -138,15 +136,4 @@ public class AAFrequencyTest } System.out.println(System.currentTimeMillis() - start); } - - @Test(groups = { "Functional" }) - public void testGetPercentageFormat() - { - assertNull(AAFrequency.getPercentageFormat(0)); - assertNull(AAFrequency.getPercentageFormat(99)); - assertEquals("%3.1f", AAFrequency.getPercentageFormat(100).toString()); - assertEquals("%3.1f", AAFrequency.getPercentageFormat(999).toString()); - assertEquals("%3.2f", AAFrequency.getPercentageFormat(1000).toString()); - assertEquals("%3.2f", AAFrequency.getPercentageFormat(9999).toString()); - } } diff --git a/test/jalview/analysis/ResidueCountTest.java b/test/jalview/analysis/ResidueCountTest.java new file mode 100644 index 0000000..a26252c --- /dev/null +++ b/test/jalview/analysis/ResidueCountTest.java @@ -0,0 +1,292 @@ +package jalview.analysis; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import jalview.analysis.ResidueCount.SymbolCounts; + +import org.junit.Assert; +import org.testng.annotations.Test; + +public class ResidueCountTest +{ + /** + * Test a mix of add and put for nucleotide counting + */ + @Test(groups = "Functional") + public void test_countNucleotide() + { + ResidueCount rc = new ResidueCount(true); + assertEquals(rc.getCount('A'), 0); + assertEquals(rc.getGapCount(), 0); + // add then add + assertEquals(rc.add('A'), 1); + assertEquals(rc.add('a'), 2); + // put then add + rc.put('g', 3); + assertEquals(rc.add('G'), 4); + // add then put + assertEquals(rc.add('c'), 1); + rc.put('C', 4); + assertEquals(rc.add('N'), 1); + + assertEquals(rc.getCount('a'), 2); + assertEquals(rc.getCount('A'), 2); + assertEquals(rc.getCount('G'), 4); + assertEquals(rc.getCount('c'), 4); + assertEquals(rc.getCount('T'), 0); // never seen + assertEquals(rc.getCount('N'), 1); + assertEquals(rc.getCount('?'), 0); + assertEquals(rc.getCount('-'), 0); + + assertFalse(rc.isCountingInts()); + assertFalse(rc.isUsingOtherData()); + } + + /** + * Test adding to gap count (either using addGap or add) + */ + @Test(groups = "Functional") + public void testAddGap() + { + ResidueCount rc = new ResidueCount(true); + rc.addGap(); + rc.add('-'); + rc.add('.'); + rc.add(' '); + + assertEquals(rc.getGapCount(), 4); + assertEquals(rc.getCount(' '), 4); + assertEquals(rc.getCount('-'), 4); + assertEquals(rc.getCount('.'), 4); + assertFalse(rc.isUsingOtherData()); + assertFalse(rc.isCountingInts()); + } + + @Test(groups = "Functional") + public void testOverflow() + { + /* + * overflow from add + */ + ResidueCount rc = new ResidueCount(true); + rc.put('A', Short.MAX_VALUE - 1); + assertFalse(rc.isCountingInts()); + rc.add('A'); + assertFalse(rc.isCountingInts()); + rc.add('A'); + assertTrue(rc.isCountingInts()); + assertEquals(rc.getCount('a'), Short.MAX_VALUE + 1); + + /* + * overflow from put + */ + rc = new ResidueCount(true); + rc.put('G', Short.MAX_VALUE + 1); + assertTrue(rc.isCountingInts()); + assertEquals(rc.getCount('g'), Short.MAX_VALUE + 1); + } + + /** + * Test a mix of add and put for peptide counting + */ + @Test(groups = "Functional") + public void test_countPeptide() + { + ResidueCount rc = new ResidueCount(false); + rc.put('q', 4); + rc.add('Q'); + rc.add('X'); + rc.add('x'); + rc.add('W'); + rc.put('w', 7); + rc.put('m', 12); + rc.put('M', 13); + + assertEquals(rc.getCount('q'), 5); + assertEquals(rc.getCount('X'), 2); + assertEquals(rc.getCount('W'), 7); + assertEquals(rc.getCount('m'), 13); + assertEquals(rc.getCount('G'), 0); + assertEquals(rc.getCount('-'), 0); + + assertFalse(rc.isCountingInts()); + assertFalse(rc.isUsingOtherData()); + } + + @Test(groups = "Functional") + public void test_unexpectedPeptide() + { + ResidueCount rc = new ResidueCount(false); + // expected characters (upper or lower case): + String aas = "ACDEFGHIKLMNPQRSTVWXY"; + String lower = aas.toLowerCase(); + for (int i = 0; i < aas.length(); i++) + { + rc.put(aas.charAt(i), i); + rc.add(lower.charAt(i)); + } + for (int i = 0; i < aas.length(); i++) + { + assertEquals(rc.getCount(aas.charAt(i)), i + 1); + } + assertFalse(rc.isUsingOtherData()); + + rc.put('J', 4); + assertTrue(rc.isUsingOtherData()); + } + + @Test(groups = "Functional") + public void test_unexpectedNucleotide() + { + ResidueCount rc = new ResidueCount(true); + // expected characters (upper or lower case): + String nucs = "ACGTUN"; + String lower = nucs.toLowerCase(); + for (int i = 0; i < nucs.length(); i++) + { + rc.put(nucs.charAt(i), i); + rc.add(lower.charAt(i)); + } + for (int i = 0; i < nucs.length(); i++) + { + assertEquals(rc.getCount(nucs.charAt(i)), i + 1); + } + assertFalse(rc.isUsingOtherData()); + + rc.add('J'); + assertTrue(rc.isUsingOtherData()); + } + + @Test(groups = "Functional") + public void testGetModalCount() + { + ResidueCount rc = new ResidueCount(); + rc.add('c'); + rc.add('g'); + rc.add('c'); + assertEquals(rc.getModalCount(), 2); + + // modal count is in the 'short overflow' counts + rc = new ResidueCount(); + rc.add('c'); + rc.put('g', Short.MAX_VALUE); + rc.add('G'); + assertEquals(rc.getModalCount(), Short.MAX_VALUE + 1); + + // modal count is in the 'other data' counts + rc = new ResidueCount(); + rc.add('Q'); + rc.add('{'); + rc.add('{'); + assertEquals(rc.getModalCount(), 2); + + // verify modal count excludes gap + rc = new ResidueCount(); + rc.add('Q'); + rc.add('P'); + rc.add('Q'); + rc.addGap(); + rc.addGap(); + rc.addGap(); + assertEquals(rc.getModalCount(), 2); + } + + @Test(groups = "Functional") + public void testGetResiduesForCount() + { + ResidueCount rc = new ResidueCount(); + rc.add('c'); + rc.add('g'); + rc.add('c'); + assertEquals(rc.getResiduesForCount(2), "C"); + assertEquals(rc.getResiduesForCount(1), "G"); + assertEquals(rc.getResiduesForCount(3), ""); + assertEquals(rc.getResiduesForCount(0), ""); + assertEquals(rc.getResiduesForCount(-1), ""); + + // modal count is in the 'short overflow' counts + rc = new ResidueCount(); + rc.add('c'); + rc.put('g', Short.MAX_VALUE); + rc.add('G'); + assertEquals(rc.getResiduesForCount(Short.MAX_VALUE + 1), "G"); + assertEquals(rc.getResiduesForCount(1), "C"); + + // modal count is in the 'other data' counts + rc = new ResidueCount(); + rc.add('Q'); + rc.add('{'); + rc.add('{'); + assertEquals(rc.getResiduesForCount(1), "Q"); + assertEquals(rc.getResiduesForCount(2), "{"); + + // residues share modal count + rc = new ResidueCount(); + rc.add('G'); + rc.add('G'); + rc.add('c'); + rc.add('C'); + rc.add('U'); + assertEquals(rc.getResiduesForCount(1), "U"); + assertEquals(rc.getResiduesForCount(2), "CG"); + + // expected and unexpected symbols share modal count + rc = new ResidueCount(); + rc.add('G'); + rc.add('t'); + rc.add('['); + rc.add('['); + rc.add('t'); + rc.add('G'); + rc.add('c'); + rc.add('C'); + rc.add('U'); + assertEquals(rc.getResiduesForCount(1), "U"); + assertEquals(rc.getResiduesForCount(2), "CGT["); + } + + @Test(groups = "Functional") + public void testGetSymbolCounts() + { + ResidueCount rc = new ResidueCount(); + rc.add('q'); + rc.add('c'); + rc.add('Q'); + rc.add('J'); // 'otherData' + rc.add('q'); + rc.add('x'); + + SymbolCounts sc = rc.getSymbolCounts(); + Assert.assertArrayEquals(new char[] { 'C', 'Q', 'X', 'J' }, sc.symbols); + Assert.assertArrayEquals(new int[] { 1, 3, 1, 1 }, sc.values); + + // now with overflow to int counts + rc.put('g', Short.MAX_VALUE); + rc.add('g'); + sc = rc.getSymbolCounts(); + Assert.assertArrayEquals(new char[] { 'C', 'G', 'Q', 'X', 'J' }, + sc.symbols); + Assert.assertArrayEquals(new int[] { 1, 32768, 3, 1, 1 }, sc.values); + } + + @Test(groups = "Functional") + public void testToString() + { + ResidueCount rc = new ResidueCount(); + rc.add('q'); + rc.add('c'); + rc.add('Q'); + assertEquals(rc.toString(), "[ C:1 Q:2 ]"); + + // add 'other data' + rc.add('{'); + assertEquals(rc.toString(), "[ C:1 Q:2 {:1 ]"); + + // switch from short to int counting: + rc.put('G', Short.MAX_VALUE); + rc.add('g'); + assertEquals(rc.toString(), "[ C:1 G:32768 Q:2 {:1 ]"); + } +} diff --git a/test/jalview/ext/android/SparseIntArrayTest.java b/test/jalview/ext/android/SparseIntArrayTest.java new file mode 100644 index 0000000..3919e33 --- /dev/null +++ b/test/jalview/ext/android/SparseIntArrayTest.java @@ -0,0 +1,43 @@ +package jalview.ext.android; + +import static org.testng.Assert.assertEquals; + +import org.testng.annotations.Test; + +/* + * Tests for SparseIntArray. Unlike SparseShortArray, SparseIntArray does not throw + * any exception for overflow. + */ +public class SparseIntArrayTest +{ + @Test(groups = "Functional") + public void testPut() + { + SparseIntArray counter = new SparseIntArray(); + + /* + * either key or value may be in the range of int + */ + counter.put(Integer.MAX_VALUE, Integer.MIN_VALUE); + counter.put(Integer.MIN_VALUE, Integer.MAX_VALUE); + assertEquals(counter.get(Integer.MAX_VALUE), Integer.MIN_VALUE); + assertEquals(counter.get(Integer.MIN_VALUE), Integer.MAX_VALUE); + } + + @Test(groups = "Functional") + public void testAdd() + { + SparseIntArray counter = new SparseIntArray(); + + assertEquals(counter.add('P', 2), 2); + assertEquals(counter.add('P', 3), 5); + counter.put('Q', 7); + assertEquals(counter.add('Q', 4), 11); + + counter.put('x', Integer.MAX_VALUE); + counter.add('x', 1); + + counter.put('y', Integer.MIN_VALUE); + counter.add('y', -1); + } +} diff --git a/test/jalview/ext/android/SparseShortArrayTest.java b/test/jalview/ext/android/SparseShortArrayTest.java new file mode 100644 index 0000000..351f640 --- /dev/null +++ b/test/jalview/ext/android/SparseShortArrayTest.java @@ -0,0 +1,94 @@ +package jalview.ext.android; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.fail; + +import org.testng.annotations.Test; + +public class SparseShortArrayTest +{ + @Test(groups = "Functional") + public void testPut() + { + SparseShortArray counter = new SparseShortArray(); + + /* + * either key or value may be in the range of short + */ + counter.put(Short.MAX_VALUE, Short.MIN_VALUE); + counter.put(Short.MIN_VALUE, Short.MAX_VALUE); + + // put a too large value + try + { + counter.put(0, Short.MAX_VALUE + 1); + fail("Expected exception"); + } catch (ArithmeticException e) + { + // expected; + } + + // put a too small value + try + { + counter.put(1, Short.MIN_VALUE - 1); + fail("Expected exception"); + } catch (ArithmeticException e) + { + // expected; + } + + // put a too large key + try + { + counter.put(Short.MAX_VALUE + 1, 0); + fail("Expected exception"); + } catch (ArithmeticException e) + { + // expected; + } + + // put a too small key + try + { + counter.put(Short.MIN_VALUE - 1, 2); + fail("Expected exception"); + } catch (ArithmeticException e) + { + // expected; + } + } + + @Test(groups = "Functional") + public void testAdd() + { + SparseShortArray counter = new SparseShortArray(); + + assertEquals(counter.add('P', 2), 2); + assertEquals(counter.add('P', 3), 5); + counter.put('Q', 7); + assertEquals(counter.add('Q', 4), 11); + + // increment giving overflow + counter.put('x', Short.MAX_VALUE); + try + { + counter.add('x', 1); + fail("Expected exception"); + } catch (ArithmeticException e) + { + // expected; + } + + // decrement giving underflow + counter.put('y', Short.MIN_VALUE); + try + { + counter.add('y', -1); + fail("Expected exception"); + } catch (ArithmeticException e) + { + // expected; + } + } +} diff --git a/test/jalview/util/SparseCountTest.java b/test/jalview/util/SparseCountTest.java new file mode 100644 index 0000000..c9a07a1 --- /dev/null +++ b/test/jalview/util/SparseCountTest.java @@ -0,0 +1,79 @@ +package jalview.util; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import org.testng.annotations.Test; +public class SparseCountTest +{ + @Test(groups = "Functional") + public void testAdd() + { + SparseCount p = new SparseCount(8); + p.add('a', 1); + p.add('b', 2); + p.add('a', 3); + p.add('b', -4); + assertEquals(p.size(), 2); + assertEquals(p.get('a'), 4); + assertEquals(p.get('b'), -2); + } + + @Test(groups = "Functional") + public void testPut() + { + SparseCount p = new SparseCount(8); + p.put('a', 3); + p.add('b', 2); + p.put('b', 4); + assertEquals(p.size(), 2); + assertEquals(p.get('a'), 3); + assertEquals(p.get('b'), 4); + } + + /** + * Test handling overflow of short by switching to counting ints + */ + @Test(groups = "Functional") + public void testOverflow() + { + SparseCount p = new SparseCount(8); + p.put('a', Short.MAX_VALUE - 1); + p.add('a', 1); + assertFalse(p.isUsingInt()); + p.add('a', 1); + assertTrue(p.isUsingInt()); + } + + /** + * Test handling underflow of short by switching to counting ints + */ + @Test(groups = "Functional") + public void testUnderflow() + { + SparseCount p = new SparseCount(8); + p.put('a', Short.MIN_VALUE + 1); + p.add('a', -1); + assertFalse(p.isUsingInt()); + p.add('a', -1); + assertTrue(p.isUsingInt()); + } + + @Test(groups = "Functional") + public void testKeyAt_ValueAt() + { + SparseCount p = new SparseCount(8); + p.put('W', 12); + p.put('K', 9); + p.put('R', 6); + assertEquals(p.size(), 3); + assertEquals(p.keyAt(0), 'K'); + assertEquals(p.valueAt(0), 9); + assertEquals(p.keyAt(1), 'R'); + assertEquals(p.valueAt(1), 6); + assertEquals(p.keyAt(2), 'W'); + assertEquals(p.valueAt(2), 12); + } + +}