public class SymbolCounts
{
/**
- * the symbols seen (as char values)
+ * the symbols seen (as char values), in no particular order
*/
public final char[] symbols;
/*
* nucleotide symbols to count (including N unknown)
*/
- private static final String NUCS = "ACGTUN";
+ private static final String NUCS = "ACGNTU";
/*
* amino acid symbols to count (including X unknown)
* @param c
* @return the new value of the count for the character
*/
- public int add(char c)
+ public int add(final char c)
{
+ char u = toUpperCase(c);
int newValue = 0;
- int offset = getOffset(c);
+ int offset = getOffset(u);
/*
* offset 0 is reserved for gap counting, so 0 here means either
*/
if (offset == 0)
{
- if (Comparison.isGap(c))
+ if (Comparison.isGap(u))
{
newValue = addGap();
}
else
{
- newValue = addOtherCharacter(c);
+ newValue = addOtherCharacter(u);
}
}
else
}
/**
+ * Returns this character's offset in the count array
+ *
* @param c
* @return
*/
int getOffset(char c)
{
- /*
- * ensure upper-case (fails fast if it already is!)
- */
- if ('a' <= c && c <= 'z')
- {
- c = (char) (c + TOUPPERCASE);
- }
-
- /*
- * locate this character's offset in the count array
- */
int offset = 0;
if ('A' <= c && c <= 'Z')
{
}
/**
+ * @param c
+ * @return
+ */
+ protected char toUpperCase(final char c)
+ {
+ char u = c;
+ if ('a' <= c && c <= 'z')
+ {
+ u = (char) (c + TOUPPERCASE);
+ }
+ return u;
+ }
+
+ /**
* Increment count for some unanticipated character. The first time this
* called, a SparseCount is instantiated to hold these 'extra' counts.
*
*/
public void put(char c, int count)
{
- int offset = getOffset(c);
+ char u = toUpperCase(c);
+ int offset = getOffset(u);
/*
* offset 0 is reserved for gap counting, so 0 here means either
*/
if (offset == 0)
{
- if (Comparison.isGap(c))
+ if (Comparison.isGap(u))
{
- addGap();
+ set(0, count);
}
else
{
- setOtherCharacter(c, count);
+ setOtherCharacter(u, count);
maxCount = Math.max(maxCount, count);
}
}
*/
public int getCount(char c)
{
- int offset = getOffset(c);
+ char u = toUpperCase(c);
+ int offset = getOffset(u);
if (offset == 0)
{
- if (!Comparison.isGap(c))
+ if (!Comparison.isGap(u))
{
// should have called getGapCount()
- return otherData == null ? 0 : otherData.get(c);
+ return otherData == null ? 0 : otherData.get(u);
}
}
return useIntCounts ? intCounts[offset] : counts[offset];
}
/**
- * Returns the highest count for any symbol in the profile (excluding gap)
+ * Returns the highest count for any symbol(s) in the profile (excluding gap)
*
* @return
*/
}
/**
- * Returns those symbols that have a non-zero count (excluding the gap
- * symbol), with their counts. The symbols are in no special order. Returns an
- * array of size 2 whose first element is a char array of symbols, and second
- * element an int array of corresponding counts.
+ * Returns a data bean holding those symbols that have a non-zero count
+ * (excluding the gap symbol), with their counts.
*
- * @return an array [[char1, char2, ...] [char1Count, char2Count, ...] ... ]
+ * @return
*/
public SymbolCounts getSymbolCounts()
{
{
for (int i = 0; i < otherData.size(); i++)
{
- int value = otherData.valueAt(i);
- if (value > 0)
- {
- symbols[j] = (char) otherData.keyAt(i);
- values[j] = otherData.valueAt(i);
- j++;
- }
+ symbols[j] = (char) otherData.keyAt(i);
+ values[j] = otherData.valueAt(i);
+ j++;
}
}
*/
public String getTooltip(int normaliseBy, int percentageDecPl)
{
- StringBuilder sb = new StringBuilder(64);
SymbolCounts symbolCounts = getSymbolCounts();
char[] ca = symbolCounts.symbols;
int[] vl = symbolCounts.values;
/*
* traverse in reverse order (highest count first) to build tooltip
*/
- for (int p = 0, c = ca.length - 1; c >= 0; c--)
+ boolean first = true;
+ StringBuilder sb = new StringBuilder(64);
+ for (int c = ca.length - 1; c >= 0; c--)
{
final char residue = ca[c];
- if (residue != '-')
- {
- // TODO combine residues which share a percentage
- // (see AAFrequency.completeCdnaConsensus)
- float tval = (vl[c] * 100f) / normaliseBy;
- sb.append((((p == 0) ? "" : "; "))).append(residue)
- .append(" ");
- Format.appendPercentage(sb, tval, percentageDecPl);
- sb.append("%");
- p++;
- }
+ // TODO combine residues which share a percentage
+ // (see AAFrequency.completeCdnaConsensus)
+ float tval = (vl[c] * 100f) / normaliseBy;
+ sb.append(first ? "" : "; ").append(residue).append(" ");
+ Format.appendPercentage(sb, tval, percentageDecPl);
+ sb.append("%");
+ first = false;
}
return sb.toString();
}
* overflow from add
*/
ResidueCount rc = new ResidueCount(true);
+ rc.addGap();
rc.put('A', Short.MAX_VALUE - 1);
assertFalse(rc.isCountingInts());
rc.add('A');
rc.add('A');
assertTrue(rc.isCountingInts());
assertEquals(rc.getCount('a'), Short.MAX_VALUE + 1);
+ rc.add('A');
+ assertTrue(rc.isCountingInts());
+ assertEquals(rc.getCount('a'), Short.MAX_VALUE + 2);
+ assertEquals(rc.getGapCount(), 1);
+ rc.addGap();
+ assertEquals(rc.getGapCount(), 2);
/*
* overflow from put
rc.put('G', Short.MAX_VALUE + 1);
assertTrue(rc.isCountingInts());
assertEquals(rc.getCount('g'), Short.MAX_VALUE + 1);
+ rc.put('G', 1);
+ assertTrue(rc.isCountingInts());
+ assertEquals(rc.getCount('g'), 1);
+
+ /*
+ * underflow from put
+ */
+ rc = new ResidueCount(true);
+ rc.put('G', Short.MIN_VALUE - 1);
+ assertTrue(rc.isCountingInts());
+ assertEquals(rc.getCount('g'), Short.MIN_VALUE - 1);
}
/**
rc.put('J', 4);
assertTrue(rc.isUsingOtherData());
+ assertEquals(rc.getCount('J'), 4);
+ rc.add('j');
+ assertEquals(rc.getCount('J'), 5);
}
@Test(groups = "Functional")
@Test(groups = "Functional")
public void testGetModalCount()
{
- ResidueCount rc = new ResidueCount();
+ ResidueCount rc = new ResidueCount(true);
rc.add('c');
rc.add('g');
rc.add('c');
assertEquals(rc.getModalCount(), Short.MAX_VALUE + 1);
// modal count is in the 'other data' counts
- rc = new ResidueCount();
+ rc = new ResidueCount(false);
rc.add('Q');
rc.add('{');
rc.add('{');
@Test(groups = "Functional")
public void testGetResiduesForCount()
{
- ResidueCount rc = new ResidueCount();
+ ResidueCount rc = new ResidueCount(true);
rc.add('c');
rc.add('g');
rc.add('c');
assertEquals(rc.getResiduesForCount(-1), "");
// modal count is in the 'short overflow' counts
- rc = new ResidueCount();
+ rc = new ResidueCount(true);
rc.add('c');
rc.put('g', Short.MAX_VALUE);
rc.add('G');
assertEquals(rc.getResiduesForCount(Short.MAX_VALUE + 1), "G");
assertEquals(rc.getResiduesForCount(1), "C");
+
+ // peptide modal count is in the 'short overflow' counts
+ rc = new ResidueCount(false);
+ rc.add('c');
+ rc.put('p', Short.MAX_VALUE);
+ rc.add('P');
+ assertEquals(rc.getResiduesForCount(Short.MAX_VALUE + 1), "P");
+ assertEquals(rc.getResiduesForCount(1), "C");
// modal count is in the 'other data' counts
rc = new ResidueCount();
}
@Test(groups = "Functional")
- public void testGetSymbolCounts()
+ public void testGetSymbolCounts_nucleotide()
{
- ResidueCount rc = new ResidueCount();
- rc.add('q');
+ ResidueCount rc = new ResidueCount(true);
+ rc.add('g');
rc.add('c');
- rc.add('Q');
+ rc.add('G');
rc.add('J'); // 'otherData'
- rc.add('q');
- rc.add('x');
+ rc.add('g');
+ rc.add('N');
+ rc.put('[', 0); // 'otherdata'
SymbolCounts sc = rc.getSymbolCounts();
- Assert.assertArrayEquals(new char[] { 'C', 'Q', 'X', 'J' }, sc.symbols);
- Assert.assertArrayEquals(new int[] { 1, 3, 1, 1 }, sc.values);
+ Assert.assertArrayEquals(new char[] { 'C', 'G', 'N', 'J', '[' },
+ sc.symbols);
+ Assert.assertArrayEquals(new int[] { 1, 3, 1, 1, 0 }, sc.values);
// now with overflow to int counts
- rc.put('g', Short.MAX_VALUE);
- rc.add('g');
+ rc.put('U', Short.MAX_VALUE);
+ rc.add('u');
sc = rc.getSymbolCounts();
- Assert.assertArrayEquals(new char[] { 'C', 'G', 'Q', 'X', 'J' },
+ Assert.assertArrayEquals(new char[] { 'C', 'G', 'N', 'U', 'J', '[' },
sc.symbols);
- Assert.assertArrayEquals(new int[] { 1, 32768, 3, 1, 1 }, sc.values);
+ Assert.assertArrayEquals(new int[] { 1, 3, 1, 32768, 1, 0 }, sc.values);
+ }
+
+ @Test(groups = "Functional")
+ public void testGetSymbolCounts_peptide()
+ {
+ ResidueCount rc = new ResidueCount(false);
+ rc.add('W');
+ rc.add('q');
+ rc.add('W');
+ rc.add('Z'); // 'otherData'
+ rc.add('w');
+ rc.add('L');
+
+ SymbolCounts sc = rc.getSymbolCounts();
+ Assert.assertArrayEquals(new char[] { 'L', 'Q', 'W', 'Z' }, sc.symbols);
+ Assert.assertArrayEquals(new int[] { 1, 1, 3, 1 }, sc.values);
+
+ // now with overflow to int counts
+ rc.put('W', Short.MAX_VALUE);
+ rc.add('W');
+ sc = rc.getSymbolCounts();
+ Assert.assertArrayEquals(new char[] { 'L', 'Q', 'W', 'Z' }, sc.symbols);
+ Assert.assertArrayEquals(new int[] { 1, 1, 32768, 1 }, sc.values);
}
@Test(groups = "Functional")
rc.add('g');
assertEquals(rc.toString(), "[ C:1 G:32768 Q:2 {:1 ]");
}
+
+ @Test(groups = "Functional")
+ public void testGetTooltip()
+ {
+ ResidueCount rc = new ResidueCount();
+
+ // no counts!
+ assertEquals(rc.getTooltip(20, 1), "");
+
+ /*
+ * count 7 C, 6 K, 7 Q, 10 P, 9 W, 1 F (total 40)
+ */
+ for (int i = 0; i < 7; i++)
+ {
+ rc.add('c');
+ rc.add('q');
+ }
+ for (int i = 0; i < 10; i++)
+ {
+ rc.add('p');
+ }
+ for (int i = 0; i < 9; i++)
+ {
+ rc.add('W');
+ }
+ for (int i = 0; i < 6; i++)
+ {
+ rc.add('K');
+ }
+ rc.add('F');
+
+ assertEquals(rc.getTooltip(40, 0),
+ "P 25%; W 22%; C 17%; Q 17%; K 15%; F 2%");
+
+ assertEquals(rc.getTooltip(30, 1),
+ "P 33.3%; W 30.0%; C 23.3%; Q 23.3%; K 20.0%; F 3.3%");
+ }
+
+ @Test(groups = "Functional")
+ public void testPut()
+ {
+ ResidueCount rc = new ResidueCount();
+ rc.put('q', 3);
+ assertEquals(rc.getCount('Q'), 3);
+ rc.put(' ', 4);
+ assertEquals(rc.getGapCount(), 4);
+ rc.put('.', 5);
+ assertEquals(rc.getGapCount(), 5);
+ rc.put('-', 6);
+ assertEquals(rc.getGapCount(), 6);
+
+ rc.put('?', 5);
+ assertEquals(rc.getCount('?'), 5);
+ rc.put('?', 6);
+ rc.put('!', 7);
+ assertEquals(rc.getCount('?'), 6);
+ assertEquals(rc.getCount('!'), 7);
+ }
}