From 189c266962d87c8f748da4889626e1faa691c13e Mon Sep 17 00:00:00 2001 From: gmungoc Date: Mon, 24 Oct 2016 19:11:28 +0100 Subject: [PATCH] JAL-98 ResidueCount 100% test coverage (and bug fixes!) --- src/jalview/analysis/ResidueCount.java | 99 +++++++++--------- test/jalview/analysis/ResidueCountTest.java | 144 ++++++++++++++++++++++++--- 2 files changed, 177 insertions(+), 66 deletions(-) diff --git a/src/jalview/analysis/ResidueCount.java b/src/jalview/analysis/ResidueCount.java index 2c7cb20..cd35206 100644 --- a/src/jalview/analysis/ResidueCount.java +++ b/src/jalview/analysis/ResidueCount.java @@ -19,7 +19,7 @@ public class ResidueCount public class SymbolCounts { /** - * the symbols seen (as char values) + * the symbols seen (as char values), in no particular order */ public final char[] symbols; @@ -40,7 +40,7 @@ public class ResidueCount /* * nucleotide symbols to count (including N unknown) */ - private static final String NUCS = "ACGTUN"; + private static final String NUCS = "ACGNTU"; /* * amino acid symbols to count (including X unknown) @@ -134,10 +134,11 @@ public class ResidueCount * @param c * @return the new value of the count for the character */ - public int add(char c) + public int add(final char c) { + char u = toUpperCase(c); int newValue = 0; - int offset = getOffset(c); + int offset = getOffset(u); /* * offset 0 is reserved for gap counting, so 0 here means either @@ -145,13 +146,13 @@ public class ResidueCount */ if (offset == 0) { - if (Comparison.isGap(c)) + if (Comparison.isGap(u)) { newValue = addGap(); } else { - newValue = addOtherCharacter(c); + newValue = addOtherCharacter(u); } } else @@ -209,22 +210,13 @@ public class ResidueCount } /** + * Returns this character's offset in the count array + * * @param c * @return */ int getOffset(char c) { - /* - * ensure upper-case (fails fast if it already is!) - */ - if ('a' <= c && c <= 'z') - { - c = (char) (c + TOUPPERCASE); - } - - /* - * locate this character's offset in the count array - */ int offset = 0; if ('A' <= c && c <= 'Z') { @@ -234,6 +226,20 @@ public class ResidueCount } /** + * @param c + * @return + */ + protected char toUpperCase(final char c) + { + char u = c; + if ('a' <= c && c <= 'z') + { + u = (char) (c + TOUPPERCASE); + } + return u; + } + + /** * Increment count for some unanticipated character. The first time this * called, a SparseCount is instantiated to hold these 'extra' counts. * @@ -305,7 +311,8 @@ public class ResidueCount */ public void put(char c, int count) { - int offset = getOffset(c); + char u = toUpperCase(c); + int offset = getOffset(u); /* * offset 0 is reserved for gap counting, so 0 here means either @@ -313,13 +320,13 @@ public class ResidueCount */ if (offset == 0) { - if (Comparison.isGap(c)) + if (Comparison.isGap(u)) { - addGap(); + set(0, count); } else { - setOtherCharacter(c, count); + setOtherCharacter(u, count); maxCount = Math.max(maxCount, count); } } @@ -365,13 +372,14 @@ public class ResidueCount */ public int getCount(char c) { - int offset = getOffset(c); + char u = toUpperCase(c); + int offset = getOffset(u); if (offset == 0) { - if (!Comparison.isGap(c)) + if (!Comparison.isGap(u)) { // should have called getGapCount() - return otherData == null ? 0 : otherData.get(c); + return otherData == null ? 0 : otherData.get(u); } } return useIntCounts ? intCounts[offset] : counts[offset]; @@ -449,7 +457,7 @@ public class ResidueCount } /** - * Returns the highest count for any symbol in the profile (excluding gap) + * Returns the highest count for any symbol(s) in the profile (excluding gap) * * @return */ @@ -495,12 +503,10 @@ public class ResidueCount } /** - * Returns those symbols that have a non-zero count (excluding the gap - * symbol), with their counts. The symbols are in no special order. Returns an - * array of size 2 whose first element is a char array of symbols, and second - * element an int array of corresponding counts. + * Returns a data bean holding those symbols that have a non-zero count + * (excluding the gap symbol), with their counts. * - * @return an array [[char1, char2, ...] [char1Count, char2Count, ...] ... ] + * @return */ public SymbolCounts getSymbolCounts() { @@ -540,13 +546,9 @@ public class ResidueCount { for (int i = 0; i < otherData.size(); i++) { - int value = otherData.valueAt(i); - if (value > 0) - { - symbols[j] = (char) otherData.keyAt(i); - values[j] = otherData.valueAt(i); - j++; - } + symbols[j] = (char) otherData.keyAt(i); + values[j] = otherData.valueAt(i); + j++; } } @@ -566,7 +568,6 @@ public class ResidueCount */ public String getTooltip(int normaliseBy, int percentageDecPl) { - StringBuilder sb = new StringBuilder(64); SymbolCounts symbolCounts = getSymbolCounts(); char[] ca = symbolCounts.symbols; int[] vl = symbolCounts.values; @@ -579,20 +580,18 @@ public class ResidueCount /* * traverse in reverse order (highest count first) to build tooltip */ - for (int p = 0, c = ca.length - 1; c >= 0; c--) + boolean first = true; + StringBuilder sb = new StringBuilder(64); + for (int c = ca.length - 1; c >= 0; c--) { final char residue = ca[c]; - if (residue != '-') - { - // TODO combine residues which share a percentage - // (see AAFrequency.completeCdnaConsensus) - float tval = (vl[c] * 100f) / normaliseBy; - sb.append((((p == 0) ? "" : "; "))).append(residue) - .append(" "); - Format.appendPercentage(sb, tval, percentageDecPl); - sb.append("%"); - p++; - } + // TODO combine residues which share a percentage + // (see AAFrequency.completeCdnaConsensus) + float tval = (vl[c] * 100f) / normaliseBy; + sb.append(first ? "" : "; ").append(residue).append(" "); + Format.appendPercentage(sb, tval, percentageDecPl); + sb.append("%"); + first = false; } return sb.toString(); } diff --git a/test/jalview/analysis/ResidueCountTest.java b/test/jalview/analysis/ResidueCountTest.java index a26252c..4a71f89 100644 --- a/test/jalview/analysis/ResidueCountTest.java +++ b/test/jalview/analysis/ResidueCountTest.java @@ -71,6 +71,7 @@ public class ResidueCountTest * overflow from add */ ResidueCount rc = new ResidueCount(true); + rc.addGap(); rc.put('A', Short.MAX_VALUE - 1); assertFalse(rc.isCountingInts()); rc.add('A'); @@ -78,6 +79,12 @@ public class ResidueCountTest rc.add('A'); assertTrue(rc.isCountingInts()); assertEquals(rc.getCount('a'), Short.MAX_VALUE + 1); + rc.add('A'); + assertTrue(rc.isCountingInts()); + assertEquals(rc.getCount('a'), Short.MAX_VALUE + 2); + assertEquals(rc.getGapCount(), 1); + rc.addGap(); + assertEquals(rc.getGapCount(), 2); /* * overflow from put @@ -86,6 +93,17 @@ public class ResidueCountTest rc.put('G', Short.MAX_VALUE + 1); assertTrue(rc.isCountingInts()); assertEquals(rc.getCount('g'), Short.MAX_VALUE + 1); + rc.put('G', 1); + assertTrue(rc.isCountingInts()); + assertEquals(rc.getCount('g'), 1); + + /* + * underflow from put + */ + rc = new ResidueCount(true); + rc.put('G', Short.MIN_VALUE - 1); + assertTrue(rc.isCountingInts()); + assertEquals(rc.getCount('g'), Short.MIN_VALUE - 1); } /** @@ -135,6 +153,9 @@ public class ResidueCountTest rc.put('J', 4); assertTrue(rc.isUsingOtherData()); + assertEquals(rc.getCount('J'), 4); + rc.add('j'); + assertEquals(rc.getCount('J'), 5); } @Test(groups = "Functional") @@ -162,7 +183,7 @@ public class ResidueCountTest @Test(groups = "Functional") public void testGetModalCount() { - ResidueCount rc = new ResidueCount(); + ResidueCount rc = new ResidueCount(true); rc.add('c'); rc.add('g'); rc.add('c'); @@ -176,7 +197,7 @@ public class ResidueCountTest assertEquals(rc.getModalCount(), Short.MAX_VALUE + 1); // modal count is in the 'other data' counts - rc = new ResidueCount(); + rc = new ResidueCount(false); rc.add('Q'); rc.add('{'); rc.add('{'); @@ -196,7 +217,7 @@ public class ResidueCountTest @Test(groups = "Functional") public void testGetResiduesForCount() { - ResidueCount rc = new ResidueCount(); + ResidueCount rc = new ResidueCount(true); rc.add('c'); rc.add('g'); rc.add('c'); @@ -207,12 +228,20 @@ public class ResidueCountTest assertEquals(rc.getResiduesForCount(-1), ""); // modal count is in the 'short overflow' counts - rc = new ResidueCount(); + rc = new ResidueCount(true); rc.add('c'); rc.put('g', Short.MAX_VALUE); rc.add('G'); assertEquals(rc.getResiduesForCount(Short.MAX_VALUE + 1), "G"); assertEquals(rc.getResiduesForCount(1), "C"); + + // peptide modal count is in the 'short overflow' counts + rc = new ResidueCount(false); + rc.add('c'); + rc.put('p', Short.MAX_VALUE); + rc.add('P'); + assertEquals(rc.getResiduesForCount(Short.MAX_VALUE + 1), "P"); + assertEquals(rc.getResiduesForCount(1), "C"); // modal count is in the 'other data' counts rc = new ResidueCount(); @@ -248,27 +277,52 @@ public class ResidueCountTest } @Test(groups = "Functional") - public void testGetSymbolCounts() + public void testGetSymbolCounts_nucleotide() { - ResidueCount rc = new ResidueCount(); - rc.add('q'); + ResidueCount rc = new ResidueCount(true); + rc.add('g'); rc.add('c'); - rc.add('Q'); + rc.add('G'); rc.add('J'); // 'otherData' - rc.add('q'); - rc.add('x'); + rc.add('g'); + rc.add('N'); + rc.put('[', 0); // 'otherdata' SymbolCounts sc = rc.getSymbolCounts(); - Assert.assertArrayEquals(new char[] { 'C', 'Q', 'X', 'J' }, sc.symbols); - Assert.assertArrayEquals(new int[] { 1, 3, 1, 1 }, sc.values); + Assert.assertArrayEquals(new char[] { 'C', 'G', 'N', 'J', '[' }, + sc.symbols); + Assert.assertArrayEquals(new int[] { 1, 3, 1, 1, 0 }, sc.values); // now with overflow to int counts - rc.put('g', Short.MAX_VALUE); - rc.add('g'); + rc.put('U', Short.MAX_VALUE); + rc.add('u'); sc = rc.getSymbolCounts(); - Assert.assertArrayEquals(new char[] { 'C', 'G', 'Q', 'X', 'J' }, + Assert.assertArrayEquals(new char[] { 'C', 'G', 'N', 'U', 'J', '[' }, sc.symbols); - Assert.assertArrayEquals(new int[] { 1, 32768, 3, 1, 1 }, sc.values); + Assert.assertArrayEquals(new int[] { 1, 3, 1, 32768, 1, 0 }, sc.values); + } + + @Test(groups = "Functional") + public void testGetSymbolCounts_peptide() + { + ResidueCount rc = new ResidueCount(false); + rc.add('W'); + rc.add('q'); + rc.add('W'); + rc.add('Z'); // 'otherData' + rc.add('w'); + rc.add('L'); + + SymbolCounts sc = rc.getSymbolCounts(); + Assert.assertArrayEquals(new char[] { 'L', 'Q', 'W', 'Z' }, sc.symbols); + Assert.assertArrayEquals(new int[] { 1, 1, 3, 1 }, sc.values); + + // now with overflow to int counts + rc.put('W', Short.MAX_VALUE); + rc.add('W'); + sc = rc.getSymbolCounts(); + Assert.assertArrayEquals(new char[] { 'L', 'Q', 'W', 'Z' }, sc.symbols); + Assert.assertArrayEquals(new int[] { 1, 1, 32768, 1 }, sc.values); } @Test(groups = "Functional") @@ -289,4 +343,62 @@ public class ResidueCountTest rc.add('g'); assertEquals(rc.toString(), "[ C:1 G:32768 Q:2 {:1 ]"); } + + @Test(groups = "Functional") + public void testGetTooltip() + { + ResidueCount rc = new ResidueCount(); + + // no counts! + assertEquals(rc.getTooltip(20, 1), ""); + + /* + * count 7 C, 6 K, 7 Q, 10 P, 9 W, 1 F (total 40) + */ + for (int i = 0; i < 7; i++) + { + rc.add('c'); + rc.add('q'); + } + for (int i = 0; i < 10; i++) + { + rc.add('p'); + } + for (int i = 0; i < 9; i++) + { + rc.add('W'); + } + for (int i = 0; i < 6; i++) + { + rc.add('K'); + } + rc.add('F'); + + assertEquals(rc.getTooltip(40, 0), + "P 25%; W 22%; C 17%; Q 17%; K 15%; F 2%"); + + assertEquals(rc.getTooltip(30, 1), + "P 33.3%; W 30.0%; C 23.3%; Q 23.3%; K 20.0%; F 3.3%"); + } + + @Test(groups = "Functional") + public void testPut() + { + ResidueCount rc = new ResidueCount(); + rc.put('q', 3); + assertEquals(rc.getCount('Q'), 3); + rc.put(' ', 4); + assertEquals(rc.getGapCount(), 4); + rc.put('.', 5); + assertEquals(rc.getGapCount(), 5); + rc.put('-', 6); + assertEquals(rc.getGapCount(), 6); + + rc.put('?', 5); + assertEquals(rc.getCount('?'), 5); + rc.put('?', 6); + rc.put('!', 7); + assertEquals(rc.getCount('?'), 6); + assertEquals(rc.getCount('!'), 7); + } } -- 1.7.10.2