* contains
*
* <pre>
- * [profileType, numberOfValues, nonGapCount, charValue1, percentage1, charValue2, percentage2, ...]
+ * [profileType, numberOfValues, totalPercent, charValue1, percentage1, charValue2, percentage2, ...]
* in descending order of percentage value
* </pre>
*
*/
public static int[] extractProfile(ProfileI profile, boolean ignoreGaps)
{
- int[] rtnval = new int[64];
ResidueCount counts = profile.getCounts();
if (counts == null)
{
char[] symbols = symbolCounts.symbols;
int[] values = symbolCounts.values;
QuickSort.sort(values, symbols);
- int nextArrayPos = 2;
int totalPercentage = 0;
final int divisor = ignoreGaps ? profile.getNonGapped()
: profile.getHeight();
/*
* traverse the arrays in reverse order (highest counts first)
*/
+ int[] result = new int[3 + 2 * symbols.length];
+ int nextArrayPos = 3;
+ int nonZeroCount = 0;
+
for (int i = symbols.length - 1; i >= 0; i--)
{
int theChar = symbols[i];
int charCount = values[i];
-
- rtnval[nextArrayPos++] = theChar;
final int percentage = (charCount * 100) / divisor;
- rtnval[nextArrayPos++] = percentage;
+ if (percentage == 0)
+ {
+ /*
+ * this count (and any remaining) round down to 0% - discard
+ */
+ break;
+ }
+ nonZeroCount++;
+ result[nextArrayPos++] = theChar;
+ result[nextArrayPos++] = percentage;
totalPercentage += percentage;
}
- rtnval[0] = symbols.length;
- rtnval[1] = totalPercentage;
- int[] result = new int[rtnval.length + 1];
+
+ /*
+ * truncate array if any zero values were discarded
+ */
+ if (nonZeroCount < symbols.length)
+ {
+ int[] tmp = new int[3 + 2 * nonZeroCount];
+ System.arraycopy(result, 0, tmp, 0, tmp.length);
+ result = tmp;
+ }
+
+ /*
+ * fill in 'header' values
+ */
result[0] = AlignmentAnnotation.SEQUENCE_PROFILE;
- System.arraycopy(rtnval, 0, result, 1, rtnval.length);
+ result[1] = nonZeroCount;
+ result[2] = totalPercentage;
return result;
}
* contains
*
* <pre>
- * [profileType, numberOfValues, totalCount, charValue1, percentage1, charValue2, percentage2, ...]
+ * [profileType, numberOfValues, totalPercentage, charValue1, percentage1, charValue2, percentage2, ...]
* in descending order of percentage value, where the character values encode codon triplets
* </pre>
*
{
break; // nothing else of interest here
}
+ final int percentage = codonCount * 100 / divisor;
+ if (percentage == 0)
+ {
+ /*
+ * this (and any remaining) values rounded down to 0 - discard
+ */
+ break;
+ }
distinctValuesCount++;
result[j++] = codons[i];
- final int percentage = codonCount * 100 / divisor;
result[j++] = percentage;
totalPercentage += percentage;
}
for (int col = 0; col < cols; col++)
{
// todo would prefer a Java bean for consensus data
- Hashtable<String, int[]> columnHash = new Hashtable<String, int[]>();
+ Hashtable<String, int[]> columnHash = new Hashtable<>();
// #seqs, #ungapped seqs, counts indexed by (codon encoded + 1)
int[] codonCounts = new int[66];
codonCounts[0] = alignment.getSequences().size();
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.Annotation;
+import jalview.datamodel.Profile;
import jalview.datamodel.ProfileI;
import jalview.datamodel.ProfilesI;
+import jalview.datamodel.ResidueCount;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceI;
import jalview.gui.JvOptionPane;
+import java.util.Hashtable;
+
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
assertEquals("T 75%", ann.description);
assertEquals("T", ann.displayCharacter);
}
+
+ /**
+ * Test to include rounding down of a non-zero count to 0% (JAL-3202)
+ */
+ @Test(groups = { "Functional" })
+ public void testExtractProfile()
+ {
+ /*
+ * 200 sequences of which 30 gapped (170 ungapped)
+ * max count 70 for modal residue 'G'
+ */
+ ProfileI profile = new Profile(200, 30, 70, "G");
+ ResidueCount counts = new ResidueCount();
+ counts.put('G', 70);
+ counts.put('R', 60);
+ counts.put('L', 38);
+ counts.put('H', 2);
+ profile.setCounts(counts);
+
+ /*
+ * [0, noOfValues, totalPercent, char1, count1, ...]
+ * G: 70/170 = 41.2 = 41
+ * R: 60/170 = 35.3 = 35
+ * L: 38/170 = 22.3 = 22
+ * H: 2/170 = 1
+ * total (rounded) percentages = 99
+ */
+ int[] extracted = AAFrequency.extractProfile(profile, true);
+ int[] expected = new int[] { 0, 4, 99, 'G', 41, 'R', 35, 'L', 22, 'H',
+ 1 };
+ org.testng.Assert.assertEquals(extracted, expected);
+
+ /*
+ * add some counts of 1; these round down to 0% and should be discarded
+ */
+ counts.put('G', 68); // 68/170 = 40% exactly (percentages now total 98)
+ counts.put('Q', 1);
+ counts.put('K', 1);
+ extracted = AAFrequency.extractProfile(profile, true);
+ expected = new int[] { 0, 4, 98, 'G', 40, 'R', 35, 'L', 22, 'H', 1 };
+ org.testng.Assert.assertEquals(extracted, expected);
+
+ }
+
+ /**
+ * Tests for the profile calculation where gaps are included i.e. the
+ * denominator is the total number of sequences in the column
+ */
+ @Test(groups = { "Functional" })
+ public void testExtractProfile_countGaps()
+ {
+ /*
+ * 200 sequences of which 30 gapped (170 ungapped)
+ * max count 70 for modal residue 'G'
+ */
+ ProfileI profile = new Profile(200, 30, 70, "G");
+ ResidueCount counts = new ResidueCount();
+ counts.put('G', 70);
+ counts.put('R', 60);
+ counts.put('L', 38);
+ counts.put('H', 2);
+ profile.setCounts(counts);
+
+ /*
+ * [0, noOfValues, totalPercent, char1, count1, ...]
+ * G: 70/200 = 35%
+ * R: 60/200 = 30%
+ * L: 38/200 = 19%
+ * H: 2/200 = 1%
+ * total (rounded) percentages = 85
+ */
+ int[] extracted = AAFrequency.extractProfile(profile, false);
+ int[] expected = new int[] { AlignmentAnnotation.SEQUENCE_PROFILE, 4,
+ 85, 'G', 35, 'R', 30, 'L', 19, 'H',
+ 1 };
+ org.testng.Assert.assertEquals(extracted, expected);
+
+ /*
+ * add some counts of 1; these round down to 0% and should be discarded
+ */
+ counts.put('G', 68); // 68/200 = 34%
+ counts.put('Q', 1);
+ counts.put('K', 1);
+ extracted = AAFrequency.extractProfile(profile, false);
+ expected = new int[] { AlignmentAnnotation.SEQUENCE_PROFILE, 4, 84, 'G',
+ 34, 'R', 30, 'L', 19, 'H', 1 };
+ org.testng.Assert.assertEquals(extracted, expected);
+
+ }
+
+ @Test(groups = { "Functional" })
+ public void testExtractCdnaProfile()
+ {
+ /*
+ * 200 sequences of which 30 gapped (170 ungapped)
+ * max count 70 for modal residue 'G'
+ */
+ Hashtable profile = new Hashtable();
+
+ /*
+ * cdna profile is {seqCount, ungappedCount, codonCount1, ...codonCount64}
+ * where 1..64 positions correspond to encoded codons
+ * see CodingUtils.encodeCodon()
+ */
+ int[] codonCounts = new int[66];
+ char[] codon1 = new char[] { 'G', 'C', 'A' };
+ char[] codon2 = new char[] { 'c', 'C', 'A' };
+ char[] codon3 = new char[] { 't', 'g', 'A' };
+ char[] codon4 = new char[] { 'G', 'C', 't' };
+ int encoded1 = CodingUtils.encodeCodon(codon1);
+ int encoded2 = CodingUtils.encodeCodon(codon2);
+ int encoded3 = CodingUtils.encodeCodon(codon3);
+ int encoded4 = CodingUtils.encodeCodon(codon4);
+ codonCounts[2 + encoded1] = 30;
+ codonCounts[2 + encoded2] = 70;
+ codonCounts[2 + encoded3] = 9;
+ codonCounts[2 + encoded4] = 1;
+ codonCounts[0] = 120;
+ codonCounts[1] = 110;
+ profile.put(AAFrequency.PROFILE, codonCounts);
+
+ /*
+ * [0, noOfValues, totalPercent, char1, count1, ...]
+ * codon1: 30/110 = 27.2 = 27%
+ * codon2: 70/110 = 63.6% = 63%
+ * codon3: 9/110 = 8.1% = 8%
+ * codon4: 1/110 = 0.9% = 0% should be discarded
+ * total (rounded) percentages = 98
+ */
+ int[] extracted = AAFrequency.extractCdnaProfile(profile, true);
+ int[] expected = new int[] { AlignmentAnnotation.CDNA_PROFILE, 3, 98,
+ encoded2, 63, encoded1, 27, encoded3, 8 };
+ org.testng.Assert.assertEquals(extracted, expected);
+ }
+
+ @Test(groups = { "Functional" })
+ public void testExtractCdnaProfile_countGaps()
+ {
+ /*
+ * 200 sequences of which 30 gapped (170 ungapped)
+ * max count 70 for modal residue 'G'
+ */
+ Hashtable profile = new Hashtable();
+
+ /*
+ * cdna profile is {seqCount, ungappedCount, codonCount1, ...codonCount64}
+ * where 1..64 positions correspond to encoded codons
+ * see CodingUtils.encodeCodon()
+ */
+ int[] codonCounts = new int[66];
+ char[] codon1 = new char[] { 'G', 'C', 'A' };
+ char[] codon2 = new char[] { 'c', 'C', 'A' };
+ char[] codon3 = new char[] { 't', 'g', 'A' };
+ char[] codon4 = new char[] { 'G', 'C', 't' };
+ int encoded1 = CodingUtils.encodeCodon(codon1);
+ int encoded2 = CodingUtils.encodeCodon(codon2);
+ int encoded3 = CodingUtils.encodeCodon(codon3);
+ int encoded4 = CodingUtils.encodeCodon(codon4);
+ codonCounts[2 + encoded1] = 30;
+ codonCounts[2 + encoded2] = 70;
+ codonCounts[2 + encoded3] = 9;
+ codonCounts[2 + encoded4] = 1;
+ codonCounts[0] = 120;
+ codonCounts[1] = 110;
+ profile.put(AAFrequency.PROFILE, codonCounts);
+
+ /*
+ * [0, noOfValues, totalPercent, char1, count1, ...]
+ * codon1: 30/120 = 25%
+ * codon2: 70/120 = 58.3 = 58%
+ * codon3: 9/120 = 7.5 = 7%
+ * codon4: 1/120 = 0.8 = 0% should be discarded
+ * total (rounded) percentages = 90
+ */
+ int[] extracted = AAFrequency.extractCdnaProfile(profile, false);
+ int[] expected = new int[] { AlignmentAnnotation.CDNA_PROFILE, 3, 90,
+ encoded2, 58, encoded1, 25, encoded3, 7 };
+ org.testng.Assert.assertEquals(extracted, expected);
+ }
}