X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FAAFrequency.java;h=61f3b7f3b391024ce2b5355975b143871ad60f3f;hb=5ef61dcb1cc4a94ac6f88e90fc186521ce7224c1;hp=f8e83796892ef269f6880a87a00199c7b0be4666;hpb=3d0101179759ef157b088ea135423cd909512d9f;p=jalview.git diff --git a/src/jalview/analysis/AAFrequency.java b/src/jalview/analysis/AAFrequency.java index f8e8379..61f3b7f 100755 --- a/src/jalview/analysis/AAFrequency.java +++ b/src/jalview/analysis/AAFrequency.java @@ -151,10 +151,9 @@ public class AAFrequency "WARNING: Consensus skipping null sequence - possible race condition."); continue; } - char[] seq = sequences[row].getSequence(); - if (seq.length > column) + if (sequences[row].getLength() > column) { - char c = seq[column]; + char c = sequences[row].getCharAt(column); residueCounts.add(c); if (Comparison.isNucleotide(c)) { @@ -399,7 +398,7 @@ public class AAFrequency * contains * *
-   *    [profileType, numberOfValues, nonGapCount, charValue1, percentage1, charValue2, percentage2, ...]
+   *    [profileType, numberOfValues, totalPercent, charValue1, percentage1, charValue2, percentage2, ...]
    * in descending order of percentage value
    * 
* @@ -412,7 +411,6 @@ public class AAFrequency */ public static int[] extractProfile(ProfileI profile, boolean ignoreGaps) { - int[] rtnval = new int[64]; ResidueCount counts = profile.getCounts(); if (counts == null) { @@ -423,7 +421,6 @@ public class AAFrequency char[] symbols = symbolCounts.symbols; int[] values = symbolCounts.values; QuickSort.sort(values, symbols); - int nextArrayPos = 2; int totalPercentage = 0; final int divisor = ignoreGaps ? profile.getNonGapped() : profile.getHeight(); @@ -431,21 +428,44 @@ public class AAFrequency /* * traverse the arrays in reverse order (highest counts first) */ + int[] result = new int[3 + 2 * symbols.length]; + int nextArrayPos = 3; + int nonZeroCount = 0; + for (int i = symbols.length - 1; i >= 0; i--) { int theChar = symbols[i]; int charCount = values[i]; - - rtnval[nextArrayPos++] = theChar; final int percentage = (charCount * 100) / divisor; - rtnval[nextArrayPos++] = percentage; + if (percentage == 0) + { + /* + * this count (and any remaining) round down to 0% - discard + */ + break; + } + nonZeroCount++; + result[nextArrayPos++] = theChar; + result[nextArrayPos++] = percentage; totalPercentage += percentage; } - rtnval[0] = symbols.length; - rtnval[1] = totalPercentage; - int[] result = new int[rtnval.length + 1]; + + /* + * truncate array if any zero values were discarded + */ + if (nonZeroCount < symbols.length) + { + int[] tmp = new int[3 + 2 * nonZeroCount]; + System.arraycopy(result, 0, tmp, 0, tmp.length); + result = tmp; + } + + /* + * fill in 'header' values + */ result[0] = AlignmentAnnotation.SEQUENCE_PROFILE; - System.arraycopy(rtnval, 0, result, 1, rtnval.length); + result[1] = nonZeroCount; + result[2] = totalPercentage; return result; } @@ -455,14 +475,15 @@ public class AAFrequency * contains * *
-   *    [profileType, numberOfValues, totalCount, charValue1, percentage1, charValue2, percentage2, ...]
+   *    [profileType, numberOfValues, totalPercentage, charValue1, percentage1, charValue2, percentage2, ...]
    * in descending order of percentage value, where the character values encode codon triplets
    * 
* * @param hashtable * @return */ - public static int[] extractCdnaProfile(Hashtable hashtable, + public static int[] extractCdnaProfile( + Hashtable hashtable, boolean ignoreGaps) { // this holds #seqs, #ungapped, and then codon count, indexed by encoded @@ -493,9 +514,16 @@ public class AAFrequency { break; // nothing else of interest here } + final int percentage = codonCount * 100 / divisor; + if (percentage == 0) + { + /* + * this (and any remaining) values rounded down to 0 - discard + */ + break; + } distinctValuesCount++; result[j++] = codons[i]; - final int percentage = codonCount * 100 / divisor; result[j++] = percentage; totalPercentage += percentage; } @@ -519,7 +547,7 @@ public class AAFrequency * the consensus data stores to be populated (one per column) */ public static void calculateCdna(AlignmentI alignment, - Hashtable[] hconsensus) + Hashtable[] hconsensus) { final char gapCharacter = alignment.getGapCharacter(); List mappings = alignment.getCodonFrames(); @@ -532,7 +560,7 @@ public class AAFrequency for (int col = 0; col < cols; col++) { // todo would prefer a Java bean for consensus data - Hashtable columnHash = new Hashtable(); + Hashtable columnHash = new Hashtable<>(); // #seqs, #ungapped seqs, counts indexed by (codon encoded + 1) int[] codonCounts = new int[66]; codonCounts[0] = alignment.getSequences().size(); @@ -577,7 +605,8 @@ public class AAFrequency */ public static void completeCdnaConsensus( AlignmentAnnotation consensusAnnotation, - Hashtable[] consensusData, boolean showProfileLogo, int nseqs) + Hashtable[] consensusData, boolean showProfileLogo, + int nseqs) { if (consensusAnnotation == null || consensusAnnotation.annotations == null @@ -592,7 +621,7 @@ public class AAFrequency consensusAnnotation.scaleColLabel = true; for (int col = 0; col < consensusData.length; col++) { - Hashtable hci = consensusData[col]; + Hashtable hci = consensusData[col]; if (hci == null) { // gapped protein column?