X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FAAFrequency.java;h=892b1b1ff8cb4efc79cdb7f3bac0ba3a6542bb7e;hb=d01453133dd3b6247e4d0a12a6dd6886d6053a6c;hp=10ae253b519705857efbdb7e54217d2602285339;hpb=dad3f91c2f9a38ce8c64a688b6f1ba4f539af9fc;p=jalview.git diff --git a/src/jalview/analysis/AAFrequency.java b/src/jalview/analysis/AAFrequency.java index 10ae253..892b1b1 100755 --- a/src/jalview/analysis/AAFrequency.java +++ b/src/jalview/analysis/AAFrequency.java @@ -193,16 +193,14 @@ public class AAFrequency * The alignment column on which the first profile is based. * @param end * The alignment column on which the last profile is based. - * @param saveFullProfile - * Flag for saving the counts for each profile * @param removeBelowBackground - * Flag for removing any characters with a match emission probability - * less than its background frequency + * if true, symbols with a match emission probability less than + * background frequency are ignored * @return */ public static ProfilesI calculateHMMProfiles(final HiddenMarkovModel hmm, - int width, int start, int end, boolean saveFullProfile, - boolean removeBelowBackground, boolean infoLetterHeight) + int width, int start, int end, boolean removeBelowBackground, + boolean infoLetterHeight) { ProfileI[] result = new ProfileI[width]; char[] symbols = hmm.getSymbols().toCharArray(); @@ -221,11 +219,7 @@ public class AAFrequency int gapCount = counts.getGapCount(); ProfileI profile = new Profile(symbolCount, gapCount, maxCount, maxResidue); - - if (saveFullProfile) - { - profile.setCounts(counts); - } + profile.setCounts(counts); result[column] = profile; } @@ -344,20 +338,16 @@ public class AAFrequency * @param endCol * end column (exclusive) * @param ignoreGaps - * if true, normalise residue percentages + * if true, normalise residue percentages * @param showSequenceLogo * if true include all information symbols, else just show modal * residue - * @param nseq - * number of sequences */ public static float completeInformation(AlignmentAnnotation information, - ProfilesI profiles, int startCol, int endCol, long nseq, - Float currentMax) + ProfilesI profiles, int startCol, int endCol) { // long now = System.currentTimeMillis(); - if (information == null || information.annotations == null - || information.annotations.length < endCol) + if (information == null || information.annotations == null) { /* * called with a bad alignment annotation row @@ -366,54 +356,48 @@ public class AAFrequency return 0; } - Float max = 0f; + float max = 0f; + SequenceI hmmSeq = information.sequenceRef; - for (int i = startCol; i < endCol; i++) + int seqLength = hmmSeq.getLength(); + if (information.annotations.length < seqLength) { - ProfileI profile = profiles.get(i); - if (profile == null) + return 0; + } + + HiddenMarkovModel hmm = hmmSeq.getHMM(); + + for (int column = startCol; column < endCol; column++) + { + if (column >= seqLength) { - /* - * happens if sequences calculated over were - * shorter than alignment width - */ - information.annotations[i] = null; - return 0; + // hmm consensus sequence is shorter than the alignment + break; } - SequenceI hmmSeq = information.sequenceRef; - - HiddenMarkovModel hmm = hmmSeq.getHMM(); - - float value = hmm.getInformationContent(i); - - if (value > max) + float value = hmm.getInformationContent(column); + boolean isNaN = Float.isNaN(value); + if (!isNaN) { - max = value; + max = Math.max(max, value); } - String description = value + " bits"; - information.annotations[i] = new Annotation( - Character.toString(Character - .toUpperCase(hmm.getConsensusAtAlignColumn(i))), + String description = isNaN ? null + : String.format("%.4f bits", value); + information.annotations[column] = new Annotation( + Character.toString( + Character.toUpperCase(hmmSeq.getCharAt(column))), description, ' ', value); } - if (max > currentMax) - { - information.graphMax = max; - return max; - } - else - { - information.graphMax = currentMax; - return currentMax; - } + + information.graphMax = max; + return max; } /** - * Derive the gap count annotation row. + * Derive the occupancy count annotation * - * @param gaprow + * @param occupancy * the annotation row to add annotations to * @param profiles * the source consensus data @@ -422,11 +406,11 @@ public class AAFrequency * @param endCol * end column (exclusive) */ - public static void completeGapAnnot(AlignmentAnnotation gaprow, + public static void completeGapAnnot(AlignmentAnnotation occupancy, ProfilesI profiles, int startCol, int endCol, long nseq) { - if (gaprow == null || gaprow.annotations == null - || gaprow.annotations.length < endCol) + if (occupancy == null || occupancy.annotations == null + || occupancy.annotations.length < endCol) { /* * called with a bad alignment annotation row @@ -435,8 +419,8 @@ public class AAFrequency return; } // always set ranges again - gaprow.graphMax = nseq; - gaprow.graphMin = 0; + occupancy.graphMax = nseq; + occupancy.graphMin = 0; double scale = 0.8 / nseq; for (int i = startCol; i < endCol; i++) { @@ -447,7 +431,7 @@ public class AAFrequency * happens if sequences calculated over were * shorter than alignment width */ - gaprow.annotations[i] = null; + occupancy.annotations[i] = null; return; } @@ -455,7 +439,8 @@ public class AAFrequency String description = "" + gapped; - gaprow.annotations[i] = new Annotation("", description, '\0', gapped, + occupancy.annotations[i] = new Annotation("", description, '\0', + gapped, jalview.util.ColorUtils.bleachColour(Color.DARK_GRAY, (float) scale * gapped)); } @@ -520,7 +505,7 @@ public class AAFrequency * contains * *
-   *    [profileType, numberOfValues, nonGapCount, charValue1, percentage1, charValue2, percentage2, ...]
+   *    [profileType, numberOfValues, totalPercent, charValue1, percentage1, charValue2, percentage2, ...]
    * in descending order of percentage value
    * 
* @@ -533,7 +518,6 @@ public class AAFrequency */ public static int[] extractProfile(ProfileI profile, boolean ignoreGaps) { - int[] rtnval = new int[64]; ResidueCount counts = profile.getCounts(); if (counts == null) { @@ -544,7 +528,6 @@ public class AAFrequency char[] symbols = symbolCounts.symbols; int[] values = symbolCounts.values; QuickSort.sort(values, symbols); - int nextArrayPos = 2; int totalPercentage = 0; final int divisor = ignoreGaps ? profile.getNonGapped() : profile.getHeight(); @@ -552,21 +535,44 @@ public class AAFrequency /* * traverse the arrays in reverse order (highest counts first) */ + int[] result = new int[3 + 2 * symbols.length]; + int nextArrayPos = 3; + int nonZeroCount = 0; + for (int i = symbols.length - 1; i >= 0; i--) { int theChar = symbols[i]; int charCount = values[i]; - - rtnval[nextArrayPos++] = theChar; final int percentage = (charCount * 100) / divisor; - rtnval[nextArrayPos++] = percentage; + if (percentage == 0) + { + /* + * this count (and any remaining) round down to 0% - discard + */ + break; + } + nonZeroCount++; + result[nextArrayPos++] = theChar; + result[nextArrayPos++] = percentage; totalPercentage += percentage; } - rtnval[0] = symbols.length; - rtnval[1] = totalPercentage; - int[] result = new int[rtnval.length + 1]; + + /* + * truncate array if any zero values were discarded + */ + if (nonZeroCount < symbols.length) + { + int[] tmp = new int[3 + 2 * nonZeroCount]; + System.arraycopy(result, 0, tmp, 0, tmp.length); + result = tmp; + } + + /* + * fill in 'header' values + */ result[0] = AlignmentAnnotation.SEQUENCE_PROFILE; - System.arraycopy(rtnval, 0, result, 1, rtnval.length); + result[1] = nonZeroCount; + result[2] = totalPercentage; return result; } @@ -577,15 +583,15 @@ public class AAFrequency * contains * *
-   *    [profileType, numberOfValues, totalCount, charValue1, percentage1, charValue2, percentage2, ...]
+   *    [profileType, numberOfValues, totalPercentage, charValue1, percentage1, charValue2, percentage2, ...]
    * in descending order of percentage value, where the character values encode codon triplets
    * 
* * @param hashtable * @return */ - public static int[] extractCdnaProfile(Hashtable hashtable, - boolean ignoreGaps) + public static int[] extractCdnaProfile( + Hashtable hashtable, boolean ignoreGaps) { // this holds #seqs, #ungapped, and then codon count, indexed by encoded // codon triplet @@ -615,9 +621,16 @@ public class AAFrequency { break; // nothing else of interest here } + final int percentage = codonCount * 100 / divisor; + if (percentage == 0) + { + /* + * this (and any remaining) values rounded down to 0 - discard + */ + break; + } distinctValuesCount++; result[j++] = codons[i]; - final int percentage = codonCount * 100 / divisor; result[j++] = percentage; totalPercentage += percentage; } @@ -641,7 +654,7 @@ public class AAFrequency * the consensus data stores to be populated (one per column) */ public static void calculateCdna(AlignmentI alignment, - Hashtable[] hconsensus) + Hashtable[] hconsensus) { final char gapCharacter = alignment.getGapCharacter(); List mappings = alignment.getCodonFrames(); @@ -654,7 +667,7 @@ public class AAFrequency for (int col = 0; col < cols; col++) { // todo would prefer a Java bean for consensus data - Hashtable columnHash = new Hashtable<>(); + Hashtable columnHash = new Hashtable<>(); // #seqs, #ungapped seqs, counts indexed by (codon encoded + 1) int[] codonCounts = new int[66]; codonCounts[0] = alignment.getSequences().size(); @@ -674,6 +687,7 @@ public class AAFrequency { codonCounts[codonEncoded + 2]++; ungappedCount++; + break; } } } @@ -699,7 +713,8 @@ public class AAFrequency */ public static void completeCdnaConsensus( AlignmentAnnotation consensusAnnotation, - Hashtable[] consensusData, boolean showProfileLogo, int nseqs) + Hashtable[] consensusData, + boolean showProfileLogo, int nseqs) { if (consensusAnnotation == null || consensusAnnotation.annotations == null @@ -714,7 +729,7 @@ public class AAFrequency consensusAnnotation.scaleColLabel = true; for (int col = 0; col < consensusData.length; col++) { - Hashtable hci = consensusData[col]; + Hashtable hci = consensusData[col]; if (hci == null) { // gapped protein column?