X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FAAFrequency.java;h=892b1b1ff8cb4efc79cdb7f3bac0ba3a6542bb7e;hb=d01453133dd3b6247e4d0a12a6dd6886d6053a6c;hp=10ae253b519705857efbdb7e54217d2602285339;hpb=dad3f91c2f9a38ce8c64a688b6f1ba4f539af9fc;p=jalview.git diff --git a/src/jalview/analysis/AAFrequency.java b/src/jalview/analysis/AAFrequency.java index 10ae253..892b1b1 100755 --- a/src/jalview/analysis/AAFrequency.java +++ b/src/jalview/analysis/AAFrequency.java @@ -193,16 +193,14 @@ public class AAFrequency * The alignment column on which the first profile is based. * @param end * The alignment column on which the last profile is based. - * @param saveFullProfile - * Flag for saving the counts for each profile * @param removeBelowBackground - * Flag for removing any characters with a match emission probability - * less than its background frequency + * if true, symbols with a match emission probability less than + * background frequency are ignored * @return */ public static ProfilesI calculateHMMProfiles(final HiddenMarkovModel hmm, - int width, int start, int end, boolean saveFullProfile, - boolean removeBelowBackground, boolean infoLetterHeight) + int width, int start, int end, boolean removeBelowBackground, + boolean infoLetterHeight) { ProfileI[] result = new ProfileI[width]; char[] symbols = hmm.getSymbols().toCharArray(); @@ -221,11 +219,7 @@ public class AAFrequency int gapCount = counts.getGapCount(); ProfileI profile = new Profile(symbolCount, gapCount, maxCount, maxResidue); - - if (saveFullProfile) - { - profile.setCounts(counts); - } + profile.setCounts(counts); result[column] = profile; } @@ -344,20 +338,16 @@ public class AAFrequency * @param endCol * end column (exclusive) * @param ignoreGaps - * if true, normalise residue percentages + * if true, normalise residue percentages * @param showSequenceLogo * if true include all information symbols, else just show modal * residue - * @param nseq - * number of sequences */ public static float completeInformation(AlignmentAnnotation information, - ProfilesI profiles, int startCol, int endCol, long nseq, - Float currentMax) + ProfilesI profiles, int startCol, int endCol) { // long now = System.currentTimeMillis(); - if (information == null || information.annotations == null - || information.annotations.length < endCol) + if (information == null || information.annotations == null) { /* * called with a bad alignment annotation row @@ -366,54 +356,48 @@ public class AAFrequency return 0; } - Float max = 0f; + float max = 0f; + SequenceI hmmSeq = information.sequenceRef; - for (int i = startCol; i < endCol; i++) + int seqLength = hmmSeq.getLength(); + if (information.annotations.length < seqLength) { - ProfileI profile = profiles.get(i); - if (profile == null) + return 0; + } + + HiddenMarkovModel hmm = hmmSeq.getHMM(); + + for (int column = startCol; column < endCol; column++) + { + if (column >= seqLength) { - /* - * happens if sequences calculated over were - * shorter than alignment width - */ - information.annotations[i] = null; - return 0; + // hmm consensus sequence is shorter than the alignment + break; } - SequenceI hmmSeq = information.sequenceRef; - - HiddenMarkovModel hmm = hmmSeq.getHMM(); - - float value = hmm.getInformationContent(i); - - if (value > max) + float value = hmm.getInformationContent(column); + boolean isNaN = Float.isNaN(value); + if (!isNaN) { - max = value; + max = Math.max(max, value); } - String description = value + " bits"; - information.annotations[i] = new Annotation( - Character.toString(Character - .toUpperCase(hmm.getConsensusAtAlignColumn(i))), + String description = isNaN ? null + : String.format("%.4f bits", value); + information.annotations[column] = new Annotation( + Character.toString( + Character.toUpperCase(hmmSeq.getCharAt(column))), description, ' ', value); } - if (max > currentMax) - { - information.graphMax = max; - return max; - } - else - { - information.graphMax = currentMax; - return currentMax; - } + + information.graphMax = max; + return max; } /** - * Derive the gap count annotation row. + * Derive the occupancy count annotation * - * @param gaprow + * @param occupancy * the annotation row to add annotations to * @param profiles * the source consensus data @@ -422,11 +406,11 @@ public class AAFrequency * @param endCol * end column (exclusive) */ - public static void completeGapAnnot(AlignmentAnnotation gaprow, + public static void completeGapAnnot(AlignmentAnnotation occupancy, ProfilesI profiles, int startCol, int endCol, long nseq) { - if (gaprow == null || gaprow.annotations == null - || gaprow.annotations.length < endCol) + if (occupancy == null || occupancy.annotations == null + || occupancy.annotations.length < endCol) { /* * called with a bad alignment annotation row @@ -435,8 +419,8 @@ public class AAFrequency return; } // always set ranges again - gaprow.graphMax = nseq; - gaprow.graphMin = 0; + occupancy.graphMax = nseq; + occupancy.graphMin = 0; double scale = 0.8 / nseq; for (int i = startCol; i < endCol; i++) { @@ -447,7 +431,7 @@ public class AAFrequency * happens if sequences calculated over were * shorter than alignment width */ - gaprow.annotations[i] = null; + occupancy.annotations[i] = null; return; } @@ -455,7 +439,8 @@ public class AAFrequency String description = "" + gapped; - gaprow.annotations[i] = new Annotation("", description, '\0', gapped, + occupancy.annotations[i] = new Annotation("", description, '\0', + gapped, jalview.util.ColorUtils.bleachColour(Color.DARK_GRAY, (float) scale * gapped)); } @@ -520,7 +505,7 @@ public class AAFrequency * contains * *
- * [profileType, numberOfValues, nonGapCount, charValue1, percentage1, charValue2, percentage2, ...] + * [profileType, numberOfValues, totalPercent, charValue1, percentage1, charValue2, percentage2, ...] * in descending order of percentage value ** @@ -533,7 +518,6 @@ public class AAFrequency */ public static int[] extractProfile(ProfileI profile, boolean ignoreGaps) { - int[] rtnval = new int[64]; ResidueCount counts = profile.getCounts(); if (counts == null) { @@ -544,7 +528,6 @@ public class AAFrequency char[] symbols = symbolCounts.symbols; int[] values = symbolCounts.values; QuickSort.sort(values, symbols); - int nextArrayPos = 2; int totalPercentage = 0; final int divisor = ignoreGaps ? profile.getNonGapped() : profile.getHeight(); @@ -552,21 +535,44 @@ public class AAFrequency /* * traverse the arrays in reverse order (highest counts first) */ + int[] result = new int[3 + 2 * symbols.length]; + int nextArrayPos = 3; + int nonZeroCount = 0; + for (int i = symbols.length - 1; i >= 0; i--) { int theChar = symbols[i]; int charCount = values[i]; - - rtnval[nextArrayPos++] = theChar; final int percentage = (charCount * 100) / divisor; - rtnval[nextArrayPos++] = percentage; + if (percentage == 0) + { + /* + * this count (and any remaining) round down to 0% - discard + */ + break; + } + nonZeroCount++; + result[nextArrayPos++] = theChar; + result[nextArrayPos++] = percentage; totalPercentage += percentage; } - rtnval[0] = symbols.length; - rtnval[1] = totalPercentage; - int[] result = new int[rtnval.length + 1]; + + /* + * truncate array if any zero values were discarded + */ + if (nonZeroCount < symbols.length) + { + int[] tmp = new int[3 + 2 * nonZeroCount]; + System.arraycopy(result, 0, tmp, 0, tmp.length); + result = tmp; + } + + /* + * fill in 'header' values + */ result[0] = AlignmentAnnotation.SEQUENCE_PROFILE; - System.arraycopy(rtnval, 0, result, 1, rtnval.length); + result[1] = nonZeroCount; + result[2] = totalPercentage; return result; } @@ -577,15 +583,15 @@ public class AAFrequency * contains * *
- * [profileType, numberOfValues, totalCount, charValue1, percentage1, charValue2, percentage2, ...] + * [profileType, numberOfValues, totalPercentage, charValue1, percentage1, charValue2, percentage2, ...] * in descending order of percentage value, where the character values encode codon triplets ** * @param hashtable * @return */ - public static int[] extractCdnaProfile(Hashtable hashtable, - boolean ignoreGaps) + public static int[] extractCdnaProfile( + Hashtable