X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FAAFrequency.java;h=85a9bb03904dedfef151ba5f743448c46b446419;hb=bd6ce8f5f9fc8e5bc8a6188d15987ce0ffd2c1ee;hp=b806355bf949de713b78068b9aee7ef70da075c9;hpb=136c0793b90b72b928c4d77dc109dd5c644e00d3;p=jalview.git diff --git a/src/jalview/analysis/AAFrequency.java b/src/jalview/analysis/AAFrequency.java index b806355..85a9bb0 100755 --- a/src/jalview/analysis/AAFrequency.java +++ b/src/jalview/analysis/AAFrequency.java @@ -24,6 +24,7 @@ import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.Annotation; +import jalview.datamodel.HiddenMarkovModel; import jalview.datamodel.Profile; import jalview.datamodel.ProfileI; import jalview.datamodel.Profiles; @@ -32,6 +33,7 @@ import jalview.datamodel.ResidueCount; import jalview.datamodel.ResidueCount.SymbolCounts; import jalview.datamodel.SequenceI; import jalview.ext.android.SparseIntArray; +import jalview.schemes.ResidueProperties; import jalview.util.Comparison; import jalview.util.Format; import jalview.util.MappingUtils; @@ -55,6 +57,12 @@ public class AAFrequency { public static final String PROFILE = "P"; + private static final String AMINO = "amino"; + + private static final String DNA = "DNA"; + + private static final String RNA = "RNA"; + /* * Quick look-up of String value of char 'A' to 'Z' */ @@ -101,6 +109,8 @@ public class AAFrequency } } + + /** * Calculate the consensus symbol(s) for each column in the given range. * @@ -147,14 +157,13 @@ public class AAFrequency { if (sequences[row] == null) { - System.err - .println("WARNING: Consensus skipping null sequence - possible race condition."); + System.err.println( + "WARNING: Consensus skipping null sequence - possible race condition."); continue; } - char[] seq = sequences[row].getSequence(); - if (seq.length > column) + if (sequences[row].getLength() > column) { - char c = seq[column]; + char c = sequences[row].getCharAt(column); residueCounts.add(c); if (Comparison.isNucleotide(c)) { @@ -193,6 +202,57 @@ public class AAFrequency } /** + * Returns the full set of profiles for a hidden Markov model. The underlying + * data is the raw probabilities of a residue being emitted at each node, + * however the profiles returned by this function contain the percentage + * chance of a residue emission. + * + * @param hmm + * @param width + * The width of the Profile array (Profiles) to be returned. + * @param start + * The alignment column on which the first profile is based. + * @param end + * The alignment column on which the last profile is based. + * @param saveFullProfile + * Flag for saving the counts for each profile + * @param removeBelowBackground + * Flag for removing any characters with a match emission probability + * less than its background frequency + * @return + */ + public static ProfilesI calculateHMMProfiles(final HiddenMarkovModel hmm, + int width, int start, int end, boolean saveFullProfile, + boolean removeBelowBackground, boolean infoLetterHeight) + { + ProfileI[] result = new ProfileI[width]; + int symbolCount = hmm.getNumberOfSymbols(); + for (int column = start; column < end; column++) + { + ResidueCount counts = new ResidueCount(); + for (char symbol : hmm.getSymbols()) + { + int value = getAnalogueCount(hmm, column, symbol, + removeBelowBackground, infoLetterHeight); + counts.put(symbol, value); + } + int maxCount = counts.getModalCount(); + String maxResidue = counts.getResiduesForCount(maxCount); + int gapCount = counts.getGapCount(); + ProfileI profile = new Profile(symbolCount, gapCount, maxCount, + maxResidue); + + if (saveFullProfile) + { + profile.setCounts(counts); + } + + result[column] = profile; + } + return new Profiles(result); + } + + /** * Make an estimate of the profile size we are going to compute i.e. how many * different characters may be present in it. Overestimating has a cost of * using more memory than necessary. Underestimating has a cost of needing to @@ -290,6 +350,89 @@ public class AAFrequency } /** + * Derive the information annotations to be added to the alignment for + * display. This does not recompute the raw data, but may be called on a + * change in display options, such as 'ignore below background frequency', + * which may in turn result in a change in the derived values. + * + * @param information + * the annotation row to add annotations to + * @param profiles + * the source information data + * @param startCol + * start column (inclusive) + * @param endCol + * end column (exclusive) + * @param ignoreGaps + * if true, normalise residue percentages + * @param showSequenceLogo + * if true include all information symbols, else just show modal + * residue + * @param nseq + * number of sequences + */ + public static float completeInformation(AlignmentAnnotation information, + ProfilesI profiles, int startCol, int endCol, long nseq, + Float currentMax) + { + // long now = System.currentTimeMillis(); + if (information == null || information.annotations == null + || information.annotations.length < endCol) + { + /* + * called with a bad alignment annotation row + * wait for it to be initialised properly + */ + return 0; + } + + Float max = 0f; + + for (int i = startCol; i < endCol; i++) + { + ProfileI profile = profiles.get(i); + if (profile == null) + { + /* + * happens if sequences calculated over were + * shorter than alignment width + */ + information.annotations[i] = null; + return 0; + } + + HiddenMarkovModel hmm; + + SequenceI hmmSeq = information.sequenceRef; + + hmm = hmmSeq.getHMM(); + + Float value = getInformationContent(i, hmm); + + if (value > max) + { + max = value; + } + + String description = value + " bits"; + information.annotations[i] = new Annotation( + Character.toString(Character + .toUpperCase(hmm.getConsensusAtAlignColumn(i))), + description, ' ', value); + } + if (max > currentMax) + { + information.graphMax = max; + return max; + } + else + { + information.graphMax = currentMax; + return currentMax; + } + } + + /** * Derive the gap count annotation row. * * @param gaprow @@ -316,7 +459,7 @@ public class AAFrequency // always set ranges again gaprow.graphMax = nseq; gaprow.graphMin = 0; - double scale = 0.8/nseq; + double scale = 0.8 / nseq; for (int i = startCol; i < endCol; i++) { ProfileI profile = profiles.get(i); @@ -334,9 +477,9 @@ public class AAFrequency String description = "" + gapped; - gaprow.annotations[i] = new Annotation("", description, - '\0', gapped, jalview.util.ColorUtils.bleachColour( - Color.DARK_GRAY, (float) scale * gapped)); + gaprow.annotations[i] = new Annotation("", description, '\0', gapped, + jalview.util.ColorUtils.bleachColour(Color.DARK_GRAY, + (float) scale * gapped)); } } @@ -345,7 +488,8 @@ public class AAFrequency *