From 56e8c00dcdc6aa8596830451e4419e64628c77f8 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Tue, 27 Feb 2018 15:48:51 +0000 Subject: [PATCH] JAL-2717 JAL-2668 fixes to HMMER colour scheme display names, enabled state, test coverage --- resources/lang/Messages.properties | 2 + src/jalview/datamodel/HiddenMarkovModel.java | 8 + src/jalview/datamodel/ResidueCount.java | 30 +++- src/jalview/gui/ColourMenuHelper.java | 3 +- src/jalview/hmmer/HMMBuildThread.java | 1 + .../schemes/HMMERAlignmentColourScheme.java | 153 +++++++++----------- src/jalview/schemes/HMMERColourScheme.java | 149 ++++++++++--------- test/jalview/datamodel/ResidueCountTest.java | 21 +++ .../schemes/HMMERAlignmentColourSchemeTest.java | 75 ++++++++++ test/jalview/schemes/HMMERColourSchemeTest.java | 73 +++++++--- 10 files changed, 338 insertions(+), 177 deletions(-) create mode 100644 test/jalview/schemes/HMMERAlignmentColourSchemeTest.java diff --git a/resources/lang/Messages.properties b/resources/lang/Messages.properties index d44c48a..06c8b6b 100644 --- a/resources/lang/Messages.properties +++ b/resources/lang/Messages.properties @@ -204,6 +204,8 @@ label.colourScheme_purine/pyrimidine = Purine/Pyrimidine label.colourScheme_nucleotide = Nucleotide label.colourScheme_t-coffee_scores = T-Coffee Scores label.colourScheme_rna_helices = By RNA Helices +label.colourScheme_hmmer-uniprot = HMMER profile v global background +label.colourScheme_hmmer-alignment = HMMER profile v alignment background label.blc = BLC label.fasta = Fasta label.msf = MSF diff --git a/src/jalview/datamodel/HiddenMarkovModel.java b/src/jalview/datamodel/HiddenMarkovModel.java index 581f481..eb68d09 100644 --- a/src/jalview/datamodel/HiddenMarkovModel.java +++ b/src/jalview/datamodel/HiddenMarkovModel.java @@ -485,6 +485,14 @@ public class HiddenMarkovModel nodeLookup.put(column, node); } + /** + * Updates the mapping of nodes of the HMM to non-gapped positions of the + * sequence. Nodes 1, 2, 3... are mapped to the columns occupied by the first, + * second, third... residues of the sequence. The 'begin' node (node 0) of the + * HMM is not mapped. + * + * @param sequence + */ public void updateMapping(char[] sequence) { int nodeNo = 1; diff --git a/src/jalview/datamodel/ResidueCount.java b/src/jalview/datamodel/ResidueCount.java index 612969f..aaf6988 100644 --- a/src/jalview/datamodel/ResidueCount.java +++ b/src/jalview/datamodel/ResidueCount.java @@ -25,6 +25,8 @@ import jalview.util.Format; import jalview.util.QuickSort; import jalview.util.SparseCount; +import java.util.List; + /** * A class to count occurrences of residues in a profile, optimised for speed * and memory footprint. @@ -148,6 +150,24 @@ public class ResidueCount } /** + * A constructor that counts frequency of all symbols (including gaps) in the + * sequences (not case-sensitive) + * + * @param sequences + */ + public ResidueCount(List sequences) + { + this(); + for (SequenceI seq : sequences) + { + for (int i = 0; i < seq.getLength(); i++) + { + add(seq.getCharAt(i)); + } + } + } + + /** * Increments the count for the given character. The supplied character may be * upper or lower case but counts are for the upper case only. Gap characters * (space, ., -) are all counted together. @@ -641,15 +661,17 @@ public class ResidueCount return sb.toString(); } + /** + * Answers the total count for all symbols (excluding gaps) + * + * @return + */ public int getTotalCount() { int total = 0; for (char symbol : this.getSymbolCounts().symbols) { - if (!Comparison.isGap(symbol)) - { - total += getCount(symbol); - } + total += getCount(symbol); } return total; } diff --git a/src/jalview/gui/ColourMenuHelper.java b/src/jalview/gui/ColourMenuHelper.java index 8f0b88c..9479ea6 100644 --- a/src/jalview/gui/ColourMenuHelper.java +++ b/src/jalview/gui/ColourMenuHelper.java @@ -121,8 +121,7 @@ public class ColourMenuHelper */ final String name = scheme.getSchemeName(); String label = MessageManager.getStringOrReturn( - "label.colourScheme_" + name.toLowerCase().replace(" ", "_"), - name); + "label.colourScheme_", name.toLowerCase().replace(" ", "_")); final JRadioButtonMenuItem radioItem = new JRadioButtonMenuItem( label); radioItem.setName(name); diff --git a/src/jalview/hmmer/HMMBuildThread.java b/src/jalview/hmmer/HMMBuildThread.java index c52fb86..1842393 100644 --- a/src/jalview/hmmer/HMMBuildThread.java +++ b/src/jalview/hmmer/HMMBuildThread.java @@ -125,6 +125,7 @@ public class HMMBuildThread extends HmmerCommand implements Runnable { if (af != null) { + af.buildColourMenu(); // enable HMMER colour schemes af.setProgressBar("", msgID); } } diff --git a/src/jalview/schemes/HMMERAlignmentColourScheme.java b/src/jalview/schemes/HMMERAlignmentColourScheme.java index 59c0317..8e7aef7 100644 --- a/src/jalview/schemes/HMMERAlignmentColourScheme.java +++ b/src/jalview/schemes/HMMERAlignmentColourScheme.java @@ -10,55 +10,82 @@ import jalview.util.Comparison; import java.awt.Color; import java.util.HashMap; -import java.util.List; import java.util.Map; +/** + * A colour scheme based on a selected Hidden Markov Model. The colour is + * + * where information content is the log ratio + * + *
+ *   log(profile match emission probability / residue background probability>
+ * 
+ * + * using the alignment's background frequencies for residues. + * + * @author tzvanaalten + * + */ public class HMMERAlignmentColourScheme extends ResidueColourScheme { - Map backgroundFrequencies = new HashMap<>(); + /* + * the ratio, for each symbol, of its frequency to total symbol count + */ + Map frequency = new HashMap<>(); - Double maxLLR; + float logTotalCount; HiddenMarkovModel hmm; - boolean peptideSpecific; - - boolean nucleotideSpecific; - - public HMMERAlignmentColourScheme(HiddenMarkovModel markov) + /** + * Constructor given a Hidden Markov Model + * + * @param sg + * + * @param markov + */ + public HMMERAlignmentColourScheme(AnnotatedCollectionI sg, + HiddenMarkovModel markov) { hmm = markov; + countFrequencies(sg); } + /** + * Default constructor (required by ColourSchemes.loadColourSchemes) + */ public HMMERAlignmentColourScheme() { - } @Override public Color findColour(char symbol, int position, SequenceI seq, String consensusResidue, float pid) { - if (hmm == null) - { - return Color.white; - } return findColour(symbol, position); } /** - * Returns the colour at a particular symbol at a column in the alignment. + * Returns the colour at a particular symbol at a column in the alignment: + *
    + *
  • white for a gap
  • + *
  • red for an insertion
  • + *
  • orange for negative information content
  • + *
  • white to blue for increasing information content
  • + *
* * @param symbol - * @param position - * @return Red for an insertion, white for a gap, orange for a negative - * information content, white to blue for increasing information - * content. + * @param column + * @return */ - private Color findColour(char symbol, int position) + private Color findColour(char symbol, int column) { - - if (Comparison.isGap(symbol)) + if (hmm == null || Comparison.isGap(symbol)) { return Color.white; } @@ -66,9 +93,8 @@ public class HMMERAlignmentColourScheme extends ResidueColourScheme { symbol = Character.toUpperCase(symbol); } - Double prob; - prob = hmm.getMatchEmissionProbability(position, symbol); - Double freq = backgroundFrequencies.get(symbol); + double prob = hmm.getMatchEmissionProbability(column, symbol); + Double freq = frequency.get(symbol); if (freq == null) { return Color.white; @@ -77,18 +103,16 @@ public class HMMERAlignmentColourScheme extends ResidueColourScheme { return new Color(230, 0, 0); } - Double value = Math.log(prob / freq); + double value = Math.log(prob / freq.doubleValue()); Color colour = null; if (value > 0) { - - colour = ColorUtils.getGraduatedColour(value.floatValue(), 0, - Color.WHITE, maxLLR.floatValue(), Color.blue); + colour = ColorUtils.getGraduatedColour((float) value, 0, + Color.WHITE, logTotalCount, Color.blue); } else if (value < 0) { return Color.ORANGE; - } return colour; } @@ -97,55 +121,29 @@ public class HMMERAlignmentColourScheme extends ResidueColourScheme public void alignmentChanged(AnnotatedCollectionI collection, Map hiddenReps) { - List seqs = collection.getSequences(); - for (SequenceI seq : seqs) - { - if (seq.isHMMConsensusSequence()) - { - hmm = seq.getHMM(); - break; - } - } - - count(collection); - + /* + * ? no need to do anything if alignment is adjusted + * since findColour() handles everything + */ } @Override public ColourSchemeI getInstance(AnnotatedCollectionI sg, Map hiddenRepSequences) { - HiddenMarkovModel markov = null; - List seqs = sg.getSequences(); - for (SequenceI seq : seqs) - { - if (seq.getHMM() != null) - { - markov = seq.getHMM(); - break; - } - } - - count(sg); - - HMMERAlignmentColourScheme colour = new HMMERAlignmentColourScheme( - markov); - return colour; - + return new HMMERAlignmentColourScheme(sg, hmm); } @Override public boolean isApplicableTo(AnnotatedCollectionI ac) { - return true; - + return !ac.getHMMConsensusSequences().isEmpty(); } @Override public String getSchemeName() { - - return JalviewColourScheme.HMMERA.name(); + return JalviewColourScheme.HMMERA.toString(); } @Override @@ -154,29 +152,22 @@ public class HMMERAlignmentColourScheme extends ResidueColourScheme return false; } - public void count(AnnotatedCollectionI sg) + /** + * Counts and stores the relatively frequency of every residue in the + * alignment + * + * @param sg + */ + public void countFrequencies(AnnotatedCollectionI sg) { - ResidueCount counts = new ResidueCount(); - for (SequenceI seq : sg.getSequences()) - { - for (int i = 0; i < seq.getLength(); i++) - { - if (!Comparison.isGap(seq.getCharAt(i))) - { - counts.add(seq.getCharAt(i)); - } - } - } - - int total = counts.getTotalCount(); + ResidueCount counts = new ResidueCount(sg.getSequences()); + int total = counts.getTotalCount(); // excludes gaps for (char symbol : counts.getSymbolCounts().symbols) { - double count = Double.valueOf(counts.getCount(symbol)) - / Double.valueOf(total); - backgroundFrequencies.put(symbol, count); + double freq = counts.getCount(symbol) / (double) total; + frequency.put(symbol, freq); } - maxLLR = Math.log(total); + logTotalCount = (float) Math.log(total); } } - diff --git a/src/jalview/schemes/HMMERColourScheme.java b/src/jalview/schemes/HMMERColourScheme.java index 09e3293..2ba0898 100644 --- a/src/jalview/schemes/HMMERColourScheme.java +++ b/src/jalview/schemes/HMMERColourScheme.java @@ -11,51 +11,78 @@ import java.awt.Color; import java.util.List; import java.util.Map; +/** + * A colour scheme based on a selected Hidden Markov Model. The colour is + *
    + *
  • white for a gap
  • + *
  • red for an insertion
  • + *
  • orange for negative information content
  • + *
  • white to blue for increasing information content
  • + *
+ * where information content is the log ratio + * + *
+ *   log(profile match emission probability / residue background probability>
+ * 
+ * + * using global ('Uniprot') background frequencies for residues. + * + * @author tzvanaalten + * + */ public class HMMERColourScheme extends ResidueColourScheme { + /* + * The highest possible log ratio is when match emission probability in + * the HMM model is 1, and background (for W) is 0.0109 giving + * log(1/0.0109) = log(91.743) = 4.519 + */ + private static final float MAX_LOG_RATIO = 4.519f; - AnnotatedCollectionI alignment; + private static final Color REDDISH = new Color(230, 0, 0); HiddenMarkovModel hmm; - - boolean peptideSpecific; - - boolean nucleotideSpecific; + /** + * Constructor given a Hidden Markov Model + * + * @param markov + */ public HMMERColourScheme(HiddenMarkovModel markov) { hmm = markov; } + + /** + * Default constructor (required by ColourSchemes.loadColourSchemes) + */ public HMMERColourScheme() { - } - @Override public Color findColour(char symbol, int position, SequenceI seq, String consensusResidue, float pid) { - if (hmm ==null) - { - return Color.white; - } return findColour(symbol, position); } /** - * Returns the colour at a particular symbol at a column in the alignment. + * Returns the colour at a particular symbol at a column in the alignment: + *
    + *
  • white for a gap
  • + *
  • red for an insertion
  • + *
  • orange for negative information content
  • + *
  • white to blue for increasing information content
  • + *
* * @param symbol - * @param position - * @return Red for an insertion, white for a gap, orange for a negative - * information content, white to blue for increasing information - * content. + * @param column + * @return */ - private Color findColour(char symbol, int position) + private Color findColour(char symbol, int column) { - - if (Comparison.isGap(symbol)) + if (hmm == null || Comparison.isGap(symbol)) { return Color.white; } @@ -63,99 +90,81 @@ public class HMMERColourScheme extends ResidueColourScheme { symbol = Character.toUpperCase(symbol); } - Double prob; - prob = hmm.getMatchEmissionProbability(position, symbol); - double freq = 0; + + double prob = hmm.getMatchEmissionProbability(column, symbol); + Float freq = 0f; String alpha = hmm.getAlphabetType(); - if (!ResidueProperties.backgroundFrequencies.get(alpha).containsKey(symbol)) + if (!ResidueProperties.backgroundFrequencies.get(alpha) + .containsKey(symbol)) { - return Color.white; + return Color.WHITE; } else { freq = ResidueProperties.backgroundFrequencies.get(alpha).get(symbol); } - if (prob == 0) + if (prob == 0D) { - return new Color(230, 0, 0); + return REDDISH; } - Double value = Math.log(prob / freq); + double value = Math.log(prob / freq.floatValue()); Color colour = null; if (value > 0) { - - colour = ColorUtils.getGraduatedColour(value.floatValue(), 0, - Color.WHITE, 4.52f, Color.blue); + colour = ColorUtils.getGraduatedColour((float) value, 0, + Color.WHITE, MAX_LOG_RATIO, Color.blue); } else if (value < 0) { return Color.ORANGE; - } return colour; - } - - - - - @Override public void alignmentChanged(AnnotatedCollectionI collection, Map hiddenReps) { - List seqs = collection.getSequences(); - for (SequenceI seq : seqs) - { - if (seq.isHMMConsensusSequence()) - { - hmm = seq.getHMM(); - break; - } - } - + /* + * ? no need to do anything if alignment is adjusted + * since findColour() handles everything + */ } - - + /** + * Answers a new colour scheme instance based on the HMM of the first sequence + * in sg that has an HMM + */ @Override public ColourSchemeI getInstance(AnnotatedCollectionI sg, Map hiddenRepSequences) { - HiddenMarkovModel markov = null; - List seqs = sg.getSequences(); - for (SequenceI seq : seqs) - { - if (seq.isHMMConsensusSequence()) - { - markov = seq.getHMM(); - break; - } - } - HMMERColourScheme colour = new HMMERColourScheme(markov); - return colour; - + HiddenMarkovModel model = null; + List seqs = sg.getHMMConsensusSequences(); + if (!seqs.isEmpty()) + { + model = seqs.get(0).getHMM(); + } + HMMERColourScheme colour = new HMMERColourScheme(model); + return colour; } @Override - public boolean isApplicableTo(AnnotatedCollectionI ac) + public String getSchemeName() { - return true; - + return JalviewColourScheme.HMMERU.toString(); } @Override - public String getSchemeName() + public boolean isSimple() { - - return JalviewColourScheme.HMMERU.name(); + return false; } @Override - public boolean isSimple() + public boolean isApplicableTo(AnnotatedCollectionI ac) { - return false; + return !ac.getHMMConsensusSequences().isEmpty(); } } diff --git a/test/jalview/datamodel/ResidueCountTest.java b/test/jalview/datamodel/ResidueCountTest.java index 4eb6dbf..462104f 100644 --- a/test/jalview/datamodel/ResidueCountTest.java +++ b/test/jalview/datamodel/ResidueCountTest.java @@ -27,6 +27,8 @@ import static org.testng.Assert.assertTrue; import jalview.datamodel.ResidueCount.SymbolCounts; import jalview.gui.JvOptionPane; +import java.util.Arrays; + import org.junit.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -439,4 +441,23 @@ public class ResidueCountTest assertEquals(rc.getCount('?'), 6); assertEquals(rc.getCount('!'), 7); } + + @Test(groups = "Functional") + public void testConstructor_forSequences() + { + SequenceI seq1 = new Sequence("seq1", "abcde--. FCD"); + SequenceI seq2 = new Sequence("seq2", "ab.kKqBd-."); + ResidueCount rc = new ResidueCount(Arrays.asList(seq1, seq2)); + + assertEquals(rc.getGapCount(), 7); + assertEquals(rc.getTotalCount(), 15); // excludes gaps + assertEquals(rc.getCount('a'), 2); + assertEquals(rc.getCount('A'), 2); + assertEquals(rc.getCount('B'), 3); + assertEquals(rc.getCount('c'), 2); + assertEquals(rc.getCount('D'), 3); + assertEquals(rc.getCount('f'), 1); + assertEquals(rc.getCount('K'), 2); + assertEquals(rc.getCount('Q'), 1); + } } diff --git a/test/jalview/schemes/HMMERAlignmentColourSchemeTest.java b/test/jalview/schemes/HMMERAlignmentColourSchemeTest.java new file mode 100644 index 0000000..0907c9a --- /dev/null +++ b/test/jalview/schemes/HMMERAlignmentColourSchemeTest.java @@ -0,0 +1,75 @@ +package jalview.schemes; + +import static org.testng.Assert.assertEquals; + +import jalview.datamodel.Alignment; +import jalview.datamodel.AnnotatedCollectionI; +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceI; +import jalview.io.DataSourceType; +import jalview.io.HMMFile; + +import java.awt.Color; +import java.io.IOException; +import java.net.MalformedURLException; + +import org.testng.annotations.Test; + +public class HMMERAlignmentColourSchemeTest { + + @Test(groups = "Functional") + public void testFindColour() throws MalformedURLException, IOException + { + HMMFile file = new HMMFile("test/jalview/io/test_PKinase_hmm.txt", + DataSourceType.FILE); + + /* + * alignment with 20 residues and background frequencies: + * A/a, S 3/20 = 0.15 + * M, K 4/20 = 0.2 + * V 2/20 = 0.1 + * Q, R, L 1/20 = 0.05 + * log(totalCount) = log(20) = 2.996 + */ + SequenceI seq1 = new Sequence("seq1", "AAMMMKKKVV"); + SequenceI seq2 = new Sequence("seq2", "aAM-QKRSSSL"); + AnnotatedCollectionI ac = new Alignment(new SequenceI[] { seq1, seq2 }); + ColourSchemeI scheme = new HMMERAlignmentColourScheme(ac, + file.getHMM()); + + /* + * 'A' in column 1, node 2, match emission 2.77204 + * e-2.77204 = 0.0625 + * background frequency is 0.15 + * ratio is < 1, log is negative, colour is Orange + */ + Color actual = scheme.findColour('A', 1, null, null, 0); + assertEquals(actual, Color.ORANGE); + + // gap is white + actual = scheme.findColour('-', 2, null, null, 0); + assertEquals(actual, Color.WHITE); + actual = scheme.findColour(' ', 2, null, null, 0); + assertEquals(actual, Color.WHITE); + actual = scheme.findColour('.', 2, null, null, 0); + assertEquals(actual, Color.WHITE); + + /* + * 'L' in column 3, node 4, match emission 1.98342 + * e-1.98342 = 0.1376 + * background frequency is 0.05 + * ratio is 2.752, log is 1.012 + * colour is graduated 1.012/2.996 or 86/255 of the way from + * white(255, 255, 255) to blue(0, 0, 255) + */ + actual = scheme.findColour('L', 3, null, null, 0); + assertEquals(actual, new Color(169, 169, 255)); + + /* + * invalid symbol is White + */ + actual = scheme.findColour('X', 2, null, null, 0); + assertEquals(actual, Color.WHITE); + } + +} diff --git a/test/jalview/schemes/HMMERColourSchemeTest.java b/test/jalview/schemes/HMMERColourSchemeTest.java index f411708..9d8093d 100644 --- a/test/jalview/schemes/HMMERColourSchemeTest.java +++ b/test/jalview/schemes/HMMERColourSchemeTest.java @@ -3,7 +3,6 @@ package jalview.schemes; import static org.testng.Assert.assertEquals; import jalview.io.DataSourceType; -import jalview.io.FileParse; import jalview.io.HMMFile; import java.awt.Color; @@ -13,40 +12,74 @@ import java.net.MalformedURLException; import org.testng.annotations.Test; public class HMMERColourSchemeTest { - HMMERColourScheme scheme = new HMMERColourScheme(); - @Test + @Test(groups = "Functional") public void testFindColour() throws MalformedURLException, IOException { + HMMFile file = new HMMFile("test/jalview/io/test_PKinase_hmm.txt", + DataSourceType.FILE); - HMMFile file = new HMMFile(new FileParse( - "test/jalview/io/test_PKinase_hmm.txt", DataSourceType.FILE)); - - scheme.hmm = file.getHMM(); - Color expected; + ColourSchemeI scheme = new HMMERColourScheme(file.getHMM()); + /* + * 'A' in column 1, node 2, match emission 2.77204 + * e-2.77204 = 0.0625 + * background frequency is 0.0826 + * ratio is 0.757, log is negative, colour is Orange + */ Color actual = scheme.findColour('A', 1, null, null, 0); assertEquals(actual, Color.ORANGE); - actual = scheme.findColour('Y', 3, null, null, 0); + // gap is white + actual = scheme.findColour('-', 2, null, null, 0); + assertEquals(actual, Color.WHITE); + actual = scheme.findColour(' ', 2, null, null, 0); + assertEquals(actual, Color.WHITE); + actual = scheme.findColour('.', 2, null, null, 0); + assertEquals(actual, Color.WHITE); + + /* + * 'Y' in column 4, node 5, match emission 4.41426 + * e-4.41426 = 0.0121 + * background frequency is 0.0292 + * ratio is 0.414, log is negative, colour is Orange + */ + actual = scheme.findColour('Y', 4, null, null, 0); assertEquals(actual, Color.ORANGE); + /* + * 'M' in column 109, no matching node, colour is reddish + */ actual = scheme.findColour('M', 109, null, null, 0); - expected = new Color(230, 0, 0); - assertEquals(actual, expected); - + assertEquals(actual, new Color(230, 0, 0)); + + /* + * 'I' in column 6, node 7, match emission 1.33015 + * e-1.33015 = 0.2644 + * background frequency is 0.0593 + * ratio is 4.459, log is 1.495 + * colour is graduated 1.495/4.52 or 84/255 of the way from + * white(255, 255, 255) to blue(0, 0, 255) + */ actual = scheme.findColour('I', 6, null, null, 0); - expected = new Color(171, 171, 255); - assertEquals(actual, expected); - + assertEquals(actual, new Color(171, 171, 255)); + + /* + * 'V' in column 14, node 15, match emission 0.44769 + * e-0.44769 = 0.6391 + * background frequency is 0.0686 + * ratio is 9.316, log is 2.232 + * colour is graduated 2.232/4.52 or 126/255 of the way from + * white(255, 255, 255) to blue(0, 0, 255) + */ actual = scheme.findColour('V', 14, null, null, 0); - expected = new Color(129, 129, 255); - assertEquals(actual, expected); + assertEquals(actual, new Color(129, 129, 255)); - actual = scheme.findColour('X', 100000000, null, null, 0); + /* + * invalid symbol is White + */ + actual = scheme.findColour('X', 2, null, null, 0); assertEquals(actual, Color.WHITE); - - } } -- 1.7.10.2