From 7d950017bb9262f2eff563192071b5ed9ccc76b4 Mon Sep 17 00:00:00 2001 From: gmungoc Date: Thu, 22 Feb 2018 13:43:32 +0000 Subject: [PATCH] JAL-2629 tidy unit tests, constants etc --- src/jalview/analysis/AAFrequency.java | 184 ++++++++------------- src/jalview/datamodel/AlignmentAnnotation.java | 2 +- src/jalview/datamodel/HiddenMarkovModel.java | 48 +++++- src/jalview/datamodel/Sequence.java | 3 +- src/jalview/datamodel/SequenceGroup.java | 27 +-- src/jalview/gui/AnnotationLabels.java | 9 +- src/jalview/io/HMMFile.java | 10 +- src/jalview/renderer/AnnotationRenderer.java | 5 +- src/jalview/viewmodel/AlignmentViewport.java | 2 +- src/jalview/workers/InformationThread.java | 3 +- test/jalview/analysis/AAFrequencyTest.java | 49 ++++-- test/jalview/datamodel/HiddenMarkovModelTest.java | 180 ++++++++++++-------- 12 files changed, 285 insertions(+), 237 deletions(-) diff --git a/src/jalview/analysis/AAFrequency.java b/src/jalview/analysis/AAFrequency.java index 85a9bb0..f77517c 100755 --- a/src/jalview/analysis/AAFrequency.java +++ b/src/jalview/analysis/AAFrequency.java @@ -50,31 +50,12 @@ import java.util.List; * This class is used extensively in calculating alignment colourschemes that * depend on the amount of conservation in each alignment column. * - * @author $author$ - * @version $Revision$ */ public class AAFrequency { - public static final String PROFILE = "P"; - - private static final String AMINO = "amino"; - - private static final String DNA = "DNA"; - - private static final String RNA = "RNA"; + private static final double LOG2 = Math.log(2); - /* - * Quick look-up of String value of char 'A' to 'Z' - */ - private static final String[] CHARS = new String['Z' - 'A' + 1]; - - static - { - for (char c = 'A'; c <= 'Z'; c++) - { - CHARS[c - 'A'] = String.valueOf(c); - } - } + public static final String PROFILE = "P"; public static final ProfilesI calculate(List list, int start, int end) @@ -109,8 +90,6 @@ public class AAFrequency } } - - /** * Calculate the consensus symbol(s) for each column in the given range. * @@ -400,14 +379,12 @@ public class AAFrequency information.annotations[i] = null; return 0; } - - HiddenMarkovModel hmm; SequenceI hmmSeq = information.sequenceRef; - hmm = hmmSeq.getHMM(); + HiddenMarkovModel hmm = hmmSeq.getHMM(); - Float value = getInformationContent(i, hmm); + float value = hmm.getInformationContent(i); if (value > max) { @@ -865,121 +842,104 @@ public class AAFrequency } /** - * Returns the information content at a specified column. + * Returns the sorted HMM profile for the given column of the alignment. The + * returned array contains * - * @param column - * Index of the column, starting from 0. - * @return - */ - public static float getInformationContent(int column, - HiddenMarkovModel hmm) - { - float informationContent = 0f; - - for (char symbol : hmm.getSymbols()) - { - float freq = 0f; - freq = ResidueProperties.backgroundFrequencies - .get(hmm.getAlphabetType()).get(symbol); - Double hmmProb = hmm.getMatchEmissionProbability(column, symbol); - float prob = hmmProb.floatValue(); - informationContent += prob * (Math.log(prob / freq) / Math.log(2)); - - } - - return informationContent; - } - - /** - * Produces a HMM profile for a column in an alignment + *
+   *    [profileType=0, numberOfValues, 100, charValue1, percentage1, charValue2, percentage2, ...]
+   * in descending order of percentage value
+   * 
* - * @param aa - * Alignment annotation for which the profile is being calculated. + * @param hmm * @param column - * Column in the alignment the profile is being made for. * @param removeBelowBackground - * Boolean indicating whether to ignore residues with probabilities - * less than their background frequencies. + * if true, ignores residues with probability less than their + * background frequency + * @param infoHeight + * if true, uses the log ratio 'information' measure to scale the + * value * @return */ public static int[] extractHMMProfile(HiddenMarkovModel hmm, int column, boolean removeBelowBackground, boolean infoHeight) { - - if (hmm != null) + if (hmm == null) { - int size = hmm.getNumberOfSymbols(); - char symbols[] = new char[size]; - int values[] = new int[size]; - List charList = hmm.getSymbols(); - Integer totalCount = 0; + return null; + } + int size = hmm.getNumberOfSymbols(); + char symbols[] = new char[size]; + int values[] = new int[size]; + List charList = hmm.getSymbols(); + int totalCount = 0; - for (int i = 0; i < size; i++) - { - char symbol = charList.get(i); - symbols[i] = symbol; - int value = getAnalogueCount(hmm, column, symbol, - removeBelowBackground, infoHeight); - values[i] = value; - totalCount += value; - } + for (int i = 0; i < size; i++) + { + char symbol = charList.get(i); + symbols[i] = symbol; + int value = getAnalogueCount(hmm, column, symbol, + removeBelowBackground, infoHeight); + values[i] = value; + totalCount += value; + } - QuickSort.sort(values, symbols); + /* + * sort symbols by increasing emission probability + */ + QuickSort.sort(values, symbols); - int[] profile = new int[3 + size * 2]; + int[] profile = new int[3 + size * 2]; - profile[0] = AlignmentAnnotation.SEQUENCE_PROFILE; - profile[1] = size; - profile[2] = 100; + profile[0] = AlignmentAnnotation.SEQUENCE_PROFILE; + profile[1] = size; + profile[2] = 100; - if (totalCount != 0) + /* + * order symbol/count profile by decreasing emission probability + */ + if (totalCount != 0) + { + int arrayPos = 3; + for (int k = size - 1; k >= 0; k--) { - int arrayPos = 3; - for (int k = size - 1; k >= 0; k--) + Float percentage; + int value = values[k]; + if (removeBelowBackground) { - Float percentage; - Integer value = values[k]; - if (removeBelowBackground) - { - percentage = (value.floatValue() / totalCount.floatValue()) - * 100; - } - else - { - percentage = value.floatValue() / 100f; - } - int intPercent = Math.round(percentage); - profile[arrayPos] = symbols[k]; - profile[arrayPos + 1] = intPercent; - arrayPos += 2; + percentage = ((float) value) / totalCount * 100f; } + else + { + percentage = value / 100f; + } + int intPercent = Math.round(percentage); + profile[arrayPos] = symbols[k]; + profile[arrayPos + 1] = intPercent; + arrayPos += 2; } - return profile; } - return null; + return profile; } /** * Converts the emission probability of a residue at a column in the alignment - * to a 'count' to allow for processing by the annotation renderer. + * to a 'count', suitable for rendering as an annotation value * * @param hmm * @param column - * @param removeBelowBackground - * When true, this method returns 0 for any symbols with a match - * emission probability less than the background frequency. * @param symbol + * @param removeBelowBackground + * if true, returns 0 for any symbol with a match emission + * probability less than the background frequency + * @infoHeight if true, uses the log ratio 'information content' to scale the + * value * @return */ static int getAnalogueCount(HiddenMarkovModel hmm, int column, char symbol, boolean removeBelowBackground, boolean infoHeight) { - Double value; - - value = hmm.getMatchEmissionProbability(column, symbol); - double freq; - - freq = ResidueProperties.backgroundFrequencies + double value = hmm.getMatchEmissionProbability(column, symbol); + double freq = ResidueProperties.backgroundFrequencies .get(hmm.getAlphabetType()).get(symbol); if (value < freq && removeBelowBackground) { @@ -988,10 +948,10 @@ public class AAFrequency if (infoHeight) { - value = value * (Math.log(value / freq) / Math.log(2)); + value = value * (Math.log(value / freq) / LOG2); } - value = value * 10000; - return Math.round(value.floatValue()); + value = value * 10000d; + return Math.round((float) value); } } diff --git a/src/jalview/datamodel/AlignmentAnnotation.java b/src/jalview/datamodel/AlignmentAnnotation.java index d6bfd93..268751e 100755 --- a/src/jalview/datamodel/AlignmentAnnotation.java +++ b/src/jalview/datamodel/AlignmentAnnotation.java @@ -1185,7 +1185,7 @@ public class AlignmentAnnotation /** * machine readable ID string indicating what generated this annotation */ - protected String calcId = ""; + private String calcId = ""; /** * properties associated with the calcId diff --git a/src/jalview/datamodel/HiddenMarkovModel.java b/src/jalview/datamodel/HiddenMarkovModel.java index 49c29d1..1b12945 100644 --- a/src/jalview/datamodel/HiddenMarkovModel.java +++ b/src/jalview/datamodel/HiddenMarkovModel.java @@ -1,5 +1,7 @@ package jalview.datamodel; +import jalview.schemes.ResidueProperties; + import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -15,7 +17,7 @@ import java.util.Map; */ public class HiddenMarkovModel { - + private static final double LOG2 = Math.log(2); // Stores file properties. Do not directly access this field as it contains // only string value - use the getter methods. For example, to find the length @@ -112,9 +114,11 @@ public class HiddenMarkovModel String fileHeader; + /** + * Constructor + */ public HiddenMarkovModel() { - } public HiddenMarkovModel(HiddenMarkovModel hmm) @@ -130,7 +134,36 @@ public class HiddenMarkovModel } /** - * Gets the file header of the .hmm file this model came from. + * Returns the information content at a specified column, calculated as the + * sum (over possible symbols) of the log ratio + * + *
+   *  log(emission probability / background probability) / log(2)
+   * 
+ * + * @param column + * column position (base 0) + * @return + */ + public float getInformationContent(int column) + { + float informationContent = 0f; + + for (char symbol : getSymbols()) + { + float freq = ResidueProperties.backgroundFrequencies + .get(getAlphabetType()).get(symbol); + float prob = (float) getMatchEmissionProbability(column, symbol); + informationContent += prob * Math.log(prob / freq); + } + + informationContent = informationContent / (float) LOG2; + + return informationContent; + } + + /** + * Gets the file header of the .hmm file this model came from * * @return */ @@ -376,11 +409,11 @@ public class HiddenMarkovModel * @return * */ - public Double getMatchEmissionProbability(int alignColumn, char symbol) + public double getMatchEmissionProbability(int alignColumn, char symbol) { int symbolIndex; int nodeIndex; - Double probability; + double probability; if (!symbolIndexLookup.containsKey(symbol)) { return 0d; @@ -396,7 +429,6 @@ public class HiddenMarkovModel { return 0d; } - } /** @@ -411,11 +443,11 @@ public class HiddenMarkovModel * @return * */ - public Double getInsertEmissionProbability(int alignColumn, char symbol) + public double getInsertEmissionProbability(int alignColumn, char symbol) { int symbolIndex; int nodeIndex; - Double probability; + double probability; if (!symbolIndexLookup.containsKey(symbol)) { return 0d; diff --git a/src/jalview/datamodel/Sequence.java b/src/jalview/datamodel/Sequence.java index 6a5e4d0..4d51c0c 100755 --- a/src/jalview/datamodel/Sequence.java +++ b/src/jalview/datamodel/Sequence.java @@ -1783,7 +1783,8 @@ public class Sequence extends ASequence implements SequenceI { for (AlignmentAnnotation ann : annotation) { - if (ann.calcId != null && ann.calcId.equals(calcId) + String id = ann.getCalcId(); + if (id != null && id.equals(calcId) && ann.label != null && ann.label.equals(label)) { result.add(ann); diff --git a/src/jalview/datamodel/SequenceGroup.java b/src/jalview/datamodel/SequenceGroup.java index c2c4541..d1e7b83 100755 --- a/src/jalview/datamodel/SequenceGroup.java +++ b/src/jalview/datamodel/SequenceGroup.java @@ -26,6 +26,7 @@ import jalview.renderer.ResidueShader; import jalview.renderer.ResidueShaderI; import jalview.schemes.ColourSchemeI; import jalview.util.MessageManager; +import jalview.workers.InformationThread; import java.awt.Color; import java.beans.PropertyChangeListener; @@ -738,13 +739,9 @@ public class SequenceGroup implements AnnotatedCollectionI information.annotations = null; information.annotations = new Annotation[aWidth]; // should be alignment // width - information.calcId = "HMM"; + information.setCalcId(InformationThread.HMM_CALC_ID); AAFrequency.completeInformation(information, cnsns, startRes, - endRes + 1, nseq, 0f); // TODO: - // setting - // container - // for - // ignoreGapsInInformationCalculation); + endRes + 1, nseq, 0f); } /** @@ -1201,20 +1198,13 @@ public class SequenceGroup implements AnnotatedCollectionI } /** + * Answers the Hidden Markov Model annotation for this group (creating it if + * necessary) * - * @return information content annotation. + * @return */ public AlignmentAnnotation getInformation() { - // TODO get or calculate and get information annotation row for this group - int aWidth = this.getWidth(); - // pointer - // possibility - // here. - if (aWidth < 0) - { - return null; - } if (information == null) { information = new AlignmentAnnotation("", "", new Annotation[1], 0f, @@ -1223,8 +1213,9 @@ public class SequenceGroup implements AnnotatedCollectionI information.autoCalculated = false; information.groupRef = this; information.label = getName(); - information.description = "Information content, measured in bits"; - information.calcId = "HMM"; + information.description = MessageManager + .getString("label.information_description"); + information.setCalcId(InformationThread.HMM_CALC_ID); } return information; } diff --git a/src/jalview/gui/AnnotationLabels.java b/src/jalview/gui/AnnotationLabels.java index 067acd7..3112fe1 100755 --- a/src/jalview/gui/AnnotationLabels.java +++ b/src/jalview/gui/AnnotationLabels.java @@ -33,6 +33,7 @@ import jalview.io.FormatAdapter; import jalview.util.Comparison; import jalview.util.MessageManager; import jalview.util.Platform; +import jalview.workers.InformationThread; import java.awt.Color; import java.awt.Cursor; @@ -444,7 +445,8 @@ public class AnnotationLabels extends JPanel { final String label = aa[selectedRow].label; if (!(aa[selectedRow].autoCalculated) - && !("HMM".equals(aa[selectedRow].getCalcId()))) + && !(InformationThread.HMM_CALC_ID + .equals(aa[selectedRow].getCalcId()))) { if (aa[selectedRow].graph == AlignmentAnnotation.NO_GRAPH) { @@ -625,9 +627,8 @@ public class AnnotationLabels extends JPanel consclipbrd.addActionListener(this); pop.add(consclipbrd); } - else if ("HMM".equals(aa[selectedRow].getCalcId())) // TODO create labels - // in message resource - // for these + else if (InformationThread.HMM_CALC_ID + .equals(aa[selectedRow].getCalcId())) { pop.addSeparator(); final AlignmentAnnotation aaa = aa[selectedRow]; diff --git a/src/jalview/io/HMMFile.java b/src/jalview/io/HMMFile.java index f966f3f..95c6f32 100644 --- a/src/jalview/io/HMMFile.java +++ b/src/jalview/io/HMMFile.java @@ -461,14 +461,14 @@ public class HMMFile extends AlignFile */ String getModelAsString() { - StringBuffer output = new StringBuffer(); + StringBuilder output = new StringBuilder(); String symbolLine = "HMM"; List charSymbols = hmm.getSymbols(); List strSymbols; strSymbols = charListToStringList(charSymbols); symbolLine += addData(11, 9, strSymbols); output.append(symbolLine); - output.append(NL + TRANSITIONTYPELINE); + output.append(NL).append(TRANSITIONTYPELINE); int length = hmm.getLength(); @@ -505,7 +505,7 @@ public class HMMFile extends AlignFile } - output.append(NL + matchLine); + output.append(NL).append(matchLine); String insertLine = EMPTY; List strInserts; @@ -515,7 +515,7 @@ public class HMMFile extends AlignFile strInserts = doubleListToStringList(doubleInserts); insertLine += addData(17, 9, strInserts); - output.append(NL + insertLine); + output.append(NL).append(insertLine); String transitionLine = EMPTY; List strTransitions; @@ -525,7 +525,7 @@ public class HMMFile extends AlignFile strTransitions = doubleListToStringList(doubleTransitions); transitionLine += addData(17, 9, strTransitions); - output.append(NL + transitionLine); + output.append(NL).append(transitionLine); } return output.toString(); } diff --git a/src/jalview/renderer/AnnotationRenderer.java b/src/jalview/renderer/AnnotationRenderer.java index ed67aa6..b3aea2c 100644 --- a/src/jalview/renderer/AnnotationRenderer.java +++ b/src/jalview/renderer/AnnotationRenderer.java @@ -36,6 +36,7 @@ import jalview.schemes.NucleotideColourScheme; import jalview.schemes.ResidueProperties; import jalview.schemes.ZappoColourScheme; import jalview.util.Platform; +import jalview.workers.InformationThread; import java.awt.BasicStroke; import java.awt.Color; @@ -375,7 +376,7 @@ public class AnnotationRenderer // properties/rendering attributes as a global 'alignment group' which holds // all vis settings for the alignment as a whole rather than a subset // - if ("HMM".equals(aa.getCalcId())) + if (InformationThread.HMM_CALC_ID.equals(aa.getCalcId())) { HiddenMarkovModel hmm = aa.sequenceRef.getHMM(); return AAFrequency.extractHMMProfile(hmm, column, @@ -526,7 +527,7 @@ public class AnnotationRenderer renderProfile = av_renderProfile; normaliseProfile = av_normaliseProfile; } - else if ("HMM".equals(row.getCalcId())) + else if (InformationThread.HMM_CALC_ID.equals(row.getCalcId())) { renderHistogram = av_renderInformationHistogram; renderProfile = av_renderHMMProfile; diff --git a/src/jalview/viewmodel/AlignmentViewport.java b/src/jalview/viewmodel/AlignmentViewport.java index 2651b79..04af232 100644 --- a/src/jalview/viewmodel/AlignmentViewport.java +++ b/src/jalview/viewmodel/AlignmentViewport.java @@ -2178,7 +2178,7 @@ public abstract class AlignmentViewport info.hasText = true; info.autoCalculated = false; info.sequenceRef = seq; - info.setCalcId("HMM"); + info.setCalcId(InformationThread.HMM_CALC_ID); this.information.add(info); hinformation.add(new Profiles(new ProfileI[1])); alignment.addAnnotation(info); diff --git a/src/jalview/workers/InformationThread.java b/src/jalview/workers/InformationThread.java index 6b1bd76..5385359 100644 --- a/src/jalview/workers/InformationThread.java +++ b/src/jalview/workers/InformationThread.java @@ -15,8 +15,9 @@ import java.util.List; public class InformationThread extends AlignCalcWorker { + public static final String HMM_CALC_ID = "HMM"; - Float max = 0f; + private float max = 0f; /** * Constructor for information thread. diff --git a/test/jalview/analysis/AAFrequencyTest.java b/test/jalview/analysis/AAFrequencyTest.java index 05a72e4..fc4063a 100644 --- a/test/jalview/analysis/AAFrequencyTest.java +++ b/test/jalview/analysis/AAFrequencyTest.java @@ -45,7 +45,6 @@ import org.testng.annotations.Test; public class AAFrequencyTest { - HiddenMarkovModel hmm; @BeforeClass(alwaysRun = true) @@ -55,6 +54,17 @@ public class AAFrequencyTest JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION); } + @BeforeClass(alwaysRun = true) + public void setUp() throws IOException, MalformedURLException + { + /* + * load a dna (ACGT) HMM file to a HiddenMarkovModel + */ + HMMFile hmmFile = new HMMFile(new FileParse( + "test/jalview/io/test_MADE1_hmm.txt", DataSourceType.FILE)); + hmm = hmmFile.getHMM(); + } + @Test(groups = { "Functional" }) public void testCalculate_noProfile() { @@ -244,15 +254,10 @@ public class AAFrequencyTest assertEquals("T", ann.displayCharacter); } - - @Test(groups = { "Functional" }, priority = 1) + @Test(groups = { "Functional" }) public void testExtractHMMProfile() throws MalformedURLException, IOException { - - HMMFile hmmFile = new HMMFile(new FileParse( - "test/jalview/io/test_MADE1_hmm.txt", DataSourceType.FILE)); - hmm = hmmFile.getHMM(); int[] expected = { 0, 4, 100, 'T', 71, 'C', 12, 'G', 9, 'A', 9 }; int[] actual = AAFrequency.extractHMMProfile(hmm, 17, false, false); for (int i = 0; i < actual.length; i++) @@ -266,7 +271,7 @@ public class AAFrequencyTest assertEquals(actual[i], expected[i]); } } - + int[] expected2 = { 0, 4, 100, 'A', 85, 'C', 0, 'G', 0, 'T', 0 }; int[] actual2 = AAFrequency.extractHMMProfile(hmm, 2, true, false); for (int i = 0; i < actual2.length; i++) @@ -284,19 +289,39 @@ public class AAFrequencyTest assertNull(AAFrequency.extractHMMProfile(null, 98978867, true, false)); } - @Test(groups = { "Functional" }, priority = 2) + @Test(groups = { "Functional" }) public void testGetAnalogueCount() { - int count; - count = AAFrequency.getAnalogueCount(hmm, 0, 'T', false, false); + /* + * 'T' in column 0 has emission probability 0.7859, scales to 7859 + */ + int count = AAFrequency.getAnalogueCount(hmm, 0, 'T', false, false); assertEquals(7859, count); + + /* + * same with 'use info height': value is multiplied by log ratio + * log(value / background) / log(2) = log(0.7859/0.25)/0.693 + * = log(3.1)/0.693 = 1.145/0.693 = 1.66 + * so value becomes 1.2987 and scales up to 12987 + */ + count = AAFrequency.getAnalogueCount(hmm, 0, 'T', false, true); + assertEquals(12987, count); + + /* + * 'G' in column 20 has emission probability 0.75457, scales to 7546 + */ count = AAFrequency.getAnalogueCount(hmm, 20, 'G', false, false); assertEquals(7546, count); + + /* + * 'G' in column 1077 has emission probability 0.0533, here + * ignored (set to 0) since below background of 0.25 + */ count = AAFrequency.getAnalogueCount(hmm, 1077, 'G', true, false); assertEquals(0, count); } - @Test(groups = { "Functional" }, priority = 3) + @Test(groups = { "Functional" }) public void testCompleteInformation() { ProfileI prof1 = new Profile(1, 0, 100, "A"); diff --git a/test/jalview/datamodel/HiddenMarkovModelTest.java b/test/jalview/datamodel/HiddenMarkovModelTest.java index 069978d..b283809 100644 --- a/test/jalview/datamodel/HiddenMarkovModelTest.java +++ b/test/jalview/datamodel/HiddenMarkovModelTest.java @@ -5,108 +5,117 @@ import static org.testng.Assert.assertEquals; import jalview.io.DataSourceType; import jalview.io.FileParse; import jalview.io.HMMFile; +import jalview.schemes.ResidueProperties; import java.io.IOException; import java.net.MalformedURLException; +import java.util.Map; +import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; public class HiddenMarkovModelTest { HiddenMarkovModel hmm; - @Test(priority = 0) - public void testGetMatchEmissionProbabilities() - throws MalformedURLException, IOException + @BeforeClass(alwaysRun = true) + public void setUp() throws MalformedURLException, IOException { + /* + * load hmm model of a Kinase domain to a HiddenMarkovModel + */ HMMFile file = new HMMFile(new FileParse( "test/jalview/io/test_PKinase_hmm.txt", DataSourceType.FILE)); hmm = file.getHMM(); + } - double[] actual = new double[4]; - actual[0] = hmm.getMatchEmissionProbability(0, 'R'); - actual[1] = hmm.getMatchEmissionProbability(19, 'W'); - actual[2] = hmm.getMatchEmissionProbability(160, 'G'); - actual[3] = hmm.getMatchEmissionProbability(475, 'A'); - - double[] expected = new double[4]; - expected[0] = 0.02537400637; - expected[1] = 0.00588228492; - expected[2] = 0; - expected[3] = 0.04995163708; - - for (int i = 0; i < 4; i++) - { - assertEquals(actual[i], expected[i], 0.001d); - } + @Test(groups = "Functional") + public void testGetMatchEmissionProbabilities() + throws MalformedURLException, IOException + { + /* + * raw value in file is 3.67403 + * saved as probability e^-X = 0.05259 + */ + double mep = hmm.getMatchEmissionProbability(0, 'R'); + assertEquals(mep, 0.02537400637, 0.0001d); + assertEquals(mep, Math.pow(Math.E, -3.67403), 0.0001d); + + mep = hmm.getMatchEmissionProbability(19, 'W'); + assertEquals(mep, 0.00588228492, 0.0001d); + assertEquals(mep, Math.pow(Math.E, -5.13581), 0.0001d); + + // column 160 is a gapped region of the model + mep = hmm.getMatchEmissionProbability(160, 'G'); + assertEquals(mep, 0D, 0.0001d); + + mep = hmm.getMatchEmissionProbability(475, 'A'); + assertEquals(mep, 0.04995163708, 0.0001d); + assertEquals(mep, Math.pow(Math.E, -2.99670), 0.0001d); } - @Test(priority = 1) + @Test(groups = "Functional") public void testGetInsertEmissionProbabilities() { - double[] actual = new double[4]; - actual[0] = hmm.getInsertEmissionProbability(2, 'A'); - actual[1] = hmm.getInsertEmissionProbability(5, 'T'); - actual[2] = hmm.getInsertEmissionProbability(161, 'K'); - actual[3] = hmm.getInsertEmissionProbability(472, 'L'); - - double[] expected = new double[4]; - expected[0] = 0.06841384927; - expected[1] = 0.06233763141; - expected[2] = 0; - expected[3] = 0.06764038926; - - for (int i = 0; i < 4; i++) - { - assertEquals(actual[i], expected[i], 0.001d); - } + double iep = hmm.getInsertEmissionProbability(2, 'A'); + assertEquals(iep, Math.pow(Math.E, -2.68618), 0.0001d); + + iep = hmm.getInsertEmissionProbability(5, 'T'); + assertEquals(iep, Math.pow(Math.E, -2.77519), 0.0001d); + + // column 161 is gapped in the hmm + iep = hmm.getInsertEmissionProbability(161, 'K'); + assertEquals(iep, 0D, 0.0001d); + + iep = hmm.getInsertEmissionProbability(472, 'L'); + assertEquals(iep, Math.pow(Math.E, -2.69355), 0.0001d); } - @Test(priority = 1) + @Test(groups = "Functional") public void testGetStateTransitionProbabilities() { - double[] actual = new double[4]; - actual[0] = hmm.getStateTransitionProbability(475, hmm.MATCHTODELETE); - actual[1] = hmm.getStateTransitionProbability(8, hmm.MATCHTOINSERT); - actual[2] = hmm.getStateTransitionProbability(80, hmm.INSERTTOINSERT); - actual[3] = hmm.getStateTransitionProbability(475, hmm.DELETETOMATCH); - - double[] expected = new double[4]; - expected[0] = Double.NEGATIVE_INFINITY; - expected[1] = 0.00662894243; - expected[2] = 0.46183388908; - expected[3] = 1; - - for (int i = 0; i < 4; i++) - { - assertEquals(actual[i], expected[i], 0.001d); - } + // * in model file treated as negative infinity + double stp = hmm.getStateTransitionProbability(475, + HiddenMarkovModel.MATCHTODELETE); + assertEquals(stp, Double.NEGATIVE_INFINITY); + + // file value is 5.01631, saved as e^-5.01631 + stp = hmm.getStateTransitionProbability(8, + HiddenMarkovModel.MATCHTOINSERT); + assertEquals(stp, Math.pow(Math.E, -5.01631), 0.0001D); + + stp = hmm.getStateTransitionProbability(36, + HiddenMarkovModel.MATCHTODELETE); + assertEquals(stp, Math.pow(Math.E, -5.73865), 0.0001D); + + stp = hmm.getStateTransitionProbability(22, + HiddenMarkovModel.INSERTTOMATCH); + assertEquals(stp, Math.pow(Math.E, -0.61958), 0.0001D); + + stp = hmm.getStateTransitionProbability(80, + HiddenMarkovModel.INSERTTOINSERT); + assertEquals(stp, Math.pow(Math.E, -0.77255), 0.0001D); + + stp = hmm.getStateTransitionProbability(475, + HiddenMarkovModel.DELETETOMATCH); + assertEquals(stp, 1D, 0.0001D); + + stp = hmm.getStateTransitionProbability(218, + HiddenMarkovModel.DELETETODELETE); + assertEquals(stp, Math.pow(Math.E, -0.95510), 0.0001D); } - @Test(priority = 1) + @Test(groups = "Functional") public void testGetConsensusAtAlignColumn() { - char[] cons = new char[4]; - cons[0] = hmm.getConsensusAtAlignColumn(10); - cons[1] = hmm.getConsensusAtAlignColumn(50); + assertEquals(hmm.getConsensusAtAlignColumn(10), 's'); + assertEquals(hmm.getConsensusAtAlignColumn(50), 'k'); hmm.setConsensusResidueStatus(false); - cons[2] = hmm.getConsensusAtAlignColumn(100); - cons[3] = hmm.getConsensusAtAlignColumn(400); - - char[] expected = new char[4]; - expected[0] = 's'; - expected[1] = 'k'; - expected[2] = 'l'; - expected[3] = 'k'; - - for (int i = 0; i < 4; i++) - { - assertEquals(cons[i], expected[i]); - } - + assertEquals(hmm.getConsensusAtAlignColumn(100), 'l'); + assertEquals(hmm.getConsensusAtAlignColumn(400), 'k'); } - @Test(priority = 1) + @Test(groups = "Functional") public void testGetConsensusSequence() { SequenceI seq = hmm.getConsensusSequence(); @@ -115,4 +124,31 @@ public class HiddenMarkovModelTest { subStr = seq.getSequenceAsString().substring(150, 161); assertEquals(subStr, "-DLLK------"); } + + /** + * A rather pointless test that reproduces the code implemented and asserts + * the result is the same... + */ + @Test(groups = "Functional") + public void testGetInformationContent() + { + /* + * information measure is sum over all symbols of + * emissionProb * log(emissionProb / background) / log(2) + */ + Map uniprotFreqs = ResidueProperties.backgroundFrequencies + .get("amino"); + int col = 4; + float expected = 0f; + for (char aa : hmm.getSymbols()) + { + double mep = hmm.getMatchEmissionProbability(col, aa); + float background = uniprotFreqs.get(aa); + expected += mep * Math.log(mep / background); + } + expected /= Math.log(2D); + + float actual = hmm.getInformationContent(col); + assertEquals(actual, expected, 0.0001d); + } } -- 1.7.10.2