387a915c785789b3d591df0b170229de9697c798
[jalview.git] / test / jalview / io / HMMFileTest.java
1 package jalview.io;
2
3 import static org.testng.Assert.assertEquals;
4 import static org.testng.Assert.assertFalse;
5 import static org.testng.Assert.assertNull;
6 import static org.testng.Assert.assertTrue;
7 import static org.testng.Assert.fail;
8
9 import jalview.datamodel.HMMNode;
10 import jalview.datamodel.HiddenMarkovModel;
11
12 import java.io.BufferedReader;
13 import java.io.File;
14 import java.io.FileNotFoundException;
15 import java.io.FileReader;
16 import java.io.IOException;
17 import java.io.PrintWriter;
18 import java.util.ArrayList;
19 import java.util.Scanner;
20
21 import org.testng.annotations.BeforeClass;
22 import org.testng.annotations.Test;
23
24 import junit.extensions.PA;
25
26 public class HMMFileTest {
27
28   HMMFile fn3;
29
30   HMMFile pKinase;
31
32   HMMFile made1;
33
34   @BeforeClass(alwaysRun = true)
35   public void setUp() throws IOException
36   {
37     fn3 = new HMMFile("test/jalview/io/test_fn3_hmm.txt",
38             DataSourceType.FILE);
39
40     pKinase = new HMMFile("test/jalview/io/test_PKinase_hmm.txt",
41             DataSourceType.FILE);
42
43     made1 = new HMMFile("test/jalview/io/test_MADE1_hmm.txt",
44             DataSourceType.FILE);
45   }
46
47   @Test(groups = "Functional")
48   public void testParse() throws IOException
49   {
50     HiddenMarkovModel hmm = pKinase.getHMM();
51     assertEquals(hmm.getName(), "Pkinase");
52     assertEquals(hmm.getProperty(HMMFile.ACCESSION_NUMBER), "PF00069.17");
53     assertEquals(hmm.getProperty(HMMFile.DESCRIPTION),
54             "Protein kinase domain");
55     assertEquals(hmm.getLength().intValue(), 260);
56     assertNull(hmm.getProperty(HMMFile.MAX_LENGTH));
57     assertEquals(hmm.getAlphabetType(), "amino");
58     assertFalse(hmm.getBooleanProperty(HMMFile.REFERENCE_ANNOTATION));
59     assertFalse(hmm.getBooleanProperty(HMMFile.MASKED_VALUE));
60     assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_RESIDUE));
61     assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_STRUCTURE));
62     assertTrue(hmm.getBooleanProperty(HMMFile.MAP));
63     assertEquals(hmm.getProperty(HMMFile.DATE), "Thu Jun 16 11:44:06 2011");
64     assertNull(hmm.getProperty(HMMFile.COMMAND_LOG));
65     assertEquals(hmm.getProperty(HMMFile.NUMBER_OF_SEQUENCES), "54");
66     assertEquals(hmm.getProperty(HMMFile.EFF_NUMBER_OF_SEQUENCES),
67             "3.358521");
68     assertEquals(hmm.getProperty(HMMFile.CHECK_SUM), "3106786190");
69     assertEquals(hmm.getProperty(HMMFile.GATHERING_THRESHOLD),
70             "70.30 70.30");
71     assertEquals(hmm.getProperty(HMMFile.TRUSTED_CUTOFF), "70.30 70.30");
72     assertEquals(hmm.getProperty(HMMFile.NOISE_CUTOFF), "70.20 70.20");
73   
74     assertEquals(hmm.getSymbols(), "ACDEFGHIKLMNPQRSTVWY");
75   
76     assertEquals(hmm.getMatchEmissionProbability(0, 'Y'), 0.16102, 0.001d);
77     assertEquals(hmm.getMatchEmissionProbability(11, 'P'), 0.0130, 0.001d);
78     assertEquals(hmm.getMatchEmissionProbability(24, 'I'), 0.02583, 0.001d);
79     assertEquals(hmm.getMatchEmissionProbability(83, 'C'), 0.008549,
80             0.001d);
81     assertEquals(hmm.getMatchEmissionProbability(332, 'E'), 0.07998,
82             0.001d);
83     assertEquals(hmm.getMatchEmissionProbability(381, 'D'), 0.014465,
84             0.001d);
85     assertEquals(hmm.getMatchEmissionProbability(475, 'Y'), 0.02213,
86             0.001d);
87   
88     assertEquals(hmm.getInsertEmissionProbability(1, 'C'), 0.012, 0.001d);
89     assertEquals(hmm.getInsertEmissionProbability(14, 'H'), 0.02411,
90             0.001d);
91     assertEquals(hmm.getInsertEmissionProbability(23, 'L'), 0.06764,
92             0.001d);
93     assertEquals(hmm.getInsertEmissionProbability(90, 'D'), 0.0623, 0.001d);
94     assertEquals(hmm.getInsertEmissionProbability(374, 'T'), 0.0623,
95             0.001d);
96     assertEquals(hmm.getInsertEmissionProbability(470, 'P'), 0.0647,
97             0.001d);
98   
99     assertEquals(hmm.getStateTransitionProbability(2, 6), 0.3848, 0.001d);
100     assertEquals(hmm.getStateTransitionProbability(38, 3), 0.5382, 0.001d);
101     assertEquals(hmm.getStateTransitionProbability(305, 3), 0.2916, 0.001d);
102     assertEquals(hmm.getStateTransitionProbability(380, 0), 0.99, 0.001d);
103     assertEquals(hmm.getStateTransitionProbability(453, 1), 0.0066, 0.001d);
104   
105     assertEquals(hmm.getNodeAlignmentColumn(3).intValue(), 2);
106     assertEquals(hmm.getReferenceAnnotation(7), '-');
107     assertEquals(hmm.getConsensusResidue(23), 't');
108     assertEquals(hmm.getMaskedValue(30), '-');
109     assertEquals(hmm.getConsensusStructure(56), 'S');
110   
111     assertEquals(hmm.getNodeAlignmentColumn(78).intValue(), 135);
112     assertEquals(hmm.getReferenceAnnotation(93), '-');
113     assertEquals(hmm.getConsensusResidue(145), 'a');
114     assertEquals(hmm.getMaskedValue(183), '-');
115     assertEquals(hmm.getConsensusStructure(240), 'H');
116   }
117   
118   @Test(groups = "Functional")
119   public void testParseHeaderLines_amino() throws IOException
120   {
121     FileReader fr = new FileReader(
122             new File("test/jalview/io/test_fn3_hmm.txt"));
123     BufferedReader br = new BufferedReader(fr);
124     HiddenMarkovModel hmm = new HiddenMarkovModel();
125     HMMFile testee = new HMMFile();
126     PA.setValue(testee, "hmm", hmm);
127     testee.parseHeaderLines(br);
128     br.close();
129     fr.close();
130   
131     assertEquals(hmm.getName(), "fn3");
132     assertEquals(hmm.getProperty(HMMFile.ACCESSION_NUMBER), "PF00041.13");
133     assertEquals(hmm.getProperty(HMMFile.DESCRIPTION),
134             "Fibronectin type III domain");
135     assertEquals(hmm.getProperty(HMMFile.LENGTH), "86");
136     assertNull(hmm.getProperty(HMMFile.MAX_LENGTH));
137     assertEquals(hmm.getAlphabetType(), "amino");
138     assertFalse(hmm.getBooleanProperty(HMMFile.REFERENCE_ANNOTATION));
139     assertFalse(hmm.getBooleanProperty(HMMFile.MASKED_VALUE));
140     assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_RESIDUE));
141     assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_STRUCTURE));
142
143     assertTrue(hmm.getBooleanProperty(HMMFile.MAP));
144     assertEquals(hmm.getProperty(HMMFile.DATE), "Fri Jun 20 08:22:31 2014");
145     assertNull(hmm.getProperty(HMMFile.COMMAND_LOG));
146     assertEquals(hmm.getProperty(HMMFile.NUMBER_OF_SEQUENCES), "106");
147     assertEquals(hmm.getProperty(HMMFile.EFF_NUMBER_OF_SEQUENCES),
148             "11.415833");
149     assertEquals(hmm.getProperty(HMMFile.CHECK_SUM), "3564431818");
150     assertEquals(hmm.getProperty(HMMFile.GATHERING_THRESHOLD), "8.00 7.20");
151     assertEquals(hmm.getProperty(HMMFile.TRUSTED_CUTOFF), "8.00 7.20");
152     assertEquals(hmm.getProperty(HMMFile.NOISE_CUTOFF), "7.90 7.90");
153     assertEquals(hmm.getViterbi(), "-9.7737  0.71847");
154     assertEquals(hmm.getMSV(), "-9.4043  0.71847");
155     assertEquals(hmm.getForward(), "-3.8341  0.71847");
156   }
157   
158   @Test(groups = "Functional")
159   public void testParseHeaderLines_dna() throws IOException
160   {
161     FileReader fr = new FileReader(
162             new File("test/jalview/io/test_MADE1_hmm.txt"));
163     BufferedReader br = new BufferedReader(fr);
164     HiddenMarkovModel hmm = new HiddenMarkovModel();
165     HMMFile testee = new HMMFile();
166     PA.setValue(testee, "hmm", hmm);
167     testee.parseHeaderLines(br);
168     br.close();
169     fr.close();
170   
171     assertEquals(hmm.getName(), "MADE1");
172     assertEquals(hmm.getProperty(HMMFile.ACCESSION_NUMBER),
173             "DF0000629.2");
174     assertEquals(hmm.getProperty(HMMFile.DESCRIPTION),
175             "MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon");
176     assertEquals(hmm.getProperty(HMMFile.LENGTH), "80");
177     assertEquals(hmm.getProperty(HMMFile.MAX_LENGTH), "426");
178     assertEquals(hmm.getAlphabetType(), "DNA");
179     assertTrue(hmm.getBooleanProperty(HMMFile.REFERENCE_ANNOTATION));
180     assertFalse(hmm.getBooleanProperty(HMMFile.MASKED_VALUE));
181     assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_RESIDUE));
182     assertFalse(hmm.getBooleanProperty(HMMFile.CONSENSUS_STRUCTURE));
183     assertTrue(hmm.getBooleanProperty(HMMFile.MAP));
184     assertEquals(hmm.getProperty(HMMFile.DATE), "Tue Feb 19 20:33:41 2013");
185     assertNull(hmm.getProperty(HMMFile.COMMAND_LOG));
186     assertEquals(hmm.getProperty(HMMFile.NUMBER_OF_SEQUENCES), "1997");
187     assertEquals(hmm.getProperty(HMMFile.EFF_NUMBER_OF_SEQUENCES), "3.911818");
188     assertEquals(hmm.getProperty(HMMFile.CHECK_SUM), "3015610723");
189     assertEquals(hmm.getProperty(HMMFile.GATHERING_THRESHOLD),
190             "2.324 4.234");
191     assertEquals(hmm.getProperty(HMMFile.TRUSTED_CUTOFF), "2.343 1.212");
192     assertEquals(hmm.getProperty(HMMFile.NOISE_CUTOFF), "2.354 5.456");
193     assertEquals(hmm.getViterbi(), "-9.3632  0.71858");
194     assertEquals(hmm.getMSV(), "-8.5786  0.71858");
195     assertEquals(hmm.getForward(), "-3.4823  0.71858");
196   }
197   
198   @Test(groups = "Functional")
199   public void testFillList() throws IOException
200   {
201     Scanner scanner1 = new Scanner("1.3 2.4 5.3 3.9 9.8 4.7 4.3 2.3 6.9");
202     ArrayList<Double> filledArray = new ArrayList<>();
203   
204     filledArray.add(0.27253);
205     filledArray.add(0.0907);
206     filledArray.add(0.00499);
207     filledArray.add(0.02024);
208     filledArray.add(0.00005);
209     filledArray.add(0.00909);
210     filledArray.add(0.01357);
211     filledArray.add(0.10026);
212     filledArray.add(0.001);
213   
214     double[] testList = HMMFile.parseDoubles(scanner1, 9);
215
216     for (int i = 0; i < 9; i++)
217     {
218       assertEquals(testList[i], filledArray.get(i), 0.001d);
219     }
220
221     filledArray.clear();
222     scanner1.close();
223   
224     Scanner scanner2 = new Scanner(
225             "1.346 5.554 35.345 5.64 1.4");
226     filledArray.add(0.2603);
227     filledArray.add(0.00387);
228     filledArray.add(0d);
229     filledArray.add(0.00355);
230     filledArray.add(0.2466);
231   
232     testList = HMMFile.parseDoubles(scanner2, 5);
233
234     for (int i = 0; i < 5; i++)
235     {
236       assertEquals(testList[i], filledArray.get(i), 0.001d);
237     }
238   }
239   
240   @Test(groups = "Functional")
241   public void testParseModel() throws IOException
242   {
243     FileReader fr = new FileReader(
244             new File("test/jalview/io/test_MADE1_hmm.txt"));
245     BufferedReader br = new BufferedReader(fr);
246     HiddenMarkovModel testHMM = new HiddenMarkovModel();
247     for (int i = 0; i < 24; i++)
248     {
249       br.readLine();
250     }
251
252     made1.parseModel(br);
253     testHMM = made1.getHMM();
254
255     br.close();
256     fr.close();
257   
258     assertEquals(testHMM.getMatchEmissionProbability(1, 'C'), 0.09267,
259             0.001d);
260     assertEquals(testHMM.getMatchEmissionProbability(25, 'G'), 0.07327,
261             0.001d);
262     assertEquals(testHMM.getMatchEmissionProbability(1092, 'C'), 0.04184,
263             0.001d);
264     assertEquals(testHMM.getMatchEmissionProbability(1107, 'G'), 0.07,
265             0.001d);
266   
267     assertEquals(testHMM.getInsertEmissionProbability(0, 'G'), 0.25,
268             0.001d);
269     assertEquals(testHMM.getInsertEmissionProbability(247, 'T'), 0.2776,
270             0.001d);
271     assertEquals(testHMM.getInsertEmissionProbability(1096, 'T'), 0.25,
272             0.001d);
273     assertEquals(testHMM.getInsertEmissionProbability(1111, 'T'), 0.25,
274             0.001d);
275
276     assertEquals(testHMM.getStateTransitionProbability(1, 0), 0.9634,
277             0.001d);
278     assertEquals(testHMM.getStateTransitionProbability(5, 1), 0.0203,
279             0.001d);
280     assertEquals(testHMM.getStateTransitionProbability(14, 3), 0.2515,
281             0.001d);
282     assertEquals(testHMM.getStateTransitionProbability(65, 4), 0.78808,
283             0.001d);
284     assertEquals(testHMM.getStateTransitionProbability(1080, 2), 0.01845,
285             0.001d);
286     assertEquals(testHMM.getStateTransitionProbability(1111, 6),
287             Double.NEGATIVE_INFINITY);
288   }
289   
290   /**
291    * Test that if no mapping of nodes to aligned columns is provided by the HMM
292    * file, we construct one
293    * 
294    * @throws IOException
295    */
296   @Test(groups = "Functional")
297   public void testParseModel_noMap() throws IOException
298   {
299     fail("test to be written");
300   }
301
302   @Test(groups = "Functional")
303   public void testParseAnnotations()
304   {
305     HMMFile testFile = new HMMFile();
306     HiddenMarkovModel hmm = new HiddenMarkovModel();
307     PA.setValue(testFile, "hmm", hmm);
308     hmm.addNode(new HMMNode());
309   
310     hmm.setProperty(HMMFile.CONSENSUS_RESIDUE, "yes");
311     hmm.setProperty(HMMFile.MAP, "yes");
312     hmm.setProperty(HMMFile.REFERENCE_ANNOTATION, "yes");
313     hmm.setProperty(HMMFile.CONSENSUS_STRUCTURE, "yes");
314     hmm.setProperty(HMMFile.MASKED_VALUE, "yes");
315     Scanner scanner = new Scanner("1345 t t t t");
316     HMMNode node = new HMMNode();
317     hmm.addNode(node);
318     testFile.parseAnnotations(scanner, node);
319   
320     hmm.setProperty(HMMFile.CONSENSUS_RESIDUE, "yes");
321     hmm.setProperty(HMMFile.MAP, "no");
322     hmm.setProperty(HMMFile.REFERENCE_ANNOTATION, "yes");
323     hmm.setProperty(HMMFile.CONSENSUS_STRUCTURE, "no");
324     hmm.setProperty(HMMFile.MASKED_VALUE, "no");
325     Scanner scanner2 = new Scanner("- y x - -");
326     node = new HMMNode();
327     hmm.addNode(node);
328     testFile.parseAnnotations(scanner2, node);
329   
330     assertEquals(hmm.getNodeAlignmentColumn(1).intValue(), 1344);
331     assertEquals(hmm.getConsensusResidue(1), 't');
332     assertEquals(hmm.getReferenceAnnotation(1), 't');
333     assertEquals(hmm.getMaskedValue(1), 't');
334     assertEquals(hmm.getConsensusStructure(1), 't');
335   
336     scanner.close();
337   }
338   
339   /**
340    * tests to see if file produced by the output matches the file from the input
341    * 
342    * @throws IOException
343    */
344   @Test(groups = "Functional")
345   public void testPrint() throws IOException
346   {
347     PrintWriter writer = new PrintWriter(
348             "test/jalview/io/test_export_hmm.txt");
349     String output = pKinase.print();
350     writer.print(output);
351     writer.close();
352     HMMFile pKinaseClone = new HMMFile(
353             new FileParse("test/jalview/io/test_export_hmm.txt",
354                     DataSourceType.FILE));
355     HiddenMarkovModel pKinaseHMM = new HiddenMarkovModel();
356     HiddenMarkovModel pKinaseCloneHMM = new HiddenMarkovModel();
357     pKinaseHMM = pKinase.getHMM();
358     pKinaseCloneHMM = pKinaseClone.getHMM();
359   
360     for (int i = 0; i < pKinaseHMM.getLength(); i++)
361     {
362       double[] list1;
363       double[] list2;
364   
365       list1 = pKinaseHMM.getNode(i).getMatchEmissions();
366       list2 = pKinaseCloneHMM.getNode(i).getMatchEmissions();
367   
368       assertEquals(list1, list2);
369   
370       list1 = pKinaseHMM.getNode(i).getInsertEmissions();
371       list2 = pKinaseCloneHMM.getNode(i).getInsertEmissions();
372   
373       assertEquals(list1, list2);
374   
375       list1 = pKinaseHMM.getNode(i).getStateTransitions();
376       list2 = pKinaseCloneHMM.getNode(i).getStateTransitions();
377   
378       assertEquals(list1, list2);
379   
380       if (i > 0)
381       {
382         int alignColumn1;
383         int alignColumn2;
384   
385         alignColumn1 = pKinaseHMM.getNodeAlignmentColumn(i);
386         alignColumn2 = pKinaseCloneHMM.getNodeAlignmentColumn(i);
387   
388         assertEquals(alignColumn1, alignColumn2);
389   
390         char annotation1;
391         char annotation2;
392   
393         annotation1 = pKinaseHMM.getReferenceAnnotation(i);
394         annotation2 = pKinaseCloneHMM.getReferenceAnnotation(i);
395   
396         assertEquals(annotation1, annotation2);
397   
398         annotation1 = pKinaseHMM.getConsensusResidue(i);
399         annotation2 = pKinaseCloneHMM.getConsensusResidue(i);
400   
401         assertEquals(annotation1, annotation2);
402       }
403     }
404   }
405   
406   @Test(groups = "Functional")
407   public void testAppendProperties() throws FileNotFoundException
408   {
409     StringBuilder sb = new StringBuilder();
410     fn3.appendProperties(sb);
411
412     Scanner testScanner = new Scanner(sb.toString());
413   
414     String[] expected = new String[] { "HMMER3/f [3.1b1 | May 2013]",
415         "NAME  fn3", "ACC   PF00041.13",
416         "DESC  Fibronectin type III domain", "LENG  86", "ALPH  amino",
417         "RF    no", "MM    no", "CONS  yes", "CS    yes", "MAP   yes",
418         "DATE  Fri Jun 20 08:22:31 2014", "NSEQ  106", "EFFN  11.415833",
419         "CKSUM 3564431818", "GA    8.00 7.20", "TC    8.00 7.20",
420         "NC    7.90 7.90", "STATS LOCAL MSV       -9.4043  0.71847",
421         "STATS LOCAL VITERBI   -9.7737  0.71847",
422         "STATS LOCAL FORWARD   -3.8341  0.71847" };
423   
424     for (String value : expected)
425     {
426       assertEquals(testScanner.nextLine(), value);
427     }
428   
429     testScanner.close();
430   }
431   
432   @Test(groups = "Functional")
433   public void testAppendModelAsString() throws FileNotFoundException
434   {
435     StringBuilder sb = new StringBuilder();
436     fn3.appendModelAsString(sb);
437     String string = sb.toString();
438
439     assertEquals(findValue(2, 2, 2, string), "4.42225");
440     assertEquals(findValue(12, 14, 1, string), "2.79307");
441     assertEquals(findValue(6, 24, 3, string), "0.48576");
442     assertEquals(findValue(19, 33, 2, string), "4.58477");
443     assertEquals(findValue(20, 64, 2, string), "3.61505");
444     assertEquals(findValue(3, 72, 3, string), "6.81068");
445     assertEquals(findValue(10, 80, 2, string), "2.69355");
446     assertEquals(findValue(16, 65, 1, string), "2.81003");
447     assertEquals(findValue(14, 3, 1, string), "2.69012");
448     assertEquals(findValue(11, 32, 1, string), "4.34805");
449   }
450   
451   /**
452    * A helper method to find a token in the model string
453    * 
454    * @param symbolIndex
455    *          index of symbol being searched. First symbol has index 1.
456    * @param nodeIndex
457    *          index of node being searched. Begin node has index 0. First node
458    *          has index 1.
459    * @param line
460    *          index of line being searched in node. First line has index 1.
461    * @param model
462    *          string model being searched
463    * @return value at specified position
464    */
465   private String findValue(int symbolIndex, int nodeIndex, int line,
466           String model)
467   {
468     String value = "";
469     Scanner scanner = new Scanner(model);
470     scanner.nextLine();
471     scanner.nextLine();
472   
473     for (int lineIndex = 0; lineIndex < line - 1; lineIndex++)
474     {
475       scanner.nextLine();
476     }
477     for (int node = 0; node < nodeIndex; node++)
478     {
479       scanner.nextLine();
480       scanner.nextLine();
481       scanner.nextLine();
482     }
483   
484     for (int symbol = 0; symbol < symbolIndex; symbol++)
485     {
486       value = scanner.next();
487       if ("COMPO".equals(value))
488       {
489         scanner.next();
490       }
491       else if (value.length() < 7)
492       {
493         scanner.next();
494       }
495     }
496     scanner.close();
497     return value;
498   }
499 }
500