Add test of parser of Jpred output
[jabaws.git] / testsrc / compbio / data / sequence / SequenceUtilTester.java
1 /*\r
2  * Copyright (c) 2009 Peter Troshin JAva Bioinformatics Analysis Web Services\r
3  * (JABAWS) @version: 1.0 This library is free software; you can redistribute it\r
4  * and/or modify it under the terms of the Apache License version 2 as published\r
5  * by the Apache Software Foundation This library is distributed in the hope\r
6  * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied\r
7  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
8  * Apache License for more details. A copy of the license is in\r
9  * apache_license.txt. It is also available here:\r
10  * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or\r
11  * derived work distributed in source code form must include this copyright and\r
12  * license notice.\r
13  */\r
14 package compbio.data.sequence;\r
15 \r
16 import static org.testng.AssertJUnit.assertEquals;\r
17 import static org.testng.AssertJUnit.assertFalse;\r
18 import static org.testng.AssertJUnit.assertNotNull;\r
19 import static org.testng.AssertJUnit.assertTrue;\r
20 import static org.testng.AssertJUnit.fail;\r
21 \r
22 import java.io.File;\r
23 import java.io.FileInputStream;\r
24 import java.io.FileNotFoundException;\r
25 import java.io.FileOutputStream;\r
26 import java.io.IOException;\r
27 import java.io.InputStream;\r
28 import java.io.PrintWriter;\r
29 import java.util.HashMap;\r
30 import java.util.HashSet;\r
31 import java.util.List;\r
32 import java.util.Map;\r
33 import java.util.Set;\r
34 \r
35 import org.testng.annotations.Test;\r
36 \r
37 import compbio.metadata.AllTestSuit;\r
38 import compbio.runner.disorder.Disembl;\r
39 \r
40 public class SequenceUtilTester {\r
41 \r
42         @Test()\r
43         public void isNonAmbNucleotideSequence() {\r
44                 String dnaseq = "atgatTGACGCTGCTGatgtcgtgagtgga";\r
45                 assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dnaseq));\r
46                 String dirtyDnaseq = "atgAGTggt\taGGTgc\ncgcACTgc gACtcgcGAt cgA ";\r
47                 assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dirtyDnaseq));\r
48                 String nonDna = "atgfctgatgcatgcatgatgctga";\r
49                 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
50 \r
51                 nonDna = "atgc1tgatgcatgcatgatgctga";\r
52                 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
53 \r
54                 nonDna = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
55                 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
56                 // String ambDna = "AGTCRYMKSWHBVDN"; // see IUPAC Nucleotide Code\r
57                 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
58         }\r
59 \r
60         @Test()\r
61         public void CleanSequence() {\r
62                 String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
63                 assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),SequenceUtil.cleanSequence(dirtySeq));\r
64         }\r
65 \r
66         @Test()\r
67         public void DeepCleanSequence() {\r
68                 String dirtySeq = "a!t?g.A;GTggt\ta12GGTgc\ncgc23AC\rTgc gAC<>.,?!|\\|/t@cg-c¬GA=_+(0){]}[:£$&^*\"t cgA ";\r
69                 assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),SequenceUtil.deepCleanSequence(dirtySeq));\r
70         }\r
71 \r
72         @Test()\r
73         public void isProteinSequence() {\r
74                 String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
75                 assertFalse(SequenceUtil.isProteinSequence(dirtySeq));\r
76                 String notaSeq = "atgc1tgatgcatgcatgatgctga";\r
77                 assertFalse(SequenceUtil.isProteinSequence(notaSeq));\r
78                 String AAseq = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
79                 assertTrue(SequenceUtil.isProteinSequence(AAseq));\r
80                 AAseq += "XU";\r
81                 assertFalse(SequenceUtil.isProteinSequence(AAseq));\r
82 \r
83         }\r
84 \r
85         @Test()\r
86         public void CleanProteinSequence() {\r
87                 String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
88                 assertFalse(SequenceUtil.isProteinSequence(dirtySeq));\r
89                 // This will still be NON protein sequence despite having only correct\r
90                 // letters because the letters match perfectly the nucleotide sequence!\r
91                 assertFalse(SequenceUtil.isProteinSequence(SequenceUtil.cleanProteinSequence(dirtySeq)));\r
92 \r
93                 String notaSeq = "atgc1tgatgcatgcatgatgmctga";\r
94                 assertFalse(SequenceUtil.isProteinSequence(notaSeq));\r
95                 assertTrue(SequenceUtil.isProteinSequence(SequenceUtil.cleanProteinSequence(notaSeq)));\r
96 \r
97                 String AAseq = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
98                 assertTrue(SequenceUtil.isProteinSequence(AAseq));\r
99                 assertTrue(SequenceUtil.isProteinSequence(SequenceUtil.cleanProteinSequence(AAseq)));\r
100                 AAseq += "XU";\r
101 \r
102                 assertFalse(SequenceUtil.isProteinSequence(AAseq));\r
103                 assertTrue(SequenceUtil.isProteinSequence(SequenceUtil.cleanProteinSequence(AAseq)));\r
104         }\r
105 \r
106         @Test()\r
107         public void ReadWriteFasta() {\r
108                 try {\r
109                         FileInputStream fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "TO1381.fasta");\r
110                         assertNotNull(fio);\r
111                         List<FastaSequence> fseqs = SequenceUtil.readFasta(fio);\r
112                         assertNotNull(fseqs);\r
113                         assertEquals(3, fseqs.size());\r
114                         assertEquals(3, fseqs.size());\r
115                         fio.close();\r
116                         FileOutputStream fou = new FileOutputStream(AllTestSuit.TEST_DATA_PATH + "TO1381.fasta.written");\r
117                         SequenceUtil.writeFasta(fou, fseqs);\r
118                         fou.close();\r
119                         FileOutputStream fou20 = new FileOutputStream(AllTestSuit.TEST_DATA_PATH + "TO1381.fasta20.written");\r
120                         SequenceUtil.writeFasta(fou20, fseqs, 21);\r
121                         fou20.close();\r
122                 } catch (FileNotFoundException e) {\r
123                         e.printStackTrace();\r
124                         fail(e.getLocalizedMessage());\r
125                 } catch (IOException e) {\r
126                         e.printStackTrace();\r
127                         fail(e.getLocalizedMessage());\r
128                 }\r
129         }\r
130 \r
131         // This method tests the loading of horizontally formatted Jronn output file\r
132         @Test\r
133         public void LoadJronnFile() {\r
134 \r
135                 FileInputStream fio;\r
136                 try {\r
137                         fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "jronn.out");\r
138                         Map<String, Score> aseqs = SequenceUtil.readJRonn(fio);\r
139                         assertNotNull(aseqs);\r
140                         assertEquals(aseqs.size(), 3);\r
141                         Score aseq = aseqs.get("Foobar");\r
142                         assertNotNull(aseq);\r
143                         assertNotNull(aseq.getScores());\r
144                         assertEquals(aseq.getScores().size(), aseq.getScores().size());\r
145                         fio.close();\r
146                 } catch (FileNotFoundException e) {\r
147                         e.printStackTrace();\r
148                         fail(e.getLocalizedMessage());\r
149                 } catch (IOException e) {\r
150                         e.printStackTrace();\r
151                         fail(e.getLocalizedMessage());\r
152                 } catch (UnknownFileFormatException e) {\r
153                         e.printStackTrace();\r
154                         fail(e.getLocalizedMessage());\r
155                 }\r
156         }\r
157 \r
158         enum Trial {\r
159                 one, two, three\r
160         };\r
161 \r
162         /**\r
163          * This test tests the loading of horizontally formatted Jronn output file\r
164          * \r
165          * First seq\r
166          * \r
167          * M 0.86010 0.88512 0.37094\r
168          * \r
169          * T 0.79983 0.85864 0.44331\r
170          * \r
171          */\r
172         @SuppressWarnings("unchecked")\r
173         @Test\r
174         public void ReadDisemblResults() {\r
175                 Map<String, Map<String,Set<Range>>> _ranges=new HashMap<String, Map<String,Set<Range>>>();\r
176                 Map<String, Set<Range>> ranges=new HashMap<String,Set<Range>>();\r
177                 Map<String,Map<String, Float>>  _values=new HashMap<String, Map<String,Float>>();\r
178                 Map<String, Float> values = new HashMap<String, Float>();\r
179                 Set<Range> rset;\r
180                 rset = new HashSet<Range>();\r
181                 for (String[] se:new String[][] { { "34","41"},{"50","58"},{"83","91"},{"118","127"},{" 160","169"},{" 191","220"},{" 243","252"},{" 287","343"},{" 350","391"},{" 429","485"},{" 497","506"},{"539","547"}}) {\r
182                         rset.add(new Range(se));\r
183                 }\r
184                 ranges.put(DisemblResult.COILS.toString(), rset);\r
185                 values.put(DisemblResult.COILS.toString(), Float.valueOf(0.86010f));\r
186                 rset = new HashSet<Range>();\r
187                 for (String[] se:new String[][] { { "355","368"}}) {\r
188                         rset.add(new Range(se));\r
189                 }\r
190                 ranges.put(DisemblResult.REM465.toString(), rset);\r
191                 values.put(DisemblResult.REM465.toString(), Float.valueOf(0.88512f));\r
192                 rset = new HashSet<Range>();\r
193                 for (String[] se:new String[][] { { "190","204"}}) {\r
194                         rset.add(new Range(se));\r
195                 }\r
196                 ranges.put(DisemblResult.HOTLOOPS.toString(), rset);\r
197                 values.put(DisemblResult.HOTLOOPS.toString(), Float.valueOf(0.37094f));\r
198                 _ranges.put("Foobar_dundeefriends", ranges);\r
199                 _values.put("Foobar_dundeefriends", values);\r
200                 FileInputStream fio;\r
201                 try {\r
202                         fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "disembl.out");\r
203                         Map<String, Set<Score>> aseqs = SequenceUtil.readDisembl(fio);\r
204                         assertNotNull(aseqs);\r
205                         assertEquals(aseqs.size(), 3);\r
206                         ScoreManager sman = ScoreManager.newInstance(aseqs);\r
207 \r
208                         for (String fs : aseqs.keySet()) {\r
209                                 assertTrue(" Foobar_dundeefriends Foobar dundeefriends ".contains(fs));\r
210                                 Set<Score> scores = aseqs.get(fs);\r
211                                 assertEquals(scores.size(), 3);\r
212                                 for (Score sc:scores) {\r
213                                         if (_ranges.containsKey(fs)) {\r
214                                                 assertEquals("Checking range for Method "+sc.getMethod(),_ranges.get(fs).get(sc.getMethod()), sc.getRanges());\r
215                                                 assertEquals("Checking first value for Method "+sc.getMethod(), _values.get(fs).get(sc.getMethod()), sc.getScores().get(0));\r
216                                         }\r
217                                 }\r
218                         }\r
219                         fio.close();\r
220                 } catch (FileNotFoundException e) {\r
221                         e.printStackTrace();\r
222                         fail(e.getLocalizedMessage());\r
223                 } catch (IOException e) {\r
224                         e.printStackTrace();\r
225                         fail(e.getLocalizedMessage());\r
226                 } catch (UnknownFileFormatException e) {\r
227                         e.printStackTrace();\r
228                         fail(e.getLocalizedMessage());\r
229                 }\r
230         }\r
231 \r
232         /**\r
233          * This method tests the loading of horizontally formatted Jronn output file\r
234          * \r
235          * First sequence:\r
236          * \r
237          * >Foobar_dundeefriends\r
238          * \r
239          * # GlobDoms 2-358, 373-568\r
240          * \r
241          * # Disorder 1-5, 206-218, 243-250, 288-300, 313-324, 359-372, 475-481\r
242          * \r
243          * # RESIDUE DYDX RAW SMOOTHED\r
244          * \r
245          * M 0.0044 -0.2259 -0.2259\r
246          * \r
247          * T -0.1308 -0.2170 -0.2170\r
248          * \r
249          * ............\r
250          * \r
251          * > Second sequence\r
252          */\r
253         @SuppressWarnings("unchecked")\r
254         @Test\r
255         public void ReadGlobPlotResults() {\r
256 \r
257                 FileInputStream fio;\r
258                 try {\r
259                         fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "globplot.out");\r
260                         HashMap<String, Set<Score>> aseqs = SequenceUtil.readGlobPlot(fio);\r
261                         assertNotNull(aseqs);\r
262                         assertEquals(aseqs.size(), 3);\r
263 \r
264                         String fsdf = null;\r
265                         Set<Score> scores = null;\r
266                         for (String fs : aseqs.keySet()) {\r
267                                 if ("Foobar_dundeefriends".contains(fs)) {\r
268                                         fsdf = fs;\r
269                                         scores = aseqs.get(fs);\r
270                                 }\r
271                                 assertEquals(scores.size(), 5);\r
272                         }\r
273 \r
274                         ScoreManager sm = ScoreManager.newInstanceSingleSequence(scores);\r
275                         sm.writeOut(new PrintWriter(System.out, true));\r
276 \r
277                         for (Score score : scores) {\r
278                                 if (score.getMethod().equals(GlobProtResult.Disorder.toString())) {\r
279                                         assertEquals(score.getRanges().size(), 7);\r
280                                         assertTrue(score.getScores().isEmpty());\r
281                                 }\r
282                                 if (GlobProtResult.valueOf(score.getMethod()) == GlobProtResult.Dydx) {\r
283                                         assertFalse(score.getScores().isEmpty());\r
284                                         assertTrue(score.getRanges().isEmpty());\r
285                                 }\r
286                         }\r
287                         fio.close();\r
288                 } catch (FileNotFoundException e) {\r
289                         e.printStackTrace();\r
290                         fail(e.getLocalizedMessage());\r
291                 } catch (IOException e) {\r
292                         e.printStackTrace();\r
293                         fail(e.getLocalizedMessage());\r
294                 } catch (UnknownFileFormatException e) {\r
295                         e.printStackTrace();\r
296                         fail(e.getLocalizedMessage());\r
297                 }\r
298         }\r
299 \r
300         @Test\r
301         public void ReadIUPredForShortAndLongDisorder() {\r
302                 try {\r
303                         Map<String, Score> scores = SequenceUtil.readIUPred(new File(AllTestSuit.TEST_DATA_PATH, "out.long"));\r
304                         ScoreManager man = ScoreManager.newInstanceSingleScore(scores);\r
305                         assertNotNull(scores);\r
306                         assertEquals(3, scores.size());\r
307 \r
308                         Score score = scores.get("Foobar_dundeefriends");\r
309                         assertNotNull(score);\r
310                         assertEquals(0, score.getRanges().size());\r
311                         assertEquals(568, score.getScores().size());\r
312                         assertEquals("Long", score.getMethod());\r
313 \r
314                         score = scores.get("Foobar");\r
315                         assertNotNull(score);\r
316                         assertEquals(0, score.getRanges().size());\r
317                         assertEquals(481, score.getScores().size());\r
318                         assertEquals("Long", score.getMethod());\r
319 \r
320                         score = scores.get("dundeefriends");\r
321                         assertNotNull(score);\r
322                         assertEquals(0, score.getRanges().size());\r
323                         assertEquals(513, score.getScores().size());\r
324                         assertEquals("Long", score.getMethod());\r
325                 } catch (IOException e) {\r
326                         e.printStackTrace();\r
327                         fail(e.getLocalizedMessage());\r
328                 } catch (UnknownFileFormatException e) {\r
329                         e.printStackTrace();\r
330                         fail(e.getLocalizedMessage());\r
331                 }\r
332         }\r
333 \r
334         @Test\r
335         public void ReadIUPredForGlobDomain() {\r
336                 try {\r
337                         Map<String, Score> scores = SequenceUtil.readIUPred(new File(AllTestSuit.TEST_DATA_PATH, "output.glob"));\r
338                         assertNotNull(scores);\r
339                         assertEquals(2, scores.size());\r
340                         ScoreManager man = ScoreManager.newInstanceSingleScore(scores);\r
341                         assertEquals(2, man.getNumberOfSeq());\r
342                         Score score = scores.get("P53_HUMA");\r
343                         assertNotNull(score);\r
344                         assertEquals(2, score.getRanges().size());\r
345                         assertEquals(0, score.getScores().size());\r
346                         assertEquals("Glob", score.getMethod());\r
347                         score = scores.get("Foobar_dundeefriends");\r
348                         assertEquals(0, score.getRanges().size());\r
349                 } catch (IOException e) {\r
350                         e.printStackTrace();\r
351                         fail(e.getLocalizedMessage());\r
352                 } catch (UnknownFileFormatException e) {\r
353                         e.printStackTrace();\r
354                         fail(e.getLocalizedMessage());\r
355                 }\r
356         }\r
357 \r
358         @Test\r
359         public void ReadAAConResults() {\r
360                 try {\r
361                         InputStream inStream = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "aacon_results.txt");\r
362                         HashSet<Score> result = SequenceUtil.readAAConResults(inStream);\r
363                         inStream.close();\r
364                         assertNotNull(result);\r
365                         assertEquals(result.size(), 18);\r
366 \r
367                         inStream = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "aacon_result_single.out");\r
368                         result = SequenceUtil.readAAConResults(inStream);\r
369                         inStream.close();\r
370                         assertNotNull(result);\r
371                         assertEquals(result.size(), 1);\r
372                         assertEquals(result.iterator().next().getScores().size(), 568);\r
373                 } catch (IOException e) {\r
374                         e.printStackTrace();\r
375                         fail(e.getMessage());\r
376                 }\r
377         }\r
378         @Test\r
379         public void ReadJpredResults() {\r
380                 try {\r
381                         InputStream inStream = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "Jpred.test1.out");\r
382                         List<FastaSequence> result = SequenceUtil.readJpredFile(inStream);\r
383                         inStream.close();\r
384                         assertNotNull(result);\r
385                         assertEquals(result.size(), 19);\r
386                 } catch (IOException e) {\r
387                         e.printStackTrace();\r
388                         fail(e.getMessage());\r
389                 }\r
390         }\r
391 }\r