remove jabaws 3 development code
[jabaws.git] / testsrc / compbio / data / sequence / SequenceUtilTester.java
1 /*\r
2  * Copyright (c) 2009 Peter Troshin JAva Bioinformatics Analysis Web Services\r
3  * (JABAWS) @version: 1.0 This library is free software; you can redistribute it\r
4  * and/or modify it under the terms of the Apache License version 2 as published\r
5  * by the Apache Software Foundation This library is distributed in the hope\r
6  * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied\r
7  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
8  * Apache License for more details. A copy of the license is in\r
9  * apache_license.txt. It is also available here:\r
10  * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or\r
11  * derived work distributed in source code form must include this copyright and\r
12  * license notice.\r
13  */\r
14 package compbio.data.sequence;\r
15 \r
16 import static org.testng.AssertJUnit.assertEquals;\r
17 import static org.testng.AssertJUnit.assertFalse;\r
18 import static org.testng.AssertJUnit.assertNotNull;\r
19 import static org.testng.AssertJUnit.assertTrue;\r
20 import static org.testng.AssertJUnit.fail;\r
21 \r
22 import java.io.File;\r
23 import java.io.FileInputStream;\r
24 import java.io.FileNotFoundException;\r
25 import java.io.FileOutputStream;\r
26 import java.io.IOException;\r
27 import java.io.InputStream;\r
28 import java.io.PrintWriter;\r
29 import java.util.HashMap;\r
30 import java.util.HashSet;\r
31 import java.util.List;\r
32 import java.util.Map;\r
33 import java.util.Set;\r
34 \r
35 import org.testng.annotations.Test;\r
36 \r
37 import compbio.metadata.AllTestSuit;\r
38 \r
39 public class SequenceUtilTester {\r
40 \r
41         @Test()\r
42         public void testisNonAmbNucleotideSequence() {\r
43                 String dnaseq = "atgatTGACGCTGCTGatgtcgtgagtgga";\r
44                 assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dnaseq));\r
45                 String dirtyDnaseq = "atgAGTggt\taGGTgc\ncgcACTgc gACtcgcGAt cgA ";\r
46                 assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dirtyDnaseq));\r
47                 String nonDna = "atgfctgatgcatgcatgatgctga";\r
48                 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
49 \r
50                 nonDna = "atgc1tgatgcatgcatgatgctga";\r
51                 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
52 \r
53                 nonDna = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
54                 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
55                 // String ambDna = "AGTCRYMKSWHBVDN"; // see IUPAC Nucleotide Code\r
56                 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
57 \r
58         }\r
59 \r
60         @Test()\r
61         public void testCleanSequence() {\r
62                 String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
63                 assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),\r
64                                 SequenceUtil.cleanSequence(dirtySeq));\r
65         }\r
66 \r
67         @Test()\r
68         public void testDeepCleanSequence() {\r
69                 String dirtySeq = "a!t?g.A;GTggt\ta12GGTgc\ncgc23AC\rTgc gAC<>.,?!|\\|/t@cg-c¬GA=_+(0){]}[:£$&^*\"t cgA ";\r
70                 assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),\r
71                                 SequenceUtil.deepCleanSequence(dirtySeq));\r
72         }\r
73 \r
74         @Test()\r
75         public void testisProteinSequence() {\r
76                 String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
77                 assertFalse(SequenceUtil.isProteinSequence(dirtySeq));\r
78                 String notaSeq = "atgc1tgatgcatgcatgatgctga";\r
79                 assertFalse(SequenceUtil.isProteinSequence(notaSeq));\r
80                 String AAseq = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
81                 assertTrue(SequenceUtil.isProteinSequence(AAseq));\r
82                 AAseq += "XU";\r
83                 assertFalse(SequenceUtil.isProteinSequence(AAseq));\r
84 \r
85         }\r
86 \r
87         @Test()\r
88         public void testCleanProteinSequence() {\r
89                 String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
90                 assertFalse(SequenceUtil.isProteinSequence(dirtySeq));\r
91                 // This will still be NON protein sequence despite having only correct\r
92                 // letters because the letters match perfectly the nucleotide sequence!\r
93                 assertFalse(SequenceUtil.isProteinSequence(SequenceUtil\r
94                                 .cleanProteinSequence(dirtySeq)));\r
95 \r
96                 String notaSeq = "atgc1tgatgcatgcatgatgmctga";\r
97                 assertFalse(SequenceUtil.isProteinSequence(notaSeq));\r
98                 assertTrue(SequenceUtil.isProteinSequence(SequenceUtil\r
99                                 .cleanProteinSequence(notaSeq)));\r
100 \r
101                 String AAseq = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
102                 assertTrue(SequenceUtil.isProteinSequence(AAseq));\r
103                 assertTrue(SequenceUtil.isProteinSequence(SequenceUtil\r
104                                 .cleanProteinSequence(AAseq)));\r
105                 AAseq += "XU";\r
106 \r
107                 assertFalse(SequenceUtil.isProteinSequence(AAseq));\r
108                 assertTrue(SequenceUtil.isProteinSequence(SequenceUtil\r
109                                 .cleanProteinSequence(AAseq)));\r
110         }\r
111 \r
112         @Test()\r
113         public void testReadWriteFasta() {\r
114 \r
115                 try {\r
116                         FileInputStream fio = new FileInputStream(\r
117                                         AllTestSuit.TEST_DATA_PATH + "TO1381.fasta");\r
118                         assertNotNull(fio);\r
119                         List<FastaSequence> fseqs = SequenceUtil.readFasta(fio);\r
120                         assertNotNull(fseqs);\r
121                         assertEquals(3, fseqs.size());\r
122                         assertEquals(3, fseqs.size());\r
123                         fio.close();\r
124                         FileOutputStream fou = new FileOutputStream(\r
125                                         AllTestSuit.TEST_DATA_PATH + "TO1381.fasta.written");\r
126                         SequenceUtil.writeFasta(fou, fseqs);\r
127                         fou.close();\r
128                         FileOutputStream fou20 = new FileOutputStream(\r
129                                         AllTestSuit.TEST_DATA_PATH + "TO1381.fasta20.written");\r
130                         SequenceUtil.writeFasta(fou20, fseqs, 21);\r
131                         fou20.close();\r
132 \r
133                 } catch (FileNotFoundException e) {\r
134                         e.printStackTrace();\r
135                         fail(e.getLocalizedMessage());\r
136                 } catch (IOException e) {\r
137                         e.printStackTrace();\r
138                         fail(e.getLocalizedMessage());\r
139                 }\r
140         }\r
141 \r
142         /**\r
143          * This test tests the loading of horizontally formatted Jronn output file\r
144          */\r
145         @Test\r
146         public void loadJronnFile() {\r
147 \r
148                 FileInputStream fio;\r
149                 try {\r
150                         fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "jronn.out");\r
151                         Map<String, Score> aseqs = SequenceUtil.readJRonn(fio);\r
152                         assertNotNull(aseqs);\r
153                         assertEquals(aseqs.size(), 3);\r
154                         Score aseq = aseqs.get("Foobar");\r
155                         assertNotNull(aseq);\r
156                         assertNotNull(aseq.getScores());\r
157                         // System.out.println(aseq);\r
158                         assertEquals(aseq.getScores().size(), aseq.getScores().size());\r
159                         fio.close();\r
160                 } catch (FileNotFoundException e) {\r
161                         e.printStackTrace();\r
162                         fail(e.getLocalizedMessage());\r
163                 } catch (IOException e) {\r
164                         e.printStackTrace();\r
165                         fail(e.getLocalizedMessage());\r
166                 } catch (UnknownFileFormatException e) {\r
167                         e.printStackTrace();\r
168                         fail(e.getLocalizedMessage());\r
169                 }\r
170 \r
171         }\r
172 \r
173         enum Trial {\r
174                 one, two, three\r
175         };\r
176 \r
177         /**\r
178          * This test tests the loading of horizontally formatted Jronn output file\r
179          * \r
180          * First seq\r
181          * \r
182          * M 0.86010 0.88512 0.37094\r
183          * \r
184          * T 0.79983 0.85864 0.44331\r
185          * \r
186          */\r
187         @SuppressWarnings("unchecked")\r
188         @Test\r
189         public void testReadDisemblResults() {\r
190 \r
191                 FileInputStream fio;\r
192                 try {\r
193                         fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH\r
194                                         + "disembl.out");\r
195                         Map<String, Set<Score>> aseqs = SequenceUtil.readDisembl(fio);\r
196                         assertNotNull(aseqs);\r
197                         assertEquals(aseqs.size(), 3);\r
198                         ScoreManager sman = ScoreManager.newInstance(aseqs);\r
199 \r
200                         for (String fs : aseqs.keySet()) {\r
201                                 assertTrue(" Foobar_dundeefriends Foobar dundeefriends "\r
202                                                 .contains(fs));\r
203                                 Set<Score> scores = aseqs.get(fs);\r
204                                 assertEquals(scores.size(), 3);\r
205                         }\r
206                         fio.close();\r
207                 } catch (FileNotFoundException e) {\r
208                         e.printStackTrace();\r
209                         fail(e.getLocalizedMessage());\r
210                 } catch (IOException e) {\r
211                         e.printStackTrace();\r
212                         fail(e.getLocalizedMessage());\r
213                 } catch (UnknownFileFormatException e) {\r
214                         e.printStackTrace();\r
215                         fail(e.getLocalizedMessage());\r
216                 }\r
217         }\r
218 \r
219         /**\r
220          * This test tests the loading of horizontally formatted Jronn output file\r
221          * \r
222          * First sequence:\r
223          * \r
224          * >Foobar_dundeefriends\r
225          * \r
226          * # GlobDoms 2-358, 373-568\r
227          * \r
228          * # Disorder 1-5, 206-218, 243-250, 288-300, 313-324, 359-372, 475-481\r
229          * \r
230          * # RESIDUE DYDX RAW SMOOTHED\r
231          * \r
232          * M 0.0044 -0.2259 -0.2259\r
233          * \r
234          * T -0.1308 -0.2170 -0.2170\r
235          * \r
236          * ............\r
237          * \r
238          * > Second sequence\r
239          */\r
240         @SuppressWarnings("unchecked")\r
241         @Test\r
242         public void testReadGlobPlotResults() {\r
243 \r
244                 FileInputStream fio;\r
245                 try {\r
246                         fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH\r
247                                         + "globplot.out");\r
248                         HashMap<String, Set<Score>> aseqs = SequenceUtil.readGlobPlot(fio);\r
249                         assertNotNull(aseqs);\r
250                         assertEquals(aseqs.size(), 3);\r
251 \r
252                         String fsdf = null;\r
253                         Set<Score> scores = null;\r
254                         for (String fs : aseqs.keySet()) {\r
255                                 if ("Foobar_dundeefriends".contains(fs)) {\r
256                                         fsdf = fs;\r
257                                         scores = aseqs.get(fs);\r
258                                 }\r
259                                 assertEquals(scores.size(), 5);\r
260                         }\r
261 \r
262                         ScoreManager sm = ScoreManager.newInstanceSingleSequence(scores);\r
263                         sm.writeOut(new PrintWriter(System.out, true));\r
264 \r
265                         for (Score score : scores) {\r
266 \r
267                                 if (score.getMethod()\r
268                                                 .equals(GlobProtResult.Disorder.toString())) {\r
269                                         assertEquals(score.getRanges().size(), 7);\r
270                                         assertTrue(score.getScores().isEmpty());\r
271                                 }\r
272                                 if (GlobProtResult.valueOf(score.getMethod()) == GlobProtResult.Dydx) {\r
273                                         assertFalse(score.getScores().isEmpty());\r
274                                         assertTrue(score.getRanges().isEmpty());\r
275                                 }\r
276                         }\r
277                         fio.close();\r
278                 } catch (FileNotFoundException e) {\r
279                         e.printStackTrace();\r
280                         fail(e.getLocalizedMessage());\r
281                 } catch (IOException e) {\r
282                         e.printStackTrace();\r
283                         fail(e.getLocalizedMessage());\r
284                 } catch (UnknownFileFormatException e) {\r
285                         e.printStackTrace();\r
286                         fail(e.getLocalizedMessage());\r
287                 }\r
288         }\r
289 \r
290         @Test\r
291         public void testReadIUPredForShortAndLongDisorder() {\r
292                 try {\r
293                         Map<String, Score> scores = SequenceUtil.readIUPred(new File(\r
294                                         AllTestSuit.TEST_DATA_PATH, "out.long"));\r
295                         ScoreManager man = ScoreManager.newInstanceSingleScore(scores);\r
296                         // man.writeOut(new PrintWriter(System.out, true));\r
297                         assertNotNull(scores);\r
298                         assertEquals(3, scores.size());\r
299 \r
300                         Score score = scores.get("Foobar_dundeefriends");\r
301                         assertNotNull(score);\r
302                         assertEquals(0, score.getRanges().size());\r
303                         assertEquals(568, score.getScores().size());\r
304                         assertEquals("Long", score.getMethod());\r
305                         \r
306                         score = scores.get("Foobar");\r
307                         assertNotNull(score);\r
308                         assertEquals(0, score.getRanges().size());\r
309                         assertEquals(481, score.getScores().size());\r
310                         assertEquals("Long", score.getMethod());\r
311                         \r
312                         score = scores.get("dundeefriends");\r
313                         assertNotNull(score);\r
314                         assertEquals(0, score.getRanges().size());\r
315                         assertEquals(513, score.getScores().size());\r
316                         assertEquals("Long", score.getMethod());\r
317                         \r
318                 } catch (IOException e) {\r
319                         e.printStackTrace();\r
320                         fail(e.getLocalizedMessage());\r
321                 } catch (UnknownFileFormatException e) {\r
322                         e.printStackTrace();\r
323                         fail(e.getLocalizedMessage());\r
324                 }\r
325         }\r
326 \r
327         @Test\r
328         public void testReadIUPredForGlobDomain() {\r
329                 try {\r
330                         Map<String, Score> scores = SequenceUtil.readIUPred(new File(\r
331                                         AllTestSuit.TEST_DATA_PATH, "output.glob"));\r
332                         assertNotNull(scores);\r
333                         assertEquals(2, scores.size());\r
334                         ScoreManager man = ScoreManager.newInstanceSingleScore(scores);\r
335                         // man.writeOut(new PrintWriter(System.out, true));\r
336                         assertEquals(2, man.getNumberOfSeq());\r
337                         Score score = scores.get("P53_HUMA");\r
338                         assertNotNull(score);\r
339                         assertEquals(2, score.getRanges().size());\r
340                         assertEquals(0, score.getScores().size());\r
341                         assertEquals("Glob", score.getMethod());\r
342 \r
343                         score = scores.get("Foobar_dundeefriends");\r
344                         assertEquals(0, score.getRanges().size());\r
345                 } catch (IOException e) {\r
346                         e.printStackTrace();\r
347                         fail(e.getLocalizedMessage());\r
348                 } catch (UnknownFileFormatException e) {\r
349                         e.printStackTrace();\r
350                         fail(e.getLocalizedMessage());\r
351                 }\r
352         }\r
353         @Test\r
354         public void testReadAAConResults() {\r
355                 try {\r
356                         InputStream inStream = new FileInputStream(\r
357                                         AllTestSuit.TEST_DATA_PATH + "aacon_results.txt");\r
358                         HashSet<Score> result = SequenceUtil.readAAConResults(inStream);\r
359                         inStream.close();\r
360                         assertNotNull(result);\r
361                         assertEquals(result.size(), 18);\r
362 \r
363                         inStream = new FileInputStream(AllTestSuit.TEST_DATA_PATH\r
364                                         + "aacon_result_single.out");\r
365                         result = SequenceUtil.readAAConResults(inStream);\r
366                         inStream.close();\r
367                         assertNotNull(result);\r
368                         assertEquals(result.size(), 1);\r
369                         assertEquals(result.iterator().next().getScores().size(), 568);\r
370                 } catch (IOException e) {\r
371                         e.printStackTrace();\r
372                         fail(e.getMessage());\r
373                 }\r
374         }\r
375 }\r