More work to disorder prediction client & services.
[jabaws.git] / testsrc / compbio / data / sequence / SequenceUtilTester.java
1 /*\r
2  * Copyright (c) 2009 Peter Troshin JAva Bioinformatics Analysis Web Services\r
3  * (JABAWS) @version: 1.0 This library is free software; you can redistribute it\r
4  * and/or modify it under the terms of the Apache License version 2 as published\r
5  * by the Apache Software Foundation This library is distributed in the hope\r
6  * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied\r
7  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
8  * Apache License for more details. A copy of the license is in\r
9  * apache_license.txt. It is also available here:\r
10  * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or\r
11  * derived work distributed in source code form must include this copyright and\r
12  * license notice.\r
13  */\r
14 package compbio.data.sequence;\r
15 \r
16 import static org.testng.AssertJUnit.assertEquals;\r
17 import static org.testng.AssertJUnit.assertFalse;\r
18 import static org.testng.AssertJUnit.assertNotNull;\r
19 import static org.testng.AssertJUnit.assertTrue;\r
20 import static org.testng.AssertJUnit.fail;\r
21 \r
22 import java.io.FileInputStream;\r
23 import java.io.FileNotFoundException;\r
24 import java.io.FileOutputStream;\r
25 import java.io.IOException;\r
26 import java.io.InputStream;\r
27 import java.io.PrintWriter;\r
28 import java.util.HashMap;\r
29 import java.util.HashSet;\r
30 import java.util.List;\r
31 import java.util.Map;\r
32 import java.util.Set;\r
33 \r
34 import org.testng.annotations.Test;\r
35 \r
36 import compbio.metadata.AllTestSuit;\r
37 \r
38 public class SequenceUtilTester {\r
39 \r
40         @Test()\r
41         public void testisNonAmbNucleotideSequence() {\r
42                 String dnaseq = "atgatTGACGCTGCTGatgtcgtgagtgga";\r
43                 assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dnaseq));\r
44                 String dirtyDnaseq = "atgAGTggt\taGGTgc\ncgcACTgc gACtcgcGAt cgA ";\r
45                 assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dirtyDnaseq));\r
46                 String nonDna = "atgfctgatgcatgcatgatgctga";\r
47                 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
48 \r
49                 nonDna = "atgc1tgatgcatgcatgatgctga";\r
50                 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
51 \r
52                 nonDna = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
53                 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
54                 // String ambDna = "AGTCRYMKSWHBVDN"; // see IUPAC Nucleotide Code\r
55                 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
56 \r
57         }\r
58 \r
59         @Test()\r
60         public void testCleanSequence() {\r
61                 String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
62                 assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),\r
63                                 SequenceUtil.cleanSequence(dirtySeq));\r
64         }\r
65 \r
66         @Test()\r
67         public void testDeepCleanSequence() {\r
68                 String dirtySeq = "a!t?g.A;GTggt\ta12GGTgc\ncgc23AC\rTgc gAC<>.,?!|\\|/t@cg-c¬GA=_+(0){]}[:£$&^*\"t cgA ";\r
69                 assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),\r
70                                 SequenceUtil.deepCleanSequence(dirtySeq));\r
71         }\r
72 \r
73         @Test()\r
74         public void testisProteinSequence() {\r
75                 String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
76                 assertFalse(SequenceUtil.isProteinSequence(dirtySeq));\r
77                 String notaSeq = "atgc1tgatgcatgcatgatgctga";\r
78                 assertFalse(SequenceUtil.isProteinSequence(notaSeq));\r
79                 String AAseq = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
80                 assertTrue(SequenceUtil.isProteinSequence(AAseq));\r
81                 AAseq += "XU";\r
82                 assertFalse(SequenceUtil.isProteinSequence(AAseq));\r
83 \r
84         }\r
85 \r
86         @Test()\r
87         public void testCleanProteinSequence() {\r
88                 String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
89                 assertFalse(SequenceUtil.isProteinSequence(dirtySeq));\r
90                 // This will still be NON protein sequence despite having only correct\r
91                 // letters because the letters match perfectly the nucleotide sequence!\r
92                 assertFalse(SequenceUtil.isProteinSequence(SequenceUtil\r
93                                 .cleanProteinSequence(dirtySeq)));\r
94 \r
95                 String notaSeq = "atgc1tgatgcatgcatgatgmctga";\r
96                 assertFalse(SequenceUtil.isProteinSequence(notaSeq));\r
97                 assertTrue(SequenceUtil.isProteinSequence(SequenceUtil\r
98                                 .cleanProteinSequence(notaSeq)));\r
99 \r
100                 String AAseq = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
101                 assertTrue(SequenceUtil.isProteinSequence(AAseq));\r
102                 assertTrue(SequenceUtil.isProteinSequence(SequenceUtil\r
103                                 .cleanProteinSequence(AAseq)));\r
104                 AAseq += "XU";\r
105 \r
106                 assertFalse(SequenceUtil.isProteinSequence(AAseq));\r
107                 assertTrue(SequenceUtil.isProteinSequence(SequenceUtil\r
108                                 .cleanProteinSequence(AAseq)));\r
109         }\r
110 \r
111         @Test()\r
112         public void testReadWriteFasta() {\r
113 \r
114                 try {\r
115                         FileInputStream fio = new FileInputStream(\r
116                                         AllTestSuit.TEST_DATA_PATH + "TO1381.fasta");\r
117                         assertNotNull(fio);\r
118                         List<FastaSequence> fseqs = SequenceUtil.readFasta(fio);\r
119                         assertNotNull(fseqs);\r
120                         assertEquals(3, fseqs.size());\r
121                         assertEquals(3, fseqs.size());\r
122                         fio.close();\r
123                         FileOutputStream fou = new FileOutputStream(\r
124                                         AllTestSuit.TEST_DATA_PATH + "TO1381.fasta.written");\r
125                         SequenceUtil.writeFasta(fou, fseqs);\r
126                         fou.close();\r
127                         FileOutputStream fou20 = new FileOutputStream(\r
128                                         AllTestSuit.TEST_DATA_PATH + "TO1381.fasta20.written");\r
129                         SequenceUtil.writeFasta(fou20, fseqs, 21);\r
130                         fou20.close();\r
131 \r
132                 } catch (FileNotFoundException e) {\r
133                         e.printStackTrace();\r
134                         fail(e.getLocalizedMessage());\r
135                 } catch (IOException e) {\r
136                         e.printStackTrace();\r
137                         fail(e.getLocalizedMessage());\r
138                 }\r
139         }\r
140 \r
141         /**\r
142          * This test tests the loading of horizontally formatted Jronn output file\r
143          */\r
144         @Test\r
145         public void loadJronnFile() {\r
146 \r
147                 FileInputStream fio;\r
148                 try {\r
149                         fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "jronn.out");\r
150                         Map<String, Score> aseqs = SequenceUtil.readJRonn(fio);\r
151                         assertNotNull(aseqs);\r
152                         assertEquals(aseqs.size(), 3);\r
153                         Score aseq = aseqs.get("Foobar");\r
154                         assertNotNull(aseq);\r
155                         assertNotNull(aseq.getScores());\r
156                         // System.out.println(aseq);\r
157                         assertEquals(aseq.getScores().size(), aseq.getScores().size());\r
158                         fio.close();\r
159                 } catch (FileNotFoundException e) {\r
160                         e.printStackTrace();\r
161                         fail(e.getLocalizedMessage());\r
162                 } catch (IOException e) {\r
163                         e.printStackTrace();\r
164                         fail(e.getLocalizedMessage());\r
165                 } catch (UnknownFileFormatException e) {\r
166                         e.printStackTrace();\r
167                         fail(e.getLocalizedMessage());\r
168                 }\r
169 \r
170         }\r
171 \r
172         enum Trial {\r
173                 one, two, three\r
174         };\r
175 \r
176         /**\r
177          * This test tests the loading of horizontally formatted Jronn output file\r
178          * \r
179          * First seq\r
180          * \r
181          * M 0.86010 0.88512 0.37094\r
182          * \r
183          * T 0.79983 0.85864 0.44331\r
184          * \r
185          */\r
186         @SuppressWarnings("unchecked")\r
187         @Test\r
188         public void testReadDisemblResults() {\r
189 \r
190                 FileInputStream fio;\r
191                 try {\r
192                         fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH\r
193                                         + "disembl.out");\r
194                         Map<String, Set<Score>> aseqs = SequenceUtil.readDisembl(fio);\r
195                         assertNotNull(aseqs);\r
196                         assertEquals(aseqs.size(), 3);\r
197                         ScoreManager sman = ScoreManager.newInstance(aseqs);\r
198 \r
199                         for (String fs : aseqs.keySet()) {\r
200                                 assertTrue(" Foobar_dundeefriends Foobar dundeefriends "\r
201                                                 .contains(fs));\r
202                                 Set<Score> scores = aseqs.get(fs);\r
203                                 assertEquals(scores.size(), 3);\r
204                         }\r
205                         fio.close();\r
206                 } catch (FileNotFoundException e) {\r
207                         e.printStackTrace();\r
208                         fail(e.getLocalizedMessage());\r
209                 } catch (IOException e) {\r
210                         e.printStackTrace();\r
211                         fail(e.getLocalizedMessage());\r
212                 } catch (UnknownFileFormatException e) {\r
213                         e.printStackTrace();\r
214                         fail(e.getLocalizedMessage());\r
215                 }\r
216         }\r
217 \r
218         /**\r
219          * This test tests the loading of horizontally formatted Jronn output file\r
220          * \r
221          * First sequence:\r
222          * \r
223          * >Foobar_dundeefriends\r
224          * \r
225          * # GlobDoms 2-358, 373-568\r
226          * \r
227          * # Disorder 1-5, 206-218, 243-250, 288-300, 313-324, 359-372, 475-481\r
228          * \r
229          * # RESIDUE DYDX RAW SMOOTHED\r
230          * \r
231          * M 0.0044 -0.2259 -0.2259\r
232          * \r
233          * T -0.1308 -0.2170 -0.2170\r
234          * \r
235          * ............\r
236          * \r
237          * > Second sequence\r
238          */\r
239         @SuppressWarnings("unchecked")\r
240         @Test\r
241         public void testReadGlobPlotResults() {\r
242 \r
243                 FileInputStream fio;\r
244                 try {\r
245                         fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH\r
246                                         + "globplot.out");\r
247                         HashMap<String, Set<Score>> aseqs = SequenceUtil.readGlobPlot(fio);\r
248                         assertNotNull(aseqs);\r
249                         assertEquals(aseqs.size(), 3);\r
250 \r
251                         String fsdf = null;\r
252                         Set<Score> scores = null;\r
253                         for (String fs : aseqs.keySet()) {\r
254                                 if ("Foobar_dundeefriends".contains(fs)) {\r
255                                         fsdf = fs;\r
256                                         scores = aseqs.get(fs);\r
257                                 }\r
258                                 assertEquals(scores.size(), 5);\r
259                         }\r
260 \r
261                         ScoreManager sm = ScoreManager.newInstanceSingleSequence(scores);\r
262                         sm.writeOut(new PrintWriter(System.out, true));\r
263 \r
264                         for (Score score : scores) {\r
265 \r
266                                 if (score.getMethod()\r
267                                                 .equals(GlobProtResult.Disorder.toString())) {\r
268                                         assertEquals(score.getRanges().size(), 7);\r
269                                         assertTrue(score.getScores().isEmpty());\r
270                                 }\r
271                                 if (GlobProtResult.valueOf(score.getMethod()) == GlobProtResult.Dydx) {\r
272                                         assertFalse(score.getScores().isEmpty());\r
273                                         assertTrue(score.getRanges().isEmpty());\r
274                                 }\r
275                         }\r
276                         fio.close();\r
277                 } catch (FileNotFoundException e) {\r
278                         e.printStackTrace();\r
279                         fail(e.getLocalizedMessage());\r
280                 } catch (IOException e) {\r
281                         e.printStackTrace();\r
282                         fail(e.getLocalizedMessage());\r
283                 } catch (UnknownFileFormatException e) {\r
284                         e.printStackTrace();\r
285                         fail(e.getLocalizedMessage());\r
286                 }\r
287         }\r
288 \r
289         @Test\r
290         public void testReadAAConResults() {\r
291                 try {\r
292                         InputStream inStream = new FileInputStream(\r
293                                         AllTestSuit.TEST_DATA_PATH + "aacon_results.txt");\r
294                         HashSet<Score> result = SequenceUtil.readAAConResults(inStream);\r
295                         inStream.close();\r
296                         assertNotNull(result);\r
297                         assertEquals(result.size(), 18);\r
298 \r
299                         inStream = new FileInputStream(AllTestSuit.TEST_DATA_PATH\r
300                                         + "aacon_result_single.out");\r
301                         result = SequenceUtil.readAAConResults(inStream);\r
302                         inStream.close();\r
303                         assertNotNull(result);\r
304                         assertEquals(result.size(), 1);\r
305                         assertEquals(result.iterator().next().getScores().size(), 568);\r
306                 } catch (IOException e) {\r
307                         e.printStackTrace();\r
308                         fail(e.getMessage());\r
309                 }\r
310         }\r
311 }\r