Parser for DisEMBL results and finished runner, few test cases. DisemblTester has...
[jabaws.git] / testsrc / compbio / data / sequence / SequenceUtilTester.java
1 /*\r
2  * Copyright (c) 2009 Peter Troshin JAva Bioinformatics Analysis Web Services\r
3  * (JABAWS) @version: 1.0 This library is free software; you can redistribute it\r
4  * and/or modify it under the terms of the Apache License version 2 as published\r
5  * by the Apache Software Foundation This library is distributed in the hope\r
6  * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied\r
7  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
8  * Apache License for more details. A copy of the license is in\r
9  * apache_license.txt. It is also available here:\r
10  * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or\r
11  * derived work distributed in source code form must include this copyright and\r
12  * license notice.\r
13  */\r
14 package compbio.data.sequence;\r
15 \r
16 import static org.testng.AssertJUnit.assertEquals;\r
17 import static org.testng.AssertJUnit.assertFalse;\r
18 import static org.testng.AssertJUnit.assertNotNull;\r
19 import static org.testng.AssertJUnit.assertTrue;\r
20 import static org.testng.AssertJUnit.fail;\r
21 \r
22 import java.io.FileInputStream;\r
23 import java.io.FileNotFoundException;\r
24 import java.io.FileOutputStream;\r
25 import java.io.IOException;\r
26 import java.io.InputStream;\r
27 import java.util.HashSet;\r
28 import java.util.List;\r
29 import java.util.Map;\r
30 import java.util.Set;\r
31 \r
32 import org.testng.annotations.Test;\r
33 \r
34 import compbio.metadata.AllTestSuit;\r
35 \r
36 public class SequenceUtilTester {\r
37 \r
38         @Test()\r
39         public void testisNonAmbNucleotideSequence() {\r
40                 String dnaseq = "atgatTGACGCTGCTGatgtcgtgagtgga";\r
41                 assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dnaseq));\r
42                 String dirtyDnaseq = "atgAGTggt\taGGTgc\ncgcACTgc gACtcgcGAt cgA ";\r
43                 assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dirtyDnaseq));\r
44                 String nonDna = "atgfctgatgcatgcatgatgctga";\r
45                 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
46 \r
47                 nonDna = "atgc1tgatgcatgcatgatgctga";\r
48                 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
49 \r
50                 nonDna = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
51                 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
52                 // String ambDna = "AGTCRYMKSWHBVDN"; // see IUPAC Nucleotide Code\r
53                 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
54 \r
55         }\r
56 \r
57         @Test()\r
58         public void testCleanSequence() {\r
59                 String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
60                 assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),\r
61                                 SequenceUtil.cleanSequence(dirtySeq));\r
62         }\r
63 \r
64         @Test()\r
65         public void testDeepCleanSequence() {\r
66                 String dirtySeq = "a!t?g.A;GTggt\ta12GGTgc\ncgc23AC\rTgc gAC<>.,?!|\\|/t@cg-c¬GA=_+(0){]}[:£$&^*\"t cgA ";\r
67                 assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),\r
68                                 SequenceUtil.deepCleanSequence(dirtySeq));\r
69         }\r
70 \r
71         @Test()\r
72         public void testisProteinSequence() {\r
73                 String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
74                 assertFalse(SequenceUtil.isProteinSequence(dirtySeq));\r
75                 String notaSeq = "atgc1tgatgcatgcatgatgctga";\r
76                 assertFalse(SequenceUtil.isProteinSequence(notaSeq));\r
77                 String AAseq = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
78                 assertTrue(SequenceUtil.isProteinSequence(AAseq));\r
79                 AAseq += "XU";\r
80                 assertFalse(SequenceUtil.isProteinSequence(AAseq));\r
81 \r
82         }\r
83 \r
84         @Test()\r
85         public void testReadWriteFasta() {\r
86 \r
87                 try {\r
88                         FileInputStream fio = new FileInputStream(\r
89                                         AllTestSuit.TEST_DATA_PATH + "TO1381.fasta");\r
90                         assertNotNull(fio);\r
91                         List<FastaSequence> fseqs = SequenceUtil.readFasta(fio);\r
92                         assertNotNull(fseqs);\r
93                         assertEquals(3, fseqs.size());\r
94                         assertEquals(3, fseqs.size());\r
95                         fio.close();\r
96                         FileOutputStream fou = new FileOutputStream(\r
97                                         AllTestSuit.TEST_DATA_PATH + "TO1381.fasta.written");\r
98                         SequenceUtil.writeFasta(fou, fseqs);\r
99                         fou.close();\r
100                         FileOutputStream fou20 = new FileOutputStream(\r
101                                         AllTestSuit.TEST_DATA_PATH + "TO1381.fasta20.written");\r
102                         SequenceUtil.writeFasta(fou20, fseqs, 21);\r
103                         fou20.close();\r
104 \r
105                 } catch (FileNotFoundException e) {\r
106                         e.printStackTrace();\r
107                         fail(e.getLocalizedMessage());\r
108                 } catch (IOException e) {\r
109                         e.printStackTrace();\r
110                         fail(e.getLocalizedMessage());\r
111                 }\r
112         }\r
113 \r
114         /**\r
115          * This test tests the loading of horizontally formatted Jronn output file\r
116          */\r
117         @Test\r
118         public void loadJronnFile() {\r
119 \r
120                 FileInputStream fio;\r
121                 try {\r
122                         fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "jronn.out");\r
123                         Map<String, Score> aseqs = SequenceUtil.readJRonn(fio);\r
124                         assertNotNull(aseqs);\r
125                         assertEquals(aseqs.size(), 3);\r
126                         Score aseq = aseqs.get("Foobar");\r
127                         assertNotNull(aseq);\r
128                         assertNotNull(aseq.getScores());\r
129                         // System.out.println(aseq);\r
130                         assertEquals(aseq.getScores().size(), aseq.getScores().size());\r
131                         fio.close();\r
132                 } catch (FileNotFoundException e) {\r
133                         e.printStackTrace();\r
134                         fail(e.getLocalizedMessage());\r
135                 } catch (IOException e) {\r
136                         e.printStackTrace();\r
137                         fail(e.getLocalizedMessage());\r
138                 } catch (UnknownFileFormatException e) {\r
139                         e.printStackTrace();\r
140                         fail(e.getLocalizedMessage());\r
141                 }\r
142 \r
143         }\r
144 \r
145         enum Trial {\r
146                 one, two, three\r
147         };\r
148 \r
149         /**\r
150          * This test tests the loading of horizontally formatted Jronn output file\r
151          * \r
152          * First seq \r
153          * \r
154          * M    0.86010 0.88512 0.37094\r
155          * \r
156          * T    0.79983 0.85864 0.44331\r
157          * \r
158          */\r
159         @SuppressWarnings("unchecked")\r
160         @Test\r
161         public void testReadDisemblResults() {\r
162 \r
163                 FileInputStream fio;\r
164                 try {\r
165                         fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH\r
166                                         + "scores.out");\r
167                         Map<FastaSequence, Set<Score>> aseqs = SequenceUtil\r
168                                         .readDisembl(fio);\r
169                         assertNotNull(aseqs);\r
170                         assertEquals(aseqs.size(), 3); \r
171                         System.out.println(aseqs);\r
172                         for(FastaSequence fs: aseqs.keySet()) {\r
173                                 assertTrue(" Foobar_dundeefriends Foobar dundeefriends ".contains(fs.getId()));\r
174                                 Set<Score> scores = aseqs.get(fs); \r
175                                 assertEquals(scores.size(),3);\r
176                         }\r
177                           fio.close();\r
178                 } catch (FileNotFoundException e) {\r
179                         e.printStackTrace();\r
180                         fail(e.getLocalizedMessage());\r
181                 } catch (IOException e) {\r
182                         e.printStackTrace();\r
183                         fail(e.getLocalizedMessage());\r
184                 } catch (UnknownFileFormatException e) {\r
185                         e.printStackTrace();\r
186                         fail(e.getLocalizedMessage());\r
187                 }\r
188         }\r
189 \r
190         @Test\r
191         public void testReadAAConResults() {\r
192                 try {\r
193                         InputStream inStream = new FileInputStream(\r
194                                         AllTestSuit.TEST_DATA_PATH + "aacon_results.txt");\r
195                         HashSet<Score> result = SequenceUtil.readAAConResults(inStream);\r
196                         inStream.close();\r
197                         assertNotNull(result);\r
198                         assertEquals(result.size(), 18);\r
199 \r
200                         inStream = new FileInputStream(AllTestSuit.TEST_DATA_PATH\r
201                                         + "aacon_result_single.out");\r
202                         result = SequenceUtil.readAAConResults(inStream);\r
203                         inStream.close();\r
204                         assertNotNull(result);\r
205                         assertEquals(result.size(), 1);\r
206                         assertEquals(result.iterator().next().getScores().size(), 568);\r
207                 } catch (IOException e) {\r
208                         e.printStackTrace();\r
209                         fail(e.getMessage());\r
210                 }\r
211         }\r
212 }\r