Adding AAConWS
[jabaws.git] / testsrc / compbio / data / sequence / SequenceUtilTester.java
1 /*\r
2  * Copyright (c) 2009 Peter Troshin JAva Bioinformatics Analysis Web Services\r
3  * (JABAWS) @version: 1.0 This library is free software; you can redistribute it\r
4  * and/or modify it under the terms of the Apache License version 2 as published\r
5  * by the Apache Software Foundation This library is distributed in the hope\r
6  * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied\r
7  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
8  * Apache License for more details. A copy of the license is in\r
9  * apache_license.txt. It is also available here:\r
10  * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or\r
11  * derived work distributed in source code form must include this copyright and\r
12  * license notice.\r
13  */\r
14 package compbio.data.sequence;\r
15 \r
16 import static org.testng.AssertJUnit.assertEquals;\r
17 import static org.testng.AssertJUnit.assertFalse;\r
18 import static org.testng.AssertJUnit.assertNotNull;\r
19 import static org.testng.AssertJUnit.assertTrue;\r
20 import static org.testng.AssertJUnit.fail;\r
21 \r
22 import java.io.FileInputStream;\r
23 import java.io.FileNotFoundException;\r
24 import java.io.FileOutputStream;\r
25 import java.io.IOException;\r
26 import java.util.List;\r
27 \r
28 import org.testng.annotations.Test;\r
29 \r
30 import compbio.metadata.AllTestSuit;\r
31 \r
32 public class SequenceUtilTester {\r
33 \r
34         @Test()\r
35         public void testisNonAmbNucleotideSequence() {\r
36                 String dnaseq = "atgatTGACGCTGCTGatgtcgtgagtgga";\r
37                 assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dnaseq));\r
38                 String dirtyDnaseq = "atgAGTggt\taGGTgc\ncgcACTgc gACtcgcGAt cgA ";\r
39                 assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dirtyDnaseq));\r
40                 String nonDna = "atgfctgatgcatgcatgatgctga";\r
41                 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
42 \r
43                 nonDna = "atgc1tgatgcatgcatgatgctga";\r
44                 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
45 \r
46                 nonDna = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
47                 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
48                 // String ambDna = "AGTCRYMKSWHBVDN"; // see IUPAC Nucleotide Code\r
49                 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
50 \r
51         }\r
52 \r
53         @Test()\r
54         public void testCleanSequence() {\r
55                 String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
56                 assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),\r
57                                 SequenceUtil.cleanSequence(dirtySeq));\r
58         }\r
59 \r
60         @Test()\r
61         public void testDeepCleanSequence() {\r
62                 String dirtySeq = "a!t?g.A;GTggt\ta12GGTgc\ncgc23AC\rTgc gAC<>.,?!|\\|/t@cg-c¬GA=_+(0){]}[:£$&^*\"t cgA ";\r
63                 assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),\r
64                                 SequenceUtil.deepCleanSequence(dirtySeq));\r
65         }\r
66 \r
67         @Test()\r
68         public void testisProteinSequence() {\r
69                 String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
70                 assertFalse(SequenceUtil.isProteinSequence(dirtySeq));\r
71                 String notaSeq = "atgc1tgatgcatgcatgatgctga";\r
72                 assertFalse(SequenceUtil.isProteinSequence(notaSeq));\r
73                 String AAseq = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
74                 assertTrue(SequenceUtil.isProteinSequence(AAseq));\r
75                 AAseq += "XU";\r
76                 assertFalse(SequenceUtil.isProteinSequence(AAseq));\r
77 \r
78         }\r
79 \r
80         @Test()\r
81         public void testReadWriteFasta() {\r
82 \r
83                 try {\r
84                         FileInputStream fio = new FileInputStream(\r
85                                         AllTestSuit.TEST_DATA_PATH + "TO1381.fasta");\r
86                         assertNotNull(fio);\r
87                         List<FastaSequence> fseqs = SequenceUtil.readFasta(fio);\r
88                         assertNotNull(fseqs);\r
89                         assertEquals(3, fseqs.size());\r
90                         assertEquals(3, fseqs.size());\r
91                         fio.close();\r
92                         FileOutputStream fou = new FileOutputStream(\r
93                                         AllTestSuit.TEST_DATA_PATH + "TO1381.fasta.written");\r
94                         SequenceUtil.writeFasta(fou, fseqs);\r
95                         fou.close();\r
96                         FileOutputStream fou20 = new FileOutputStream(\r
97                                         AllTestSuit.TEST_DATA_PATH + "TO1381.fasta20.written");\r
98                         SequenceUtil.writeFasta(fou20, fseqs, 21);\r
99                         fou20.close();\r
100 \r
101                 } catch (FileNotFoundException e) {\r
102                         e.printStackTrace();\r
103                         fail(e.getLocalizedMessage());\r
104                 } catch (IOException e) {\r
105                         e.printStackTrace();\r
106                         fail(e.getLocalizedMessage());\r
107                 }\r
108         }\r
109 \r
110         /**\r
111          * This test tests the loading of horizontally formatted Jronn output file\r
112          */\r
113         @Test\r
114         public void loadJronnFile() {\r
115 \r
116                 FileInputStream fio;\r
117                 try {\r
118                         fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "jronn.out");\r
119                         List<AnnotatedSequence> aseqs = SequenceUtil.readJRonn(fio);\r
120                         assertNotNull(aseqs);\r
121                         assertEquals(aseqs.size(), 3);\r
122                         AnnotatedSequence aseq = aseqs.get(0);\r
123                         assertNotNull(aseq);\r
124                         assertNotNull(aseq.getAnnotation());\r
125                         // System.out.println(aseq);\r
126                         assertEquals(aseq.getAnnotation().length, aseq.getSequence()\r
127                                         .length());\r
128                         fio.close();\r
129                 } catch (FileNotFoundException e) {\r
130                         e.printStackTrace();\r
131                         fail(e.getLocalizedMessage());\r
132                 } catch (IOException e) {\r
133                         e.printStackTrace();\r
134                         fail(e.getLocalizedMessage());\r
135                 } catch (UnknownFileFormatException e) {\r
136                         e.printStackTrace();\r
137                         fail(e.getLocalizedMessage());\r
138                 }\r
139 \r
140         }\r
141 \r
142         enum Trial {\r
143                 one, two, three\r
144         };\r
145 \r
146         /**\r
147          * This test tests the loading of horizontally formatted Jronn output file\r
148          */\r
149         @SuppressWarnings("unchecked")\r
150         @Test\r
151         public void testMultiAnnotatedSequence() {\r
152 \r
153                 FileInputStream fio;\r
154                 try {\r
155                         fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH\r
156                                         + "disembl.out");\r
157                         List<MultiAnnotatedSequence<DisemblResultAnnot>> aseqs = SequenceUtil\r
158                                         .readDisembl(fio);\r
159                         assertNotNull(aseqs);\r
160 \r
161                         /*\r
162                          * MultiAnnotatedSequence ma = new MultiAnnotatedSequence();\r
163                          * Map<Trial, List<Number>> val = ma.getInstance(Trial.class);\r
164                          * List<Number> list = new ArrayList<Number>(); list.add(new\r
165                          * Float(1.2)); list.add(new Double(5.662)); val.put(Trial.one,\r
166                          * list); val.put(Trial.two, Arrays.asList(6.22f, 1, 37.6f));\r
167                          * System.out.println(val); AnnotatedSequence aseq = aseqs.get(0);\r
168                          */\r
169                         fio.close();\r
170                 } catch (FileNotFoundException e) {\r
171                         e.printStackTrace();\r
172                         fail(e.getLocalizedMessage());\r
173                 } catch (IOException e) {\r
174                         e.printStackTrace();\r
175                         fail(e.getLocalizedMessage());\r
176                 } catch (UnknownFileFormatException e) {\r
177                         e.printStackTrace();\r
178                         fail(e.getLocalizedMessage());\r
179                 }\r
180 \r
181         }\r
182 }\r