2 * Copyright (c) 2009 Peter Troshin JAva Bioinformatics Analysis Web Services
\r
3 * (JABAWS) @version: 1.0 This library is free software; you can redistribute it
\r
4 * and/or modify it under the terms of the Apache License version 2 as published
\r
5 * by the Apache Software Foundation This library is distributed in the hope
\r
6 * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
\r
7 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
8 * Apache License for more details. A copy of the license is in
\r
9 * apache_license.txt. It is also available here:
\r
10 * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or
\r
11 * derived work distributed in source code form must include this copyright and
\r
14 package compbio.data.sequence;
\r
16 import static org.testng.AssertJUnit.assertEquals;
\r
17 import static org.testng.AssertJUnit.assertFalse;
\r
18 import static org.testng.AssertJUnit.assertNotNull;
\r
19 import static org.testng.AssertJUnit.assertTrue;
\r
20 import static org.testng.AssertJUnit.fail;
\r
22 import java.io.FileInputStream;
\r
23 import java.io.FileNotFoundException;
\r
24 import java.io.FileOutputStream;
\r
25 import java.io.IOException;
\r
26 import java.io.InputStream;
\r
27 import java.util.List;
\r
29 import org.testng.annotations.Test;
\r
31 import compbio.metadata.AllTestSuit;
\r
33 public class SequenceUtilTester {
\r
36 public void testisNonAmbNucleotideSequence() {
\r
37 String dnaseq = "atgatTGACGCTGCTGatgtcgtgagtgga";
\r
38 assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dnaseq));
\r
39 String dirtyDnaseq = "atgAGTggt\taGGTgc\ncgcACTgc gACtcgcGAt cgA ";
\r
40 assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dirtyDnaseq));
\r
41 String nonDna = "atgfctgatgcatgcatgatgctga";
\r
42 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));
\r
44 nonDna = "atgc1tgatgcatgcatgatgctga";
\r
45 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));
\r
47 nonDna = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";
\r
48 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));
\r
49 // String ambDna = "AGTCRYMKSWHBVDN"; // see IUPAC Nucleotide Code
\r
50 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));
\r
55 public void testCleanSequence() {
\r
56 String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";
\r
57 assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),
\r
58 SequenceUtil.cleanSequence(dirtySeq));
\r
62 public void testDeepCleanSequence() {
\r
63 String dirtySeq = "a!t?g.A;GTggt\ta12GGTgc\ncgc23AC\rTgc gAC<>.,?!|\\|/t@cg-c¬GA=_+(0){]}[:£$&^*\"t cgA ";
\r
64 assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),
\r
65 SequenceUtil.deepCleanSequence(dirtySeq));
\r
69 public void testisProteinSequence() {
\r
70 String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";
\r
71 assertFalse(SequenceUtil.isProteinSequence(dirtySeq));
\r
72 String notaSeq = "atgc1tgatgcatgcatgatgctga";
\r
73 assertFalse(SequenceUtil.isProteinSequence(notaSeq));
\r
74 String AAseq = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";
\r
75 assertTrue(SequenceUtil.isProteinSequence(AAseq));
\r
77 assertFalse(SequenceUtil.isProteinSequence(AAseq));
\r
82 public void testReadWriteFasta() {
\r
85 FileInputStream fio = new FileInputStream(
\r
86 AllTestSuit.TEST_DATA_PATH + "TO1381.fasta");
\r
88 List<FastaSequence> fseqs = SequenceUtil.readFasta(fio);
\r
89 assertNotNull(fseqs);
\r
90 assertEquals(3, fseqs.size());
\r
91 assertEquals(3, fseqs.size());
\r
93 FileOutputStream fou = new FileOutputStream(
\r
94 AllTestSuit.TEST_DATA_PATH + "TO1381.fasta.written");
\r
95 SequenceUtil.writeFasta(fou, fseqs);
\r
97 FileOutputStream fou20 = new FileOutputStream(
\r
98 AllTestSuit.TEST_DATA_PATH + "TO1381.fasta20.written");
\r
99 SequenceUtil.writeFasta(fou20, fseqs, 21);
\r
102 } catch (FileNotFoundException e) {
\r
103 e.printStackTrace();
\r
104 fail(e.getLocalizedMessage());
\r
105 } catch (IOException e) {
\r
106 e.printStackTrace();
\r
107 fail(e.getLocalizedMessage());
\r
112 * This test tests the loading of horizontally formatted Jronn output file
\r
115 public void loadJronnFile() {
\r
117 FileInputStream fio;
\r
119 fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "jronn.out");
\r
120 List<AnnotatedSequence> aseqs = SequenceUtil.readJRonn(fio);
\r
121 assertNotNull(aseqs);
\r
122 assertEquals(aseqs.size(), 3);
\r
123 AnnotatedSequence aseq = aseqs.get(0);
\r
124 assertNotNull(aseq);
\r
125 assertNotNull(aseq.getAnnotation());
\r
126 // System.out.println(aseq);
\r
127 assertEquals(aseq.getAnnotation().length, aseq.getSequence()
\r
130 } catch (FileNotFoundException e) {
\r
131 e.printStackTrace();
\r
132 fail(e.getLocalizedMessage());
\r
133 } catch (IOException e) {
\r
134 e.printStackTrace();
\r
135 fail(e.getLocalizedMessage());
\r
136 } catch (UnknownFileFormatException e) {
\r
137 e.printStackTrace();
\r
138 fail(e.getLocalizedMessage());
\r
148 * This test tests the loading of horizontally formatted Jronn output file
\r
150 @SuppressWarnings("unchecked")
\r
152 public void testMultiAnnotatedSequence() {
\r
154 FileInputStream fio;
\r
156 fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH
\r
158 List<MultiAnnotatedSequence<DisemblResultAnnot>> aseqs = SequenceUtil
\r
160 assertNotNull(aseqs);
\r
163 * MultiAnnotatedSequence ma = new MultiAnnotatedSequence();
\r
164 * Map<Trial, List<Number>> val = ma.getInstance(Trial.class);
\r
165 * List<Number> list = new ArrayList<Number>(); list.add(new
\r
166 * Float(1.2)); list.add(new Double(5.662)); val.put(Trial.one,
\r
167 * list); val.put(Trial.two, Arrays.asList(6.22f, 1, 37.6f));
\r
168 * System.out.println(val); AnnotatedSequence aseq = aseqs.get(0);
\r
171 } catch (FileNotFoundException e) {
\r
172 e.printStackTrace();
\r
173 fail(e.getLocalizedMessage());
\r
174 } catch (IOException e) {
\r
175 e.printStackTrace();
\r
176 fail(e.getLocalizedMessage());
\r
177 } catch (UnknownFileFormatException e) {
\r
178 e.printStackTrace();
\r
179 fail(e.getLocalizedMessage());
\r
184 public void testReadResults() throws FileNotFoundException {
\r
185 InputStream inStream = new FileInputStream(AllTestSuit.TEST_DATA_PATH
\r
186 + "aacon_results.txt");
\r
187 System.out.println(SequenceUtil.readResults(inStream));
\r