1 /* Copyright (c) 2009 Peter Troshin
\r
3 * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 1.0
\r
5 * This library is free software; you can redistribute it and/or modify it under the terms of the
\r
6 * Apache License version 2 as published by the Apache Software Foundation
\r
8 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
\r
9 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache
\r
10 * License for more details.
\r
12 * A copy of the license is in apache_license.txt. It is also available here:
\r
13 * @see: http://www.apache.org/licenses/LICENSE-2.0.txt
\r
15 * Any republication or derived work distributed in source code form
\r
16 * must include this copyright and license notice.
\r
19 package compbio.data.sequence;
\r
21 import static org.testng.AssertJUnit.assertEquals;
\r
22 import static org.testng.AssertJUnit.assertFalse;
\r
23 import static org.testng.AssertJUnit.assertNotNull;
\r
24 import static org.testng.AssertJUnit.assertTrue;
\r
25 import static org.testng.AssertJUnit.fail;
\r
27 import java.io.FileInputStream;
\r
28 import java.io.FileNotFoundException;
\r
29 import java.io.FileOutputStream;
\r
30 import java.io.IOException;
\r
31 import java.util.List;
\r
33 import org.testng.annotations.Test;
\r
35 import compbio.metadata.AllTestSuit;
\r
37 public class SequenceUtilTester {
\r
40 public void testisNonAmbNucleotideSequence() {
\r
41 String dnaseq = "atgatTGACGCTGCTGatgtcgtgagtgga";
\r
42 assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dnaseq));
\r
43 String dirtyDnaseq = "atgAGTggt\taGGTgc\ncgcACTgc gACtcgcGAt cgA ";
\r
44 assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dirtyDnaseq));
\r
45 String nonDna = "atgfctgatgcatgcatgatgctga";
\r
46 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));
\r
48 nonDna = "atgc1tgatgcatgcatgatgctga";
\r
49 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));
\r
51 nonDna = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";
\r
52 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));
\r
53 // String ambDna = "AGTCRYMKSWHBVDN"; // see IUPAC Nucleotide Code
\r
54 assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));
\r
59 public void testCleanSequence() {
\r
60 String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";
\r
61 assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),
\r
62 SequenceUtil.cleanSequence(dirtySeq));
\r
66 public void testDeepCleanSequence() {
\r
67 String dirtySeq = "a!t?g.A;GTggt\ta12GGTgc\ncgc23AC\rTgc gAC<>.,?!|\\|/t@cg-c¬GA=_+(0){]}[:£$&^*\"t cgA ";
\r
68 assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),
\r
69 SequenceUtil.deepCleanSequence(dirtySeq));
\r
73 public void testisProteinSequence() {
\r
74 String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";
\r
75 assertFalse(SequenceUtil.isProteinSequence(dirtySeq));
\r
76 String notaSeq = "atgc1tgatgcatgcatgatgctga";
\r
77 assertFalse(SequenceUtil.isProteinSequence(notaSeq));
\r
78 String AAseq = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";
\r
79 assertTrue(SequenceUtil.isProteinSequence(AAseq));
\r
81 assertFalse(SequenceUtil.isProteinSequence(AAseq));
\r
86 public void testReadWriteFasta() {
\r
89 FileInputStream fio = new FileInputStream(
\r
90 AllTestSuit.TEST_DATA_PATH + "TO1381.fasta");
\r
92 List<FastaSequence> fseqs = SequenceUtil.readFasta(fio);
\r
93 assertNotNull(fseqs);
\r
94 assertEquals(3, fseqs.size());
\r
95 assertEquals(3, fseqs.size());
\r
97 FileOutputStream fou = new FileOutputStream(
\r
98 AllTestSuit.TEST_DATA_PATH + "TO1381.fasta.written");
\r
99 SequenceUtil.writeFasta(fou, fseqs);
\r
101 FileOutputStream fou20 = new FileOutputStream(
\r
102 AllTestSuit.TEST_DATA_PATH + "TO1381.fasta20.written");
\r
103 SequenceUtil.writeFasta(fou20, fseqs, 20);
\r
106 } catch (FileNotFoundException e) {
\r
107 e.printStackTrace();
\r
108 fail(e.getLocalizedMessage());
\r
109 } catch (IOException e) {
\r
110 e.printStackTrace();
\r
111 fail(e.getLocalizedMessage());
\r