/* * Copyright (c) 2009 Peter Troshin JAva Bioinformatics Analysis Web Services * (JABAWS) @version: 1.0 This library is free software; you can redistribute it * and/or modify it under the terms of the Apache License version 2 as published * by the Apache Software Foundation This library is distributed in the hope * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * Apache License for more details. A copy of the license is in * apache_license.txt. It is also available here: * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or * derived work distributed in source code form must include this copyright and * license notice. */ package compbio.data.sequence; import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertFalse; import static org.testng.AssertJUnit.assertNotNull; import static org.testng.AssertJUnit.assertTrue; import static org.testng.AssertJUnit.fail; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.util.List; import org.testng.annotations.Test; import compbio.metadata.AllTestSuit; public class SequenceUtilTester { @Test() public void testisNonAmbNucleotideSequence() { String dnaseq = "atgatTGACGCTGCTGatgtcgtgagtgga"; assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dnaseq)); String dirtyDnaseq = "atgAGTggt\taGGTgc\ncgcACTgc gACtcgcGAt cgA "; assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dirtyDnaseq)); String nonDna = "atgfctgatgcatgcatgatgctga"; assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna)); nonDna = "atgc1tgatgcatgcatgatgctga"; assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna)); nonDna = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL"; assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna)); // String ambDna = "AGTCRYMKSWHBVDN"; // see IUPAC Nucleotide Code assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna)); } @Test() public void testCleanSequence() { String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA "; assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(), SequenceUtil.cleanSequence(dirtySeq)); } @Test() public void testDeepCleanSequence() { String dirtySeq = "a!t?g.A;GTggt\ta12GGTgc\ncgc23AC\rTgc gAC<>.,?!|\\|/t@cg-c¬GA=_+(0){]}[:£$&^*\"t cgA "; assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(), SequenceUtil.deepCleanSequence(dirtySeq)); } @Test() public void testisProteinSequence() { String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA "; assertFalse(SequenceUtil.isProteinSequence(dirtySeq)); String notaSeq = "atgc1tgatgcatgcatgatgctga"; assertFalse(SequenceUtil.isProteinSequence(notaSeq)); String AAseq = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL"; assertTrue(SequenceUtil.isProteinSequence(AAseq)); AAseq += "XU"; assertFalse(SequenceUtil.isProteinSequence(AAseq)); } @Test() public void testReadWriteFasta() { try { FileInputStream fio = new FileInputStream( AllTestSuit.TEST_DATA_PATH + "TO1381.fasta"); assertNotNull(fio); List fseqs = SequenceUtil.readFasta(fio); assertNotNull(fseqs); assertEquals(3, fseqs.size()); assertEquals(3, fseqs.size()); fio.close(); FileOutputStream fou = new FileOutputStream( AllTestSuit.TEST_DATA_PATH + "TO1381.fasta.written"); SequenceUtil.writeFasta(fou, fseqs); fou.close(); FileOutputStream fou20 = new FileOutputStream( AllTestSuit.TEST_DATA_PATH + "TO1381.fasta20.written"); SequenceUtil.writeFasta(fou20, fseqs, 21); fou20.close(); } catch (FileNotFoundException e) { e.printStackTrace(); fail(e.getLocalizedMessage()); } catch (IOException e) { e.printStackTrace(); fail(e.getLocalizedMessage()); } } /** * This test tests the loading of horizontally formatted Jronn output file */ @Test public void loadJronnFile() { FileInputStream fio; try { fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "jronn.out"); List aseqs = SequenceUtil.readJRonn(fio); assertNotNull(aseqs); assertEquals(aseqs.size(), 3); AnnotatedSequence aseq = aseqs.get(0); assertNotNull(aseq); assertNotNull(aseq.getAnnotation()); // System.out.println(aseq); assertEquals(aseq.getAnnotation().length, aseq.getSequence() .length()); fio.close(); } catch (FileNotFoundException e) { e.printStackTrace(); fail(e.getLocalizedMessage()); } catch (IOException e) { e.printStackTrace(); fail(e.getLocalizedMessage()); } catch (UnknownFileFormatException e) { e.printStackTrace(); fail(e.getLocalizedMessage()); } } enum Trial { one, two, three }; /** * This test tests the loading of horizontally formatted Jronn output file */ @SuppressWarnings("unchecked") @Test public void testMultiAnnotatedSequence() { FileInputStream fio; try { fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "disembl.out"); List> aseqs = SequenceUtil .readDisembl(fio); assertNotNull(aseqs); /* * MultiAnnotatedSequence ma = new MultiAnnotatedSequence(); * Map> val = ma.getInstance(Trial.class); * List list = new ArrayList(); list.add(new * Float(1.2)); list.add(new Double(5.662)); val.put(Trial.one, * list); val.put(Trial.two, Arrays.asList(6.22f, 1, 37.6f)); * System.out.println(val); AnnotatedSequence aseq = aseqs.get(0); */ fio.close(); } catch (FileNotFoundException e) { e.printStackTrace(); fail(e.getLocalizedMessage()); } catch (IOException e) { e.printStackTrace(); fail(e.getLocalizedMessage()); } catch (UnknownFileFormatException e) { e.printStackTrace(); fail(e.getLocalizedMessage()); } } }