X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=testsrc%2Fcompbio%2Fdata%2Fsequence%2FSequenceUtilTester.java;h=9468cdad20a87fcfbb8d098c6495e0cd24d5e8f9;hb=4b647613e686c4edffd44702d86662c6facb2d5a;hp=a4bf4c90c992d89e43a3e65c339867028abfe9e6;hpb=a79900255139dc7bcab23320de2a3630f6531107;p=jabaws.git diff --git a/testsrc/compbio/data/sequence/SequenceUtilTester.java b/testsrc/compbio/data/sequence/SequenceUtilTester.java index a4bf4c9..9468cda 100644 --- a/testsrc/compbio/data/sequence/SequenceUtilTester.java +++ b/testsrc/compbio/data/sequence/SequenceUtilTester.java @@ -1,15 +1,21 @@ -/* - * Copyright (c) 2009 Peter Troshin JAva Bioinformatics Analysis Web Services - * (JABAWS) @version: 1.0 This library is free software; you can redistribute it - * and/or modify it under the terms of the Apache License version 2 as published +/* Copyright (c) 2009 Peter Troshin + * Copyright (c) 2013 Alexander Sherstnev + * + * JAva Bioinformatics Analysis Web Services (JABAWS) + * @version: 2.5 + * + * This library is free software; you can redistribute it and/or modify it under + * the terms of the Apache License version 2 as published * by the Apache Software Foundation This library is distributed in the hope * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * Apache License for more details. A copy of the license is in * apache_license.txt. It is also available here: - * @see: http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or - * derived work distributed in source code form must include this copyright and - * license notice. + * + * @see: http://www.apache.org/licenses/LICENSE-2.0.txt + * + * Any republication or derived work distributed in source code form must include + * this copyright and license notice. */ package compbio.data.sequence; @@ -19,11 +25,13 @@ import static org.testng.AssertJUnit.assertNotNull; import static org.testng.AssertJUnit.assertTrue; import static org.testng.AssertJUnit.fail; +import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.io.PrintWriter; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -37,7 +45,7 @@ import compbio.metadata.AllTestSuit; public class SequenceUtilTester { @Test() - public void testisNonAmbNucleotideSequence() { + public void isNonAmbNucleotideSequence() { String dnaseq = "atgatTGACGCTGCTGatgtcgtgagtgga"; assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dnaseq)); String dirtyDnaseq = "atgAGTggt\taGGTgc\ncgcACTgc gACtcgcGAt cgA "; @@ -52,25 +60,22 @@ public class SequenceUtilTester { assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna)); // String ambDna = "AGTCRYMKSWHBVDN"; // see IUPAC Nucleotide Code assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna)); - } @Test() - public void testCleanSequence() { + public void CleanSequence() { String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA "; - assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(), - SequenceUtil.cleanSequence(dirtySeq)); + assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),SequenceUtil.cleanSequence(dirtySeq)); } @Test() - public void testDeepCleanSequence() { + public void DeepCleanSequence() { String dirtySeq = "a!t?g.A;GTggt\ta12GGTgc\ncgc23AC\rTgc gAC<>.,?!|\\|/t@cg-c¬GA=_+(0){]}[:£$&^*\"t cgA "; - assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(), - SequenceUtil.deepCleanSequence(dirtySeq)); + assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),SequenceUtil.deepCleanSequence(dirtySeq)); } @Test() - public void testisProteinSequence() { + public void isProteinSequence() { String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA "; assertFalse(SequenceUtil.isProteinSequence(dirtySeq)); String notaSeq = "atgc1tgatgcatgcatgatgctga"; @@ -83,26 +88,69 @@ public class SequenceUtilTester { } @Test() - public void testReadWriteFasta() { + public void CleanProteinSequence() { + String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA "; + assertFalse(SequenceUtil.isProteinSequence(dirtySeq)); + // This will still be NON protein sequence despite having only correct + // letters because the letters match perfectly the nucleotide sequence! + assertFalse(SequenceUtil.isProteinSequence(SequenceUtil.cleanProteinSequence(dirtySeq))); + + String notaSeq = "atgc1tgatgcatgcatgatgmctga"; + assertFalse(SequenceUtil.isProteinSequence(notaSeq)); + assertTrue(SequenceUtil.isProteinSequence(SequenceUtil.cleanProteinSequence(notaSeq))); + + String AAseq = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL"; + assertTrue(SequenceUtil.isProteinSequence(AAseq)); + assertTrue(SequenceUtil.isProteinSequence(SequenceUtil.cleanProteinSequence(AAseq))); + AAseq += "XU"; + + assertFalse(SequenceUtil.isProteinSequence(AAseq)); + assertTrue(SequenceUtil.isProteinSequence(SequenceUtil.cleanProteinSequence(AAseq))); + } + @Test() + public void ReadWriteFasta() { try { - FileInputStream fio = new FileInputStream( - AllTestSuit.TEST_DATA_PATH + "TO1381.fasta"); + FileInputStream fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "TO1381.fasta"); assertNotNull(fio); List fseqs = SequenceUtil.readFasta(fio); assertNotNull(fseqs); assertEquals(3, fseqs.size()); assertEquals(3, fseqs.size()); fio.close(); - FileOutputStream fou = new FileOutputStream( - AllTestSuit.TEST_DATA_PATH + "TO1381.fasta.written"); + FileOutputStream fou = new FileOutputStream(AllTestSuit.TEST_DATA_PATH + "TO1381.fasta.written"); SequenceUtil.writeFasta(fou, fseqs); fou.close(); - FileOutputStream fou20 = new FileOutputStream( - AllTestSuit.TEST_DATA_PATH + "TO1381.fasta20.written"); + FileOutputStream fou20 = new FileOutputStream(AllTestSuit.TEST_DATA_PATH + "TO1381.fasta20.written"); SequenceUtil.writeFasta(fou20, fseqs, 21); fou20.close(); + } catch (FileNotFoundException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } catch (IOException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } + } + // Potential Bug :- Sequence names are shortened to 2-3 letters + @Test + public void testReadFastaWriteClustal() { + + try { + FileInputStream fio = new FileInputStream( + AllTestSuit.TEST_DATA_PATH + "TO1381.fasta"); + assertNotNull(fio); + List fseqs = SequenceUtil.readFasta(fio); + assertNotNull(fseqs); + fio.close(); + + char gapChar = '-'; + FileOutputStream fou = new FileOutputStream( + AllTestSuit.TEST_DATA_PATH + "TO1381.aln.written"); + SequenceUtil.writeClustal(fou, fseqs, gapChar); + fou.close(); + } catch (FileNotFoundException e) { e.printStackTrace(); fail(e.getLocalizedMessage()); @@ -116,7 +164,7 @@ public class SequenceUtilTester { * This test tests the loading of horizontally formatted Jronn output file */ @Test - public void loadJronnFile() { + public void LoadJronnFile() { FileInputStream fio; try { @@ -127,7 +175,6 @@ public class SequenceUtilTester { Score aseq = aseqs.get("Foobar"); assertNotNull(aseq); assertNotNull(aseq.getScores()); - // System.out.println(aseq); assertEquals(aseq.getScores().size(), aseq.getScores().size()); fio.close(); } catch (FileNotFoundException e) { @@ -140,7 +187,6 @@ public class SequenceUtilTester { e.printStackTrace(); fail(e.getLocalizedMessage()); } - } enum Trial { @@ -159,22 +205,50 @@ public class SequenceUtilTester { */ @SuppressWarnings("unchecked") @Test - public void testReadDisemblResults() { - + public void ReadDisemblResults() { + Map>> _ranges=new HashMap>>(); + Map> ranges=new HashMap>(); + Map> _values=new HashMap>(); + Map values = new HashMap(); + Set rset; + rset = new HashSet(); + for (String[] se:new String[][] { { "34","41"},{"50","58"},{"83","91"},{"118","127"},{" 160","169"},{" 191","220"},{" 243","252"},{" 287","343"},{" 350","391"},{" 429","485"},{" 497","506"},{"539","547"}}) { + rset.add(new Range(se)); + } + ranges.put(DisemblResult.COILS.toString(), rset); + values.put(DisemblResult.COILS.toString(), Float.valueOf(0.86010f)); + rset = new HashSet(); + for (String[] se:new String[][] { { "355","368"}}) { + rset.add(new Range(se)); + } + ranges.put(DisemblResult.REM465.toString(), rset); + values.put(DisemblResult.REM465.toString(), Float.valueOf(0.88512f)); + rset = new HashSet(); + for (String[] se:new String[][] { { "190","204"}}) { + rset.add(new Range(se)); + } + ranges.put(DisemblResult.HOTLOOPS.toString(), rset); + values.put(DisemblResult.HOTLOOPS.toString(), Float.valueOf(0.37094f)); + _ranges.put("Foobar_dundeefriends", ranges); + _values.put("Foobar_dundeefriends", values); FileInputStream fio; try { - fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH - + "disembl.out"); - Map> aseqs = SequenceUtil - .readDisembl(fio); + fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "disembl.out"); + Map> aseqs = SequenceUtil.readDisembl(fio); assertNotNull(aseqs); assertEquals(aseqs.size(), 3); - // System.out.println(aseqs); - for (FastaSequence fs : aseqs.keySet()) { - assertTrue(" Foobar_dundeefriends Foobar dundeefriends " - .contains(fs.getId())); + ScoreManager sman = ScoreManager.newInstance(aseqs); + + for (String fs : aseqs.keySet()) { + assertTrue(" Foobar_dundeefriends Foobar dundeefriends ".contains(fs)); Set scores = aseqs.get(fs); assertEquals(scores.size(), 3); + for (Score sc:scores) { + if (_ranges.containsKey(fs)) { + assertEquals("Checking range for Method "+sc.getMethod(),_ranges.get(fs).get(sc.getMethod()), sc.getRanges()); + assertEquals("Checking first value for Method "+sc.getMethod(), _values.get(fs).get(sc.getMethod()), sc.getScores().get(0)); + } + } } fio.close(); } catch (FileNotFoundException e) { @@ -190,7 +264,7 @@ public class SequenceUtilTester { } /** - * This test tests the loading of horizontally formatted Jronn output file + * This method tests the loading of horizontally formatted Jronn output file * * First sequence: * @@ -212,33 +286,34 @@ public class SequenceUtilTester { */ @SuppressWarnings("unchecked") @Test - public void testReadGlobPlotResults() { + public void ReadGlobPlotResults() { FileInputStream fio; try { - fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH - + "globplot.out"); - HashMap> aseqs = SequenceUtil - .readGlobPlot(fio); + fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "globplot.out"); + HashMap> aseqs = SequenceUtil.readGlobPlot(fio); assertNotNull(aseqs); assertEquals(aseqs.size(), 3); - FastaSequence fsdf = null; + String fsdf = null; Set scores = null; - for (FastaSequence fs : aseqs.keySet()) { - if ("Foobar_dundeefriends".contains(fs.getId())) { + for (String fs : aseqs.keySet()) { + if ("Foobar_dundeefriends".contains(fs)) { fsdf = fs; scores = aseqs.get(fs); } assertEquals(scores.size(), 5); } - for (Score score : scores) { - if (score.getMethod() == (Enum) GlobProtResult.Disorder) { + ScoreManager sm = ScoreManager.newInstanceSingleSequence(scores); + sm.writeOut(new PrintWriter(System.out, true)); + + for (Score score : scores) { + if (score.getMethod().equals(GlobProtResult.Disorder.toString())) { assertEquals(score.getRanges().size(), 7); assertTrue(score.getScores().isEmpty()); } - if (score.getMethod() == (Enum) GlobProtResult.Dydx) { + if (GlobProtResult.valueOf(score.getMethod()) == GlobProtResult.Dydx) { assertFalse(score.getScores().isEmpty()); assertTrue(score.getRanges().isEmpty()); } @@ -257,17 +332,73 @@ public class SequenceUtilTester { } @Test - public void testReadAAConResults() { + public void ReadIUPredForShortAndLongDisorder() { + try { + Map scores = SequenceUtil.readIUPred(new File(AllTestSuit.TEST_DATA_PATH, "out.long")); + ScoreManager man = ScoreManager.newInstanceSingleScore(scores); + assertNotNull(scores); + assertEquals(3, scores.size()); + + Score score = scores.get("Foobar_dundeefriends"); + assertNotNull(score); + assertEquals(0, score.getRanges().size()); + assertEquals(568, score.getScores().size()); + assertEquals("Long", score.getMethod()); + + score = scores.get("Foobar"); + assertNotNull(score); + assertEquals(0, score.getRanges().size()); + assertEquals(481, score.getScores().size()); + assertEquals("Long", score.getMethod()); + + score = scores.get("dundeefriends"); + assertNotNull(score); + assertEquals(0, score.getRanges().size()); + assertEquals(513, score.getScores().size()); + assertEquals("Long", score.getMethod()); + } catch (IOException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } catch (UnknownFileFormatException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } + } + + @Test + public void ReadIUPredForGlobDomain() { + try { + Map scores = SequenceUtil.readIUPred(new File(AllTestSuit.TEST_DATA_PATH, "output.glob")); + assertNotNull(scores); + assertEquals(2, scores.size()); + ScoreManager man = ScoreManager.newInstanceSingleScore(scores); + assertEquals(2, man.getNumberOfSeq()); + Score score = scores.get("P53_HUMA"); + assertNotNull(score); + assertEquals(2, score.getRanges().size()); + assertEquals(0, score.getScores().size()); + assertEquals("Glob", score.getMethod()); + score = scores.get("Foobar_dundeefriends"); + assertEquals(0, score.getRanges().size()); + } catch (IOException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } catch (UnknownFileFormatException e) { + e.printStackTrace(); + fail(e.getLocalizedMessage()); + } + } + + @Test + public void ReadAAConResults() { try { - InputStream inStream = new FileInputStream( - AllTestSuit.TEST_DATA_PATH + "aacon_results.txt"); + InputStream inStream = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "aacon_results.txt"); HashSet result = SequenceUtil.readAAConResults(inStream); inStream.close(); assertNotNull(result); assertEquals(result.size(), 18); - inStream = new FileInputStream(AllTestSuit.TEST_DATA_PATH - + "aacon_result_single.out"); + inStream = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "aacon_result_single.out"); result = SequenceUtil.readAAConResults(inStream); inStream.close(); assertNotNull(result); @@ -278,4 +409,18 @@ public class SequenceUtilTester { fail(e.getMessage()); } } + @Test + public void ReadJpredResults() { + try { + InputStream inStream = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "Jpred.test1.out"); + List result = SequenceUtil.readJpredFile(inStream); + inStream.close(); + assertNotNull(result); + assertEquals(result.size(), 19); + } catch (IOException e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } } +