-/* Copyright (c) 2009 Peter Troshin\r
- * Copyright (c) 2013 Alexander Sherstnev\r
- * \r
- * JAva Bioinformatics Analysis Web Services (JABAWS) \r
- * @version: 2.5\r
- * \r
- * This library is free software; you can redistribute it and/or modify it under \r
- * the terms of the Apache License version 2 as published\r
- * by the Apache Software Foundation This library is distributed in the hope\r
- * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied\r
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
- * Apache License for more details. A copy of the license is in\r
- * apache_license.txt. It is also available here:\r
- * \r
- * @see: http://www.apache.org/licenses/LICENSE-2.0.txt \r
- * \r
- * Any republication or derived work distributed in source code form must include \r
- * this copyright and license notice.\r
- */\r
-package compbio.data.sequence;\r
-\r
-import static org.testng.AssertJUnit.assertEquals;\r
-import static org.testng.AssertJUnit.assertFalse;\r
-import static org.testng.AssertJUnit.assertNotNull;\r
-import static org.testng.AssertJUnit.assertTrue;\r
-import static org.testng.AssertJUnit.fail;\r
-\r
-import java.io.File;\r
-import java.io.FileInputStream;\r
-import java.io.FileNotFoundException;\r
-import java.io.FileOutputStream;\r
-import java.io.IOException;\r
-import java.io.InputStream;\r
-import java.io.PrintWriter;\r
-import java.util.HashMap;\r
-import java.util.HashSet;\r
-import java.util.List;\r
-import java.util.Map;\r
-import java.util.Set;\r
-\r
-import org.testng.annotations.Test;\r
-\r
-import compbio.metadata.AllTestSuit;\r
-\r
-public class SequenceUtilTester {\r
-\r
- @Test()\r
- public void isNonAmbNucleotideSequence() {\r
- String dnaseq = "atgatTGACGCTGCTGatgtcgtgagtgga";\r
- assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dnaseq));\r
- String dirtyDnaseq = "atgAGTggt\taGGTgc\ncgcACTgc gACtcgcGAt cgA ";\r
- assertTrue(SequenceUtil.isNonAmbNucleotideSequence(dirtyDnaseq));\r
- String nonDna = "atgfctgatgcatgcatgatgctga";\r
- assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
-\r
- nonDna = "atgc1tgatgcatgcatgatgctga";\r
- assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
-\r
- nonDna = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
- assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
- // String ambDna = "AGTCRYMKSWHBVDN"; // see IUPAC Nucleotide Code\r
- assertFalse(SequenceUtil.isNonAmbNucleotideSequence(nonDna));\r
- }\r
-\r
- @Test()\r
- public void CleanSequence() {\r
- String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
- assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),SequenceUtil.cleanSequence(dirtySeq));\r
- }\r
-\r
- @Test()\r
- public void DeepCleanSequence() {\r
- String dirtySeq = "a!t?g.A;GTggt\ta12GGTgc\ncgc23AC\rTgc gAC<>.,?!|\\|/t@cg-c¬GA=_+(0){]}[:£$&^*\"t cgA ";\r
- assertEquals("atgAGTggtaGGTgccgcACTgcgACtcgcGAtcgA".toUpperCase(),SequenceUtil.deepCleanSequence(dirtySeq));\r
- }\r
-\r
- @Test()\r
- public void isProteinSequence() {\r
- String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
- assertFalse(SequenceUtil.isProteinSequence(dirtySeq));\r
- String notaSeq = "atgc1tgatgcatgcatgatgctga";\r
- assertFalse(SequenceUtil.isProteinSequence(notaSeq));\r
- String AAseq = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
- assertTrue(SequenceUtil.isProteinSequence(AAseq));\r
- AAseq += "XU";\r
- assertFalse(SequenceUtil.isProteinSequence(AAseq));\r
-\r
- }\r
-\r
- @Test()\r
- public void CleanProteinSequence() {\r
- String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
- assertFalse(SequenceUtil.isProteinSequence(dirtySeq));\r
- // This will still be NON protein sequence despite having only correct\r
- // letters because the letters match perfectly the nucleotide sequence!\r
- assertFalse(SequenceUtil.isProteinSequence(SequenceUtil.cleanProteinSequence(dirtySeq)));\r
-\r
- String notaSeq = "atgc1tgatgcatgcatgatgmctga";\r
- assertFalse(SequenceUtil.isProteinSequence(notaSeq));\r
- assertTrue(SequenceUtil.isProteinSequence(SequenceUtil.cleanProteinSequence(notaSeq)));\r
-\r
- String AAseq = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
- assertTrue(SequenceUtil.isProteinSequence(AAseq));\r
- assertTrue(SequenceUtil.isProteinSequence(SequenceUtil.cleanProteinSequence(AAseq)));\r
- AAseq += "XU";\r
-\r
- assertFalse(SequenceUtil.isProteinSequence(AAseq));\r
- assertTrue(SequenceUtil.isProteinSequence(SequenceUtil.cleanProteinSequence(AAseq)));\r
- }\r
-\r
- @Test()\r
- public void ReadWriteFasta() {\r
- try {\r
- FileInputStream fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "TO1381.fasta");\r
- assertNotNull(fio);\r
- List<FastaSequence> fseqs = SequenceUtil.readFasta(fio);\r
- assertNotNull(fseqs);\r
- assertEquals(3, fseqs.size());\r
- assertEquals(3, fseqs.size());\r
- fio.close();\r
- FileOutputStream fou = new FileOutputStream(AllTestSuit.TEST_DATA_PATH + "TO1381.fasta.written");\r
- SequenceUtil.writeFasta(fou, fseqs);\r
- fou.close();\r
- FileOutputStream fou20 = new FileOutputStream(AllTestSuit.TEST_DATA_PATH + "TO1381.fasta20.written");\r
- SequenceUtil.writeFasta(fou20, fseqs, 21);\r
- fou20.close();\r
- } catch (FileNotFoundException e) {\r
- e.printStackTrace();\r
- fail(e.getLocalizedMessage());\r
- } catch (IOException e) {\r
- e.printStackTrace();\r
- fail(e.getLocalizedMessage());\r
- }\r
- }\r
-\r
- // Potential Bug :- Sequence names are shortened to 2-3 letters\r
- @Test\r
- public void testReadFastaWriteClustal() {\r
- \r
- try {\r
- FileInputStream fio = new FileInputStream(\r
- AllTestSuit.TEST_DATA_PATH + "TO1381.fasta");\r
- assertNotNull(fio);\r
- List<FastaSequence> fseqs = SequenceUtil.readFasta(fio);\r
- assertNotNull(fseqs);\r
- fio.close();\r
- \r
- char gapChar = '-';\r
- FileOutputStream fou = new FileOutputStream(\r
- AllTestSuit.TEST_DATA_PATH + "TO1381.aln.written");\r
- SequenceUtil.writeClustal(fou, fseqs, gapChar);\r
- fou.close();\r
- \r
- } catch (FileNotFoundException e) {\r
- e.printStackTrace();\r
- fail(e.getLocalizedMessage());\r
- } catch (IOException e) {\r
- e.printStackTrace();\r
- fail(e.getLocalizedMessage());\r
- }\r
- }\r
-\r
- /**\r
- * This test tests the loading of horizontally formatted Jronn output file\r
- */\r
- @Test\r
- public void LoadJronnFile() {\r
-\r
- FileInputStream fio;\r
- try {\r
- fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "jronn.out");\r
- Map<String, Score> aseqs = SequenceUtil.readJRonn(fio);\r
- assertNotNull(aseqs);\r
- assertEquals(aseqs.size(), 3);\r
- Score aseq = aseqs.get("Foobar");\r
- assertNotNull(aseq);\r
- assertNotNull(aseq.getScores());\r
- assertEquals(aseq.getScores().size(), aseq.getScores().size());\r
- fio.close();\r
- } catch (FileNotFoundException e) {\r
- e.printStackTrace();\r
- fail(e.getLocalizedMessage());\r
- } catch (IOException e) {\r
- e.printStackTrace();\r
- fail(e.getLocalizedMessage());\r
- } catch (UnknownFileFormatException e) {\r
- e.printStackTrace();\r
- fail(e.getLocalizedMessage());\r
- }\r
- }\r
-\r
- enum Trial {\r
- one, two, three\r
- };\r
-\r
- /**\r
- * This test tests the loading of horizontally formatted Jronn output file\r
- * \r
- * First seq\r
- * \r
- * M 0.86010 0.88512 0.37094\r
- * \r
- * T 0.79983 0.85864 0.44331\r
- * \r
- */\r
- @SuppressWarnings("unchecked")\r
- @Test\r
- public void ReadDisemblResults() {\r
- Map<String, Map<String,Set<Range>>> _ranges=new HashMap<String, Map<String,Set<Range>>>();\r
- Map<String, Set<Range>> ranges=new HashMap<String,Set<Range>>();\r
- Map<String,Map<String, Float>> _values=new HashMap<String, Map<String,Float>>();\r
- Map<String, Float> values = new HashMap<String, Float>();\r
- Set<Range> rset;\r
- rset = new HashSet<Range>();\r
- for (String[] se:new String[][] { { "34","41"},{"50","58"},{"83","91"},{"118","127"},{" 160","169"},{" 191","220"},{" 243","252"},{" 287","343"},{" 350","391"},{" 429","485"},{" 497","506"},{"539","547"}}) {\r
- rset.add(new Range(se));\r
- }\r
- ranges.put(DisemblResult.COILS.toString(), rset);\r
- values.put(DisemblResult.COILS.toString(), Float.valueOf(0.86010f));\r
- rset = new HashSet<Range>();\r
- for (String[] se:new String[][] { { "355","368"}}) {\r
- rset.add(new Range(se));\r
- }\r
- ranges.put(DisemblResult.REM465.toString(), rset);\r
- values.put(DisemblResult.REM465.toString(), Float.valueOf(0.88512f));\r
- rset = new HashSet<Range>();\r
- for (String[] se:new String[][] { { "190","204"}}) {\r
- rset.add(new Range(se));\r
- }\r
- ranges.put(DisemblResult.HOTLOOPS.toString(), rset);\r
- values.put(DisemblResult.HOTLOOPS.toString(), Float.valueOf(0.37094f));\r
- _ranges.put("Foobar_dundeefriends", ranges);\r
- _values.put("Foobar_dundeefriends", values);\r
- FileInputStream fio;\r
- try {\r
- fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "disembl.out");\r
- Map<String, Set<Score>> aseqs = SequenceUtil.readDisembl(fio);\r
- assertNotNull(aseqs);\r
- assertEquals(aseqs.size(), 3);\r
- ScoreManager sman = ScoreManager.newInstance(aseqs);\r
-\r
- for (String fs : aseqs.keySet()) {\r
- assertTrue(" Foobar_dundeefriends Foobar dundeefriends ".contains(fs));\r
- Set<Score> scores = aseqs.get(fs);\r
- assertEquals(scores.size(), 3);\r
- for (Score sc:scores) {\r
- if (_ranges.containsKey(fs)) {\r
- assertEquals("Checking range for Method "+sc.getMethod(),_ranges.get(fs).get(sc.getMethod()), sc.getRanges());\r
- assertEquals("Checking first value for Method "+sc.getMethod(), _values.get(fs).get(sc.getMethod()), sc.getScores().get(0));\r
- }\r
- }\r
- }\r
- fio.close();\r
- } catch (FileNotFoundException e) {\r
- e.printStackTrace();\r
- fail(e.getLocalizedMessage());\r
- } catch (IOException e) {\r
- e.printStackTrace();\r
- fail(e.getLocalizedMessage());\r
- } catch (UnknownFileFormatException e) {\r
- e.printStackTrace();\r
- fail(e.getLocalizedMessage());\r
- }\r
- }\r
-\r
- /**\r
- * This method tests the loading of horizontally formatted Jronn output file\r
- * \r
- * First sequence:\r
- * \r
- * >Foobar_dundeefriends\r
- * \r
- * # GlobDoms 2-358, 373-568\r
- * \r
- * # Disorder 1-5, 206-218, 243-250, 288-300, 313-324, 359-372, 475-481\r
- * \r
- * # RESIDUE DYDX RAW SMOOTHED\r
- * \r
- * M 0.0044 -0.2259 -0.2259\r
- * \r
- * T -0.1308 -0.2170 -0.2170\r
- * \r
- * ............\r
- * \r
- * > Second sequence\r
- */\r
- @SuppressWarnings("unchecked")\r
- @Test\r
- public void ReadGlobPlotResults() {\r
-\r
- FileInputStream fio;\r
- try {\r
- fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "globplot.out");\r
- HashMap<String, Set<Score>> aseqs = SequenceUtil.readGlobPlot(fio);\r
- assertNotNull(aseqs);\r
- assertEquals(aseqs.size(), 3);\r
-\r
- String fsdf = null;\r
- Set<Score> scores = null;\r
- for (String fs : aseqs.keySet()) {\r
- if ("Foobar_dundeefriends".contains(fs)) {\r
- fsdf = fs;\r
- scores = aseqs.get(fs);\r
- }\r
- assertEquals(scores.size(), 5);\r
- }\r
-\r
- ScoreManager sm = ScoreManager.newInstanceSingleSequence(scores);\r
- sm.writeOut(new PrintWriter(System.out, true));\r
-\r
- for (Score score : scores) {\r
- if (score.getMethod().equals(GlobProtResult.Disorder.toString())) {\r
- assertEquals(score.getRanges().size(), 7);\r
- assertTrue(score.getScores().isEmpty());\r
- }\r
- if (GlobProtResult.valueOf(score.getMethod()) == GlobProtResult.Dydx) {\r
- assertFalse(score.getScores().isEmpty());\r
- assertTrue(score.getRanges().isEmpty());\r
- }\r
- }\r
- fio.close();\r
- } catch (FileNotFoundException e) {\r
- e.printStackTrace();\r
- fail(e.getLocalizedMessage());\r
- } catch (IOException e) {\r
- e.printStackTrace();\r
- fail(e.getLocalizedMessage());\r
- } catch (UnknownFileFormatException e) {\r
- e.printStackTrace();\r
- fail(e.getLocalizedMessage());\r
- }\r
- }\r
-\r
- @Test\r
- public void ReadIUPredForShortAndLongDisorder() {\r
- try {\r
- Map<String, Score> scores = SequenceUtil.readIUPred(new File(AllTestSuit.TEST_DATA_PATH, "out.long"));\r
- ScoreManager man = ScoreManager.newInstanceSingleScore(scores);\r
- assertNotNull(scores);\r
- assertEquals(3, scores.size());\r
-\r
- Score score = scores.get("Foobar_dundeefriends");\r
- assertNotNull(score);\r
- assertEquals(0, score.getRanges().size());\r
- assertEquals(568, score.getScores().size());\r
- assertEquals("Long", score.getMethod());\r
-\r
- score = scores.get("Foobar");\r
- assertNotNull(score);\r
- assertEquals(0, score.getRanges().size());\r
- assertEquals(481, score.getScores().size());\r
- assertEquals("Long", score.getMethod());\r
-\r
- score = scores.get("dundeefriends");\r
- assertNotNull(score);\r
- assertEquals(0, score.getRanges().size());\r
- assertEquals(513, score.getScores().size());\r
- assertEquals("Long", score.getMethod());\r
- } catch (IOException e) {\r
- e.printStackTrace();\r
- fail(e.getLocalizedMessage());\r
- } catch (UnknownFileFormatException e) {\r
- e.printStackTrace();\r
- fail(e.getLocalizedMessage());\r
- }\r
- }\r
-\r
- @Test\r
- public void ReadIUPredForGlobDomain() {\r
- try {\r
- Map<String, Score> scores = SequenceUtil.readIUPred(new File(AllTestSuit.TEST_DATA_PATH, "output.glob"));\r
- assertNotNull(scores);\r
- assertEquals(2, scores.size());\r
- ScoreManager man = ScoreManager.newInstanceSingleScore(scores);\r
- assertEquals(2, man.getNumberOfSeq());\r
- Score score = scores.get("P53_HUMA");\r
- assertNotNull(score);\r
- assertEquals(2, score.getRanges().size());\r
- assertEquals(0, score.getScores().size());\r
- assertEquals("Glob", score.getMethod());\r
- score = scores.get("Foobar_dundeefriends");\r
- assertEquals(0, score.getRanges().size());\r
- } catch (IOException e) {\r
- e.printStackTrace();\r
- fail(e.getLocalizedMessage());\r
- } catch (UnknownFileFormatException e) {\r
- e.printStackTrace();\r
- fail(e.getLocalizedMessage());\r
- }\r
- }\r
-\r
- @Test\r
- public void ReadAAConResults() {\r
- try {\r
- InputStream inStream = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "aacon_results.txt");\r
- HashSet<Score> result = SequenceUtil.readAAConResults(inStream);\r
- inStream.close();\r
- assertNotNull(result);\r
- assertEquals(result.size(), 18);\r
-\r
- inStream = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "aacon_result_single.out");\r
- result = SequenceUtil.readAAConResults(inStream);\r
- inStream.close();\r
- assertNotNull(result);\r
- assertEquals(result.size(), 1);\r
- assertEquals(result.iterator().next().getScores().size(), 568);\r
- } catch (IOException e) {\r
- e.printStackTrace();\r
- fail(e.getMessage());\r
- }\r
- }\r
- @Test\r
- public void ReadJpredResults() {\r
- try {\r
- InputStream inStream = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "Jpred.test1.out");\r
- List<FastaSequence> result = SequenceUtil.readJpredFile(inStream);\r
- inStream.close();\r
- assertNotNull(result);\r
- assertEquals(result.size(), 19);\r
- } catch (IOException e) {\r
- e.printStackTrace();\r
- fail(e.getMessage());\r
- }\r
- }\r
-}\r
-\r