test for JWS-35 - check that range and first value of prediction for each method...
[jabaws.git] / testsrc / compbio / data / sequence / SequenceUtilTester.java
index 7f45855..e646c1e 100644 (file)
@@ -19,11 +19,14 @@ import static org.testng.AssertJUnit.assertNotNull;
 import static org.testng.AssertJUnit.assertTrue;\r
 import static org.testng.AssertJUnit.fail;\r
 \r
+import java.io.File;\r
 import java.io.FileInputStream;\r
 import java.io.FileNotFoundException;\r
 import java.io.FileOutputStream;\r
 import java.io.IOException;\r
 import java.io.InputStream;\r
+import java.io.PrintWriter;\r
+import java.util.HashMap;\r
 import java.util.HashSet;\r
 import java.util.List;\r
 import java.util.Map;\r
@@ -32,6 +35,7 @@ import java.util.Set;
 import org.testng.annotations.Test;\r
 \r
 import compbio.metadata.AllTestSuit;\r
+import compbio.runner.disorder.Disembl;\r
 \r
 public class SequenceUtilTester {\r
 \r
@@ -82,6 +86,31 @@ public class SequenceUtilTester {
        }\r
 \r
        @Test()\r
+       public void testCleanProteinSequence() {\r
+               String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
+               assertFalse(SequenceUtil.isProteinSequence(dirtySeq));\r
+               // This will still be NON protein sequence despite having only correct\r
+               // letters because the letters match perfectly the nucleotide sequence!\r
+               assertFalse(SequenceUtil.isProteinSequence(SequenceUtil\r
+                               .cleanProteinSequence(dirtySeq)));\r
+\r
+               String notaSeq = "atgc1tgatgcatgcatgatgmctga";\r
+               assertFalse(SequenceUtil.isProteinSequence(notaSeq));\r
+               assertTrue(SequenceUtil.isProteinSequence(SequenceUtil\r
+                               .cleanProteinSequence(notaSeq)));\r
+\r
+               String AAseq = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
+               assertTrue(SequenceUtil.isProteinSequence(AAseq));\r
+               assertTrue(SequenceUtil.isProteinSequence(SequenceUtil\r
+                               .cleanProteinSequence(AAseq)));\r
+               AAseq += "XU";\r
+\r
+               assertFalse(SequenceUtil.isProteinSequence(AAseq));\r
+               assertTrue(SequenceUtil.isProteinSequence(SequenceUtil\r
+                               .cleanProteinSequence(AAseq)));\r
+       }\r
+\r
+       @Test()\r
        public void testReadWriteFasta() {\r
 \r
                try {\r
@@ -160,20 +189,56 @@ public class SequenceUtilTester {
        @Test\r
        public void testReadDisemblResults() {\r
 \r
+\r
+               Map<String, Map<String,Set<Range>>> _ranges=new HashMap<String, Map<String,Set<Range>>>();\r
+               Map<String, Set<Range>> ranges=new HashMap<String,Set<Range>>();\r
+               Map<String,Map<String, Float>>  _values=new HashMap<String, Map<String,Float>>();\r
+               Map<String, Float> values = new HashMap<String, Float>();\r
+               Set<Range> rset;\r
+               rset = new HashSet<Range>();\r
+               for (String[] se:new String[][] { { "34","41"},{"50","58"},{"83","91"},{"118","127"},{" 160","169"},{" 191","220"},{" 243","252"},{" 287","343"},{" 350","391"},{" 429","485"},{" 497","506"},{"539","547"}})\r
+               {\r
+                       rset.add(new Range(se));\r
+               }\r
+               ranges.put(DisemblResult.COILS.toString(), rset);\r
+               values.put(DisemblResult.COILS.toString(), Float.valueOf(0.86010f));\r
+               rset = new HashSet<Range>();\r
+               for (String[] se:new String[][] { { "355","368"}})\r
+               {\r
+                       rset.add(new Range(se));\r
+               }\r
+               ranges.put(DisemblResult.REM465.toString(), rset);\r
+               values.put(DisemblResult.REM465.toString(), Float.valueOf(0.88512f));\r
+               rset = new HashSet<Range>();\r
+               for (String[] se:new String[][] { { "190","204"}})\r
+               {\r
+                       rset.add(new Range(se));\r
+               }\r
+               ranges.put(DisemblResult.HOTLOOPS.toString(), rset);\r
+               values.put(DisemblResult.HOTLOOPS.toString(), Float.valueOf(0.37094f));\r
+               _ranges.put("Foobar_dundeefriends", ranges);\r
+               _values.put("Foobar_dundeefriends", values);\r
                FileInputStream fio;\r
                try {\r
                        fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH\r
                                        + "disembl.out");\r
-                       Map<FastaSequence, Set<Score>> aseqs = SequenceUtil\r
-                                       .readDisembl(fio);\r
+                       Map<String, Set<Score>> aseqs = SequenceUtil.readDisembl(fio);\r
                        assertNotNull(aseqs);\r
                        assertEquals(aseqs.size(), 3);\r
-                       System.out.println(aseqs);\r
-                       for (FastaSequence fs : aseqs.keySet()) {\r
+                       ScoreManager sman = ScoreManager.newInstance(aseqs);\r
+\r
+                       for (String fs : aseqs.keySet()) {\r
                                assertTrue(" Foobar_dundeefriends Foobar dundeefriends "\r
-                                               .contains(fs.getId()));\r
+                                               .contains(fs));\r
                                Set<Score> scores = aseqs.get(fs);\r
                                assertEquals(scores.size(), 3);\r
+                               for (Score sc:scores) {\r
+                                       if (_ranges.containsKey(fs))\r
+                                       {\r
+                                       assertEquals("Checking range for Method "+sc.getMethod(),_ranges.get(fs).get(sc.getMethod()), sc.getRanges());\r
+                                       assertEquals("Checking first value for Method "+sc.getMethod(), _values.get(fs).get(sc.getMethod()), sc.getScores().get(0));\r
+                                       }\r
+                               }\r
                        }\r
                        fio.close();\r
                } catch (FileNotFoundException e) {\r
@@ -187,7 +252,6 @@ public class SequenceUtilTester {
                        fail(e.getLocalizedMessage());\r
                }\r
        }\r
-\r
        /**\r
         * This test tests the loading of horizontally formatted Jronn output file\r
         * \r
@@ -217,27 +281,31 @@ public class SequenceUtilTester {
                try {\r
                        fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH\r
                                        + "globplot.out");\r
-                       Map<FastaSequence, Set<Score>> aseqs = SequenceUtil\r
-                                       .readGlobPlot(fio);\r
+                       HashMap<String, Set<Score>> aseqs = SequenceUtil.readGlobPlot(fio);\r
                        assertNotNull(aseqs);\r
                        assertEquals(aseqs.size(), 3);\r
 \r
-                       FastaSequence fsdf = null;\r
+                       String fsdf = null;\r
                        Set<Score> scores = null;\r
-                       for (FastaSequence fs : aseqs.keySet()) {\r
-                               if ("Foobar_dundeefriends".contains(fs.getId())) {\r
+                       for (String fs : aseqs.keySet()) {\r
+                               if ("Foobar_dundeefriends".contains(fs)) {\r
                                        fsdf = fs;\r
                                        scores = aseqs.get(fs);\r
                                }\r
                                assertEquals(scores.size(), 5);\r
                        }\r
+\r
+                       ScoreManager sm = ScoreManager.newInstanceSingleSequence(scores);\r
+                       sm.writeOut(new PrintWriter(System.out, true));\r
+\r
                        for (Score score : scores) {\r
-                               \r
-                               if (score.getMethod() == (Enum<?>) GlobProtResult.Disorder) {\r
+\r
+                               if (score.getMethod()\r
+                                               .equals(GlobProtResult.Disorder.toString())) {\r
                                        assertEquals(score.getRanges().size(), 7);\r
                                        assertTrue(score.getScores().isEmpty());\r
                                }\r
-                               if (score.getMethod() == (Enum<?>)GlobProtResult.Dydx) {\r
+                               if (GlobProtResult.valueOf(score.getMethod()) == GlobProtResult.Dydx) {\r
                                        assertFalse(score.getScores().isEmpty());\r
                                        assertTrue(score.getRanges().isEmpty());\r
                                }\r
@@ -256,6 +324,69 @@ public class SequenceUtilTester {
        }\r
 \r
        @Test\r
+       public void testReadIUPredForShortAndLongDisorder() {\r
+               try {\r
+                       Map<String, Score> scores = SequenceUtil.readIUPred(new File(\r
+                                       AllTestSuit.TEST_DATA_PATH, "out.long"));\r
+                       ScoreManager man = ScoreManager.newInstanceSingleScore(scores);\r
+                       // man.writeOut(new PrintWriter(System.out, true));\r
+                       assertNotNull(scores);\r
+                       assertEquals(3, scores.size());\r
+\r
+                       Score score = scores.get("Foobar_dundeefriends");\r
+                       assertNotNull(score);\r
+                       assertEquals(0, score.getRanges().size());\r
+                       assertEquals(568, score.getScores().size());\r
+                       assertEquals("Long", score.getMethod());\r
+\r
+                       score = scores.get("Foobar");\r
+                       assertNotNull(score);\r
+                       assertEquals(0, score.getRanges().size());\r
+                       assertEquals(481, score.getScores().size());\r
+                       assertEquals("Long", score.getMethod());\r
+\r
+                       score = scores.get("dundeefriends");\r
+                       assertNotNull(score);\r
+                       assertEquals(0, score.getRanges().size());\r
+                       assertEquals(513, score.getScores().size());\r
+                       assertEquals("Long", score.getMethod());\r
+\r
+               } catch (IOException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               } catch (UnknownFileFormatException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               }\r
+       }\r
+\r
+       @Test\r
+       public void testReadIUPredForGlobDomain() {\r
+               try {\r
+                       Map<String, Score> scores = SequenceUtil.readIUPred(new File(\r
+                                       AllTestSuit.TEST_DATA_PATH, "output.glob"));\r
+                       assertNotNull(scores);\r
+                       assertEquals(2, scores.size());\r
+                       ScoreManager man = ScoreManager.newInstanceSingleScore(scores);\r
+                       // man.writeOut(new PrintWriter(System.out, true));\r
+                       assertEquals(2, man.getNumberOfSeq());\r
+                       Score score = scores.get("P53_HUMA");\r
+                       assertNotNull(score);\r
+                       assertEquals(2, score.getRanges().size());\r
+                       assertEquals(0, score.getScores().size());\r
+                       assertEquals("Glob", score.getMethod());\r
+\r
+                       score = scores.get("Foobar_dundeefriends");\r
+                       assertEquals(0, score.getRanges().size());\r
+               } catch (IOException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               } catch (UnknownFileFormatException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               }\r
+       }\r
+       @Test\r
        public void testReadAAConResults() {\r
                try {\r
                        InputStream inStream = new FileInputStream(\r