More work to disorder prediction client & services.
[jabaws.git] / testsrc / compbio / data / sequence / SequenceUtilTester.java
index b726d88..7db01ee 100644 (file)
@@ -24,7 +24,12 @@ import java.io.FileNotFoundException;
 import java.io.FileOutputStream;\r
 import java.io.IOException;\r
 import java.io.InputStream;\r
+import java.io.PrintWriter;\r
+import java.util.HashMap;\r
+import java.util.HashSet;\r
 import java.util.List;\r
+import java.util.Map;\r
+import java.util.Set;\r
 \r
 import org.testng.annotations.Test;\r
 \r
@@ -79,6 +84,31 @@ public class SequenceUtilTester {
        }\r
 \r
        @Test()\r
+       public void testCleanProteinSequence() {\r
+               String dirtySeq = "atgAGTggt\taGGTgc\ncgcAC\rTgc gACtcgcGAt cgA ";\r
+               assertFalse(SequenceUtil.isProteinSequence(dirtySeq));\r
+               // This will still be NON protein sequence despite having only correct\r
+               // letters because the letters match perfectly the nucleotide sequence!\r
+               assertFalse(SequenceUtil.isProteinSequence(SequenceUtil\r
+                               .cleanProteinSequence(dirtySeq)));\r
+\r
+               String notaSeq = "atgc1tgatgcatgcatgatgmctga";\r
+               assertFalse(SequenceUtil.isProteinSequence(notaSeq));\r
+               assertTrue(SequenceUtil.isProteinSequence(SequenceUtil\r
+                               .cleanProteinSequence(notaSeq)));\r
+\r
+               String AAseq = "ARLGRVRWTQQRHAEAAVLLQQASDAAPEHPGIALWLGHALEDAGQAEAAAAAYTRAHQL";\r
+               assertTrue(SequenceUtil.isProteinSequence(AAseq));\r
+               assertTrue(SequenceUtil.isProteinSequence(SequenceUtil\r
+                               .cleanProteinSequence(AAseq)));\r
+               AAseq += "XU";\r
+\r
+               assertFalse(SequenceUtil.isProteinSequence(AAseq));\r
+               assertTrue(SequenceUtil.isProteinSequence(SequenceUtil\r
+                               .cleanProteinSequence(AAseq)));\r
+       }\r
+\r
+       @Test()\r
        public void testReadWriteFasta() {\r
 \r
                try {\r
@@ -117,15 +147,14 @@ public class SequenceUtilTester {
                FileInputStream fio;\r
                try {\r
                        fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH + "jronn.out");\r
-                       List<AnnotatedSequence> aseqs = SequenceUtil.readJRonn(fio);\r
+                       Map<String, Score> aseqs = SequenceUtil.readJRonn(fio);\r
                        assertNotNull(aseqs);\r
                        assertEquals(aseqs.size(), 3);\r
-                       AnnotatedSequence aseq = aseqs.get(0);\r
+                       Score aseq = aseqs.get("Foobar");\r
                        assertNotNull(aseq);\r
-                       assertNotNull(aseq.getAnnotation());\r
+                       assertNotNull(aseq.getScores());\r
                        // System.out.println(aseq);\r
-                       assertEquals(aseq.getAnnotation().length, aseq.getSequence()\r
-                                       .length());\r
+                       assertEquals(aseq.getScores().size(), aseq.getScores().size());\r
                        fio.close();\r
                } catch (FileNotFoundException e) {\r
                        e.printStackTrace();\r
@@ -146,27 +175,33 @@ public class SequenceUtilTester {
 \r
        /**\r
         * This test tests the loading of horizontally formatted Jronn output file\r
+        * \r
+        * First seq\r
+        * \r
+        * M 0.86010 0.88512 0.37094\r
+        * \r
+        * T 0.79983 0.85864 0.44331\r
+        * \r
         */\r
        @SuppressWarnings("unchecked")\r
        @Test\r
-       public void testMultiAnnotatedSequence() {\r
+       public void testReadDisemblResults() {\r
 \r
                FileInputStream fio;\r
                try {\r
                        fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH\r
                                        + "disembl.out");\r
-                       List<MultiAnnotatedSequence<DisemblResultAnnot>> aseqs = SequenceUtil\r
-                                       .readDisembl(fio);\r
+                       Map<String, Set<Score>> aseqs = SequenceUtil.readDisembl(fio);\r
                        assertNotNull(aseqs);\r
+                       assertEquals(aseqs.size(), 3);\r
+                       ScoreManager sman = ScoreManager.newInstance(aseqs);\r
 \r
-                       /*\r
-                        * MultiAnnotatedSequence ma = new MultiAnnotatedSequence();\r
-                        * Map<Trial, List<Number>> val = ma.getInstance(Trial.class);\r
-                        * List<Number> list = new ArrayList<Number>(); list.add(new\r
-                        * Float(1.2)); list.add(new Double(5.662)); val.put(Trial.one,\r
-                        * list); val.put(Trial.two, Arrays.asList(6.22f, 1, 37.6f));\r
-                        * System.out.println(val); AnnotatedSequence aseq = aseqs.get(0);\r
-                        */\r
+                       for (String fs : aseqs.keySet()) {\r
+                               assertTrue(" Foobar_dundeefriends Foobar dundeefriends "\r
+                                               .contains(fs));\r
+                               Set<Score> scores = aseqs.get(fs);\r
+                               assertEquals(scores.size(), 3);\r
+                       }\r
                        fio.close();\r
                } catch (FileNotFoundException e) {\r
                        e.printStackTrace();\r
@@ -180,10 +215,97 @@ public class SequenceUtilTester {
                }\r
        }\r
 \r
+       /**\r
+        * This test tests the loading of horizontally formatted Jronn output file\r
+        * \r
+        * First sequence:\r
+        * \r
+        * >Foobar_dundeefriends\r
+        * \r
+        * # GlobDoms 2-358, 373-568\r
+        * \r
+        * # Disorder 1-5, 206-218, 243-250, 288-300, 313-324, 359-372, 475-481\r
+        * \r
+        * # RESIDUE DYDX RAW SMOOTHED\r
+        * \r
+        * M 0.0044 -0.2259 -0.2259\r
+        * \r
+        * T -0.1308 -0.2170 -0.2170\r
+        * \r
+        * ............\r
+        * \r
+        * > Second sequence\r
+        */\r
+       @SuppressWarnings("unchecked")\r
        @Test\r
-       public void testReadResults() throws FileNotFoundException {\r
-               InputStream inStream = new FileInputStream(AllTestSuit.TEST_DATA_PATH\r
-                               + "aacon_results.txt");\r
-               System.out.println(SequenceUtil.readResults(inStream));\r
+       public void testReadGlobPlotResults() {\r
+\r
+               FileInputStream fio;\r
+               try {\r
+                       fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH\r
+                                       + "globplot.out");\r
+                       HashMap<String, Set<Score>> aseqs = SequenceUtil.readGlobPlot(fio);\r
+                       assertNotNull(aseqs);\r
+                       assertEquals(aseqs.size(), 3);\r
+\r
+                       String fsdf = null;\r
+                       Set<Score> scores = null;\r
+                       for (String fs : aseqs.keySet()) {\r
+                               if ("Foobar_dundeefriends".contains(fs)) {\r
+                                       fsdf = fs;\r
+                                       scores = aseqs.get(fs);\r
+                               }\r
+                               assertEquals(scores.size(), 5);\r
+                       }\r
+\r
+                       ScoreManager sm = ScoreManager.newInstanceSingleSequence(scores);\r
+                       sm.writeOut(new PrintWriter(System.out, true));\r
+\r
+                       for (Score score : scores) {\r
+\r
+                               if (score.getMethod()\r
+                                               .equals(GlobProtResult.Disorder.toString())) {\r
+                                       assertEquals(score.getRanges().size(), 7);\r
+                                       assertTrue(score.getScores().isEmpty());\r
+                               }\r
+                               if (GlobProtResult.valueOf(score.getMethod()) == GlobProtResult.Dydx) {\r
+                                       assertFalse(score.getScores().isEmpty());\r
+                                       assertTrue(score.getRanges().isEmpty());\r
+                               }\r
+                       }\r
+                       fio.close();\r
+               } catch (FileNotFoundException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               } catch (IOException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               } catch (UnknownFileFormatException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getLocalizedMessage());\r
+               }\r
+       }\r
+\r
+       @Test\r
+       public void testReadAAConResults() {\r
+               try {\r
+                       InputStream inStream = new FileInputStream(\r
+                                       AllTestSuit.TEST_DATA_PATH + "aacon_results.txt");\r
+                       HashSet<Score> result = SequenceUtil.readAAConResults(inStream);\r
+                       inStream.close();\r
+                       assertNotNull(result);\r
+                       assertEquals(result.size(), 18);\r
+\r
+                       inStream = new FileInputStream(AllTestSuit.TEST_DATA_PATH\r
+                                       + "aacon_result_single.out");\r
+                       result = SequenceUtil.readAAConResults(inStream);\r
+                       inStream.close();\r
+                       assertNotNull(result);\r
+                       assertEquals(result.size(), 1);\r
+                       assertEquals(result.iterator().next().getScores().size(), 568);\r
+               } catch (IOException e) {\r
+                       e.printStackTrace();\r
+                       fail(e.getMessage());\r
+               }\r
        }\r
 }\r