IUPred result readers. IUpred binaries compilation scripts. IUpred executable descrip...
[jabaws.git] / datamodel / compbio / data / sequence / SequenceUtil.java
index e737575..f65ec9e 100644 (file)
@@ -311,6 +311,126 @@ public final class SequenceUtil {
                outWriter.close();\r
        }\r
 \r
+       /**\r
+        * Read IUPred output\r
+        * \r
+        * @param result\r
+        * @return\r
+        * @throws IOException\r
+        * @throws UnknownFileFormatException\r
+        */\r
+       public static Map<String, Score> readIUPred(final File result,\r
+                       IUPredResult type) throws IOException, UnknownFileFormatException {\r
+               InputStream input = new FileInputStream(result);\r
+               Map<String, Score> sequences = readIUPred(input, type);\r
+               input.close();\r
+               return sequences;\r
+       }\r
+\r
+       // Check the type of the file e.g. long| short or domain\r
+       // and read\r
+       /**\r
+        * ## Long Disorder\r
+        * \r
+        * # P53_HUMAN\r
+        * \r
+        * 1 M 0.9943\r
+        * \r
+        * 2 E 0.9917\r
+        * \r
+        * 3 E 0.9879\r
+        * \r
+        * (every line)\r
+        * \r
+        * @throws IOException\r
+        * @throws UnknownFileFormatException\r
+        * \r
+        * \r
+        */\r
+       private static Map<String, Score> readIUPred(InputStream input,\r
+                       IUPredResult type) throws IOException, UnknownFileFormatException {\r
+\r
+               Score score = null;\r
+               final Map<String, Score> seqs = new HashMap<String, Score>();\r
+               Scanner scan = new Scanner(input);\r
+               scan.useDelimiter("#");\r
+               while (scan.hasNext()) {\r
+                       String nextEntry = scan.next();\r
+                       Scanner entry = new Scanner(nextEntry);\r
+                       String name = entry.nextLine();\r
+                       // inside entry:\r
+                       if (IUPredResult.Glob == type) {\r
+                               // parse domains\r
+                               TreeSet<Range> ranges = parseIUPredDomains(entry);\r
+                               score = new Score(type, ranges);\r
+                       } else {\r
+                               // parse short | long\r
+                               float[] scores = parseIUPredScores(entry);\r
+                               score = new Score(type, scores);\r
+                       }\r
+                       entry.close();\r
+                       seqs.put(name, score);\r
+               }\r
+\r
+               scan.close();\r
+               return seqs;\r
+       }\r
+\r
+       /**\r
+        * # P53_HUMA\r
+        * \r
+        * Number of globular domains: 2\r
+        * \r
+        * globular domain 1. 98 - 269\r
+        * \r
+        * globular domain 2. 431 - 482\r
+        * \r
+        * >P53_HUMA\r
+        * \r
+        * meepqsdpsv epplsqetfs dlwkllpenn vlsplpsqam ddlmlspddi eqwftedpgp\r
+        * \r
+        * @param scan\r
+        */\r
+       private static TreeSet<Range> parseIUPredDomains(Scanner scan) {\r
+               String header = "Number of globular domains:";\r
+               String domainPref = "globular domain";\r
+               TreeSet<Range> ranges = new TreeSet<Range>();\r
+               String line = scan.nextLine().trim();\r
+               assert line.startsWith(header);\r
+               line = line.substring(header.length()).trim();\r
+               int domainNum = Integer.parseInt(line);\r
+               if (domainNum == 0) {\r
+                       return ranges;\r
+               }\r
+\r
+               for (int i = 0; i < domainNum; i++) {\r
+                       assert scan.hasNextLine();\r
+                       line = scan.nextLine();\r
+                       assert line.trim().startsWith(domainPref);\r
+                       line = line.substring(line.indexOf(".") + 1).trim();\r
+                       Range r = new Range(line.split("-"));\r
+                       ranges.add(r);\r
+               }\r
+\r
+               return ranges;\r
+       }\r
+       /*\r
+        * 1 M 0.9943\r
+        * \r
+        * 2 E 0.9917\r
+        */\r
+       private static float[] parseIUPredScores(Scanner scan)\r
+                       throws UnknownFileFormatException {\r
+               List<String> annotation = new ArrayList<String>();\r
+               while (scan.hasNextLine()) {\r
+                       String line = scan.nextLine().trim();\r
+                       String[] val = line.split("\\s+");\r
+                       annotation.add(val[2]);\r
+               }\r
+               return convertToNumber(annotation\r
+                               .toArray(new String[annotation.size()]));\r
+       }\r
+\r
        public static Map<String, Score> readJRonn(final File result)\r
                        throws IOException, UnknownFileFormatException {\r
                InputStream input = new FileInputStream(result);\r
@@ -711,4 +831,19 @@ enum GlobProtResult {
        SmoothedScore,\r
        /** This a score with no range */\r
        RawScore\r
+}\r
+\r
+enum IUPredResult {\r
+       /**\r
+        * Short disorder\r
+        */\r
+       Short,\r
+       /**\r
+        * Long disorder\r
+        */\r
+       Long,\r
+       /**\r
+        * Globular domains\r
+        */\r
+       Glob\r
 }
\ No newline at end of file