Change header template for a new version
[jabaws.git] / datamodel / compbio / data / sequence / SequenceUtil.java
index 149e0e0..16b65f2 100644 (file)
@@ -1,15 +1,19 @@
-/*\r
- * @(#)SequenceUtil.java 1.0 September 2009 Copyright (c) 2009 Peter Troshin\r
- * Jalview Web Services version: 2.0 This library is free software; you can\r
- * redistribute it and/or modify it under the terms of the Apache License\r
- * version 2 as published by the Apache Software Foundation This library is\r
- * distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;\r
- * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A\r
- * PARTICULAR PURPOSE. See the Apache License for more details. A copy of the\r
- * license is in apache_license.txt. It is also available here: see:\r
- * http://www.apache.org/licenses/LICENSE-2.0.txt Any republication or derived\r
- * work distributed in source code form must include this copyright and license\r
- * notice.\r
+/* Copyright (c) 2011 Peter Troshin\r
+ *  \r
+ *  JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0     \r
+ * \r
+ *  This library is free software; you can redistribute it and/or modify it under the terms of the\r
+ *  Apache License version 2 as published by the Apache Software Foundation\r
+ * \r
+ *  This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without\r
+ *  even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache \r
+ *  License for more details.\r
+ * \r
+ *  A copy of the license is in apache_license.txt. It is also available here:\r
+ * @see: http://www.apache.org/licenses/LICENSE-2.0.txt\r
+ * \r
+ * Any republication or derived work distributed in source code form\r
+ * must include this copyright and license notice.\r
  */\r
 \r
 package compbio.data.sequence;\r
@@ -25,17 +29,25 @@ import java.io.InputStreamReader;
 import java.io.OutputStream;\r
 import java.io.OutputStreamWriter;\r
 import java.util.ArrayList;\r
+import java.util.HashMap;\r
+import java.util.HashSet;\r
 import java.util.List;\r
+import java.util.Map;\r
 import java.util.Scanner;\r
+import java.util.Set;\r
+import java.util.TreeSet;\r
 import java.util.logging.Level;\r
 import java.util.regex.Matcher;\r
 import java.util.regex.Pattern;\r
 \r
+import compbio.util.Util;\r
+\r
 /**\r
  * Utility class for operations on sequences\r
  * \r
- * @author Petr Troshin\r
- * @version 1.0\r
+ * @author Peter Troshin\r
+ * @since 1.0\r
+ * @version 2.0 June 2011\r
  */\r
 public final class SequenceUtil {\r
 \r
@@ -92,19 +104,6 @@ public final class SequenceUtil {
        private SequenceUtil() {\r
        } // utility class, no instantiation\r
 \r
-       /*\r
-        * public static void write_PirSeq(OutputStream os, FastaSequence seq)\r
-        * throws IOException { BufferedWriter pir_out = new BufferedWriter(new\r
-        * OutputStreamWriter(os)); pir_out.write(">P1;" + seq.getId() +\r
-        * SysPrefs.newlinechar); pir_out.write(seq.getSequence() +\r
-        * SysPrefs.newlinechar); pir_out.close(); } public static void\r
-        * write_FastaSeq(OutputStream os, FastaSequence seq) throws IOException {\r
-        * BufferedWriter fasta_out = new BufferedWriter( new\r
-        * OutputStreamWriter(os)); fasta_out.write(">" + seq.getId() +\r
-        * SysPrefs.newlinechar); fasta_out.write(seq.getSequence() +\r
-        * SysPrefs.newlinechar); fasta_out.close(); }\r
-        */\r
-\r
        /**\r
         * @return true is the sequence contains only letters a,c, t, g, u\r
         */\r
@@ -163,6 +162,17 @@ public final class SequenceUtil {
        }\r
 \r
        /**\r
+        * Remove all non AA chars from the sequence\r
+        * \r
+        * @param sequence\r
+        *            the sequence to clean\r
+        * @return cleaned sequence\r
+        */\r
+       public static String cleanProteinSequence(String sequence) {\r
+               return SequenceUtil.NON_AA.matcher(sequence).replaceAll("");\r
+       }\r
+\r
+       /**\r
         * @param sequence\r
         * @return true is the sequence is a protein sequence, false overwise\r
         */\r
@@ -250,28 +260,11 @@ public final class SequenceUtil {
        public static List<FastaSequence> readFasta(final InputStream inStream)\r
                        throws IOException {\r
                final List<FastaSequence> seqs = new ArrayList<FastaSequence>();\r
-\r
-               final BufferedReader infasta = new BufferedReader(\r
-                               new InputStreamReader(inStream, "UTF8"), 16000);\r
-               final Pattern pattern = Pattern.compile("//s+");\r
-\r
-               String line;\r
-               String sname = "", seqstr = null;\r
-               do {\r
-                       line = infasta.readLine();\r
-                       if ((line == null) || line.startsWith(">")) {\r
-                               if (seqstr != null) {\r
-                                       seqs.add(new FastaSequence(sname.substring(1), seqstr));\r
-                               }\r
-                               sname = line; // remove >\r
-                               seqstr = "";\r
-                       } else {\r
-                               final String subseq = pattern.matcher(line).replaceAll("");\r
-                               seqstr += subseq;\r
-                       }\r
-               } while (line != null);\r
-\r
-               infasta.close();\r
+               FastaReader reader = new FastaReader(inStream);\r
+               while (reader.hasNext()) {\r
+                       seqs.add(reader.next());\r
+               }\r
+               inStream.close();\r
                return seqs;\r
        }\r
 \r
@@ -293,26 +286,157 @@ public final class SequenceUtil {
                outWriter.close();\r
        }\r
 \r
-       public static List<AnnotatedSequence> readJRonn(final File result)\r
+       /**\r
+        * Read IUPred output\r
+        * \r
+        * @param result\r
+        * @return\r
+        * @throws IOException\r
+        * @throws UnknownFileFormatException\r
+        */\r
+       public static Map<String, Score> readIUPred(final File result)\r
                        throws IOException, UnknownFileFormatException {\r
                InputStream input = new FileInputStream(result);\r
-               List<AnnotatedSequence> sequences = readJRonn(input);\r
+               Map<String, Score> sequences = readIUPred(input,\r
+                               IUPredResult.getType(result));\r
                input.close();\r
                return sequences;\r
        }\r
 \r
+       // Check the type of the file e.g. long| short or domain\r
+       // and read\r
        /**\r
-        * Reader for JRonn horizontal file format >Foobar M G D T T A G 0.48 0.42\r
-        * 0.42 0.48 0.52 0.53 0.54 All values are tab delimited\r
+        * ## Long Disorder\r
+        * \r
+        * # P53_HUMAN\r
+        * \r
+        * 1 M 0.9943\r
+        * \r
+        * 2 E 0.9917\r
+        * \r
+        * 3 E 0.9879\r
+        * \r
+        * (every line)\r
+        * \r
+        * @throws IOException\r
+        * @throws UnknownFileFormatException\r
+        * \r
+        * \r
+        */\r
+       private static Map<String, Score> readIUPred(InputStream input,\r
+                       IUPredResult type) throws IOException, UnknownFileFormatException {\r
+\r
+               Score score = null;\r
+               final Map<String, Score> seqs = new HashMap<String, Score>();\r
+               Scanner scan = new Scanner(input);\r
+               scan.useDelimiter("#");\r
+               while (scan.hasNext()) {\r
+                       String nextEntry = scan.next();\r
+                       Scanner entry = new Scanner(nextEntry);\r
+                       String name = entry.nextLine().trim();\r
+                       // inside entry:\r
+                       if (IUPredResult.Glob == type) {\r
+                               // parse domains\r
+                               TreeSet<Range> ranges = parseIUPredDomains(entry);\r
+                               score = new Score(type, ranges);\r
+                       } else {\r
+                               // parse short | long\r
+                               float[] scores = parseIUPredScores(entry);\r
+                               score = new Score(type, scores);\r
+                       }\r
+                       entry.close();\r
+                       seqs.put(name, score);\r
+               }\r
+\r
+               scan.close();\r
+               return seqs;\r
+       }\r
+\r
+       /**\r
+        * # P53_HUMA\r
+        * \r
+        * Number of globular domains: 2\r
+        * \r
+        * globular domain 1. 98 - 269\r
+        * \r
+        * globular domain 2. 431 - 482\r
+        * \r
+        * >P53_HUMA\r
+        * \r
+        * meepqsdpsv epplsqetfs dlwkllpenn vlsplpsqam ddlmlspddi eqwftedpgp\r
+        * \r
+        * @param scan\r
+        */\r
+       private static TreeSet<Range> parseIUPredDomains(Scanner scan) {\r
+               String header = "Number of globular domains:";\r
+               String domainPref = "globular domain";\r
+               TreeSet<Range> ranges = new TreeSet<Range>();\r
+               String line = scan.nextLine().trim();\r
+               assert line.startsWith(header);\r
+               line = line.substring(header.length()).trim();\r
+               int domainNum = Integer.parseInt(line);\r
+               if (domainNum == 0) {\r
+                       return ranges;\r
+               }\r
+\r
+               for (int i = 0; i < domainNum; i++) {\r
+                       assert scan.hasNextLine();\r
+                       line = scan.nextLine();\r
+                       assert line.trim().startsWith(domainPref);\r
+                       line = line.substring(line.indexOf(".") + 1).trim();\r
+                       Range r = new Range(line.split("-"));\r
+                       ranges.add(r);\r
+               }\r
+\r
+               return ranges;\r
+       }\r
+       /*\r
+        * 1 M 0.9943\r
+        * \r
+        * 2 E 0.9917\r
+        */\r
+       private static float[] parseIUPredScores(Scanner scan)\r
+                       throws UnknownFileFormatException {\r
+               List<String> annotation = new ArrayList<String>();\r
+               while (scan.hasNextLine()) {\r
+                       String line = scan.nextLine().trim();\r
+                       String[] val = line.split("\\s+");\r
+                       annotation.add(val[2]);\r
+               }\r
+               return convertToNumber(annotation\r
+                               .toArray(new String[annotation.size()]));\r
+       }\r
+\r
+       public static Map<String, Score> readJRonn(final File result)\r
+                       throws IOException, UnknownFileFormatException {\r
+               InputStream input = new FileInputStream(result);\r
+               Map<String, Score> sequences = readJRonn(input);\r
+               input.close();\r
+               return sequences;\r
+       }\r
+\r
+       /**\r
+        * Reader for JRonn horizontal file format\r
+        * \r
+        * <pre>\r
+        * &gtFoobar M G D T T A G 0.48 0.42\r
+        * 0.42 0.48 0.52 0.53 0.54\r
+        * \r
+        * <pre>\r
+        * Where all values are tab delimited\r
         * \r
         * @param inStream\r
-        * @return\r
+        *            the InputStream connected to the JRonn output file\r
+        * @return List of {@link AnnotatedSequence} objects\r
         * @throws IOException\r
+        *             is thrown if the inStream has problems accessing the data\r
         * @throws UnknownFileFormatException\r
+        *             is thrown if the inStream represents an unknown source of\r
+        * data, i.e. not a JRonn output\r
         */\r
-       public static List<AnnotatedSequence> readJRonn(final InputStream inStream)\r
+       public static Map<String, Score> readJRonn(final InputStream inStream)\r
                        throws IOException, UnknownFileFormatException {\r
-               final List<AnnotatedSequence> seqs = new ArrayList<AnnotatedSequence>();\r
+               final Map<String, Score> seqs = new HashMap<String, Score>();\r
 \r
                final BufferedReader infasta = new BufferedReader(\r
                                new InputStreamReader(inStream, "UTF8"), 16000);\r
@@ -340,7 +464,7 @@ public final class SequenceUtil {
                                                        "File does not look like Jronn horizontally formatted output file!\n"\r
                                                                        + JRONN_WRONG_FORMAT_MESSAGE);\r
                                }\r
-                               seqs.add(new AnnotatedSequence(sname, sequence, annotation));\r
+                               seqs.put(sname, new Score(DisorderMethod.JRonn, annotation));\r
                        }\r
                } while (line != null);\r
 \r
@@ -391,63 +515,329 @@ public final class SequenceUtil {
 \r
        /**\r
         * \r
-        * TODO complete!\r
+        > Foobar_dundeefriends\r
+        * \r
+        * # COILS 34-41, 50-58, 83-91, 118-127, 160-169, 191-220, 243-252, 287-343\r
+        * \r
+        * # REM465 355-368\r
+        * \r
+        * # HOTLOOPS 190-204\r
+        * \r
+        * # RESIDUE COILS REM465 HOTLOOPS\r
+        * \r
+        * M 0.86010 0.88512 0.37094\r
+        * \r
+        * T 0.79983 0.85864 0.44331\r
+        * \r
+        * >Next Sequence name\r
         * \r
-        * # RESIDUE COILS REM465 HOTLOOPS M 0.86010 0.88512 0.37094 T 0.79983\r
-        * 0.85864 0.44331 .... # RESIDUE COILS REM465 HOTLOOPS M 0.86010 0.88512\r
-        * 0.37094\r
         * \r
         * @param input\r
         * @return\r
         * @throws IOException\r
         * @throws UnknownFileFormatException\r
         */\r
-       public static List<MultiAnnotatedSequence<DisemblResultAnnot>> readDisembl(\r
+       public static HashMap<String, Set<Score>> readDisembl(\r
                        final InputStream input) throws IOException,\r
                        UnknownFileFormatException {\r
                Scanner scan = new Scanner(input);\r
-               scan.useDelimiter("# RESIDUE COILS REM465 HOTLOOPS\n");\r
+               scan.useDelimiter(">");\r
                if (!scan.hasNext()) {\r
                        throw new UnknownFileFormatException(\r
-                                       "In Disembl score format each seqeunce score is expected to start from the line: "\r
-                                                       + "'# RESIDUE COILS REM465 HOTLOOPS\\n'."\r
+                                       "In Disembl score format each sequence score is expected "\r
+                                                       + "to start from the line: >Sequence name "\r
                                                        + " No such line was found!");\r
                }\r
 \r
-               List<MultiAnnotatedSequence<DisemblResultAnnot>> results = new ArrayList<MultiAnnotatedSequence<DisemblResultAnnot>>();\r
+               HashMap<String, Set<Score>> results = new HashMap<String, Set<Score>>();\r
                int seqCounter = 0;\r
                while (scan.hasNext()) {\r
                        seqCounter++;\r
                        String singleSeq = scan.next();\r
                        Scanner scansingle = new Scanner(singleSeq);\r
+                       if (!scansingle.hasNextLine()) {\r
+                               throw new RuntimeException(\r
+                                               "The input looks like an incomplete disembl file - cannot parse!");\r
+                       }\r
+\r
                        StringBuffer seqbuffer = new StringBuffer();\r
-                       List<Float> coils = new ArrayList<Float>();\r
-                       List<Float> rem = new ArrayList<Float>();\r
-                       List<Float> hotloops = new ArrayList<Float>();\r
-\r
-                       MultiAnnotatedSequence<DisemblResultAnnot> disemblRes = new MultiAnnotatedSequence<DisemblResultAnnot>(\r
-                                       DisemblResultAnnot.class);\r
-\r
-                       while (scansingle.hasNextLine()) {\r
-                               String valueLine = scansingle.nextLine();\r
-                               Scanner values = new Scanner(valueLine);\r
-                               seqbuffer.append(values.next());\r
-                               coils.add(values.nextFloat());\r
-                               rem.add(values.nextFloat());\r
-                               hotloops.add(values.nextFloat());\r
-                               values.close();\r
+                       ArrayList<Float> coils = new ArrayList<Float>();\r
+                       ArrayList<Float> rem = new ArrayList<Float>();\r
+                       ArrayList<Float> hotloops = new ArrayList<Float>();\r
+\r
+                       String sequenceName = scansingle.nextLine().trim();\r
+                       TreeSet<Range> coilsR = parseRanges(DisemblResult.COILS,\r
+                                       scansingle.nextLine());\r
+                       TreeSet<Range> rem465R = parseRanges(DisemblResult.REM465,\r
+                                       scansingle.nextLine());\r
+                       TreeSet<Range> loopsR = parseRanges(DisemblResult.HOTLOOPS,\r
+                                       scansingle.nextLine());\r
+\r
+                       String title = scansingle.nextLine();\r
+                       assert title.startsWith("# RESIDUE COILS REM465 HOTLOOPS") : ">Sequence_name must follow column title: # RESIDUE COILS REM465 HOTLOOPS!";\r
+\r
+                       while (scansingle.hasNext()) {\r
+                               seqbuffer.append(scansingle.next());\r
+                               coils.add(scansingle.nextFloat());\r
+                               rem.add(scansingle.nextFloat());\r
+                               hotloops.add(scansingle.nextFloat());\r
                        }\r
-                       disemblRes.addAnnotation(DisemblResultAnnot.COILS, coils);\r
-                       disemblRes.addAnnotation(DisemblResultAnnot.REM465, rem);\r
-                       disemblRes.addAnnotation(DisemblResultAnnot.HOTLOOPS, hotloops);\r
-                       // TODO\r
-                       // disemblRes.sequence = seqbuffer.toString();\r
+                       /*\r
+                        * Also possible FastaSequence fs = new FastaSequence(sequenceName,\r
+                        * seqbuffer.toString());\r
+                        */\r
+                       HashSet<Score> scores = new HashSet<Score>();\r
+                       scores.add(new Score(DisemblResult.COILS, coils, coilsR));\r
+                       scores.add(new Score(DisemblResult.HOTLOOPS, hotloops, rem465R));\r
+                       scores.add(new Score(DisemblResult.REM465, rem, loopsR));\r
+                       results.put(sequenceName, scores);\r
+\r
                        scansingle.close();\r
-                       results.add(disemblRes);\r
                }\r
+               scan.close();\r
+               input.close();\r
+               return results;\r
+       }\r
+\r
+       /**\r
+        * Parsing:\r
+        * \r
+        * # COILS 34-41, 50-58, 83-91, 118-127, 160-169, 191-220, 243-252, 287-343,\r
+        * 350-391, 429-485, 497-506, 539-547\r
+        * \r
+        * # REM465 355-368\r
+        * \r
+        * # HOTLOOPS 190-204\r
+        * \r
+        * @param lines\r
+        * @return\r
+        */\r
+       private static TreeSet<Range> parseRanges(Enum resultType, String lines) {\r
+               TreeSet<Range> ranges = new TreeSet<Range>();\r
+\r
+               Scanner scan = new Scanner(lines);\r
+\r
+               assert scan.hasNext();\r
+               String del = scan.next();\r
+               assert "#".equals(del); // pass delimiter #\r
+               String type = scan.next(); // pass enum name e.g. COILS\r
+               assert resultType.toString().equalsIgnoreCase(type) : "Unknown result type: "\r
+                               + resultType.toString();\r
+\r
+               // beginning of the ranges\r
+               scan.useDelimiter(",");\r
+               while (scan.hasNext()) {\r
+                       String range = scan.next();\r
+                       if (!Util.isEmpty(range)) {\r
+                               ranges.add(new Range(range.split("-")));\r
+                       }\r
+               }\r
+               return ranges;\r
+       }\r
 \r
+       /**\r
+        * \r
+        > Foobar_dundeefriends\r
+        * \r
+        * # COILS 34-41, 50-58, 83-91, 118-127, 160-169, 191-220, 243-252, 287-343\r
+        * \r
+        * # REM465 355-368\r
+        * \r
+        * # HOTLOOPS 190-204\r
+        * \r
+        * # RESIDUE COILS REM465 HOTLOOPS\r
+        * \r
+        * M 0.86010 0.88512 0.37094\r
+        * \r
+        * T 0.79983 0.85864 0.44331\r
+        * \r
+        * >Next Sequence name\r
+        * \r
+        * \r
+        * @param input\r
+        * @return\r
+        * @throws IOException\r
+        * @throws UnknownFileFormatException\r
+        */\r
+       public static HashMap<String, Set<Score>> readGlobPlot(\r
+                       final InputStream input) throws IOException,\r
+                       UnknownFileFormatException {\r
+               Scanner scan = new Scanner(input);\r
+               scan.useDelimiter(">");\r
+               if (!scan.hasNext()) {\r
+                       throw new UnknownFileFormatException(\r
+                                       "In GlobPlot score format each sequence score is expected "\r
+                                                       + "to start from the line: >Sequence name "\r
+                                                       + " No such line was found!");\r
+               }\r
+\r
+               HashMap<String, Set<Score>> results = new HashMap<String, Set<Score>>();\r
+               int seqCounter = 0;\r
+               while (scan.hasNext()) {\r
+                       seqCounter++;\r
+                       String singleSeq = scan.next();\r
+                       Scanner scansingle = new Scanner(singleSeq);\r
+                       if (!scansingle.hasNextLine()) {\r
+                               throw new RuntimeException(\r
+                                               "The input looks like an incomplete GlobPlot file - cannot parse!");\r
+                       }\r
+\r
+                       StringBuffer seqbuffer = new StringBuffer();\r
+                       ArrayList<Float> dydxScore = new ArrayList<Float>();\r
+                       ArrayList<Float> rawScore = new ArrayList<Float>();\r
+                       ArrayList<Float> smoothedScore = new ArrayList<Float>();\r
+\r
+                       String sequenceName = scansingle.nextLine().trim();\r
+                       TreeSet<Range> domsR = parseRanges(GlobProtResult.GlobDoms,\r
+                                       scansingle.nextLine());\r
+                       TreeSet<Range> disorderR = parseRanges(GlobProtResult.Disorder,\r
+                                       scansingle.nextLine());\r
+\r
+                       String title = scansingle.nextLine();\r
+                       assert title.startsWith("# RESIDUE      DYDX") : ">Sequence_name must follow column title: # RESIDUE DYDX RAW SMOOTHED!";\r
+\r
+                       while (scansingle.hasNext()) {\r
+                               seqbuffer.append(scansingle.next());\r
+                               dydxScore.add(scansingle.nextFloat());\r
+                               rawScore.add(scansingle.nextFloat());\r
+                               smoothedScore.add(scansingle.nextFloat());\r
+                       }\r
+                       /*\r
+                        * Also possible FastaSequence fs = new FastaSequence(sequenceName,\r
+                        * seqbuffer.toString());\r
+                        */\r
+                       Set<Score> scores = new TreeSet<Score>();\r
+                       scores.add(new Score(GlobProtResult.Disorder, disorderR));\r
+                       scores.add(new Score(GlobProtResult.GlobDoms, domsR));\r
+                       scores.add(new Score(GlobProtResult.Dydx, dydxScore));\r
+                       scores.add(new Score(GlobProtResult.RawScore, rawScore));\r
+                       scores.add(new Score(GlobProtResult.SmoothedScore, smoothedScore));\r
+                       results.put(sequenceName, scores);\r
+\r
+                       scansingle.close();\r
+               }\r
+               scan.close();\r
                input.close();\r
                return results;\r
        }\r
+       /**\r
+        * Read AACon result with no alignment files. This method leaves incoming\r
+        * InputStream open!\r
+        * \r
+        * @param results\r
+        *            output file of AAConservation\r
+        * @return Map with keys {@link ConservationMethod} -> float[]\r
+        */\r
+       public static HashSet<Score> readAAConResults(InputStream results) {\r
+               if (results == null) {\r
+                       throw new NullPointerException(\r
+                                       "InputStream with results must be provided");\r
+               }\r
+               HashSet<Score> annotations = new HashSet<Score>();\r
+               Scanner sc = new Scanner(results);\r
+               sc.useDelimiter("#");\r
+               while (sc.hasNext()) {\r
+                       String line = sc.next();\r
+                       int spacePos = line.indexOf(" ");\r
+                       assert spacePos > 0 : "Space is expected as delimited between method "\r
+                                       + "name and values!";\r
+                       String methodLine = line.substring(0, spacePos);\r
+                       ConservationMethod method = ConservationMethod\r
+                                       .getMethod(methodLine);\r
+                       assert method != null : "Method " + methodLine\r
+                                       + " is not recognized! ";\r
+                       Scanner valuesScanner = new Scanner(line.substring(spacePos));\r
+                       ArrayList<Float> values = new ArrayList<Float>();\r
+                       while (valuesScanner.hasNextDouble()) {\r
+                               Double value = valuesScanner.nextDouble();\r
+                               values.add(value.floatValue());\r
+                       }\r
+                       annotations.add(new Score(method, values));\r
+               }\r
+               return annotations;\r
+       }\r
+\r
+       /**\r
+        * Reads and parses Fasta or Clustal formatted file into a list of\r
+        * FastaSequence objects\r
+        * \r
+        * @param inFilePath\r
+        *            the path to the input file\r
+        * @throws IOException\r
+        *             if the file denoted by inFilePath cannot be read\r
+        * @throws UnknownFileFormatException\r
+        *             if the inFilePath points to the file which format cannot be\r
+        *             recognised\r
+        * @return the List of FastaSequence objects\r
+        * \r
+        */\r
+       public static List<FastaSequence> openInputStream(String inFilePath)\r
+                       throws IOException, UnknownFileFormatException {\r
+\r
+               // This stream gets closed in isValidClustalFile method\r
+               InputStream inStrForValidation = new FileInputStream(inFilePath);\r
+               // This stream is closed in the calling methods\r
+               InputStream inStr = new FileInputStream(inFilePath);\r
+               List<FastaSequence> fastaSeqs = null;\r
+               if (ClustalAlignmentUtil.isValidClustalFile(inStrForValidation)) {\r
+                       Alignment al = ClustalAlignmentUtil.readClustalFile(inStr);\r
+                       // alignment cannot be null see\r
+                       // ClustalAlignmentUtil.readClustalFile(inStr);\r
+                       fastaSeqs = al.getSequences();\r
+               } else {\r
+                       fastaSeqs = SequenceUtil.readFasta(inStr);\r
+               }\r
+               return fastaSeqs;\r
+       }\r
+\r
+}\r
 \r
+enum DisemblResult {\r
+       /** These contains ranges and scores */\r
+       COILS, REM465, HOTLOOPS\r
+}\r
+enum GlobProtResult {\r
+       /** This a range with no scores */\r
+       GlobDoms,\r
+       /** This a range with no scores */\r
+       Disorder,\r
+       /** This a score with no range */\r
+       Dydx,\r
+       /** This a score with no range */\r
+       SmoothedScore,\r
+       /** This a score with no range */\r
+       RawScore\r
 }\r
+\r
+enum IUPredResult {\r
+       /**\r
+        * Short disorder\r
+        */\r
+       Short,\r
+       /**\r
+        * Long disorder\r
+        */\r
+       Long,\r
+       /**\r
+        * Globular domains\r
+        */\r
+       Glob;\r
+\r
+       static IUPredResult getType(File file) {\r
+               assert file != null;\r
+               String name = file.getName();\r
+               if (name.endsWith(Long.toString().toLowerCase())) {\r
+                       return Long;\r
+               }\r
+               if (name.endsWith(Short.toString().toLowerCase())) {\r
+                       return Short;\r
+               }\r
+               if (name.endsWith(Glob.toString().toLowerCase())) {\r
+                       return Glob;\r
+               }\r
+               throw new AssertionError(\r
+                               "IUPred result file type cannot be recognised! "\r
+                                               + "\nFile must ends with one of [glob, long or short]"\r
+                                               + "\n but given file name was: " + file.getName());\r
+       }\r
+}
\ No newline at end of file