More work to disorder prediction client & services.
[jabaws.git] / datamodel / compbio / data / sequence / SequenceUtil.java
index c4e1def..e737575 100644 (file)
@@ -30,6 +30,7 @@ import java.util.HashSet;
 import java.util.List;\r
 import java.util.Map;\r
 import java.util.Scanner;\r
+import java.util.Set;\r
 import java.util.TreeSet;\r
 import java.util.logging.Level;\r
 import java.util.regex.Matcher;\r
@@ -169,6 +170,17 @@ public final class SequenceUtil {
        }\r
 \r
        /**\r
+        * Remove all non AA chars from the sequence\r
+        * \r
+        * @param sequence\r
+        *            the sequence to clean\r
+        * @return cleaned sequence\r
+        */\r
+       public static String cleanProteinSequence(String sequence) {\r
+               return SequenceUtil.NON_AA.matcher(sequence).replaceAll("");\r
+       }\r
+\r
+       /**\r
         * @param sequence\r
         * @return true is the sequence is a protein sequence, false overwise\r
         */\r
@@ -429,7 +441,7 @@ public final class SequenceUtil {
         * @throws IOException\r
         * @throws UnknownFileFormatException\r
         */\r
-       public static HashMap<FastaSequence, HashSet<Score>> readDisembl(\r
+       public static HashMap<String, Set<Score>> readDisembl(\r
                        final InputStream input) throws IOException,\r
                        UnknownFileFormatException {\r
                Scanner scan = new Scanner(input);\r
@@ -441,7 +453,7 @@ public final class SequenceUtil {
                                                        + " No such line was found!");\r
                }\r
 \r
-               HashMap<FastaSequence, HashSet<Score>> results = new HashMap<FastaSequence, HashSet<Score>>();\r
+               HashMap<String, Set<Score>> results = new HashMap<String, Set<Score>>();\r
                int seqCounter = 0;\r
                while (scan.hasNext()) {\r
                        seqCounter++;\r
@@ -474,13 +486,15 @@ public final class SequenceUtil {
                                rem.add(scansingle.nextFloat());\r
                                hotloops.add(scansingle.nextFloat());\r
                        }\r
-                       FastaSequence fs = new FastaSequence(sequenceName,\r
-                                       seqbuffer.toString());\r
+                       /*\r
+                        * Also possible FastaSequence fs = new FastaSequence(sequenceName,\r
+                        * seqbuffer.toString());\r
+                        */\r
                        HashSet<Score> scores = new HashSet<Score>();\r
                        scores.add(new Score(DisemblResult.COILS, coils, coilsR));\r
                        scores.add(new Score(DisemblResult.HOTLOOPS, hotloops, rem465R));\r
                        scores.add(new Score(DisemblResult.REM465, rem, loopsR));\r
-                       results.put(fs, scores);\r
+                       results.put(sequenceName, scores);\r
 \r
                        scansingle.close();\r
                }\r
@@ -525,16 +539,6 @@ public final class SequenceUtil {
                return ranges;\r
        }\r
 \r
-       public static HashMap<String, HashSet<Score>> removeSequences(\r
-                       HashMap<FastaSequence, HashSet<Score>> disemblResults) {\r
-               HashMap<String, HashSet<Score>> seqNameScores = new HashMap<String, HashSet<Score>>();\r
-               for (Map.Entry<FastaSequence, HashSet<Score>> dres : disemblResults\r
-                               .entrySet()) {\r
-                       seqNameScores.put(dres.getKey().getId(), dres.getValue());\r
-               }\r
-               return seqNameScores;\r
-       }\r
-\r
        /**\r
         * \r
         > Foobar_dundeefriends\r
@@ -559,7 +563,7 @@ public final class SequenceUtil {
         * @throws IOException\r
         * @throws UnknownFileFormatException\r
         */\r
-       public static HashMap<FastaSequence, HashSet<Score>> readGlobPlot(\r
+       public static HashMap<String, Set<Score>> readGlobPlot(\r
                        final InputStream input) throws IOException,\r
                        UnknownFileFormatException {\r
                Scanner scan = new Scanner(input);\r
@@ -571,7 +575,7 @@ public final class SequenceUtil {
                                                        + " No such line was found!");\r
                }\r
 \r
-               HashMap<FastaSequence, HashSet<Score>> results = new HashMap<FastaSequence, HashSet<Score>>();\r
+               HashMap<String, Set<Score>> results = new HashMap<String, Set<Score>>();\r
                int seqCounter = 0;\r
                while (scan.hasNext()) {\r
                        seqCounter++;\r
@@ -602,15 +606,17 @@ public final class SequenceUtil {
                                rawScore.add(scansingle.nextFloat());\r
                                smoothedScore.add(scansingle.nextFloat());\r
                        }\r
-                       FastaSequence fs = new FastaSequence(sequenceName,\r
-                                       seqbuffer.toString());\r
-                       HashSet<Score> scores = new HashSet<Score>();\r
+                       /*\r
+                        * Also possible FastaSequence fs = new FastaSequence(sequenceName,\r
+                        * seqbuffer.toString());\r
+                        */\r
+                       Set<Score> scores = new TreeSet<Score>();\r
                        scores.add(new Score(GlobProtResult.Disorder, disorderR));\r
                        scores.add(new Score(GlobProtResult.GlobDoms, domsR));\r
                        scores.add(new Score(GlobProtResult.Dydx, dydxScore));\r
                        scores.add(new Score(GlobProtResult.RawScore, rawScore));\r
                        scores.add(new Score(GlobProtResult.SmoothedScore, smoothedScore));\r
-                       results.put(fs, scores);\r
+                       results.put(sequenceName, scores);\r
 \r
                        scansingle.close();\r
                }\r
@@ -620,7 +626,7 @@ public final class SequenceUtil {
        }\r
        /**\r
         * Read AACon result with no alignment files. This method leaves incoming\r
-        * the InputStream results open!\r
+        * InputStream open!\r
         * \r
         * @param results\r
         *            output file of AAConservation\r