Replace Map<String, HashSet<Score>> with ScoreManager and ScoreHolder classes to...
authorpvtroshin <pvtroshin@e3abac25-378b-4346-85de-24260fe3988d>
Thu, 10 Feb 2011 11:36:12 +0000 (11:36 +0000)
committerpvtroshin <pvtroshin@e3abac25-378b-4346-85de-24260fe3988d>
Thu, 10 Feb 2011 11:36:12 +0000 (11:36 +0000)
git-svn-id: link to svn.lifesci.dundee.ac.uk/svn/barton/ptroshin/JABA2@3733 e3abac25-378b-4346-85de-24260fe3988d

datamodel/compbio/data/sequence/ScoreManager.java
datamodel/compbio/data/sequence/SequenceUtil.java
runner/compbio/runner/conservation/AACon.java
runner/compbio/runner/disorder/Disembl.java
runner/compbio/runner/disorder/GlobPlot.java
runner/compbio/runner/disorder/Jronn.java
testsrc/compbio/data/sequence/SequenceUtilTester.java
testsrc/compbio/ws/client/TestAAConWS.java

index b0f87a6..b0305e6 100644 (file)
@@ -4,25 +4,87 @@ import java.io.BufferedWriter;
 import java.io.IOException;\r
 import java.io.OutputStream;\r
 import java.io.OutputStreamWriter;\r
+import java.util.ArrayList;\r
+import java.util.HashMap;\r
 import java.util.HashSet;\r
 import java.util.List;\r
+import java.util.Map;\r
+import java.util.Set;\r
 \r
+import javax.xml.bind.annotation.XmlAccessType;\r
+import javax.xml.bind.annotation.XmlAccessorType;\r
+\r
+@XmlAccessorType(XmlAccessType.FIELD)\r
 public class ScoreManager {\r
 \r
+       public static final String SINGLE_ENTRY_KEY = "Alignment";\r
+\r
        private List<ScoreHolder> seqScores;\r
 \r
        private ScoreManager() {\r
                // Default JAXB constructor\r
        }\r
 \r
-       public int getNumberOfSeq() {\r
-               return seqScores.size();\r
+       private ScoreManager(String id, Set<Score> data) {\r
+               seqScores = new ArrayList<ScoreManager.ScoreHolder>();\r
+               seqScores.add(new ScoreHolder(id, data));\r
        }\r
 \r
-       public ScoreHolder getFirstSequenceScores() {\r
-               if (seqScores.isEmpty())\r
-                       return null;\r
-               return seqScores.get(0);\r
+       private ScoreManager(Map<String, Set<Score>> data) {\r
+               List<ScoreHolder> seqScores = new ArrayList<ScoreHolder>();\r
+               for (Map.Entry<String, Set<Score>> singleSeqScores : data.entrySet()) {\r
+                       seqScores.add(new ScoreHolder(singleSeqScores.getKey(),\r
+                                       singleSeqScores.getValue()));\r
+               }\r
+               this.seqScores = seqScores;\r
+       }\r
+\r
+       public static ScoreManager newInstance(Map<String, Set<Score>> data) {\r
+               return new ScoreManager(data);\r
+       }\r
+\r
+       public static ScoreManager newInstanceSingleScore(\r
+                       Map<String, Score> seqScoresMap) {\r
+               Map<String, Set<Score>> multipleScoresMap = new HashMap<String, Set<Score>>();\r
+               for (Map.Entry<String, Score> seqScore : seqScoresMap.entrySet()) {\r
+                       Set<Score> scores = new HashSet<Score>();\r
+                       scores.add(seqScore.getValue());\r
+                       multipleScoresMap.put(seqScore.getKey(), scores);\r
+               }\r
+               return new ScoreManager(multipleScoresMap);\r
+       }\r
+\r
+       public static ScoreManager newInstanceSingleSequence(Set<Score> data) {\r
+               return new ScoreManager(ScoreManager.SINGLE_ENTRY_KEY, data);\r
+       }\r
+\r
+       public Map<String, Set<Score>> asMap() {\r
+               Map<String, Set<Score>> seqScoresMap = new HashMap<String, Set<Score>>();\r
+               for (ScoreHolder sch : this.seqScores) {\r
+                       Set<Score> oldValue = seqScoresMap.put(sch.id, sch.scores);\r
+                       if (oldValue != null) {\r
+                               throw new IllegalStateException(\r
+                                               "Cannot represent this ScoreManager instance "\r
+                                                               + "as a Map as it contains duplicated keys: "\r
+                                                               + sch.id);\r
+                       }\r
+               }\r
+               return seqScoresMap;\r
+       }\r
+\r
+       public Set<Score> asSet() {\r
+               if (seqScores.size() == 0 || seqScores.size() > 1) {\r
+                       throw new IllegalStateException(\r
+                                       "This ScoreManager has no or multiple sequence entries and thus "\r
+                                                       + "cannot be represented as a Set. Number of entries are: "\r
+                                                       + seqScores.size());\r
+               }\r
+               ScoreHolder sch = seqScores.get(0);\r
+               return sch.scores;\r
+       }\r
+\r
+       public int getNumberOfSeq() {\r
+               return seqScores.size();\r
        }\r
 \r
        public ScoreHolder getAnnotationForSequence(String seqId) {\r
@@ -43,10 +105,20 @@ public class ScoreManager {
                writer.flush();\r
        }\r
 \r
+       @XmlAccessorType(XmlAccessType.FIELD)\r
        public static class ScoreHolder {\r
 \r
                public String id;\r
-               public HashSet<Score> scores;\r
+               public Set<Score> scores;\r
+\r
+               private ScoreHolder() {\r
+                       // JAXB Default constructor should not be used otherwise\r
+               }\r
+\r
+               ScoreHolder(String id, Set<Score> scores) {\r
+                       this.id = id;\r
+                       this.scores = scores;\r
+               }\r
 \r
                public void writeOut(OutputStream outStream) throws IOException {\r
                        BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(\r
@@ -55,12 +127,6 @@ public class ScoreManager {
                        Score.write(scores, outStream);\r
                }\r
 \r
-               public Score getFirstScore() {\r
-                       if (scores.isEmpty())\r
-                               return null;\r
-                       return scores.iterator().next();\r
-               }\r
-\r
                public Score getScoreByMethod(Enum<?> method) {\r
                        for (Score sc : scores) {\r
                                if (method == sc.getMethod()) {\r
index c4e1def..d0a6cd7 100644 (file)
@@ -30,6 +30,7 @@ import java.util.HashSet;
 import java.util.List;\r
 import java.util.Map;\r
 import java.util.Scanner;\r
+import java.util.Set;\r
 import java.util.TreeSet;\r
 import java.util.logging.Level;\r
 import java.util.regex.Matcher;\r
@@ -429,7 +430,7 @@ public final class SequenceUtil {
         * @throws IOException\r
         * @throws UnknownFileFormatException\r
         */\r
-       public static HashMap<FastaSequence, HashSet<Score>> readDisembl(\r
+       public static HashMap<String, Set<Score>> readDisembl(\r
                        final InputStream input) throws IOException,\r
                        UnknownFileFormatException {\r
                Scanner scan = new Scanner(input);\r
@@ -441,7 +442,7 @@ public final class SequenceUtil {
                                                        + " No such line was found!");\r
                }\r
 \r
-               HashMap<FastaSequence, HashSet<Score>> results = new HashMap<FastaSequence, HashSet<Score>>();\r
+               HashMap<String, Set<Score>> results = new HashMap<String, Set<Score>>();\r
                int seqCounter = 0;\r
                while (scan.hasNext()) {\r
                        seqCounter++;\r
@@ -474,13 +475,15 @@ public final class SequenceUtil {
                                rem.add(scansingle.nextFloat());\r
                                hotloops.add(scansingle.nextFloat());\r
                        }\r
-                       FastaSequence fs = new FastaSequence(sequenceName,\r
-                                       seqbuffer.toString());\r
+                       /*\r
+                        * Also possible FastaSequence fs = new FastaSequence(sequenceName,\r
+                        * seqbuffer.toString());\r
+                        */\r
                        HashSet<Score> scores = new HashSet<Score>();\r
                        scores.add(new Score(DisemblResult.COILS, coils, coilsR));\r
                        scores.add(new Score(DisemblResult.HOTLOOPS, hotloops, rem465R));\r
                        scores.add(new Score(DisemblResult.REM465, rem, loopsR));\r
-                       results.put(fs, scores);\r
+                       results.put(sequenceName, scores);\r
 \r
                        scansingle.close();\r
                }\r
@@ -525,16 +528,6 @@ public final class SequenceUtil {
                return ranges;\r
        }\r
 \r
-       public static HashMap<String, HashSet<Score>> removeSequences(\r
-                       HashMap<FastaSequence, HashSet<Score>> disemblResults) {\r
-               HashMap<String, HashSet<Score>> seqNameScores = new HashMap<String, HashSet<Score>>();\r
-               for (Map.Entry<FastaSequence, HashSet<Score>> dres : disemblResults\r
-                               .entrySet()) {\r
-                       seqNameScores.put(dres.getKey().getId(), dres.getValue());\r
-               }\r
-               return seqNameScores;\r
-       }\r
-\r
        /**\r
         * \r
         > Foobar_dundeefriends\r
@@ -559,7 +552,7 @@ public final class SequenceUtil {
         * @throws IOException\r
         * @throws UnknownFileFormatException\r
         */\r
-       public static HashMap<FastaSequence, HashSet<Score>> readGlobPlot(\r
+       public static HashMap<String, Set<Score>> readGlobPlot(\r
                        final InputStream input) throws IOException,\r
                        UnknownFileFormatException {\r
                Scanner scan = new Scanner(input);\r
@@ -571,7 +564,7 @@ public final class SequenceUtil {
                                                        + " No such line was found!");\r
                }\r
 \r
-               HashMap<FastaSequence, HashSet<Score>> results = new HashMap<FastaSequence, HashSet<Score>>();\r
+               HashMap<String, Set<Score>> results = new HashMap<String, Set<Score>>();\r
                int seqCounter = 0;\r
                while (scan.hasNext()) {\r
                        seqCounter++;\r
@@ -602,15 +595,17 @@ public final class SequenceUtil {
                                rawScore.add(scansingle.nextFloat());\r
                                smoothedScore.add(scansingle.nextFloat());\r
                        }\r
-                       FastaSequence fs = new FastaSequence(sequenceName,\r
-                                       seqbuffer.toString());\r
+                       /*\r
+                        * Also possible FastaSequence fs = new FastaSequence(sequenceName,\r
+                        * seqbuffer.toString());\r
+                        */\r
                        HashSet<Score> scores = new HashSet<Score>();\r
                        scores.add(new Score(GlobProtResult.Disorder, disorderR));\r
                        scores.add(new Score(GlobProtResult.GlobDoms, domsR));\r
                        scores.add(new Score(GlobProtResult.Dydx, dydxScore));\r
                        scores.add(new Score(GlobProtResult.RawScore, rawScore));\r
                        scores.add(new Score(GlobProtResult.SmoothedScore, smoothedScore));\r
-                       results.put(fs, scores);\r
+                       results.put(sequenceName, scores);\r
 \r
                        scansingle.close();\r
                }\r
@@ -620,7 +615,7 @@ public final class SequenceUtil {
        }\r
        /**\r
         * Read AACon result with no alignment files. This method leaves incoming\r
-        * the InputStream results open!\r
+        * InputStream open!\r
         * \r
         * @param results\r
         *            output file of AAConservation\r
index f43db89..fd76e55 100644 (file)
@@ -20,13 +20,12 @@ import java.io.FileNotFoundException;
 import java.io.IOException;\r
 import java.io.InputStream;\r
 import java.util.Arrays;\r
-import java.util.HashSet;\r
 import java.util.List;\r
 \r
 import org.apache.log4j.Logger;\r
 \r
 import compbio.data.sequence.SMERFSConstraints;\r
-import compbio.data.sequence.Score;\r
+import compbio.data.sequence.ScoreManager;\r
 import compbio.data.sequence.SequenceUtil;\r
 import compbio.engine.client.CommandBuilder;\r
 import compbio.engine.client.Executable;\r
@@ -72,13 +71,14 @@ public class AACon extends SkeletalExecutable<AACon> {
        }\r
        // HashMap<Method, float[]>\r
        @Override\r
-       public HashSet<Score> getResults(String workDirectory)\r
+       public ScoreManager getResults(String workDirectory)\r
                        throws ResultNotAvailableException {\r
-               HashSet<Score> annotations = null;\r
+               ScoreManager annotations = null;\r
                try {\r
                        InputStream inStream = new FileInputStream(new File(workDirectory,\r
                                        getOutput()));\r
-                       annotations = SequenceUtil.readAAConResults(inStream);\r
+                       annotations = ScoreManager.newInstanceSingleSequence(SequenceUtil\r
+                                       .readAAConResults(inStream));\r
                        inStream.close();\r
                } catch (FileNotFoundException e) {\r
                        log.error(e.getMessage(), e.getCause());\r
index 616b552..6b014ba 100644 (file)
@@ -19,12 +19,10 @@ import java.io.FileInputStream;
 import java.io.FileNotFoundException;\r
 import java.io.IOException;\r
 import java.io.InputStream;\r
-import java.util.HashMap;\r
-import java.util.HashSet;\r
 \r
 import org.apache.log4j.Logger;\r
 \r
-import compbio.data.sequence.Score;\r
+import compbio.data.sequence.ScoreManager;\r
 import compbio.data.sequence.SequenceUtil;\r
 import compbio.data.sequence.UnknownFileFormatException;\r
 import compbio.engine.client.Executable;\r
@@ -85,15 +83,15 @@ public class Disembl extends SkeletalExecutable<Disembl>
 \r
        @SuppressWarnings("unchecked")\r
        @Override\r
-       public HashMap<String, HashSet<Score>> getResults(String workDirectory)\r
+       public ScoreManager getResults(String workDirectory)\r
                        throws ResultNotAvailableException {\r
 \r
                InputStream inStream = null;\r
-               HashMap<String, HashSet<Score>> results = null;\r
+               ScoreManager results = null;\r
 \r
                try {\r
                        inStream = new FileInputStream(new File(workDirectory, getOutput()));\r
-                       results = SequenceUtil.removeSequences(SequenceUtil\r
+                       results = ScoreManager.newInstance(SequenceUtil\r
                                        .readDisembl(inStream));\r
                        inStream.close();\r
                } catch (FileNotFoundException e) {\r
@@ -112,7 +110,6 @@ public class Disembl extends SkeletalExecutable<Disembl>
                log.trace("DRESULTS: " + results);\r
                return results;\r
        }\r
-\r
        @Override\r
        public Disembl setInput(String inFile) {\r
                super.setInput(inFile);\r
index 99420f5..ce08077 100644 (file)
@@ -19,12 +19,10 @@ import java.io.FileInputStream;
 import java.io.FileNotFoundException;\r
 import java.io.IOException;\r
 import java.io.InputStream;\r
-import java.util.HashMap;\r
-import java.util.HashSet;\r
 \r
 import org.apache.log4j.Logger;\r
 \r
-import compbio.data.sequence.Score;\r
+import compbio.data.sequence.ScoreManager;\r
 import compbio.data.sequence.SequenceUtil;\r
 import compbio.data.sequence.UnknownFileFormatException;\r
 import compbio.engine.client.Executable;\r
@@ -70,15 +68,15 @@ public class GlobPlot extends SkeletalExecutable<GlobPlot>
        }\r
 \r
        @SuppressWarnings("unchecked")\r
-       public HashMap<String, HashSet<Score>> getResults(String workDirectory)\r
+       public ScoreManager getResults(String workDirectory)\r
                        throws ResultNotAvailableException {\r
 \r
                InputStream inStream = null;\r
-               HashMap<String, HashSet<Score>> results = null;\r
+               ScoreManager results = null;\r
                // How about getting ranges?\r
                try {\r
                        inStream = new FileInputStream(new File(workDirectory, getOutput()));\r
-                       results = SequenceUtil.removeSequences(SequenceUtil\r
+                       results = ScoreManager.newInstance(SequenceUtil\r
                                        .readGlobPlot(inStream));\r
                        inStream.close();\r
                } catch (FileNotFoundException e) {\r
index a436456..617d3bb 100644 (file)
@@ -25,11 +25,10 @@ import java.io.IOException;
 import java.io.InputStream;\r
 import java.util.Arrays;\r
 import java.util.List;\r
-import java.util.Map;\r
 \r
 import org.apache.log4j.Logger;\r
 \r
-import compbio.data.sequence.Score;\r
+import compbio.data.sequence.ScoreManager;\r
 import compbio.data.sequence.SequenceUtil;\r
 import compbio.data.sequence.UnknownFileFormatException;\r
 import compbio.engine.client.CommandBuilder;\r
@@ -73,13 +72,14 @@ public class Jronn extends SkeletalExecutable<Jronn> {
 \r
        @SuppressWarnings("unchecked")\r
        @Override\r
-       public Map<String, Score> getResults(String workDirectory)\r
+       public ScoreManager getResults(String workDirectory)\r
                        throws ResultNotAvailableException {\r
-               Map<String, Score> sequences = null;\r
+               ScoreManager sequences = null;\r
                try {\r
                        InputStream inStream = new FileInputStream(new File(workDirectory,\r
                                        getOutput()));\r
-                       sequences = SequenceUtil.readJRonn(inStream);\r
+                       sequences = ScoreManager.newInstanceSingleScore(SequenceUtil\r
+                                       .readJRonn(inStream));\r
                        inStream.close();\r
                } catch (FileNotFoundException e) {\r
                        log.error(e.getMessage(), e.getCause());\r
index a4bf4c9..682bcad 100644 (file)
@@ -165,14 +165,13 @@ public class SequenceUtilTester {
                try {\r
                        fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH\r
                                        + "disembl.out");\r
-                       Map<FastaSequence, HashSet<Score>> aseqs = SequenceUtil\r
-                                       .readDisembl(fio);\r
+                       Map<String, Set<Score>> aseqs = SequenceUtil.readDisembl(fio);\r
                        assertNotNull(aseqs);\r
                        assertEquals(aseqs.size(), 3);\r
                        // System.out.println(aseqs);\r
-                       for (FastaSequence fs : aseqs.keySet()) {\r
+                       for (String fs : aseqs.keySet()) {\r
                                assertTrue(" Foobar_dundeefriends Foobar dundeefriends "\r
-                                               .contains(fs.getId()));\r
+                                               .contains(fs));\r
                                Set<Score> scores = aseqs.get(fs);\r
                                assertEquals(scores.size(), 3);\r
                        }\r
@@ -218,15 +217,14 @@ public class SequenceUtilTester {
                try {\r
                        fio = new FileInputStream(AllTestSuit.TEST_DATA_PATH\r
                                        + "globplot.out");\r
-                       HashMap<FastaSequence, HashSet<Score>> aseqs = SequenceUtil\r
-                                       .readGlobPlot(fio);\r
+                       HashMap<String, Set<Score>> aseqs = SequenceUtil.readGlobPlot(fio);\r
                        assertNotNull(aseqs);\r
                        assertEquals(aseqs.size(), 3);\r
 \r
-                       FastaSequence fsdf = null;\r
+                       String fsdf = null;\r
                        Set<Score> scores = null;\r
-                       for (FastaSequence fs : aseqs.keySet()) {\r
-                               if ("Foobar_dundeefriends".contains(fs.getId())) {\r
+                       for (String fs : aseqs.keySet()) {\r
+                               if ("Foobar_dundeefriends".contains(fs)) {\r
                                        fsdf = fs;\r
                                        scores = aseqs.get(fs);\r
                                }\r
index 6476311..0954a2b 100644 (file)
@@ -77,12 +77,11 @@ public class TestAAConWS {
                        System.out.println("J: " + jobId);\r
                        ScoreManager result = msaws.getAnnotation(jobId);\r
                        assertNotNull(result);\r
-                       assertEquals(result.getFirstSequenceScores().getNumberOfScores(), 1);\r
+                       assertEquals(result.asSet().size(), 1);\r
 \r
-                       assertEquals(result.getFirstSequenceScores().getFirstScore()\r
-                                       .getMethod(), ConservationMethod.SHENKIN);\r
-                       List<Float> scores = result.getFirstSequenceScores()\r
-                                       .getFirstScore().getScores();\r
+                       assertEquals(result.asSet().iterator().next().getMethod(),\r
+                                       ConservationMethod.SHENKIN);\r
+                       List<Float> scores = result.asSet().iterator().next().getScores();\r
                        assertNotNull(scores);\r
                        assertEquals(scores.size(), 568);\r
 \r
@@ -92,21 +91,19 @@ public class TestAAConWS {
                                        presets.getPresetByName("Quick conservation"));\r
                        result = msaws.getAnnotation(jobId);\r
                        assertNotNull(result);\r
-                       assertEquals(result.getFirstSequenceScores().getNumberOfScores(),\r
-                                       13);\r
+                       assertEquals(result.asSet().size(), 13);\r
 \r
                        jobId = msaws.presetAnalize(fsl,\r
                                        presets.getPresetByName("Slow conservation"));\r
                        result = msaws.getAnnotation(jobId);\r
                        assertNotNull(result);\r
-                       assertEquals(result.getFirstSequenceScores().getNumberOfScores(), 5);\r
+                       assertEquals(result.asSet().size(), 5);\r
 \r
                        jobId = msaws.presetAnalize(fsl,\r
                                        presets.getPresetByName("Complete conservation"));\r
                        result = msaws.getAnnotation(jobId);\r
                        assertNotNull(result);\r
-                       assertEquals(result.getFirstSequenceScores().getNumberOfScores(),\r
-                                       18);\r
+                       assertEquals(result.asSet().size(), 18);\r
 \r
                } catch (UnsupportedRuntimeException e) {\r
                        e.printStackTrace();\r
@@ -145,21 +142,19 @@ public class TestAAConWS {
                                        presets.getPresetByName("Quick conservation"));\r
                        ScoreManager result = msaws.getAnnotation(jobId);\r
                        assertNotNull(result);\r
-                       assertEquals(result.getFirstSequenceScores().getNumberOfScores(),\r
-                                       13);\r
+                       assertEquals(result.asSet().size(), 13);\r
 \r
                        jobId = msaws.presetAnalize(fsl,\r
                                        presets.getPresetByName("Slow conservation"));\r
                        result = msaws.getAnnotation(jobId);\r
                        assertNotNull(result);\r
-                       assertEquals(result.getFirstSequenceScores().getNumberOfScores(), 5);\r
+                       assertEquals(result.asSet().size(), 5);\r
 \r
                        jobId = msaws.presetAnalize(fsl,\r
                                        presets.getPresetByName("Complete conservation"));\r
                        result = msaws.getAnnotation(jobId);\r
                        assertNotNull(result);\r
-                       assertEquals(result.getFirstSequenceScores().getNumberOfScores(),\r
-                                       18);\r
+                       assertEquals(result.asSet().size(), 18);\r
 \r
                } catch (UnsupportedRuntimeException e) {\r
                        e.printStackTrace();\r
@@ -202,9 +197,9 @@ public class TestAAConWS {
                        String jobId = msaws.customAnalize(fsl, options.getArguments());\r
                        ScoreManager result = msaws.getAnnotation(jobId);\r
                        assertNotNull(result);\r
-                       assertEquals(result.getFirstSequenceScores().getNumberOfScores(), 1);\r
-                       assertEquals(result.getFirstSequenceScores().getFirstScore()\r
-                                       .getScores().get(0), 0.698f);\r
+                       assertEquals(result.asSet().size(), 1);\r
+                       assertEquals(result.asSet().iterator().next().getScores().get(0),\r
+                                       0.698f);\r
 \r
                        options.getArgument("Calculation method").setDefaultValue("SMERFS");\r
                        options.removeArgument("Normalize");\r
@@ -212,9 +207,9 @@ public class TestAAConWS {
                        jobId = msaws.customAnalize(fsl, options.getArguments());\r
                        result = msaws.getAnnotation(jobId);\r
                        assertNotNull(result);\r
-                       assertEquals(result.getFirstSequenceScores().getNumberOfScores(), 1);\r
-                       assertEquals(result.getFirstSequenceScores().getFirstScore()\r
-                                       .getScores().get(0), 0.401f);\r
+                       assertEquals(result.asSet().size(), 1);\r
+                       assertEquals(result.asSet().iterator().next().getScores().get(0),\r
+                                       0.401f);\r
 \r
                } catch (WrongParameterException e) {\r
                        e.printStackTrace();\r