Changes from JWS3 is merged
[jabaws.git] / runner / compbio / pipeline / _jpred / BlastParser.java
index 73d9a99..7039588 100644 (file)
  * must include this copyright and license notice.\r
  */\r
 package compbio.pipeline._jpred;\r
-import java.io.*;\r
-import java.util.*;\r
+\r
+import java.io.BufferedInputStream;\r
+import java.io.File;\r
+import java.io.FileInputStream;\r
+import java.io.FileNotFoundException;\r
+import java.util.Collection;\r
+import java.util.HashMap;\r
+import java.util.HashSet;\r
+import java.util.Map;\r
+import java.util.Set;\r
 \r
 import javax.xml.stream.XMLInputFactory;\r
 import javax.xml.stream.XMLStreamException;\r
 import javax.xml.stream.XMLStreamReader;\r
 \r
-\r
 public class BlastParser {\r
-       \r
-       static class Psiseq {\r
-               String id;\r
-               String seq;\r
+\r
+    Map<Integer, Set<Hit>> iters;\r
+\r
+    public BlastParser(String file) throws FileNotFoundException,\r
+           XMLStreamException {\r
+       XMLInputFactory f = XMLInputFactory.newInstance();\r
+       XMLStreamReader r = f.createXMLStreamReader(new BufferedInputStream(\r
+               new FileInputStream(new File(file))));\r
+       Set<Hit> pl = new HashSet<Hit>();\r
+       Hit psi = null;\r
+       this.iters = new HashMap<Integer, Set<Hit>>();\r
+       Integer iternum = null;\r
+       while (r.hasNext()) {\r
+           int idx = r.next();\r
+           if (r.isStartElement()) {\r
+               String name = r.getLocalName();\r
+               if (name.equals("Iteration_iter-num")) {\r
+                   iternum = Integer.parseInt(r.getElementText().trim());\r
+                   System.out.println("Iter " + iternum);\r
+               }\r
+               if (name.equals("Hit")) {\r
+                   psi = new Hit();\r
+               }\r
+               if (name.equals("Hit_num")) {\r
+                   psi.number = r.getElementText();\r
+               }\r
+               if (name.equals("Hit_accession")) {\r
+                   psi.accession = r.getElementText();\r
+                   // System.out.println(psi.id);\r
+               }\r
+               if (name.equals("Hit_def")) {\r
+                   // System.out.println(r.getElementText());\r
+                   psi.name = r.getElementText().split("\\s+")[0].trim();\r
+                   // System.out.println(psi.id);\r
+               }\r
+               if (name.equals("Hsp_hseq")) {\r
+                   psi.seq = r.getElementText();\r
+                   // System.out.println(psi.seq);\r
+               }\r
+               if (name.equals("Hsp_evalue")) {\r
+                   psi.evalue = r.getElementText();\r
+                   // System.out.println(psi.seq);\r
+               }\r
+\r
+           }\r
+\r
+           if (r.isEndElement()) {\r
+               String name = r.getLocalName();\r
+               if (name.equals("Hit")) {\r
+                   boolean replaced = pl.add(psi);\r
+                   assert replaced : "Expect unique elements only!";\r
+                   psi = null;\r
+               }\r
+               if (name.equals("Iteration")) {\r
+                   iters.put(iternum, pl);\r
+                   pl = new HashSet<Hit>();\r
+               }\r
+           }\r
        }\r
-       /**\r
-        * args[0] is assumed to be the name of a Blast output file\r
-        * @throws XMLStreamException \r
-        * @throws FileNotFoundException \r
-        */\r
-       public static void main(String[] args) throws FileNotFoundException, XMLStreamException {\r
-               XMLInputFactory f = XMLInputFactory.newInstance();\r
-               XMLStreamReader r = f.createXMLStreamReader( new BufferedInputStream(new FileInputStream(new File(args[0]))));\r
-               List<Psiseq> pl = new ArrayList<Psiseq>();\r
-               Psiseq psi = null;\r
-               while(r.hasNext()) {\r
-                       int idx = r.next(); \r
-                       //System.out.println(idx);\r
 \r
-                       if(r.isStartElement()) {\r
-                               String name = r.getLocalName();\r
-                               if(name.equals("Hit") ) {\r
-                                       psi = new Psiseq();\r
-                               }\r
-                               if(name.equals("Hit_id") ) {\r
-                                       //System.out.println(r.getElementText());\r
-                                       psi.id = r.getElementText();\r
-                                       System.out.println(psi.id);\r
-                               } \r
-                               if(name.equals("Hsp_hseq")) {\r
-                                       psi.seq = r.getElementText();\r
-                                       System.out.println(psi.seq);\r
-                               } \r
-                       }\r
-                       \r
-                       if(r.isEndElement()) {\r
-                               String name = r.getLocalName();\r
-                               if(name.equals("Hit") ) {\r
-                                       pl.add(psi);\r
-                                       psi = null;\r
-                               }\r
-                       }\r
+    }\r
 \r
-               } \r
+    /**\r
+     * args[0] is assumed to be the name of a Blast output file\r
+     * \r
+     * @throws XMLStreamException\r
+     * @throws FileNotFoundException\r
+     */\r
+    public static void main(String[] args) throws FileNotFoundException,\r
+           XMLStreamException {\r
+       BlastParser parser = new BlastParser(args[0]);\r
+       printHits(parser.iters);\r
+    }\r
 \r
+    static final void printHits(Map<Integer, Set<Hit>> iterNumPsiSeqs) {\r
+       for (Integer iterNum : iterNumPsiSeqs.keySet()) {\r
+           System.out.println("Iteration " + iterNum);\r
+           printHits(iterNumPsiSeqs.get(iterNum));\r
        }\r
+    }\r
+\r
+    static final void printHits(Collection<Hit> psiseqs) {\r
+       assert psiseqs != null;\r
+       System.out.println("Total hits: " + psiseqs.size());\r
+       for (Hit pseq : psiseqs) {\r
+           System.out.println("Hit: " + pseq.number + " Accession: "\r
+                   + pseq.accession + " name " + pseq.name);\r
+       }\r
+    }\r
+\r
+    static final void printNames(Collection<Hit> psiseqs) {\r
+       assert psiseqs != null;\r
+       System.out.println("Total hits: " + psiseqs.size());\r
+       for (Hit pseq : psiseqs) {\r
+           System.out.print(pseq.number + "     ");\r
+           System.out.println(pseq.name);\r
+       }\r
+    }\r
+\r
 }\r