-/* Copyright (c) 2009 Peter Troshin\r
+/* Copyright (c) 2011 Peter Troshin\r
* \r
- * Jalview Web Services @version: 2.0 \r
+ * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0 \r
* \r
* This library is free software; you can redistribute it and/or modify it under the terms of the\r
* Apache License version 2 as published by the Apache Software Foundation\r
* must include this copyright and license notice.\r
*/\r
package compbio.pipeline._jpred;\r
-import java.io.*;\r
-import java.util.*;\r
+\r
+import java.io.BufferedInputStream;\r
+import java.io.File;\r
+import java.io.FileInputStream;\r
+import java.io.FileNotFoundException;\r
+import java.util.Collection;\r
+import java.util.HashMap;\r
+import java.util.HashSet;\r
+import java.util.Map;\r
+import java.util.Set;\r
\r
import javax.xml.stream.XMLInputFactory;\r
import javax.xml.stream.XMLStreamException;\r
import javax.xml.stream.XMLStreamReader;\r
\r
-\r
public class BlastParser {\r
- \r
- static class Psiseq {\r
- String id;\r
- String seq;\r
+\r
+ Map<Integer, Set<Hit>> iters;\r
+\r
+ public BlastParser(String file) throws FileNotFoundException,\r
+ XMLStreamException {\r
+ XMLInputFactory f = XMLInputFactory.newInstance();\r
+ XMLStreamReader r = f.createXMLStreamReader(new BufferedInputStream(\r
+ new FileInputStream(new File(file))));\r
+ Set<Hit> pl = new HashSet<Hit>();\r
+ Hit psi = null;\r
+ this.iters = new HashMap<Integer, Set<Hit>>();\r
+ Integer iternum = null;\r
+ while (r.hasNext()) {\r
+ int idx = r.next();\r
+ if (r.isStartElement()) {\r
+ String name = r.getLocalName();\r
+ if (name.equals("Iteration_iter-num")) {\r
+ iternum = Integer.parseInt(r.getElementText().trim());\r
+ System.out.println("Iter " + iternum);\r
+ }\r
+ if (name.equals("Hit")) {\r
+ psi = new Hit();\r
+ }\r
+ if (name.equals("Hit_num")) {\r
+ psi.number = r.getElementText();\r
+ }\r
+ if (name.equals("Hit_accession")) {\r
+ psi.accession = r.getElementText();\r
+ // System.out.println(psi.id);\r
+ }\r
+ if (name.equals("Hit_def")) {\r
+ // System.out.println(r.getElementText());\r
+ psi.name = r.getElementText().split("\\s+")[0].trim();\r
+ // System.out.println(psi.id);\r
+ }\r
+ if (name.equals("Hsp_hseq")) {\r
+ psi.seq = r.getElementText();\r
+ // System.out.println(psi.seq);\r
+ }\r
+ if (name.equals("Hsp_evalue")) {\r
+ psi.evalue = r.getElementText();\r
+ // System.out.println(psi.seq);\r
+ }\r
+\r
+ }\r
+\r
+ if (r.isEndElement()) {\r
+ String name = r.getLocalName();\r
+ if (name.equals("Hit")) {\r
+ boolean replaced = pl.add(psi);\r
+ assert replaced : "Expect unique elements only!";\r
+ psi = null;\r
+ }\r
+ if (name.equals("Iteration")) {\r
+ iters.put(iternum, pl);\r
+ pl = new HashSet<Hit>();\r
+ }\r
+ }\r
}\r
- /**\r
- * args[0] is assumed to be the name of a Blast output file\r
- * @throws XMLStreamException \r
- * @throws FileNotFoundException \r
- */\r
- public static void main(String[] args) throws FileNotFoundException, XMLStreamException {\r
- XMLInputFactory f = XMLInputFactory.newInstance();\r
- XMLStreamReader r = f.createXMLStreamReader( new BufferedInputStream(new FileInputStream(new File(args[0]))));\r
- List<Psiseq> pl = new ArrayList<Psiseq>();\r
- Psiseq psi = null;\r
- while(r.hasNext()) {\r
- int idx = r.next(); \r
- //System.out.println(idx);\r
\r
- if(r.isStartElement()) {\r
- String name = r.getLocalName();\r
- if(name.equals("Hit") ) {\r
- psi = new Psiseq();\r
- }\r
- if(name.equals("Hit_id") ) {\r
- //System.out.println(r.getElementText());\r
- psi.id = r.getElementText();\r
- System.out.println(psi.id);\r
- } \r
- if(name.equals("Hsp_hseq")) {\r
- psi.seq = r.getElementText();\r
- System.out.println(psi.seq);\r
- } \r
- }\r
- \r
- if(r.isEndElement()) {\r
- String name = r.getLocalName();\r
- if(name.equals("Hit") ) {\r
- pl.add(psi);\r
- psi = null;\r
- }\r
- }\r
+ }\r
\r
- } \r
+ /**\r
+ * args[0] is assumed to be the name of a Blast output file\r
+ * \r
+ * @throws XMLStreamException\r
+ * @throws FileNotFoundException\r
+ */\r
+ public static void main(String[] args) throws FileNotFoundException,\r
+ XMLStreamException {\r
+ BlastParser parser = new BlastParser(args[0]);\r
+ printHits(parser.iters);\r
+ }\r
\r
+ static final void printHits(Map<Integer, Set<Hit>> iterNumPsiSeqs) {\r
+ for (Integer iterNum : iterNumPsiSeqs.keySet()) {\r
+ System.out.println("Iteration " + iterNum);\r
+ printHits(iterNumPsiSeqs.get(iterNum));\r
}\r
+ }\r
+\r
+ static final void printHits(Collection<Hit> psiseqs) {\r
+ assert psiseqs != null;\r
+ System.out.println("Total hits: " + psiseqs.size());\r
+ for (Hit pseq : psiseqs) {\r
+ System.out.println("Hit: " + pseq.number + " Accession: "\r
+ + pseq.accession + " name " + pseq.name);\r
+ }\r
+ }\r
+\r
+ static final void printNames(Collection<Hit> psiseqs) {\r
+ assert psiseqs != null;\r
+ System.out.println("Total hits: " + psiseqs.size());\r
+ for (Hit pseq : psiseqs) {\r
+ System.out.print(pseq.number + " ");\r
+ System.out.println(pseq.name);\r
+ }\r
+ }\r
+\r
}\r