X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=runner%2Fcompbio%2Fpipeline%2F_jpred%2FBlastParser.java;fp=runner%2Fcompbio%2Fpipeline%2F_jpred%2FBlastParser.java;h=7039588f7a05fc33280472efcaec83267257b1a6;hb=3ce02f963043c66aedd16f12c789592e86149398;hp=73d9a99f63e3411e5919dcbf5c1333f796fc55ca;hpb=535359a3d592ee41bda72e7356f0181f6cee9d07;p=jabaws.git diff --git a/runner/compbio/pipeline/_jpred/BlastParser.java b/runner/compbio/pipeline/_jpred/BlastParser.java index 73d9a99..7039588 100644 --- a/runner/compbio/pipeline/_jpred/BlastParser.java +++ b/runner/compbio/pipeline/_jpred/BlastParser.java @@ -16,59 +16,119 @@ * must include this copyright and license notice. */ package compbio.pipeline._jpred; -import java.io.*; -import java.util.*; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; - public class BlastParser { - - static class Psiseq { - String id; - String seq; + + Map> iters; + + public BlastParser(String file) throws FileNotFoundException, + XMLStreamException { + XMLInputFactory f = XMLInputFactory.newInstance(); + XMLStreamReader r = f.createXMLStreamReader(new BufferedInputStream( + new FileInputStream(new File(file)))); + Set pl = new HashSet(); + Hit psi = null; + this.iters = new HashMap>(); + Integer iternum = null; + while (r.hasNext()) { + int idx = r.next(); + if (r.isStartElement()) { + String name = r.getLocalName(); + if (name.equals("Iteration_iter-num")) { + iternum = Integer.parseInt(r.getElementText().trim()); + System.out.println("Iter " + iternum); + } + if (name.equals("Hit")) { + psi = new Hit(); + } + if (name.equals("Hit_num")) { + psi.number = r.getElementText(); + } + if (name.equals("Hit_accession")) { + psi.accession = r.getElementText(); + // System.out.println(psi.id); + } + if (name.equals("Hit_def")) { + // System.out.println(r.getElementText()); + psi.name = r.getElementText().split("\\s+")[0].trim(); + // System.out.println(psi.id); + } + if (name.equals("Hsp_hseq")) { + psi.seq = r.getElementText(); + // System.out.println(psi.seq); + } + if (name.equals("Hsp_evalue")) { + psi.evalue = r.getElementText(); + // System.out.println(psi.seq); + } + + } + + if (r.isEndElement()) { + String name = r.getLocalName(); + if (name.equals("Hit")) { + boolean replaced = pl.add(psi); + assert replaced : "Expect unique elements only!"; + psi = null; + } + if (name.equals("Iteration")) { + iters.put(iternum, pl); + pl = new HashSet(); + } + } } - /** - * args[0] is assumed to be the name of a Blast output file - * @throws XMLStreamException - * @throws FileNotFoundException - */ - public static void main(String[] args) throws FileNotFoundException, XMLStreamException { - XMLInputFactory f = XMLInputFactory.newInstance(); - XMLStreamReader r = f.createXMLStreamReader( new BufferedInputStream(new FileInputStream(new File(args[0])))); - List pl = new ArrayList(); - Psiseq psi = null; - while(r.hasNext()) { - int idx = r.next(); - //System.out.println(idx); - if(r.isStartElement()) { - String name = r.getLocalName(); - if(name.equals("Hit") ) { - psi = new Psiseq(); - } - if(name.equals("Hit_id") ) { - //System.out.println(r.getElementText()); - psi.id = r.getElementText(); - System.out.println(psi.id); - } - if(name.equals("Hsp_hseq")) { - psi.seq = r.getElementText(); - System.out.println(psi.seq); - } - } - - if(r.isEndElement()) { - String name = r.getLocalName(); - if(name.equals("Hit") ) { - pl.add(psi); - psi = null; - } - } + } - } + /** + * args[0] is assumed to be the name of a Blast output file + * + * @throws XMLStreamException + * @throws FileNotFoundException + */ + public static void main(String[] args) throws FileNotFoundException, + XMLStreamException { + BlastParser parser = new BlastParser(args[0]); + printHits(parser.iters); + } + static final void printHits(Map> iterNumPsiSeqs) { + for (Integer iterNum : iterNumPsiSeqs.keySet()) { + System.out.println("Iteration " + iterNum); + printHits(iterNumPsiSeqs.get(iterNum)); } + } + + static final void printHits(Collection psiseqs) { + assert psiseqs != null; + System.out.println("Total hits: " + psiseqs.size()); + for (Hit pseq : psiseqs) { + System.out.println("Hit: " + pseq.number + " Accession: " + + pseq.accession + " name " + pseq.name); + } + } + + static final void printNames(Collection psiseqs) { + assert psiseqs != null; + System.out.println("Total hits: " + psiseqs.size()); + for (Hit pseq : psiseqs) { + System.out.print(pseq.number + " "); + System.out.println(pseq.name); + } + } + }