/* Copyright (c) 2011 Peter Troshin * * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0 * * This library is free software; you can redistribute it and/or modify it under the terms of the * Apache License version 2 as published by the Apache Software Foundation * * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache * License for more details. * * A copy of the license is in apache_license.txt. It is also available here: * @see: http://www.apache.org/licenses/LICENSE-2.0.txt * * Any republication or derived work distributed in source code form * must include this copyright and license notice. */ package compbio.pipeline._jpred; import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; public class BlastParser { Map> iters; public BlastParser(String file) throws FileNotFoundException, XMLStreamException { XMLInputFactory f = XMLInputFactory.newInstance(); XMLStreamReader r = f.createXMLStreamReader(new BufferedInputStream( new FileInputStream(new File(file)))); Set pl = new HashSet(); Hit psi = null; this.iters = new HashMap>(); Integer iternum = null; while (r.hasNext()) { int idx = r.next(); if (r.isStartElement()) { String name = r.getLocalName(); if (name.equals("Iteration_iter-num")) { iternum = Integer.parseInt(r.getElementText().trim()); System.out.println("Iter " + iternum); } if (name.equals("Hit")) { psi = new Hit(); } if (name.equals("Hit_num")) { psi.number = r.getElementText(); } if (name.equals("Hit_accession")) { psi.accession = r.getElementText(); // System.out.println(psi.id); } if (name.equals("Hit_def")) { // System.out.println(r.getElementText()); psi.name = r.getElementText().split("\\s+")[0].trim(); // System.out.println(psi.id); } if (name.equals("Hsp_hseq")) { psi.seq = r.getElementText(); // System.out.println(psi.seq); } if (name.equals("Hsp_evalue")) { psi.evalue = r.getElementText(); // System.out.println(psi.seq); } } if (r.isEndElement()) { String name = r.getLocalName(); if (name.equals("Hit")) { boolean replaced = pl.add(psi); assert replaced : "Expect unique elements only!"; psi = null; } if (name.equals("Iteration")) { iters.put(iternum, pl); pl = new HashSet(); } } } } /** * args[0] is assumed to be the name of a Blast output file * * @throws XMLStreamException * @throws FileNotFoundException */ public static void main(String[] args) throws FileNotFoundException, XMLStreamException { BlastParser parser = new BlastParser(args[0]); printHits(parser.iters); } static final void printHits(Map> iterNumPsiSeqs) { for (Integer iterNum : iterNumPsiSeqs.keySet()) { System.out.println("Iteration " + iterNum); printHits(iterNumPsiSeqs.get(iterNum)); } } static final void printHits(Collection psiseqs) { assert psiseqs != null; System.out.println("Total hits: " + psiseqs.size()); for (Hit pseq : psiseqs) { System.out.println("Hit: " + pseq.number + " Accession: " + pseq.accession + " name " + pseq.name); } } static final void printNames(Collection psiseqs) { assert psiseqs != null; System.out.println("Total hits: " + psiseqs.size()); for (Hit pseq : psiseqs) { System.out.print(pseq.number + " "); System.out.println(pseq.name); } } }