1 /* Copyright (c) 2009 Peter Troshin
\r
3 * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 1.0
\r
5 * This library is free software; you can redistribute it and/or modify it under the terms of the
\r
6 * Apache License version 2 as published by the Apache Software Foundation
\r
8 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
\r
9 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache
\r
10 * License for more details.
\r
12 * A copy of the license is in apache_license.txt. It is also available here:
\r
13 * @see: http://www.apache.org/licenses/LICENSE-2.0.txt
\r
15 * Any republication or derived work distributed in source code form
\r
16 * must include this copyright and license notice.
\r
18 package compbio.pipeline._jpred;
\r
20 import java.io.BufferedInputStream;
\r
21 import java.io.File;
\r
22 import java.io.FileInputStream;
\r
23 import java.io.FileNotFoundException;
\r
24 import java.util.Collection;
\r
25 import java.util.HashMap;
\r
26 import java.util.HashSet;
\r
27 import java.util.Map;
\r
28 import java.util.Set;
\r
30 import javax.xml.stream.XMLInputFactory;
\r
31 import javax.xml.stream.XMLStreamException;
\r
32 import javax.xml.stream.XMLStreamReader;
\r
34 public class BlastParser {
\r
36 Map<Integer, Set<Hit>> iters;
\r
38 public BlastParser(String file) throws FileNotFoundException,
\r
39 XMLStreamException {
\r
40 XMLInputFactory f = XMLInputFactory.newInstance();
\r
41 XMLStreamReader r = f.createXMLStreamReader(new BufferedInputStream(
\r
42 new FileInputStream(new File(file))));
\r
43 Set<Hit> pl = new HashSet<Hit>();
\r
45 this.iters = new HashMap<Integer, Set<Hit>>();
\r
46 Integer iternum = null;
\r
47 while (r.hasNext()) {
\r
49 if (r.isStartElement()) {
\r
50 String name = r.getLocalName();
\r
51 if (name.equals("Iteration_iter-num")) {
\r
52 iternum = Integer.parseInt(r.getElementText().trim());
\r
53 System.out.println("Iter " + iternum);
\r
55 if (name.equals("Hit")) {
\r
58 if (name.equals("Hit_num")) {
\r
59 psi.number = r.getElementText();
\r
61 if (name.equals("Hit_accession")) {
\r
62 psi.accession = r.getElementText();
\r
63 // System.out.println(psi.id);
\r
65 if (name.equals("Hit_def")) {
\r
66 // System.out.println(r.getElementText());
\r
67 psi.name = r.getElementText().split("\\s+")[0].trim();
\r
68 // System.out.println(psi.id);
\r
70 if (name.equals("Hsp_hseq")) {
\r
71 psi.seq = r.getElementText();
\r
72 // System.out.println(psi.seq);
\r
74 if (name.equals("Hsp_evalue")) {
\r
75 psi.evalue = r.getElementText();
\r
76 // System.out.println(psi.seq);
\r
81 if (r.isEndElement()) {
\r
82 String name = r.getLocalName();
\r
83 if (name.equals("Hit")) {
\r
84 boolean replaced = pl.add(psi);
\r
85 assert replaced : "Expect unique elements only!";
\r
88 if (name.equals("Iteration")) {
\r
89 iters.put(iternum, pl);
\r
90 pl = new HashSet<Hit>();
\r
98 * args[0] is assumed to be the name of a Blast output file
\r
100 * @throws XMLStreamException
\r
101 * @throws FileNotFoundException
\r
103 public static void main(String[] args) throws FileNotFoundException,
\r
104 XMLStreamException {
\r
105 BlastParser parser = new BlastParser(args[0]);
\r
106 printHits(parser.iters);
\r
109 static final void printHits(Map<Integer, Set<Hit>> iterNumPsiSeqs) {
\r
110 for (Integer iterNum : iterNumPsiSeqs.keySet()) {
\r
111 System.out.println("Iteration " + iterNum);
\r
112 printHits(iterNumPsiSeqs.get(iterNum));
\r
116 static final void printHits(Collection<Hit> psiseqs) {
\r
117 assert psiseqs != null;
\r
118 System.out.println("Total hits: " + psiseqs.size());
\r
119 for (Hit pseq : psiseqs) {
\r
120 System.out.println("Hit: " + pseq.number + " Accession: "
\r
121 + pseq.accession + " name " + pseq.name);
\r
125 static final void printNames(Collection<Hit> psiseqs) {
\r
126 assert psiseqs != null;
\r
127 System.out.println("Total hits: " + psiseqs.size());
\r
128 for (Hit pseq : psiseqs) {
\r
129 System.out.print(pseq.number + " ");
\r
130 System.out.println(pseq.name);
\r