1 /* Copyright (c) 2009 Peter Troshin
\r
3 * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 1.0
\r
5 * This library is free software; you can redistribute it and/or modify it under the terms of the
\r
6 * Apache License version 2 as published by the Apache Software Foundation
\r
8 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
\r
9 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache
\r
10 * License for more details.
\r
12 * A copy of the license is in apache_license.txt. It is also available here:
\r
13 * @see: http://www.apache.org/licenses/LICENSE-2.0.txt
\r
15 * Any republication or derived work distributed in source code form
\r
16 * must include this copyright and license notice.
\r
18 package compbio.pipeline._jpred;
\r
20 import java.io.BufferedReader;
\r
21 import java.io.FileInputStream;
\r
22 import java.io.IOException;
\r
23 import java.io.InputStreamReader;
\r
24 import java.util.ArrayList;
\r
25 import java.util.Collections;
\r
26 import java.util.HashSet;
\r
27 import java.util.List;
\r
28 import java.util.Scanner;
\r
29 import java.util.Set;
\r
32 * Parser for the following files:
\r
37 public class JackHmmerHitParser {
\r
38 //# --- full sequence ---- --- best 1 domain ---- --- domain number estimation ----
\r
39 //# target name accession query name accession E-value score bias E-value score bias exp reg clu ov env dom rep inc description of target
\r
40 //# ------------------- ---------- -------------------- ---------- --------- ------ ----- --------- ------ ----- --- --- --- --- --- --- --- --- ---------------------
\r
41 //tr|Q6TVU2|Q6TVU2_ORFV - gi_74230740_gb_ABA00545.1 - 4.5e-271 910.4 0.0 5.1e-271 910.2 0.0 1.0 1 0 0 1 1 1 1 Putative uncharacterized protein OS=Orf virus PE=4 SV=1
\r
45 public JackHmmerHitParser(String file) throws IOException {
\r
47 BufferedReader bfr = new BufferedReader(new InputStreamReader(
\r
48 new FileInputStream(file), "ISO-8859-1"), 64000);
\r
49 // throw away first three lines;
\r
50 this.hits = new HashSet<Hit>();
\r
51 String line = bfr.readLine();
\r
55 while ((line = bfr.readLine()) != null) {
\r
57 Scanner scan = new Scanner(line);
\r
58 scan.useDelimiter("\\s+");
\r
59 extractData(scan, hitc);
\r
61 List<Hit> lhits = new ArrayList<Hit>(hits);
\r
62 Collections.sort(lhits, new Hit.NumberComporator());
\r
65 void extractData(Scanner scan, int hitcounter) {
\r
66 Hit pseq = new Hit();
\r
68 String tname = scan.next();
\r
70 //System.out.println(tname);
\r
72 String tacc = scan.next();
\r
73 pseq.accession = tacc;
\r
74 //System.out.println(tacc);
\r
75 String qname = scan.next();
\r
76 //System.out.println(qname);
\r
77 String qacc = scan.next();
\r
78 //System.out.println(qacc);
\r
80 Double evalue = scan.nextDouble();
\r
81 //System.out.println(evalue);
\r
82 pseq.evalue = evalue.toString();
\r
84 Double score = scan.nextDouble();
\r
85 //System.out.println(score);
\r
86 pseq.evalue = evalue.toString();
\r
87 pseq.number = new Integer(hitcounter).toString();
\r
88 boolean unique = hits.add(pseq);
\r
89 assert unique : "Unique hits are expected!";
\r
92 public static void main(String[] args) throws IOException {
\r
93 assert args[0] != null;
\r
94 JackHmmerHitParser parser = new JackHmmerHitParser(args[0]);
\r
95 BlastParser.printHits(parser.hits);
\r