/* Copyright (c) 2011 Peter Troshin * * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0 * * This library is free software; you can redistribute it and/or modify it under the terms of the * Apache License version 2 as published by the Apache Software Foundation * * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache * License for more details. * * A copy of the license is in apache_license.txt. It is also available here: * @see: http://www.apache.org/licenses/LICENSE-2.0.txt * * Any republication or derived work distributed in source code form * must include this copyright and license notice. */ package compbio.pipeline._jpred; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Scanner; import java.util.Set; /** * Parser for the following files: * * @author pvtroshin * */ public class JackHmmerHitParser { //# --- full sequence ---- --- best 1 domain ---- --- domain number estimation ---- //# target name accession query name accession E-value score bias E-value score bias exp reg clu ov env dom rep inc description of target //# ------------------- ---------- -------------------- ---------- --------- ------ ----- --------- ------ ----- --- --- --- --- --- --- --- --- --------------------- //tr|Q6TVU2|Q6TVU2_ORFV - gi_74230740_gb_ABA00545.1 - 4.5e-271 910.4 0.0 5.1e-271 910.2 0.0 1.0 1 0 0 1 1 1 1 Putative uncharacterized protein OS=Orf virus PE=4 SV=1 Set hits; public JackHmmerHitParser(String file) throws IOException { BufferedReader bfr = new BufferedReader(new InputStreamReader( new FileInputStream(file), "ISO-8859-1"), 64000); // throw away first three lines; this.hits = new HashSet(); String line = bfr.readLine(); bfr.readLine(); bfr.readLine(); int hitc = 0; while ((line = bfr.readLine()) != null) { hitc++; Scanner scan = new Scanner(line); scan.useDelimiter("\\s+"); extractData(scan, hitc); } List lhits = new ArrayList(hits); Collections.sort(lhits, new Hit.NumberComporator()); } void extractData(Scanner scan, int hitcounter) { Hit pseq = new Hit(); String tname = scan.next(); pseq.name = tname; //System.out.println(tname); String tacc = scan.next(); pseq.accession = tacc; //System.out.println(tacc); String qname = scan.next(); //System.out.println(qname); String qacc = scan.next(); //System.out.println(qacc); Double evalue = scan.nextDouble(); //System.out.println(evalue); pseq.evalue = evalue.toString(); Double score = scan.nextDouble(); //System.out.println(score); pseq.evalue = evalue.toString(); pseq.number = new Integer(hitcounter).toString(); boolean unique = hits.add(pseq); assert unique : "Unique hits are expected!"; } public static void main(String[] args) throws IOException { assert args[0] != null; JackHmmerHitParser parser = new JackHmmerHitParser(args[0]); BlastParser.printHits(parser.hits); } }