+++ /dev/null
-/* Copyright (c) 2011 Peter Troshin\r
- * \r
- * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0 \r
- * \r
- * This library is free software; you can redistribute it and/or modify it under the terms of the\r
- * Apache License version 2 as published by the Apache Software Foundation\r
- * \r
- * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without\r
- * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache \r
- * License for more details.\r
- * \r
- * A copy of the license is in apache_license.txt. It is also available here:\r
- * @see: http://www.apache.org/licenses/LICENSE-2.0.txt\r
- * \r
- * Any republication or derived work distributed in source code form\r
- * must include this copyright and license notice.\r
- */\r
-package compbio.pipeline._jpred;\r
-\r
-import java.io.BufferedReader;\r
-import java.io.FileInputStream;\r
-import java.io.IOException;\r
-import java.io.InputStreamReader;\r
-import java.util.ArrayList;\r
-import java.util.Collections;\r
-import java.util.HashSet;\r
-import java.util.List;\r
-import java.util.Scanner;\r
-import java.util.Set;\r
-\r
-/**\r
- * Parser for the following files:\r
- * \r
- * @author pvtroshin\r
- * \r
- */\r
-public class JackHmmerHitParser {\r
- //# --- full sequence ---- --- best 1 domain ---- --- domain number estimation ----\r
- //# target name accession query name accession E-value score bias E-value score bias exp reg clu ov env dom rep inc description of target\r
- //# ------------------- ---------- -------------------- ---------- --------- ------ ----- --------- ------ ----- --- --- --- --- --- --- --- --- ---------------------\r
- //tr|Q6TVU2|Q6TVU2_ORFV - gi_74230740_gb_ABA00545.1 - 4.5e-271 910.4 0.0 5.1e-271 910.2 0.0 1.0 1 0 0 1 1 1 1 Putative uncharacterized protein OS=Orf virus PE=4 SV=1\r
-\r
- Set<Hit> hits;\r
-\r
- public JackHmmerHitParser(String file) throws IOException {\r
-\r
- BufferedReader bfr = new BufferedReader(new InputStreamReader(\r
- new FileInputStream(file), "ISO-8859-1"), 64000);\r
- // throw away first three lines; \r
- this.hits = new HashSet<Hit>();\r
- String line = bfr.readLine();\r
- bfr.readLine();\r
- bfr.readLine();\r
- int hitc = 0;\r
- while ((line = bfr.readLine()) != null) {\r
- hitc++;\r
- Scanner scan = new Scanner(line);\r
- scan.useDelimiter("\\s+");\r
- extractData(scan, hitc);\r
- }\r
- List<Hit> lhits = new ArrayList<Hit>(hits);\r
- Collections.sort(lhits, new Hit.NumberComporator());\r
- }\r
-\r
- void extractData(Scanner scan, int hitcounter) {\r
- Hit pseq = new Hit();\r
-\r
- String tname = scan.next();\r
- pseq.name = tname;\r
- //System.out.println(tname);\r
-\r
- String tacc = scan.next();\r
- pseq.accession = tacc;\r
- //System.out.println(tacc);\r
- String qname = scan.next();\r
- //System.out.println(qname);\r
- String qacc = scan.next();\r
- //System.out.println(qacc);\r
-\r
- Double evalue = scan.nextDouble();\r
- //System.out.println(evalue);\r
- pseq.evalue = evalue.toString();\r
-\r
- Double score = scan.nextDouble();\r
- //System.out.println(score);\r
- pseq.evalue = evalue.toString();\r
- pseq.number = new Integer(hitcounter).toString();\r
- boolean unique = hits.add(pseq);\r
- assert unique : "Unique hits are expected!";\r
- }\r
-\r
- public static void main(String[] args) throws IOException {\r
- assert args[0] != null;\r
- JackHmmerHitParser parser = new JackHmmerHitParser(args[0]);\r
- BlastParser.printHits(parser.hits);\r
- }\r
-}\r