2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
26 import jalview.datamodel.*;
29 * parse a simple blast report. Attempt to cope with query anchored and pairwise
35 public class SimpleBlastFile extends AlignFile
38 * header and footer info goes into alignment annotation.
40 StringBuffer headerLines, footerLines;
43 * hold sequence ids in order of appearance in file
47 public SimpleBlastFile()
51 public SimpleBlastFile(String inFile, String type) throws IOException
56 public SimpleBlastFile(FileParse source) throws IOException
61 public void initData()
64 headerLines = new StringBuffer();
65 footerLines = new StringBuffer();
66 seqids = new Vector();
69 public void parse() throws IOException
72 char gapc = ' '; // nominal gap character
73 Hashtable seqhash = new Hashtable();
74 boolean inAlignments = false;
75 int padding = -1, numcol = -1, aligcol = -1, lastcol = -1;
76 long qlen = 0, rstart, rend; // total number of query bases so far
77 boolean padseq = false;
78 while ((line = nextLine()) != null)
80 if (line.indexOf("ALIGNMENTS") == 0)
88 if (line.trim().length() == 0)
92 // parse out the sequences
93 // query anchored means that we use the query sequence as the
95 if (line.indexOf("Query") == 0)
98 // reset column markers for this block
102 // init or reset the column positions
103 for (int p = 5, mLen = line.length(); p < mLen; p++)
105 char c = line.charAt(p);
106 if (c >= '0' && c <= '9')
112 else if (aligcol != -1 && lastcol == -1)
119 if (c >= 'A' && c <= 'z')
131 padding = p; // beginning of last stretch of whitespace
141 if (line.indexOf("Database:") > -1
142 || (aligcol == -1 || numcol == -1 || lastcol == -1)
143 || line.length() < lastcol)
145 inAlignments = false;
149 // now extract the alignment.
150 String sqid = line.substring(0, numcol).trim();
151 String stindx = line.substring(numcol, aligcol).trim();
152 String aligseg = line.substring(aligcol, padding);
153 String endindx = line.substring(lastcol).trim();
154 // init start/end prior to parsing
155 rstart = 1; // best guess we have
156 rend = 0; // if zero at end of parsing, then we count non-gaps
159 rstart = Long.parseLong(stindx);
160 } catch (Exception e)
162 System.err.println("Couldn't parse '" + stindx
163 + "' as start of row");
164 // inAlignments = false;
165 // warn for this line
169 rend = Long.parseLong(endindx);
170 } catch (Exception e)
172 System.err.println("Couldn't parse '" + endindx
173 + "' as end of row");
174 // inAlignments = false;
176 // warn for this line
178 Vector seqentries = (Vector) seqhash.get(sqid);
179 if (seqentries == null)
181 seqentries = new Vector();
182 seqhash.put(sqid, seqentries);
183 seqids.addElement(sqid);
186 Object[] seqentry = null;
187 Enumeration sqent = seqentries.elements();
188 while (seqentry == null && sqent.hasMoreElements())
190 seqentry = (Object[]) sqent.nextElement();
191 if (((long[]) seqentry[1])[1] + 1 != rstart)
197 if (seqentry == null)
199 padseq = true; // prepend gaps to new sequences in this block
200 seqentry = new Object[]
201 { new StringBuffer(), new long[]
203 seqentries.addElement(seqentry);
204 seqhash.put(sqid, seqentry);
207 if (sqid.equals("Query"))
209 // update current block length in case we need to pad
210 qlen = ((StringBuffer) seqentry[0]).length();
212 StringBuffer sqs = ((StringBuffer) seqentry[0]);
215 for (long c = sqs.length(); c < qlen; c++)
223 ((long[]) seqentry[1])[1] = rend;
226 // end of parsing out the sequences
228 // if we haven't parsed the line as an alignment, then
229 // add to the sequence header
232 String ln = line.trim();
233 // save any header stuff for the user
236 StringBuffer addto = (seqhash.size() > 0) ? footerLines
244 if (seqhash.size() > 0)
246 // make the sequence vector
247 Enumeration seqid = seqids.elements();
248 while (seqid.hasMoreElements())
250 String idstring = (String) seqid.nextElement();
251 Object[] seqentry = (Object[]) seqhash.get(idstring);
254 Sequence newseq = new Sequence(idstring,
256 ((StringBuffer) seqentry[0]).toString(),
257 (int) ((long[]) seqentry[1])[0],
258 (int) ((long[]) seqentry[1])[1]);
259 if (newseq.getEnd() == 0)
261 // assume there are no deletions in the sequence.
262 newseq.setEnd(newseq.findPosition(newseq.getLength()));
264 seqs.addElement(newseq);
265 } catch (Exception e)
267 if (warningMessage == null)
271 warningMessage += "Couldn't add Sequence - ID is '" + idstring
272 + "' : Exception was " + e.toString() + "\n";
275 // add any annotation
276 if (headerLines.length() > 1)
278 setAlignmentProperty("HEADER", headerLines.toString());
280 if (footerLines.length() > 1)
282 setAlignmentProperty("FOOTER", footerLines.toString());
287 public String print(SequenceI[] s)
289 return new String("Not Implemented.");
292 public String print()
294 return print(getSeqsAsArray());