2 * Jalview - A Sequence Alignment Editor and Viewer (Development Version 2.4.1)
3 * Copyright (C) 2009 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 import jalview.datamodel.*;
25 import jalview.util.*;
28 * parse a simple blast report. Attempt to cope with query anchored and pairwise
34 public class SimpleBlastFile extends AlignFile
37 * header and footer info goes into alignment annotation.
39 StringBuffer headerLines, footerLines;
41 * hold sequence ids in order of appearance in file
44 public SimpleBlastFile()
48 public SimpleBlastFile(String inFile, String type) throws IOException
53 public SimpleBlastFile(FileParse source) throws IOException
58 public void initData()
61 headerLines = new StringBuffer();
62 footerLines = new StringBuffer();
63 seqids = new Vector();
66 public void parse() throws IOException
69 char gapc = ' '; // nominal gap character
70 Hashtable seqhash = new Hashtable();
71 boolean inAlignments = false;
72 int padding = -1, numcol = -1, aligcol = -1, lastcol = -1;
73 long qlen = 0, rstart, rend; // total number of query bases so far
74 boolean padseq = false;
75 while ((line = nextLine()) != null)
77 if (line.indexOf("ALIGNMENTS") == 0)
85 if (line.trim().length() == 0)
89 // parse out the sequences
90 if (line.indexOf("Query") == 0)
93 // reset column markers for this block
97 // init or reset the column positions
98 for (int p = 5, mLen = line.length(); p < mLen; p++)
100 char c = line.charAt(p);
101 if (c >= '0' && c <= '9')
107 else if (aligcol!=-1 && lastcol == -1)
114 if (c >= 'A' && c <= 'z')
126 padding = p; // beginning of last stretch of whitespace
136 if (line.indexOf("Database:")>-1 || (aligcol == -1 || numcol == -1 || lastcol == -1)
137 || line.length() < lastcol)
139 inAlignments = false;
143 // now extract the alignment.
144 String sqid = line.substring(0, numcol).trim();
145 String stindx = line.substring(numcol, aligcol).trim();
146 String aligseg = line.substring(aligcol, padding);
147 String endindx = line.substring(lastcol).trim();
148 // init start/end prior to parsing
149 rstart = 1; // best guess we have
150 rend = 0; // if zero at end of parsing, then we count non-gaps
153 rstart = Long.parseLong(stindx);
154 } catch (Exception e)
156 System.err.println("Couldn't parse '"+stindx+"' as start of row");
157 // inAlignments = false;
158 // warn for this line
162 rend = Long.parseLong(endindx);
163 } catch (Exception e)
165 System.err.println("Couldn't parse '"+endindx+"' as end of row");
166 // inAlignments = false;
168 // warn for this line
170 Object[] seqentry = (Object[]) seqhash.get(sqid);
172 if (seqentry == null)
174 padseq = true; // prepend gaps to new sequences in this block
175 seqentry = new Object[]
176 { new StringBuffer(), new long[]
178 seqhash.put(sqid, seqentry);
179 seqids.addElement(sqid);
182 if (sqid.equals("Query"))
184 // update current block length in case we need to pad
185 qlen = ((StringBuffer) seqentry[0]).length();
187 StringBuffer sqs = ((StringBuffer) seqentry[0]);
190 for (long c = sqs.length(); c < qlen; c++)
198 ((long[]) seqentry[1])[1] = rend;
201 // end of parsing out the sequences
203 // if we haven't parsed the line as an alignment, then
204 // add to the sequence header
207 String ln = line.trim();
208 // save any header stuff for the user
211 StringBuffer addto = (seqhash.size() > 0) ? footerLines
219 if (seqhash.size() > 0)
221 // make the sequence vector
222 Enumeration seqid = seqids.elements();
223 while (seqid.hasMoreElements())
225 String idstring = (String) seqid.nextElement();
226 Object[] seqentry = (Object[]) seqhash.get(idstring);
229 Sequence newseq = new Sequence(idstring,
231 ((StringBuffer) seqentry[0]).toString(),
232 (int) ((long[]) seqentry[1])[0],
233 (int) ((long[]) seqentry[1])[1]);
234 if (newseq.getEnd() == 0)
236 // assume there are no deletions in the sequence.
237 newseq.setEnd(newseq.findPosition(newseq.getLength()));
239 seqs.addElement(newseq);
240 } catch (Exception e)
242 if (warningMessage == null)
246 warningMessage += "Couldn't add Sequence - ID is '" + idstring
247 + "' : Exception was " + e.toString() + "\n";
250 // add any annotation
251 if (headerLines.length() > 1)
253 setAlignmentProperty("HEADER", headerLines.toString());
255 if (footerLines.length() > 1)
257 setAlignmentProperty("FOOTER", footerLines.toString());
262 public String print(SequenceI[] s)
264 return new String("Not Implemented.");
267 public String print()
269 return print(getSeqsAsArray());