2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8)
3 * Copyright (C) 2012 J Procter, AM Waterhouse, LM Lui, J Engelhardt, G Barton, M Clamp, S Searle
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
23 import jalview.datamodel.*;
26 * parse a simple blast report. Attempt to cope with query anchored and pairwise
32 public class SimpleBlastFile extends AlignFile
35 * header and footer info goes into alignment annotation.
37 StringBuffer headerLines, footerLines;
40 * hold sequence ids in order of appearance in file
44 public SimpleBlastFile()
48 public SimpleBlastFile(String inFile, String type) throws IOException
53 public SimpleBlastFile(FileParse source) throws IOException
58 public void initData()
61 headerLines = new StringBuffer();
62 footerLines = new StringBuffer();
63 seqids = new Vector();
66 public void parse() throws IOException
69 char gapc = ' '; // nominal gap character
70 Hashtable seqhash = new Hashtable();
71 boolean inAlignments = false;
72 int padding = -1, numcol = -1, aligcol = -1, lastcol = -1;
73 long qlen = 0, rstart, rend; // total number of query bases so far
74 boolean padseq = false;
75 while ((line = nextLine()) != null)
77 if (line.indexOf("ALIGNMENTS") == 0)
85 if (line.trim().length() == 0)
89 // parse out the sequences
90 // query anchored means that we use the query sequence as the
92 if (line.indexOf("Query") == 0)
95 // reset column markers for this block
99 // init or reset the column positions
100 for (int p = 5, mLen = line.length(); p < mLen; p++)
102 char c = line.charAt(p);
103 if (c >= '0' && c <= '9')
109 else if (aligcol != -1 && lastcol == -1)
116 if (c >= 'A' && c <= 'z')
128 padding = p; // beginning of last stretch of whitespace
138 if (line.indexOf("Database:") > -1
139 || (aligcol == -1 || numcol == -1 || lastcol == -1)
140 || line.length() < lastcol)
142 inAlignments = false;
146 // now extract the alignment.
147 String sqid = line.substring(0, numcol).trim();
148 String stindx = line.substring(numcol, aligcol).trim();
149 String aligseg = line.substring(aligcol, padding);
150 String endindx = line.substring(lastcol).trim();
151 // init start/end prior to parsing
152 rstart = 1; // best guess we have
153 rend = 0; // if zero at end of parsing, then we count non-gaps
156 rstart = Long.parseLong(stindx);
157 } catch (Exception e)
159 System.err.println("Couldn't parse '" + stindx
160 + "' as start of row");
161 // inAlignments = false;
162 // warn for this line
166 rend = Long.parseLong(endindx);
167 } catch (Exception e)
169 System.err.println("Couldn't parse '" + endindx
170 + "' as end of row");
171 // inAlignments = false;
173 // warn for this line
175 Vector seqentries = (Vector) seqhash.get(sqid);
176 if (seqentries == null)
178 seqentries = new Vector();
179 seqhash.put(sqid, seqentries);
180 seqids.addElement(sqid);
183 Object[] seqentry = null;
184 Enumeration sqent = seqentries.elements();
185 while (seqentry == null && sqent.hasMoreElements())
187 seqentry = (Object[]) sqent.nextElement();
188 if (((long[]) seqentry[1])[1] + 1 != rstart)
194 if (seqentry == null)
196 padseq = true; // prepend gaps to new sequences in this block
197 seqentry = new Object[]
198 { new StringBuffer(), new long[]
200 seqentries.addElement(seqentry);
201 seqhash.put(sqid, seqentry);
204 if (sqid.equals("Query"))
206 // update current block length in case we need to pad
207 qlen = ((StringBuffer) seqentry[0]).length();
209 StringBuffer sqs = ((StringBuffer) seqentry[0]);
212 for (long c = sqs.length(); c < qlen; c++)
220 ((long[]) seqentry[1])[1] = rend;
223 // end of parsing out the sequences
225 // if we haven't parsed the line as an alignment, then
226 // add to the sequence header
229 String ln = line.trim();
230 // save any header stuff for the user
233 StringBuffer addto = (seqhash.size() > 0) ? footerLines
241 if (seqhash.size() > 0)
243 // make the sequence vector
244 Enumeration seqid = seqids.elements();
245 while (seqid.hasMoreElements())
247 String idstring = (String) seqid.nextElement();
248 Object[] seqentry = (Object[]) seqhash.get(idstring);
251 Sequence newseq = new Sequence(idstring,
253 ((StringBuffer) seqentry[0]).toString(),
254 (int) ((long[]) seqentry[1])[0],
255 (int) ((long[]) seqentry[1])[1]);
256 if (newseq.getEnd() == 0)
258 // assume there are no deletions in the sequence.
259 newseq.setEnd(newseq.findPosition(newseq.getLength()));
261 seqs.addElement(newseq);
262 } catch (Exception e)
264 if (warningMessage == null)
268 warningMessage += "Couldn't add Sequence - ID is '" + idstring
269 + "' : Exception was " + e.toString() + "\n";
272 // add any annotation
273 if (headerLines.length() > 1)
275 setAlignmentProperty("HEADER", headerLines.toString());
277 if (footerLines.length() > 1)
279 setAlignmentProperty("FOOTER", footerLines.toString());
284 public String print(SequenceI[] s)
286 return new String("Not Implemented.");
289 public String print()
291 return print(getSeqsAsArray());