2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.datamodel.Sequence;
24 import jalview.datamodel.SequenceI;
26 import java.io.IOException;
27 import java.util.Enumeration;
28 import java.util.Hashtable;
29 import java.util.Vector;
32 * parse a simple blast report. Attempt to cope with query anchored and pairwise
38 public class SimpleBlastFile extends AlignFile
41 * header and footer info goes into alignment annotation.
43 StringBuffer headerLines, footerLines;
46 * hold sequence ids in order of appearance in file
50 public SimpleBlastFile()
54 public SimpleBlastFile(String inFile, String type) throws IOException
59 public SimpleBlastFile(FileParse source) throws IOException
64 public void initData()
67 headerLines = new StringBuffer();
68 footerLines = new StringBuffer();
69 seqids = new Vector();
72 public void parse() throws IOException
75 char gapc = ' '; // nominal gap character
76 Hashtable seqhash = new Hashtable();
77 boolean inAlignments = false;
78 int padding = -1, numcol = -1, aligcol = -1, lastcol = -1;
79 long qlen = 0, rstart, rend; // total number of query bases so far
80 boolean padseq = false;
81 while ((line = nextLine()) != null)
83 if (line.indexOf("ALIGNMENTS") == 0)
91 if (line.trim().length() == 0)
95 // parse out the sequences
96 // query anchored means that we use the query sequence as the
98 if (line.indexOf("Query") == 0)
101 // reset column markers for this block
105 // init or reset the column positions
106 for (int p = 5, mLen = line.length(); p < mLen; p++)
108 char c = line.charAt(p);
109 if (c >= '0' && c <= '9')
115 else if (aligcol != -1 && lastcol == -1)
122 if (c >= 'A' && c <= 'z')
134 padding = p; // beginning of last stretch of whitespace
144 if (line.indexOf("Database:") > -1
145 || (aligcol == -1 || numcol == -1 || lastcol == -1)
146 || line.length() < lastcol)
148 inAlignments = false;
152 // now extract the alignment.
153 String sqid = line.substring(0, numcol).trim();
154 String stindx = line.substring(numcol, aligcol).trim();
155 String aligseg = line.substring(aligcol, padding);
156 String endindx = line.substring(lastcol).trim();
157 // init start/end prior to parsing
158 rstart = 1; // best guess we have
159 rend = 0; // if zero at end of parsing, then we count non-gaps
162 rstart = Long.parseLong(stindx);
163 } catch (Exception e)
165 System.err.println("Couldn't parse '" + stindx
166 + "' as start of row");
167 // inAlignments = false;
168 // warn for this line
172 rend = Long.parseLong(endindx);
173 } catch (Exception e)
175 System.err.println("Couldn't parse '" + endindx
176 + "' as end of row");
177 // inAlignments = false;
179 // warn for this line
181 Vector seqentries = (Vector) seqhash.get(sqid);
182 if (seqentries == null)
184 seqentries = new Vector();
185 seqhash.put(sqid, seqentries);
186 seqids.addElement(sqid);
189 Object[] seqentry = null;
190 Enumeration sqent = seqentries.elements();
191 while (seqentry == null && sqent.hasMoreElements())
193 seqentry = (Object[]) sqent.nextElement();
194 if (((long[]) seqentry[1])[1] + 1 != rstart)
200 if (seqentry == null)
202 padseq = true; // prepend gaps to new sequences in this block
203 seqentry = new Object[] { new StringBuffer(),
204 new long[] { rstart, rend } };
205 seqentries.addElement(seqentry);
206 seqhash.put(sqid, seqentry);
209 if (sqid.equals("Query"))
211 // update current block length in case we need to pad
212 qlen = ((StringBuffer) seqentry[0]).length();
214 StringBuffer sqs = ((StringBuffer) seqentry[0]);
217 for (long c = sqs.length(); c < qlen; c++)
225 ((long[]) seqentry[1])[1] = rend;
228 // end of parsing out the sequences
230 // if we haven't parsed the line as an alignment, then
231 // add to the sequence header
234 String ln = line.trim();
235 // save any header stuff for the user
238 StringBuffer addto = (seqhash.size() > 0) ? footerLines
246 if (seqhash.size() > 0)
248 // make the sequence vector
249 Enumeration seqid = seqids.elements();
250 while (seqid.hasMoreElements())
252 String idstring = (String) seqid.nextElement();
253 Object[] seqentry = (Object[]) seqhash.get(idstring);
256 Sequence newseq = new Sequence(idstring,
258 ((StringBuffer) seqentry[0]).toString(),
259 (int) ((long[]) seqentry[1])[0],
260 (int) ((long[]) seqentry[1])[1]);
261 if (newseq.getEnd() == 0)
263 // assume there are no deletions in the sequence.
264 newseq.setEnd(newseq.findPosition(newseq.getLength()));
266 seqs.addElement(newseq);
267 } catch (Exception e)
269 if (warningMessage == null)
273 warningMessage += "Couldn't add Sequence - ID is '" + idstring
274 + "' : Exception was " + e.toString() + "\n";
277 // add any annotation
278 if (headerLines.length() > 1)
280 setAlignmentProperty("HEADER", headerLines.toString());
282 if (footerLines.length() > 1)
284 setAlignmentProperty("FOOTER", footerLines.toString());
289 public String print(SequenceI[] s)
291 return new String("Not Implemented.");
294 public String print()
296 return print(getSeqsAsArray());