2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.datamodel.Sequence;
24 import jalview.datamodel.SequenceI;
26 import java.io.IOException;
27 import java.util.Enumeration;
28 import java.util.Hashtable;
29 import java.util.Vector;
32 * parse a simple blast report. Attempt to cope with query anchored and pairwise
38 public class SimpleBlastFile extends AlignFile
41 * header and footer info goes into alignment annotation.
43 StringBuffer headerLines, footerLines;
46 * hold sequence ids in order of appearance in file
50 public SimpleBlastFile()
54 public SimpleBlastFile(String inFile, DataSourceType sourceType)
57 super(inFile, sourceType);
60 public SimpleBlastFile(FileParse source) throws IOException
66 public void initData()
69 headerLines = new StringBuffer();
70 footerLines = new StringBuffer();
71 seqids = new Vector();
75 public void parse() throws IOException
78 char gapc = ' '; // nominal gap character
79 Hashtable seqhash = new Hashtable();
80 boolean inAlignments = false;
81 int padding = -1, numcol = -1, aligcol = -1, lastcol = -1;
82 long qlen = 0, rstart, rend; // total number of query bases so far
83 boolean padseq = false;
84 while ((line = nextLine()) != null)
86 if (line.indexOf("ALIGNMENTS") == 0)
94 if (line.trim().length() == 0)
98 // parse out the sequences
99 // query anchored means that we use the query sequence as the
101 if (line.indexOf("Query") == 0)
104 // reset column markers for this block
108 // init or reset the column positions
109 for (int p = 5, mLen = line.length(); p < mLen; p++)
111 char c = line.charAt(p);
112 if (c >= '0' && c <= '9')
118 else if (aligcol != -1 && lastcol == -1)
125 if (c >= 'A' && c <= 'z')
137 padding = p; // beginning of last stretch of whitespace
147 if (line.indexOf("Database:") > -1
148 || (aligcol == -1 || numcol == -1 || lastcol == -1)
149 || line.length() < lastcol)
151 inAlignments = false;
155 // now extract the alignment.
156 String sqid = line.substring(0, numcol).trim();
157 String stindx = line.substring(numcol, aligcol).trim();
158 String aligseg = line.substring(aligcol, padding);
159 String endindx = line.substring(lastcol).trim();
160 // init start/end prior to parsing
161 rstart = 1; // best guess we have
162 rend = 0; // if zero at end of parsing, then we count non-gaps
165 rstart = Long.parseLong(stindx);
166 } catch (Exception e)
168 System.err.println("Couldn't parse '" + stindx
169 + "' as start of row");
170 // inAlignments = false;
171 // warn for this line
175 rend = Long.parseLong(endindx);
176 } catch (Exception e)
178 System.err.println("Couldn't parse '" + endindx
179 + "' as end of row");
180 // inAlignments = false;
182 // warn for this line
184 Vector seqentries = (Vector) seqhash.get(sqid);
185 if (seqentries == null)
187 seqentries = new Vector();
188 seqhash.put(sqid, seqentries);
189 seqids.addElement(sqid);
192 Object[] seqentry = null;
193 Enumeration sqent = seqentries.elements();
194 while (seqentry == null && sqent.hasMoreElements())
196 seqentry = (Object[]) sqent.nextElement();
197 if (((long[]) seqentry[1])[1] + 1 != rstart)
203 if (seqentry == null)
205 padseq = true; // prepend gaps to new sequences in this block
206 seqentry = new Object[] { new StringBuffer(),
207 new long[] { rstart, rend } };
208 seqentries.addElement(seqentry);
209 seqhash.put(sqid, seqentry);
212 if (sqid.equals("Query"))
214 // update current block length in case we need to pad
215 qlen = ((StringBuffer) seqentry[0]).length();
217 StringBuffer sqs = ((StringBuffer) seqentry[0]);
220 for (long c = sqs.length(); c < qlen; c++)
228 ((long[]) seqentry[1])[1] = rend;
231 // end of parsing out the sequences
233 // if we haven't parsed the line as an alignment, then
234 // add to the sequence header
237 String ln = line.trim();
238 // save any header stuff for the user
241 StringBuffer addto = (seqhash.size() > 0) ? footerLines
249 if (seqhash.size() > 0)
251 // make the sequence vector
252 Enumeration seqid = seqids.elements();
253 while (seqid.hasMoreElements())
255 String idstring = (String) seqid.nextElement();
256 Object[] seqentry = (Object[]) seqhash.get(idstring);
259 Sequence newseq = new Sequence(idstring,
261 ((StringBuffer) seqentry[0]).toString(),
262 (int) ((long[]) seqentry[1])[0],
263 (int) ((long[]) seqentry[1])[1]);
264 if (newseq.getEnd() == 0)
266 // assume there are no deletions in the sequence.
267 newseq.setEnd(newseq.findPosition(newseq.getLength()));
269 seqs.addElement(newseq);
270 } catch (Exception e)
272 if (warningMessage == null)
276 warningMessage += "Couldn't add Sequence - ID is '" + idstring
277 + "' : Exception was " + e.toString() + "\n";
280 // add any annotation
281 if (headerLines.length() > 1)
283 setAlignmentProperty("HEADER", headerLines.toString());
285 if (footerLines.length() > 1)
287 setAlignmentProperty("FOOTER", footerLines.toString());
293 public String print(SequenceI[] sqs, boolean jvsuffix)
295 return new String("Not Implemented.");