2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
23 import jalview.datamodel.Sequence;
24 import jalview.datamodel.SequenceI;
26 import java.io.IOException;
27 import java.util.Enumeration;
28 import java.util.Hashtable;
29 import java.util.Vector;
32 * parse a simple blast report. Attempt to cope with query anchored and pairwise
38 public class SimpleBlastFile extends AlignFile
41 * header and footer info goes into alignment annotation.
43 StringBuffer headerLines, footerLines;
46 * hold sequence ids in order of appearance in file
50 public SimpleBlastFile()
54 public SimpleBlastFile(String inFile, DataSourceType sourceType)
57 super(inFile, sourceType);
60 public SimpleBlastFile(FileParse source) throws IOException
66 public void initData()
69 headerLines = new StringBuffer();
70 footerLines = new StringBuffer();
71 seqids = new Vector();
75 public void parse() throws IOException
78 char gapc = ' '; // nominal gap character
79 Hashtable seqhash = new Hashtable();
80 boolean inAlignments = false;
81 int padding = -1, numcol = -1, aligcol = -1, lastcol = -1;
82 long qlen = 0, rstart, rend; // total number of query bases so far
83 boolean padseq = false;
84 while ((line = nextLine()) != null)
86 if (line.indexOf("ALIGNMENTS") == 0)
94 if (line.trim().length() == 0)
98 // parse out the sequences
99 // query anchored means that we use the query sequence as the
101 if (line.indexOf("Query") == 0)
104 // reset column markers for this block
108 // init or reset the column positions
109 for (int p = 5, mLen = line.length(); p < mLen; p++)
111 char c = line.charAt(p);
112 if (c >= '0' && c <= '9')
118 else if (aligcol != -1 && lastcol == -1)
125 if (c >= 'A' && c <= 'z')
137 padding = p; // beginning of last stretch of whitespace
147 if (line.indexOf("Database:") > -1
148 || (aligcol == -1 || numcol == -1 || lastcol == -1)
149 || line.length() < lastcol)
151 inAlignments = false;
155 // now extract the alignment.
156 String sqid = line.substring(0, numcol).trim();
157 String stindx = line.substring(numcol, aligcol).trim();
158 String aligseg = line.substring(aligcol, padding);
159 String endindx = line.substring(lastcol).trim();
160 // init start/end prior to parsing
161 rstart = 1; // best guess we have
162 rend = 0; // if zero at end of parsing, then we count non-gaps
165 rstart = Long.parseLong(stindx);
166 } catch (Exception e)
169 "Couldn't parse '" + stindx + "' as start of row");
170 // inAlignments = false;
171 // warn for this line
175 rend = Long.parseLong(endindx);
176 } catch (Exception e)
179 "Couldn't parse '" + endindx + "' as end of row");
180 // inAlignments = false;
182 // warn for this line
184 Vector seqentries = (Vector) seqhash.get(sqid);
185 if (seqentries == null)
187 seqentries = new Vector();
188 seqhash.put(sqid, seqentries);
189 seqids.addElement(sqid);
192 Object[] seqentry = null;
193 Enumeration sqent = seqentries.elements();
194 while (seqentry == null && sqent.hasMoreElements())
196 seqentry = (Object[]) sqent.nextElement();
197 if (((long[]) seqentry[1])[1] + 1 != rstart)
203 if (seqentry == null)
205 padseq = true; // prepend gaps to new sequences in this block
206 seqentry = new Object[] { new StringBuffer(),
209 seqentries.addElement(seqentry);
210 seqhash.put(sqid, seqentry);
213 if (sqid.equals("Query"))
215 // update current block length in case we need to pad
216 qlen = ((StringBuffer) seqentry[0]).length();
218 StringBuffer sqs = ((StringBuffer) seqentry[0]);
221 for (long c = sqs.length(); c < qlen; c++)
229 ((long[]) seqentry[1])[1] = rend;
232 // end of parsing out the sequences
234 // if we haven't parsed the line as an alignment, then
235 // add to the sequence header
238 String ln = line.trim();
239 // save any header stuff for the user
242 StringBuffer addto = (seqhash.size() > 0) ? footerLines
250 if (seqhash.size() > 0)
252 // make the sequence vector
253 Enumeration seqid = seqids.elements();
254 while (seqid.hasMoreElements())
256 String idstring = (String) seqid.nextElement();
257 Object[] seqentry = (Object[]) seqhash.get(idstring);
260 Sequence newseq = new Sequence(idstring,
262 ((StringBuffer) seqentry[0]).toString(),
263 (int) ((long[]) seqentry[1])[0],
264 (int) ((long[]) seqentry[1])[1]);
265 if (newseq.getEnd() == 0)
267 // assume there are no deletions in the sequence.
268 newseq.setEnd(newseq.findPosition(newseq.getLength()));
270 seqs.addElement(newseq);
271 } catch (Exception e)
273 if (warningMessage == null)
277 warningMessage += "Couldn't add Sequence - ID is '" + idstring
278 + "' : Exception was " + e.toString() + "\n";
281 // add any annotation
282 if (headerLines.length() > 1)
284 setAlignmentProperty("HEADER", headerLines.toString());
286 if (footerLines.length() > 1)
288 setAlignmentProperty("FOOTER", footerLines.toString());
294 public String print(SequenceI[] sqs, boolean jvsuffix)
296 return new String("Not Implemented.");