2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.5)
3 * Copyright (C) 2010 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
23 import jalview.datamodel.*;
24 import jalview.util.*;
27 * parse a simple blast report. Attempt to cope with query anchored and pairwise
33 public class SimpleBlastFile extends AlignFile
36 * header and footer info goes into alignment annotation.
38 StringBuffer headerLines, footerLines;
41 * hold sequence ids in order of appearance in file
45 public SimpleBlastFile()
49 public SimpleBlastFile(String inFile, String type) throws IOException
54 public SimpleBlastFile(FileParse source) throws IOException
59 public void initData()
62 headerLines = new StringBuffer();
63 footerLines = new StringBuffer();
64 seqids = new Vector();
67 public void parse() throws IOException
70 char gapc = ' '; // nominal gap character
71 Hashtable seqhash = new Hashtable();
72 boolean inAlignments = false;
73 int padding = -1, numcol = -1, aligcol = -1, lastcol = -1;
74 long qlen = 0, rstart, rend; // total number of query bases so far
75 boolean padseq = false;
76 while ((line = nextLine()) != null)
78 if (line.indexOf("ALIGNMENTS") == 0)
86 if (line.trim().length() == 0)
90 // parse out the sequences
91 // query anchored means that we use the query sequence as the
93 if (line.indexOf("Query") == 0)
96 // reset column markers for this block
100 // init or reset the column positions
101 for (int p = 5, mLen = line.length(); p < mLen; p++)
103 char c = line.charAt(p);
104 if (c >= '0' && c <= '9')
110 else if (aligcol != -1 && lastcol == -1)
117 if (c >= 'A' && c <= 'z')
129 padding = p; // beginning of last stretch of whitespace
139 if (line.indexOf("Database:") > -1
140 || (aligcol == -1 || numcol == -1 || lastcol == -1)
141 || line.length() < lastcol)
143 inAlignments = false;
147 // now extract the alignment.
148 String sqid = line.substring(0, numcol).trim();
149 String stindx = line.substring(numcol, aligcol).trim();
150 String aligseg = line.substring(aligcol, padding);
151 String endindx = line.substring(lastcol).trim();
152 // init start/end prior to parsing
153 rstart = 1; // best guess we have
154 rend = 0; // if zero at end of parsing, then we count non-gaps
157 rstart = Long.parseLong(stindx);
158 } catch (Exception e)
160 System.err.println("Couldn't parse '" + stindx
161 + "' as start of row");
162 // inAlignments = false;
163 // warn for this line
167 rend = Long.parseLong(endindx);
168 } catch (Exception e)
170 System.err.println("Couldn't parse '" + endindx
171 + "' as end of row");
172 // inAlignments = false;
174 // warn for this line
176 Vector seqentries = (Vector) seqhash.get(sqid);
177 if (seqentries == null)
179 seqentries = new Vector();
180 seqhash.put(sqid, seqentries);
181 seqids.addElement(sqid);
184 Object[] seqentry = null;
185 Enumeration sqent = seqentries.elements();
186 while (seqentry == null && sqent.hasMoreElements())
188 seqentry = (Object[]) sqent.nextElement();
189 if (((long[]) seqentry[1])[1] + 1 != rstart)
195 if (seqentry == null)
197 padseq = true; // prepend gaps to new sequences in this block
198 seqentry = new Object[]
199 { new StringBuffer(), new long[]
201 seqentries.addElement(seqentry);
202 seqhash.put(sqid, seqentry);
205 if (sqid.equals("Query"))
207 // update current block length in case we need to pad
208 qlen = ((StringBuffer) seqentry[0]).length();
210 StringBuffer sqs = ((StringBuffer) seqentry[0]);
213 for (long c = sqs.length(); c < qlen; c++)
221 ((long[]) seqentry[1])[1] = rend;
224 // end of parsing out the sequences
226 // if we haven't parsed the line as an alignment, then
227 // add to the sequence header
230 String ln = line.trim();
231 // save any header stuff for the user
234 StringBuffer addto = (seqhash.size() > 0) ? footerLines
242 if (seqhash.size() > 0)
244 // make the sequence vector
245 Enumeration seqid = seqids.elements();
246 while (seqid.hasMoreElements())
248 String idstring = (String) seqid.nextElement();
249 Object[] seqentry = (Object[]) seqhash.get(idstring);
252 Sequence newseq = new Sequence(idstring,
254 ((StringBuffer) seqentry[0]).toString(),
255 (int) ((long[]) seqentry[1])[0],
256 (int) ((long[]) seqentry[1])[1]);
257 if (newseq.getEnd() == 0)
259 // assume there are no deletions in the sequence.
260 newseq.setEnd(newseq.findPosition(newseq.getLength()));
262 seqs.addElement(newseq);
263 } catch (Exception e)
265 if (warningMessage == null)
269 warningMessage += "Couldn't add Sequence - ID is '" + idstring
270 + "' : Exception was " + e.toString() + "\n";
273 // add any annotation
274 if (headerLines.length() > 1)
276 setAlignmentProperty("HEADER", headerLines.toString());
278 if (footerLines.length() > 1)
280 setAlignmentProperty("FOOTER", footerLines.toString());
285 public String print(SequenceI[] s)
287 return new String("Not Implemented.");
290 public String print()
292 return print(getSeqsAsArray());