2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8)
3 * Copyright (C) 2012 J Procter, AM Waterhouse, LM Lui, J Engelhardt, G Barton, M Clamp, S Searle
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
23 import javax.xml.parsers.ParserConfigurationException;
25 import org.xml.sax.SAXException;
27 import fr.orsay.lri.varna.exceptions.ExceptionFileFormatOrSyntax;
28 import fr.orsay.lri.varna.exceptions.ExceptionLoadingFailed;
29 import fr.orsay.lri.varna.exceptions.ExceptionPermissionDenied;
30 import fr.orsay.lri.varna.exceptions.ExceptionUnmatchedClosingParentheses;
32 import jalview.datamodel.*;
35 * parse a simple blast report. Attempt to cope with query anchored and pairwise
41 public class SimpleBlastFile extends AlignFile
44 * header and footer info goes into alignment annotation.
46 StringBuffer headerLines, footerLines;
49 * hold sequence ids in order of appearance in file
53 public SimpleBlastFile()
57 public SimpleBlastFile(String inFile, String type) throws Exception
62 public SimpleBlastFile(FileParse source) throws Exception
67 public void initData()
70 headerLines = new StringBuffer();
71 footerLines = new StringBuffer();
72 seqids = new Vector();
75 public void parse() throws IOException
78 char gapc = ' '; // nominal gap character
79 Hashtable seqhash = new Hashtable();
80 boolean inAlignments = false;
81 int padding = -1, numcol = -1, aligcol = -1, lastcol = -1;
82 long qlen = 0, rstart, rend; // total number of query bases so far
83 boolean padseq = false;
84 while ((line = nextLine()) != null)
86 if (line.indexOf("ALIGNMENTS") == 0)
94 if (line.trim().length() == 0)
98 // parse out the sequences
99 // query anchored means that we use the query sequence as the
101 if (line.indexOf("Query") == 0)
104 // reset column markers for this block
108 // init or reset the column positions
109 for (int p = 5, mLen = line.length(); p < mLen; p++)
111 char c = line.charAt(p);
112 if (c >= '0' && c <= '9')
118 else if (aligcol != -1 && lastcol == -1)
125 if (c >= 'A' && c <= 'z')
137 padding = p; // beginning of last stretch of whitespace
147 if (line.indexOf("Database:") > -1
148 || (aligcol == -1 || numcol == -1 || lastcol == -1)
149 || line.length() < lastcol)
151 inAlignments = false;
155 // now extract the alignment.
156 String sqid = line.substring(0, numcol).trim();
157 String stindx = line.substring(numcol, aligcol).trim();
158 String aligseg = line.substring(aligcol, padding);
159 String endindx = line.substring(lastcol).trim();
160 // init start/end prior to parsing
161 rstart = 1; // best guess we have
162 rend = 0; // if zero at end of parsing, then we count non-gaps
165 rstart = Long.parseLong(stindx);
166 } catch (Exception e)
168 System.err.println("Couldn't parse '" + stindx
169 + "' as start of row");
170 // inAlignments = false;
171 // warn for this line
175 rend = Long.parseLong(endindx);
176 } catch (Exception e)
178 System.err.println("Couldn't parse '" + endindx
179 + "' as end of row");
180 // inAlignments = false;
182 // warn for this line
184 Vector seqentries = (Vector) seqhash.get(sqid);
185 if (seqentries == null)
187 seqentries = new Vector();
188 seqhash.put(sqid, seqentries);
189 seqids.addElement(sqid);
192 Object[] seqentry = null;
193 Enumeration sqent = seqentries.elements();
194 while (seqentry == null && sqent.hasMoreElements())
196 seqentry = (Object[]) sqent.nextElement();
197 if (((long[]) seqentry[1])[1] + 1 != rstart)
203 if (seqentry == null)
205 padseq = true; // prepend gaps to new sequences in this block
206 seqentry = new Object[]
207 { new StringBuffer(), new long[]
209 seqentries.addElement(seqentry);
210 seqhash.put(sqid, seqentry);
213 if (sqid.equals("Query"))
215 // update current block length in case we need to pad
216 qlen = ((StringBuffer) seqentry[0]).length();
218 StringBuffer sqs = ((StringBuffer) seqentry[0]);
221 for (long c = sqs.length(); c < qlen; c++)
229 ((long[]) seqentry[1])[1] = rend;
232 // end of parsing out the sequences
234 // if we haven't parsed the line as an alignment, then
235 // add to the sequence header
238 String ln = line.trim();
239 // save any header stuff for the user
242 StringBuffer addto = (seqhash.size() > 0) ? footerLines
250 if (seqhash.size() > 0)
252 // make the sequence vector
253 Enumeration seqid = seqids.elements();
254 while (seqid.hasMoreElements())
256 String idstring = (String) seqid.nextElement();
257 Object[] seqentry = (Object[]) seqhash.get(idstring);
260 Sequence newseq = new Sequence(idstring,
262 ((StringBuffer) seqentry[0]).toString(),
263 (int) ((long[]) seqentry[1])[0],
264 (int) ((long[]) seqentry[1])[1]);
265 if (newseq.getEnd() == 0)
267 // assume there are no deletions in the sequence.
268 newseq.setEnd(newseq.findPosition(newseq.getLength()));
270 seqs.addElement(newseq);
271 } catch (Exception e)
273 if (warningMessage == null)
277 warningMessage += "Couldn't add Sequence - ID is '" + idstring
278 + "' : Exception was " + e.toString() + "\n";
281 // add any annotation
282 if (headerLines.length() > 1)
284 setAlignmentProperty("HEADER", headerLines.toString());
286 if (footerLines.length() > 1)
288 setAlignmentProperty("FOOTER", footerLines.toString());
293 public String print(SequenceI[] s)
295 return new String("Not Implemented.");
298 public String print()
300 return print(getSeqsAsArray());