2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)
3 * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
23 import javax.xml.parsers.ParserConfigurationException;
25 import org.xml.sax.SAXException;
27 import fr.orsay.lri.varna.exceptions.ExceptionFileFormatOrSyntax;
28 import fr.orsay.lri.varna.exceptions.ExceptionLoadingFailed;
29 import fr.orsay.lri.varna.exceptions.ExceptionPermissionDenied;
30 import fr.orsay.lri.varna.exceptions.ExceptionUnmatchedClosingParentheses;
32 import jalview.datamodel.*;
33 import jalview.util.*;
36 * parse a simple blast report. Attempt to cope with query anchored and pairwise
42 public class SimpleBlastFile extends AlignFile
45 * header and footer info goes into alignment annotation.
47 StringBuffer headerLines, footerLines;
50 * hold sequence ids in order of appearance in file
54 public SimpleBlastFile()
58 public SimpleBlastFile(String inFile, String type) throws IOException, ExceptionFileFormatOrSyntax, ParserConfigurationException, SAXException, ExceptionPermissionDenied, ExceptionLoadingFailed, InterruptedException, ExceptionUnmatchedClosingParentheses
63 public SimpleBlastFile(FileParse source) throws IOException, ExceptionFileFormatOrSyntax, ParserConfigurationException, SAXException, ExceptionPermissionDenied, ExceptionLoadingFailed, InterruptedException, ExceptionUnmatchedClosingParentheses
68 public void initData()
71 headerLines = new StringBuffer();
72 footerLines = new StringBuffer();
73 seqids = new Vector();
76 public void parse() throws IOException
79 char gapc = ' '; // nominal gap character
80 Hashtable seqhash = new Hashtable();
81 boolean inAlignments = false;
82 int padding = -1, numcol = -1, aligcol = -1, lastcol = -1;
83 long qlen = 0, rstart, rend; // total number of query bases so far
84 boolean padseq = false;
85 while ((line = nextLine()) != null)
87 if (line.indexOf("ALIGNMENTS") == 0)
95 if (line.trim().length() == 0)
99 // parse out the sequences
100 // query anchored means that we use the query sequence as the
102 if (line.indexOf("Query") == 0)
105 // reset column markers for this block
109 // init or reset the column positions
110 for (int p = 5, mLen = line.length(); p < mLen; p++)
112 char c = line.charAt(p);
113 if (c >= '0' && c <= '9')
119 else if (aligcol != -1 && lastcol == -1)
126 if (c >= 'A' && c <= 'z')
138 padding = p; // beginning of last stretch of whitespace
148 if (line.indexOf("Database:") > -1
149 || (aligcol == -1 || numcol == -1 || lastcol == -1)
150 || line.length() < lastcol)
152 inAlignments = false;
156 // now extract the alignment.
157 String sqid = line.substring(0, numcol).trim();
158 String stindx = line.substring(numcol, aligcol).trim();
159 String aligseg = line.substring(aligcol, padding);
160 String endindx = line.substring(lastcol).trim();
161 // init start/end prior to parsing
162 rstart = 1; // best guess we have
163 rend = 0; // if zero at end of parsing, then we count non-gaps
166 rstart = Long.parseLong(stindx);
167 } catch (Exception e)
169 System.err.println("Couldn't parse '" + stindx
170 + "' as start of row");
171 // inAlignments = false;
172 // warn for this line
176 rend = Long.parseLong(endindx);
177 } catch (Exception e)
179 System.err.println("Couldn't parse '" + endindx
180 + "' as end of row");
181 // inAlignments = false;
183 // warn for this line
185 Vector seqentries = (Vector) seqhash.get(sqid);
186 if (seqentries == null)
188 seqentries = new Vector();
189 seqhash.put(sqid, seqentries);
190 seqids.addElement(sqid);
193 Object[] seqentry = null;
194 Enumeration sqent = seqentries.elements();
195 while (seqentry == null && sqent.hasMoreElements())
197 seqentry = (Object[]) sqent.nextElement();
198 if (((long[]) seqentry[1])[1] + 1 != rstart)
204 if (seqentry == null)
206 padseq = true; // prepend gaps to new sequences in this block
207 seqentry = new Object[]
208 { new StringBuffer(), new long[]
210 seqentries.addElement(seqentry);
211 seqhash.put(sqid, seqentry);
214 if (sqid.equals("Query"))
216 // update current block length in case we need to pad
217 qlen = ((StringBuffer) seqentry[0]).length();
219 StringBuffer sqs = ((StringBuffer) seqentry[0]);
222 for (long c = sqs.length(); c < qlen; c++)
230 ((long[]) seqentry[1])[1] = rend;
233 // end of parsing out the sequences
235 // if we haven't parsed the line as an alignment, then
236 // add to the sequence header
239 String ln = line.trim();
240 // save any header stuff for the user
243 StringBuffer addto = (seqhash.size() > 0) ? footerLines
251 if (seqhash.size() > 0)
253 // make the sequence vector
254 Enumeration seqid = seqids.elements();
255 while (seqid.hasMoreElements())
257 String idstring = (String) seqid.nextElement();
258 Object[] seqentry = (Object[]) seqhash.get(idstring);
261 Sequence newseq = new Sequence(idstring,
263 ((StringBuffer) seqentry[0]).toString(),
264 (int) ((long[]) seqentry[1])[0],
265 (int) ((long[]) seqentry[1])[1]);
266 if (newseq.getEnd() == 0)
268 // assume there are no deletions in the sequence.
269 newseq.setEnd(newseq.findPosition(newseq.getLength()));
271 seqs.addElement(newseq);
272 } catch (Exception e)
274 if (warningMessage == null)
278 warningMessage += "Couldn't add Sequence - ID is '" + idstring
279 + "' : Exception was " + e.toString() + "\n";
282 // add any annotation
283 if (headerLines.length() > 1)
285 setAlignmentProperty("HEADER", headerLines.toString());
287 if (footerLines.length() > 1)
289 setAlignmentProperty("FOOTER", footerLines.toString());
294 public String print(SequenceI[] s)
296 return new String("Not Implemented.");
299 public String print()
301 return print(getSeqsAsArray());