2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8)
3 * Copyright (C) 2012 J Procter, AM Waterhouse, LM Lui, J Engelhardt, G Barton, M Clamp, S Searle
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
20 * JalviewX / Vamsas Project
21 * JPred.seq.concise reader
28 import javax.xml.parsers.ParserConfigurationException;
30 import org.xml.sax.SAXException;
32 import fr.orsay.lri.varna.exceptions.ExceptionFileFormatOrSyntax;
33 import fr.orsay.lri.varna.exceptions.ExceptionLoadingFailed;
34 import fr.orsay.lri.varna.exceptions.ExceptionPermissionDenied;
35 import fr.orsay.lri.varna.exceptions.ExceptionUnmatchedClosingParentheses;
37 import jalview.datamodel.*;
40 * Parser for the JPred/JNet concise format. This is a series of CSV lines, each
41 * line is either a sequence (QUERY), a sequence profile (align;), or jnet
42 * prediction annotation (anything else). Automagic translation happens for
43 * annotation called 'JNETPRED' (translated to Secondary Structure Prediction),
44 * or 'JNETCONF' (translates to 'Prediction Confidence'). Numeric scores are
45 * differentiated from symbolic by being parseable into a float vector. They are
46 * put in Scores. Symscores gets the others. JNetAnnotationMaker translates the
47 * data parsed by this object into annotation on an alignment. It is
48 * automatically called but can be used to transfer the annotation onto a
49 * sequence in another alignment (and insert gaps where necessary)
54 public class JPredFile extends AlignFile
60 Hashtable Scores; // Hash of names and score vectors
62 Hashtable Symscores; // indexes of symbol annotation properties in sequenceI
66 private int QuerySeqPosition;
69 * Creates a new JPredFile object.
78 * @throws SAXException
79 * @throws ParserConfigurationException
80 * @throws ExceptionFileFormatOrSyntax
81 * @throws ExceptionLoadingFailed
82 * @throws ExceptionPermissionDenied
83 * @throws InterruptedException
84 * @throws ExceptionUnmatchedClosingParentheses
86 public JPredFile(String inFile, String type) throws IOException, ExceptionFileFormatOrSyntax, ParserConfigurationException, SAXException, ExceptionPermissionDenied, ExceptionLoadingFailed, InterruptedException, ExceptionUnmatchedClosingParentheses
91 public JPredFile(FileParse source) throws IOException, ExceptionFileFormatOrSyntax, ParserConfigurationException, SAXException, ExceptionPermissionDenied, ExceptionLoadingFailed, InterruptedException, ExceptionUnmatchedClosingParentheses
99 * @param QuerySeqPosition
102 public void setQuerySeqPosition(int QuerySeqPosition)
104 this.QuerySeqPosition = QuerySeqPosition;
110 * @return DOCUMENT ME!
112 public int getQuerySeqPosition()
114 return QuerySeqPosition;
120 * @return DOCUMENT ME!
122 public Hashtable getScores()
130 * @return DOCUMENT ME!
132 public Hashtable getSymscores()
140 public void initData()
143 Scores = new Hashtable();
149 * parse a JPred concise file into a sequence-alignment like object.
151 public void parse() throws IOException
153 // JBPNote log.System.out.println("all read in ");
155 QuerySeqPosition = -1;
158 Vector seq_entries = new Vector();
159 Vector ids = new Vector();
160 Hashtable Symscores = new Hashtable();
162 while ((line = nextLine()) != null)
164 // Concise format allows no comments or non comma-formatted data
165 StringTokenizer str = new StringTokenizer(line, ":");
168 if (!str.hasMoreTokens())
173 id = str.nextToken();
175 String seqsym = str.nextToken();
176 StringTokenizer symbols = new StringTokenizer(seqsym, ",");
178 // decide if we have more than just alphanumeric symbols
179 int numSymbols = symbols.countTokens();
186 if (seqsym.length() != (2 * numSymbols))
188 // Set of scalars for some property
189 if (Scores.containsKey(id))
193 while (Scores.containsKey(id + "_" + i))
201 Vector scores = new Vector();
203 // Typecheck from first entry
205 String ascore = "dead";
209 // store elements as floats...
210 while (symbols.hasMoreTokens())
212 ascore = symbols.nextToken();
214 Float score = new Float(ascore);
215 scores.addElement((Object) score);
218 Scores.put(id, scores);
219 } catch (Exception e)
221 // or just keep them as strings
224 for (int j = 0; j < i; j++)
227 (Object) ((Float) scores.elementAt(j)).toString(), j);
230 scores.addElement((Object) ascore);
232 while (symbols.hasMoreTokens())
234 ascore = symbols.nextToken();
235 scores.addElement((Object) ascore);
238 Scores.put(id, scores);
241 else if (id.equals("jnetconf"))
243 // log.debug System.out.println("here");
244 id = "Prediction Confidence";
245 this.conf = new Vector(numSymbols);
247 for (int i = 0; i < numSymbols; i++)
249 conf.setElementAt(symbols.nextToken(), i);
254 // Sequence or a prediction string (rendered as sequence)
255 StringBuffer newseq = new StringBuffer();
257 for (int i = 0; i < numSymbols; i++)
259 newseq.append(symbols.nextToken());
262 if (id.indexOf(";") > -1)
264 seq_entries.addElement(newseq);
267 String name = id.substring(id.indexOf(";") + 1);
269 while (ids.lastIndexOf(name) > -1)
271 name = id.substring(id.indexOf(";") + 1) + "_" + ++i;
274 if (QuerySeqPosition == -1)
275 QuerySeqPosition = ids.size();
276 ids.addElement(name);
281 if (id.equals("JNETPRED"))
283 id = "Predicted Secondary Structure";
286 seq_entries.addElement(newseq.toString());
288 Symscores.put((Object) id, (Object) new Integer(ids.size() - 1));
293 * leave it to the parser user to actually check this. if (noSeqs < 1) {
294 * throw new IOException( "JpredFile Parser: No sequence in the
298 maxLength = seq_entries.elementAt(0).toString().length();
300 for (int i = 0; i < ids.size(); i++)
302 // Add all sequence like objects
303 Sequence newSeq = new Sequence(ids.elementAt(i).toString(),
304 seq_entries.elementAt(i).toString(), 1, seq_entries
305 .elementAt(i).toString().length());
307 if (maxLength != seq_entries.elementAt(i).toString().length())
309 throw new IOException("JPredConcise: Entry ("
310 + ids.elementAt(i).toString()
311 + ") has an unexpected number of columns");
314 if ((newSeq.getName().startsWith("QUERY") || newSeq.getName()
315 .startsWith("align;")) && (QuerySeqPosition == -1))
317 QuerySeqPosition = seqs.size();
320 seqs.addElement(newSeq);
322 if (seqs.size() > 0 && QuerySeqPosition > -1)
324 // try to make annotation for a prediction only input (default if no
325 // alignment is given and prediction contains a QUERY or align;sequence_id
327 Alignment tal = new Alignment(this.getSeqsAsArray());
330 JnetAnnotationMaker.add_annotation(this, tal, QuerySeqPosition,
332 } catch (Exception e)
335 IOException ex = new IOException(
336 "Couldn't parse concise annotation for prediction profile.\n"
338 e.printStackTrace(); // java 1.1 does not have :
339 // ex.setStackTrace(e.getStackTrace());
342 this.annotations = new Vector();
343 AlignmentAnnotation[] aan = tal.getAlignmentAnnotation();
344 for (int aai = 0; aan != null && aai < aan.length; aai++)
346 annotations.addElement(aan[aai]);
356 public String print()
358 return "Not Supported";
366 * @throws SAXException
367 * @throws ParserConfigurationException
368 * @throws ExceptionFileFormatOrSyntax
369 * @throws ExceptionLoadingFailed
370 * @throws ExceptionPermissionDenied
371 * @throws InterruptedException
372 * @throws ExceptionUnmatchedClosingParentheses
374 public static void main(String[] args) throws ExceptionFileFormatOrSyntax, ParserConfigurationException, SAXException, ExceptionPermissionDenied, ExceptionLoadingFailed, InterruptedException, ExceptionUnmatchedClosingParentheses
378 JPredFile blc = new JPredFile(args[0], "File");
380 for (int i = 0; i < blc.seqs.size(); i++)
382 System.out.println(((Sequence) blc.seqs.elementAt(i)).getName()
384 + ((Sequence) blc.seqs.elementAt(i)).getSequenceAsString()
387 } catch (java.io.IOException e)
389 System.err.println("Exception " + e);
390 // e.printStackTrace(); not java 1.1 compatible!
394 Vector annotSeqs = null;
399 public void removeNonSequences()
401 if (annotSeqs != null)
405 annotSeqs = new Vector();
406 Vector newseqs = new Vector();
409 for (; i < QuerySeqPosition; i++)
411 annotSeqs.addElement(seqs.elementAt(i));
413 // check that no stray annotations have been added at the end.
415 SequenceI sq = (SequenceI) seqs.elementAt(j - 1);
416 if (sq.getName().toUpperCase().startsWith("JPRED"))
418 annotSeqs.addElement(sq);
419 seqs.removeElementAt(--j);
424 newseqs.addElement(seqs.elementAt(i));
427 seqs.removeAllElements();
433 * StringBuffer out = new StringBuffer();
435 * out.append("START PRED\n"); for (int i = 0; i < s[0].sequence.length(); i++)
436 * { out.append(s[0].sequence.substring(i, i + 1) + " ");
437 * out.append(s[1].sequence.substring(i, i + 1) + " ");
438 * out.append(s[1].score[0].elementAt(i) + " ");
439 * out.append(s[1].score[1].elementAt(i) + " ");
440 * out.append(s[1].score[2].elementAt(i) + " ");
441 * out.append(s[1].score[3].elementAt(i) + " ");
443 * out.append("\n"); } out.append("END PRED\n"); return out.toString(); }
445 * public static void main(String[] args) { try { BLCFile blc = new
446 * BLCFile(args[0], "File"); DrawableSequence[] s = new
447 * DrawableSequence[blc.seqs.size()]; for (int i = 0; i < blc.seqs.size(); i++)
448 * { s[i] = new DrawableSequence( (Sequence) blc.seqs.elementAt(i)); } String
449 * out = BLCFile.print(s);
451 * AlignFrame af = new AlignFrame(null, s); af.resize(700, 500); af.show();
452 * System.out.println(out); } catch (java.io.IOException e) {
453 * System.out.println("Exception " + e); } } }