2 * Jalview - A Sequence Alignment Editor and Viewer (Development Version 2.4.1)
3 * Copyright (C) 2009 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
21 * JalviewX / Vamsas Project
22 * JPred.seq.concise reader
29 import jalview.datamodel.*;
32 * Parser for the JPred/JNet concise format. This is a series of CSV lines, each
33 * line is either a sequence (QUERY), a sequence profile (align;), or jnet
34 * prediction annotation (anything else). Automagic translation happens for
35 * annotation called 'JNETPRED' (translated to Secondary Structure Prediction),
36 * or 'JNETCONF' (translates to 'Prediction Confidence'). Numeric scores are
37 * differentiated from symbolic by being parseable into a float vector. They are
38 * put in Scores. Symscores gets the others. JNetAnnotationMaker translates the
39 * data parsed by this object into annotation on an alignment. It is
40 * automatically called but can be used to transfer the annotation onto a
41 * sequence in another alignment (and insert gaps where necessary)
46 public class JPredFile extends AlignFile
52 Hashtable Scores; // Hash of names and score vectors
54 Hashtable Symscores; // indexes of symbol annotation properties in sequenceI
57 private int QuerySeqPosition;
60 * Creates a new JPredFile object.
70 public JPredFile(String inFile, String type) throws IOException
75 public JPredFile(FileParse source) throws IOException
83 * @param QuerySeqPosition
86 public void setQuerySeqPosition(int QuerySeqPosition)
88 this.QuerySeqPosition = QuerySeqPosition;
94 * @return DOCUMENT ME!
96 public int getQuerySeqPosition()
98 return QuerySeqPosition;
104 * @return DOCUMENT ME!
106 public Hashtable getScores()
114 * @return DOCUMENT ME!
116 public Hashtable getSymscores()
124 public void initData()
127 Scores = new Hashtable();
133 * parse a JPred concise file into a sequence-alignment like object.
135 public void parse() throws IOException
137 // JBPNote log.System.out.println("all read in ");
139 QuerySeqPosition = -1;
142 Vector seq_entries = new Vector();
143 Vector ids = new Vector();
144 Hashtable Symscores = new Hashtable();
146 while ((line = nextLine()) != null)
148 // Concise format allows no comments or non comma-formatted data
149 StringTokenizer str = new StringTokenizer(line, ":");
152 if (!str.hasMoreTokens())
157 id = str.nextToken();
159 String seqsym = str.nextToken();
160 StringTokenizer symbols = new StringTokenizer(seqsym, ",");
162 // decide if we have more than just alphanumeric symbols
163 int numSymbols = symbols.countTokens();
170 if (seqsym.length() != (2 * numSymbols))
172 // Set of scalars for some property
173 if (Scores.containsKey(id))
177 while (Scores.containsKey(id + "_" + i))
185 Vector scores = new Vector();
187 // Typecheck from first entry
189 String ascore = "dead";
193 // store elements as floats...
194 while (symbols.hasMoreTokens())
196 ascore = symbols.nextToken();
198 Float score = new Float(ascore);
199 scores.addElement((Object) score);
202 Scores.put(id, scores);
203 } catch (Exception e)
205 // or just keep them as strings
208 for (int j = 0; j < i; j++)
210 scores.setElementAt((Object) ((Float) scores.elementAt(j))
214 scores.addElement((Object) ascore);
216 while (symbols.hasMoreTokens())
218 ascore = symbols.nextToken();
219 scores.addElement((Object) ascore);
222 Scores.put(id, scores);
225 else if (id.equals("jnetconf"))
227 // log.debug System.out.println("here");
228 id = "Prediction Confidence";
229 this.conf = new Vector(numSymbols);
231 for (int i = 0; i < numSymbols; i++)
233 conf.setElementAt(symbols.nextToken(), i);
238 // Sequence or a prediction string (rendered as sequence)
239 StringBuffer newseq = new StringBuffer();
241 for (int i = 0; i < numSymbols; i++)
243 newseq.append(symbols.nextToken());
246 if (id.indexOf(";") > -1)
248 seq_entries.addElement(newseq);
251 String name = id.substring(id.indexOf(";") + 1);
253 while (ids.lastIndexOf(name) > -1)
255 name = id.substring(id.indexOf(";") + 1) + "_" + ++i;
258 if (QuerySeqPosition == -1)
259 QuerySeqPosition = ids.size();
260 ids.addElement(name);
265 if (id.equals("JNETPRED"))
267 id = "Predicted Secondary Structure";
270 seq_entries.addElement(newseq.toString());
272 Symscores.put((Object) id, (Object) new Integer(ids.size() - 1));
277 * leave it to the parser user to actually check this. if (noSeqs < 1) {
278 * throw new IOException( "JpredFile Parser: No sequence in the
282 maxLength = seq_entries.elementAt(0).toString().length();
284 for (int i = 0; i < ids.size(); i++)
286 // Add all sequence like objects
287 Sequence newSeq = new Sequence(ids.elementAt(i).toString(),
288 seq_entries.elementAt(i).toString(), 1, seq_entries
289 .elementAt(i).toString().length());
291 if (maxLength != seq_entries.elementAt(i).toString().length())
293 throw new IOException("JPredConcise: Entry ("
294 + ids.elementAt(i).toString()
295 + ") has an unexpected number of columns");
298 if ((newSeq.getName().startsWith("QUERY") || newSeq.getName()
299 .startsWith("align;"))
300 && (QuerySeqPosition == -1))
302 QuerySeqPosition = seqs.size();
305 seqs.addElement(newSeq);
307 if (seqs.size() > 0 && QuerySeqPosition>-1)
309 // try to make annotation for a prediction only input (default if no
310 // alignment is given and prediction contains a QUERY or align;sequence_id line)
311 Alignment tal = new Alignment(this.getSeqsAsArray());
314 JnetAnnotationMaker.add_annotation(this, tal, QuerySeqPosition,
316 } catch (Exception e)
319 IOException ex = new IOException(
320 "Couldn't parse concise annotation for prediction profile.\n"
322 e.printStackTrace(); // java 1.1 does not have : ex.setStackTrace(e.getStackTrace());
325 this.annotations = new Vector();
326 AlignmentAnnotation[] aan = tal.getAlignmentAnnotation();
327 for (int aai = 0; aan != null && aai < aan.length; aai++)
329 annotations.addElement(aan[aai]);
339 public String print()
341 return "Not Supported";
350 public static void main(String[] args)
354 JPredFile blc = new JPredFile(args[0], "File");
356 for (int i = 0; i < blc.seqs.size(); i++)
358 System.out.println(((Sequence) blc.seqs.elementAt(i)).getName()
360 + ((Sequence) blc.seqs.elementAt(i)).getSequenceAsString()
363 } catch (java.io.IOException e)
365 System.err.println("Exception " + e);
366 // e.printStackTrace(); not java 1.1 compatible!
370 Vector annotSeqs = null;
375 public void removeNonSequences()
377 if (annotSeqs != null)
381 annotSeqs = new Vector();
382 Vector newseqs = new Vector();
385 for (; i < QuerySeqPosition; i++)
387 annotSeqs.addElement(seqs.elementAt(i));
389 // check that no stray annotations have been added at the end.
391 SequenceI sq = (SequenceI) seqs.elementAt(j - 1);
392 if (sq.getName().toUpperCase().startsWith("JPRED"))
394 annotSeqs.addElement(sq);
395 seqs.removeElementAt(--j);
400 newseqs.addElement(seqs.elementAt(i));
403 seqs.removeAllElements();
409 * StringBuffer out = new StringBuffer();
411 * out.append("START PRED\n"); for (int i = 0; i < s[0].sequence.length(); i++) {
412 * out.append(s[0].sequence.substring(i, i + 1) + " ");
413 * out.append(s[1].sequence.substring(i, i + 1) + " ");
414 * out.append(s[1].score[0].elementAt(i) + " ");
415 * out.append(s[1].score[1].elementAt(i) + " ");
416 * out.append(s[1].score[2].elementAt(i) + " ");
417 * out.append(s[1].score[3].elementAt(i) + " ");
419 * out.append("\n"); } out.append("END PRED\n"); return out.toString(); }
421 * public static void main(String[] args) { try { BLCFile blc = new
422 * BLCFile(args[0], "File"); DrawableSequence[] s = new
423 * DrawableSequence[blc.seqs.size()]; for (int i = 0; i < blc.seqs.size(); i++) {
424 * s[i] = new DrawableSequence( (Sequence) blc.seqs.elementAt(i)); } String out =
427 * AlignFrame af = new AlignFrame(null, s); af.resize(700, 500); af.show();
428 * System.out.println(out); } catch (java.io.IOException e) {
429 * System.out.println("Exception " + e); } }