2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.5)
3 * Copyright (C) 2010 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
20 * JalviewX / Vamsas Project
21 * JPred.seq.concise reader
28 import jalview.datamodel.*;
31 * Parser for the JPred/JNet concise format. This is a series of CSV lines, each
32 * line is either a sequence (QUERY), a sequence profile (align;), or jnet
33 * prediction annotation (anything else). Automagic translation happens for
34 * annotation called 'JNETPRED' (translated to Secondary Structure Prediction),
35 * or 'JNETCONF' (translates to 'Prediction Confidence'). Numeric scores are
36 * differentiated from symbolic by being parseable into a float vector. They are
37 * put in Scores. Symscores gets the others. JNetAnnotationMaker translates the
38 * data parsed by this object into annotation on an alignment. It is
39 * automatically called but can be used to transfer the annotation onto a
40 * sequence in another alignment (and insert gaps where necessary)
45 public class JPredFile extends AlignFile
51 Hashtable Scores; // Hash of names and score vectors
53 Hashtable Symscores; // indexes of symbol annotation properties in sequenceI
57 private int QuerySeqPosition;
60 * Creates a new JPredFile object.
70 public JPredFile(String inFile, String type) throws IOException
75 public JPredFile(FileParse source) throws IOException
83 * @param QuerySeqPosition
86 public void setQuerySeqPosition(int QuerySeqPosition)
88 this.QuerySeqPosition = QuerySeqPosition;
94 * @return DOCUMENT ME!
96 public int getQuerySeqPosition()
98 return QuerySeqPosition;
104 * @return DOCUMENT ME!
106 public Hashtable getScores()
114 * @return DOCUMENT ME!
116 public Hashtable getSymscores()
124 public void initData()
127 Scores = new Hashtable();
133 * parse a JPred concise file into a sequence-alignment like object.
135 public void parse() throws IOException
137 // JBPNote log.System.out.println("all read in ");
139 QuerySeqPosition = -1;
142 Vector seq_entries = new Vector();
143 Vector ids = new Vector();
144 Hashtable Symscores = new Hashtable();
146 while ((line = nextLine()) != null)
148 // Concise format allows no comments or non comma-formatted data
149 StringTokenizer str = new StringTokenizer(line, ":");
152 if (!str.hasMoreTokens())
157 id = str.nextToken();
159 String seqsym = str.nextToken();
160 StringTokenizer symbols = new StringTokenizer(seqsym, ",");
162 // decide if we have more than just alphanumeric symbols
163 int numSymbols = symbols.countTokens();
170 if (seqsym.length() != (2 * numSymbols))
172 // Set of scalars for some property
173 if (Scores.containsKey(id))
177 while (Scores.containsKey(id + "_" + i))
185 Vector scores = new Vector();
187 // Typecheck from first entry
189 String ascore = "dead";
193 // store elements as floats...
194 while (symbols.hasMoreTokens())
196 ascore = symbols.nextToken();
198 Float score = new Float(ascore);
199 scores.addElement((Object) score);
202 Scores.put(id, scores);
203 } catch (Exception e)
205 // or just keep them as strings
208 for (int j = 0; j < i; j++)
210 scores.setElementAt((Object) ((Float) scores.elementAt(j))
214 scores.addElement((Object) ascore);
216 while (symbols.hasMoreTokens())
218 ascore = symbols.nextToken();
219 scores.addElement((Object) ascore);
222 Scores.put(id, scores);
225 else if (id.equals("jnetconf"))
227 // log.debug System.out.println("here");
228 id = "Prediction Confidence";
229 this.conf = new Vector(numSymbols);
231 for (int i = 0; i < numSymbols; i++)
233 conf.setElementAt(symbols.nextToken(), i);
238 // Sequence or a prediction string (rendered as sequence)
239 StringBuffer newseq = new StringBuffer();
241 for (int i = 0; i < numSymbols; i++)
243 newseq.append(symbols.nextToken());
246 if (id.indexOf(";") > -1)
248 seq_entries.addElement(newseq);
251 String name = id.substring(id.indexOf(";") + 1);
253 while (ids.lastIndexOf(name) > -1)
255 name = id.substring(id.indexOf(";") + 1) + "_" + ++i;
258 if (QuerySeqPosition == -1)
259 QuerySeqPosition = ids.size();
260 ids.addElement(name);
265 if (id.equals("JNETPRED"))
267 id = "Predicted Secondary Structure";
270 seq_entries.addElement(newseq.toString());
272 Symscores.put((Object) id, (Object) new Integer(ids.size() - 1));
277 * leave it to the parser user to actually check this. if (noSeqs < 1) {
278 * throw new IOException( "JpredFile Parser: No sequence in the
282 maxLength = seq_entries.elementAt(0).toString().length();
284 for (int i = 0; i < ids.size(); i++)
286 // Add all sequence like objects
287 Sequence newSeq = new Sequence(ids.elementAt(i).toString(),
288 seq_entries.elementAt(i).toString(), 1, seq_entries
289 .elementAt(i).toString().length());
291 if (maxLength != seq_entries.elementAt(i).toString().length())
293 throw new IOException("JPredConcise: Entry ("
294 + ids.elementAt(i).toString()
295 + ") has an unexpected number of columns");
298 if ((newSeq.getName().startsWith("QUERY") || newSeq.getName()
299 .startsWith("align;"))
300 && (QuerySeqPosition == -1))
302 QuerySeqPosition = seqs.size();
305 seqs.addElement(newSeq);
307 if (seqs.size() > 0 && QuerySeqPosition > -1)
309 // try to make annotation for a prediction only input (default if no
310 // alignment is given and prediction contains a QUERY or align;sequence_id
312 Alignment tal = new Alignment(this.getSeqsAsArray());
315 JnetAnnotationMaker.add_annotation(this, tal, QuerySeqPosition,
317 } catch (Exception e)
320 IOException ex = new IOException(
321 "Couldn't parse concise annotation for prediction profile.\n"
323 e.printStackTrace(); // java 1.1 does not have :
324 // ex.setStackTrace(e.getStackTrace());
327 this.annotations = new Vector();
328 AlignmentAnnotation[] aan = tal.getAlignmentAnnotation();
329 for (int aai = 0; aan != null && aai < aan.length; aai++)
331 annotations.addElement(aan[aai]);
341 public String print()
343 return "Not Supported";
352 public static void main(String[] args)
356 JPredFile blc = new JPredFile(args[0], "File");
358 for (int i = 0; i < blc.seqs.size(); i++)
360 System.out.println(((Sequence) blc.seqs.elementAt(i)).getName()
362 + ((Sequence) blc.seqs.elementAt(i)).getSequenceAsString()
365 } catch (java.io.IOException e)
367 System.err.println("Exception " + e);
368 // e.printStackTrace(); not java 1.1 compatible!
372 Vector annotSeqs = null;
377 public void removeNonSequences()
379 if (annotSeqs != null)
383 annotSeqs = new Vector();
384 Vector newseqs = new Vector();
387 for (; i < QuerySeqPosition; i++)
389 annotSeqs.addElement(seqs.elementAt(i));
391 // check that no stray annotations have been added at the end.
393 SequenceI sq = (SequenceI) seqs.elementAt(j - 1);
394 if (sq.getName().toUpperCase().startsWith("JPRED"))
396 annotSeqs.addElement(sq);
397 seqs.removeElementAt(--j);
402 newseqs.addElement(seqs.elementAt(i));
405 seqs.removeAllElements();
411 * StringBuffer out = new StringBuffer();
413 * out.append("START PRED\n"); for (int i = 0; i < s[0].sequence.length(); i++)
414 * { out.append(s[0].sequence.substring(i, i + 1) + " ");
415 * out.append(s[1].sequence.substring(i, i + 1) + " ");
416 * out.append(s[1].score[0].elementAt(i) + " ");
417 * out.append(s[1].score[1].elementAt(i) + " ");
418 * out.append(s[1].score[2].elementAt(i) + " ");
419 * out.append(s[1].score[3].elementAt(i) + " ");
421 * out.append("\n"); } out.append("END PRED\n"); return out.toString(); }
423 * public static void main(String[] args) { try { BLCFile blc = new
424 * BLCFile(args[0], "File"); DrawableSequence[] s = new
425 * DrawableSequence[blc.seqs.size()]; for (int i = 0; i < blc.seqs.size(); i++)
426 * { s[i] = new DrawableSequence( (Sequence) blc.seqs.elementAt(i)); } String
427 * out = BLCFile.print(s);
429 * AlignFrame af = new AlignFrame(null, s); af.resize(700, 500); af.show();
430 * System.out.println(out); } catch (java.io.IOException e) {
431 * System.out.println("Exception " + e); } } }