2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.0b1)
3 * Copyright (C) 2014 The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
17 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 * JalviewX / Vamsas Project
22 * JPred.seq.concise reader
29 import jalview.datamodel.*;
32 * Parser for the JPred/JNet concise format. This is a series of CSV lines, each
33 * line is either a sequence (QUERY), a sequence profile (align;), or jnet
34 * prediction annotation (anything else). Automagic translation happens for
35 * annotation called 'JNETPRED' (translated to Secondary Structure Prediction),
36 * or 'JNETCONF' (translates to 'Prediction Confidence'). Numeric scores are
37 * differentiated from symbolic by being parseable into a float vector. They are
38 * put in Scores. Symscores gets the others. JNetAnnotationMaker translates the
39 * data parsed by this object into annotation on an alignment. It is
40 * automatically called but can be used to transfer the annotation onto a
41 * sequence in another alignment (and insert gaps where necessary)
46 public class JPredFile extends AlignFile
52 Hashtable Scores; // Hash of names and score vectors
54 Hashtable Symscores; // indexes of symbol annotation properties in sequenceI
58 private int QuerySeqPosition;
61 * Creates a new JPredFile object.
71 public JPredFile(String inFile, String type) throws IOException
76 public JPredFile(FileParse source) throws IOException
84 * @param QuerySeqPosition
87 public void setQuerySeqPosition(int QuerySeqPosition)
89 this.QuerySeqPosition = QuerySeqPosition;
95 * @return DOCUMENT ME!
97 public int getQuerySeqPosition()
99 return QuerySeqPosition;
105 * @return DOCUMENT ME!
107 public Hashtable getScores()
115 * @return DOCUMENT ME!
117 public Hashtable getSymscores()
125 public void initData()
128 Scores = new Hashtable();
134 * parse a JPred concise file into a sequence-alignment like object.
136 public void parse() throws IOException
138 // JBPNote log.System.out.println("all read in ");
140 QuerySeqPosition = -1;
143 Vector seq_entries = new Vector();
144 Vector ids = new Vector();
145 Hashtable Symscores = new Hashtable();
147 while ((line = nextLine()) != null)
149 // Concise format allows no comments or non comma-formatted data
150 StringTokenizer str = new StringTokenizer(line, ":");
153 if (!str.hasMoreTokens())
158 id = str.nextToken();
160 String seqsym = str.nextToken();
161 StringTokenizer symbols = new StringTokenizer(seqsym, ",");
163 // decide if we have more than just alphanumeric symbols
164 int numSymbols = symbols.countTokens();
171 if (seqsym.length() != (2 * numSymbols))
173 // Set of scalars for some property
174 if (Scores.containsKey(id))
178 while (Scores.containsKey(id + "_" + i))
186 Vector scores = new Vector();
188 // Typecheck from first entry
190 String ascore = "dead";
194 // store elements as floats...
195 while (symbols.hasMoreTokens())
197 ascore = symbols.nextToken();
199 Float score = new Float(ascore);
200 scores.addElement((Object) score);
203 Scores.put(id, scores);
204 } catch (Exception e)
206 // or just keep them as strings
209 for (int j = 0; j < i; j++)
212 (Object) ((Float) scores.elementAt(j)).toString(), j);
215 scores.addElement((Object) ascore);
217 while (symbols.hasMoreTokens())
219 ascore = symbols.nextToken();
220 scores.addElement((Object) ascore);
223 Scores.put(id, scores);
226 else if (id.equals("jnetconf"))
228 // log.debug System.out.println("here");
229 id = "Prediction Confidence";
230 this.conf = new Vector(numSymbols);
232 for (int i = 0; i < numSymbols; i++)
234 conf.setElementAt(symbols.nextToken(), i);
239 // Sequence or a prediction string (rendered as sequence)
240 StringBuffer newseq = new StringBuffer();
242 for (int i = 0; i < numSymbols; i++)
244 newseq.append(symbols.nextToken());
247 if (id.indexOf(";") > -1)
249 seq_entries.addElement(newseq);
252 String name = id.substring(id.indexOf(";") + 1);
254 while (ids.lastIndexOf(name) > -1)
256 name = id.substring(id.indexOf(";") + 1) + "_" + ++i;
259 if (QuerySeqPosition == -1)
260 QuerySeqPosition = ids.size();
261 ids.addElement(name);
266 if (id.equals("JNETPRED"))
268 id = "Predicted Secondary Structure";
271 seq_entries.addElement(newseq.toString());
273 Symscores.put((Object) id, (Object) new Integer(ids.size() - 1));
278 * leave it to the parser user to actually check this. if (noSeqs < 1) {
279 * throw new IOException( "JpredFile Parser: No sequence in the
283 maxLength = seq_entries.elementAt(0).toString().length();
285 for (int i = 0; i < ids.size(); i++)
287 // Add all sequence like objects
288 Sequence newSeq = new Sequence(ids.elementAt(i).toString(),
289 seq_entries.elementAt(i).toString(), 1, seq_entries
290 .elementAt(i).toString().length());
292 if (maxLength != seq_entries.elementAt(i).toString().length())
294 throw new IOException("JPredConcise: Entry ("
295 + ids.elementAt(i).toString()
296 + ") has an unexpected number of columns");
299 if ((newSeq.getName().startsWith("QUERY") || newSeq.getName()
300 .startsWith("align;")) && (QuerySeqPosition == -1))
302 QuerySeqPosition = seqs.size();
305 seqs.addElement(newSeq);
307 if (seqs.size() > 0 && QuerySeqPosition > -1)
309 // try to make annotation for a prediction only input (default if no
310 // alignment is given and prediction contains a QUERY or align;sequence_id
312 Alignment tal = new Alignment(this.getSeqsAsArray());
315 JnetAnnotationMaker.add_annotation(this, tal, QuerySeqPosition,
317 } catch (Exception e)
320 IOException ex = new IOException(
321 "Couldn't parse concise annotation for prediction profile.\n"
323 e.printStackTrace(); // java 1.1 does not have :
324 // ex.setStackTrace(e.getStackTrace());
327 this.annotations = new Vector();
328 AlignmentAnnotation[] aan = tal.getAlignmentAnnotation();
329 for (int aai = 0; aan != null && aai < aan.length; aai++)
331 annotations.addElement(aan[aai]);
341 public String print()
343 return "Not Supported";
352 public static void main(String[] args)
356 JPredFile blc = new JPredFile(args[0], "File");
358 for (int i = 0; i < blc.seqs.size(); i++)
360 System.out.println(((Sequence) blc.seqs.elementAt(i)).getName()
362 + ((Sequence) blc.seqs.elementAt(i)).getSequenceAsString()
365 } catch (java.io.IOException e)
367 System.err.println("Exception " + e);
368 // e.printStackTrace(); not java 1.1 compatible!
372 Vector annotSeqs = null;
377 public void removeNonSequences()
379 if (annotSeqs != null)
383 annotSeqs = new Vector();
384 Vector newseqs = new Vector();
387 for (; i < QuerySeqPosition; i++)
389 annotSeqs.addElement(seqs.elementAt(i));
391 // check that no stray annotations have been added at the end.
393 SequenceI sq = (SequenceI) seqs.elementAt(j - 1);
394 if (sq.getName().toUpperCase().startsWith("JPRED"))
396 annotSeqs.addElement(sq);
397 seqs.removeElementAt(--j);
402 newseqs.addElement(seqs.elementAt(i));
405 seqs.removeAllElements();
411 * StringBuffer out = new StringBuffer();
413 * out.append("START PRED\n"); for (int i = 0; i < s[0].sequence.length(); i++)
414 * { out.append(s[0].sequence.substring(i, i + 1) + " ");
415 * out.append(s[1].sequence.substring(i, i + 1) + " ");
416 * out.append(s[1].score[0].elementAt(i) + " ");
417 * out.append(s[1].score[1].elementAt(i) + " ");
418 * out.append(s[1].score[2].elementAt(i) + " ");
419 * out.append(s[1].score[3].elementAt(i) + " ");
421 * out.append("\n"); } out.append("END PRED\n"); return out.toString(); }
423 * public static void main(String[] args) { try { BLCFile blc = new
424 * BLCFile(args[0], "File"); DrawableSequence[] s = new
425 * DrawableSequence[blc.seqs.size()]; for (int i = 0; i < blc.seqs.size(); i++)
426 * { s[i] = new DrawableSequence( (Sequence) blc.seqs.elementAt(i)); } String
427 * out = BLCFile.print(s);
429 * AlignFrame af = new AlignFrame(null, s); af.resize(700, 500); af.show();
430 * System.out.println(out); } catch (java.io.IOException e) {
431 * System.out.println("Exception " + e); } } }