X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FJPredFile.java;h=dfe1ecf920550e02e1c2dc65e89133f6b2195fe8;hb=f683f4d6c8af43be0ffeb96513b52572619efed5;hp=754b539eead65bcb98709fd2470a42850e67fb2c;hpb=950f8f47f9eff65f5f39501789b03f03591f1819;p=jalview.git diff --git a/src/jalview/io/JPredFile.java b/src/jalview/io/JPredFile.java index 754b539..dfe1ecf 100755 --- a/src/jalview/io/JPredFile.java +++ b/src/jalview/io/JPredFile.java @@ -1,3 +1,22 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer + * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + /** * PredFile.java * JalviewX / Vamsas Project @@ -5,12 +24,23 @@ */ package jalview.io; -import jalview.datamodel.*; -import jalview.util.*; - import java.io.*; import java.util.*; +import jalview.datamodel.*; + +/** + * Parser for the JPred/JNet concise format. This is a series of CSV lines, + * each line is either a sequence (QUERY), a sequence profile (align;), or + * jnet prediction annotation (anything else). + * Automagic translation happens for annotation called 'JNETPRED' (translated to Secondary Structure Prediction), or 'JNETCONF' (translates to 'Prediction Confidence'). + * Numeric scores are differentiated from symbolic by being parseable into a float vector. They are put in Scores. + * Symscores gets the others. + * JNetAnnotationMaker translates the data parsed by this object into annotation on an alignment. It is automatically called + * but can be used to transfer the annotation onto a sequence in another alignment (and insert gaps where necessary) + * @author jprocter + * @version $Revision$ + */ public class JPredFile extends AlignFile { @@ -18,57 +48,112 @@ public class JPredFile Vector conf; Hashtable Scores; // Hash of names and score vectors Hashtable Symscores; // indexes of symbol annotation properties in sequenceI vector - public JPredFile(String inStr) + private int QuerySeqPosition; + + /** + * Creates a new JPredFile object. + * + * @param inFile DOCUMENT ME! + * @param type DOCUMENT ME! + * + * @throws IOException DOCUMENT ME! + */ + public JPredFile(String inFile, String type) + throws IOException + { + super(inFile, type); + } + public JPredFile(FileParse source) throws IOException + { + super(source); + } + /** + * DOCUMENT ME! + * + * @param QuerySeqPosition DOCUMENT ME! + */ + public void setQuerySeqPosition(int QuerySeqPosition) { - super(inStr); + this.QuerySeqPosition = QuerySeqPosition; } - public void initData() + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public int getQuerySeqPosition() { + return QuerySeqPosition; + } - super.initData(); - Scores = new Hashtable(); + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public Hashtable getScores() + { + return Scores; } - public JPredFile(String inFile, String type) - throws IOException + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public Hashtable getSymscores() { + return Symscores; + } - super(inFile, type); + /** + * DOCUMENT ME! + */ + public void initData() + { + super.initData(); + Scores = new Hashtable(); + ids = null; + conf = null; } /** * parse a JPred concise file into a sequence-alignment like object. */ - public void parse() throws IOException { - + // JBPNote log.System.out.println("all read in "); String line; - + QuerySeqPosition = -1; noSeqs = 0; + Vector seq_entries = new Vector(); Vector ids = new Vector(); Hashtable Symscores = new Hashtable(); + while ( (line = nextLine()) != null) { // Concise format allows no comments or non comma-formatted data StringTokenizer str = new StringTokenizer(line, ":"); String id = ""; - String seq = ""; + if (!str.hasMoreTokens()) { continue; } id = str.nextToken(); + String seqsym = str.nextToken(); StringTokenizer symbols = new StringTokenizer(seqsym, ","); + // decide if we have more than just alphanumeric symbols int numSymbols = symbols.countTokens(); - if (numSymbols==0) { + if (numSymbols == 0) + { continue; } @@ -78,193 +163,274 @@ public class JPredFile if (Scores.containsKey(id)) { int i = 1; + while (Scores.containsKey(id + "_" + i)) { i++; } + id = id + "_" + i; } + Vector scores = new Vector(); + // Typecheck from first entry int i = 0; - String ascore="dead"; + String ascore = "dead"; + try { // store elements as floats... - while (symbols.hasMoreTokens()) { + while (symbols.hasMoreTokens()) + { ascore = symbols.nextToken(); + Float score = new Float(ascore); scores.addElement( (Object) score); } + Scores.put(id, scores); } catch (Exception e) { // or just keep them as strings i = scores.size(); + for (int j = 0; j < i; j++) { - scores.set(j, - (Object) ( (Float) scores.get(j)).toString()); + scores.setElementAt( + (Object) ( (Float) scores.elementAt(j)).toString(), j); } - scores.addElement((Object) ascore); - while (symbols.hasMoreTokens()) { - { - ascore = symbols.nextToken(); - scores.addElement( (Object) ascore); - } + + scores.addElement( (Object) ascore); + + while (symbols.hasMoreTokens()) + { + ascore = symbols.nextToken(); + scores.addElement( (Object) ascore); } + Scores.put(id, scores); } - } else - if (id.equals("jnetconf")) + } + else if (id.equals("jnetconf")) { + // log.debug System.out.println("here"); id = "Prediction Confidence"; this.conf = new Vector(numSymbols); + for (int i = 0; i < numSymbols; i++) { - conf.set(i, (Object) symbols.nextToken()); + conf.setElementAt(symbols.nextToken(), i); } } else + { + // Sequence or a prediction string (rendered as sequence) + StringBuffer newseq = new StringBuffer(); + + for (int i = 0; i < numSymbols; i++) { - // Sequence or a prediction string (rendered as sequence) + newseq.append(symbols.nextToken()); + } - StringBuffer newseq = new StringBuffer(); + if (id.indexOf(";") > -1) + { + seq_entries.addElement(newseq); - for (int i = 0; i < numSymbols; i++) { - newseq.append(symbols.nextToken()); - } + int i = 1; + String name = id.substring(id.indexOf(";") + 1); - if (id.indexOf(";") > -1) { - seq_entries.addElement(newseq); - int i=1; - String name = id.substring(id.indexOf(";")+1); - while (ids.lastIndexOf(name)>-1) { - name = id.substring(id.indexOf(";")+1)+"_"+1; - } - ids.addElement(name); - noSeqs++; + while (ids.lastIndexOf(name) > -1) + { + name = id.substring(id.indexOf(";") + 1) + "_" + ++i; } - else + + if (QuerySeqPosition==-1) + QuerySeqPosition = ids.size(); + ids.addElement(name); + noSeqs++; + } + else + { + if (id.equals("JNETPRED")) { - if (id.equals("JNETPRED")) { - id = "Predicted Secondary Structure"; - } - seq_entries.addElement( newseq.toString() ); - ids.addElement(id); - Symscores.put((Object) id, (Object) new Integer(ids.size()-1)); + id = "Predicted Secondary Structure"; } + + seq_entries.addElement(newseq.toString()); + ids.addElement(id); + Symscores.put( (Object) id, + (Object)new Integer(ids.size() - 1)); + } } } + /* leave it to the parser user to actually check this. + if (noSeqs < 1) + { + throw new IOException( + "JpredFile Parser: No sequence in the prediction!"); + }*/ - - if (noSeqs < 1) - { - throw new IOException( - "JpredFile Parser: No sequence in the prediction!"); - } maxLength = seq_entries.elementAt(0).toString().length(); + for (int i = 0; i < ids.size(); i++) { // Add all sequence like objects - Sequence newSeq = new Sequence(ids.elementAt(i).toString(), seq_entries.elementAt(i).toString(), 1, - seq_entries.elementAt(i).toString(). - length()); - if (!Symscores.containsKey(ids.elementAt(i)) - && !isValidProteinSequence(newSeq.getSequence())) - { - throw new IOException( - "JPredConcise: Not a valid protein sequence - (" - + ids.elementAt(i).toString() + ")"); - } + seq_entries.elementAt(i).toString().length()); if (maxLength != seq_entries.elementAt(i).toString().length()) { throw new IOException("JPredConcise: Entry (" + - ids.elementAt(i).toString() - + ") has an unexpected number of columns"); + ids.elementAt(i).toString() + + ") has an unexpected number of columns"); } - seqs.addElement(newSeq); + if ((newSeq.getName().startsWith("QUERY") || newSeq.getName().startsWith("align;"))&& + (QuerySeqPosition == -1)) + { + QuerySeqPosition = seqs.size(); + } + + seqs.addElement(newSeq); } + if (seqs.size()>0) + { + // try to make annotation for a prediction only input (default if no alignment is given) + Alignment tal = new Alignment(this.getSeqsAsArray()); + try { + JnetAnnotationMaker.add_annotation(this, tal, QuerySeqPosition, true); + } catch (Exception e) + { + tal = null; + IOException ex = new IOException("Couldn't parse concise annotation for prediction profile.\n"+e); + throw ex; + } + this.annotations = new Vector(); + AlignmentAnnotation[] aan = tal.getAlignmentAnnotation(); + for (int aai = 0; aan!=null && aai