X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FJPredFile.java;h=451ece808444c0d4dbbdf9f2a8622a7987991cc6;hb=ee198b3ca3687f18a2ee186f4e7c7330f4ea30f0;hp=c431cc4ef51686f25614952749df7cf691a5f7a0;hpb=451619e33c0a90c8130c7d79ffa38161af1c6e0f;p=jalview.git diff --git a/src/jalview/io/JPredFile.java b/src/jalview/io/JPredFile.java index c431cc4..451ece8 100755 --- a/src/jalview/io/JPredFile.java +++ b/src/jalview/io/JPredFile.java @@ -1,22 +1,23 @@ /* -* Jalview - A Sequence Alignment Editor and Viewer -* Copyright (C) 2006 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version 2 -* of the License, or (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with this program; if not, write to the Free Software -* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA -*/ - + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ /** * PredFile.java * JalviewX / Vamsas Project @@ -24,383 +25,426 @@ */ package jalview.io; -import java.io.*; -import java.util.*; - -import jalview.datamodel.*; +import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentAnnotation; +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceI; +import jalview.util.MessageManager; +import java.io.IOException; +import java.util.Hashtable; +import java.util.StringTokenizer; +import java.util.Vector; /** - * DOCUMENT ME! - * - * @author $author$ + * Parser for the JPred/JNet concise format. This is a series of CSV lines, each + * line is either a sequence (QUERY), a sequence profile (align;), or jnet + * prediction annotation (anything else). Automagic translation happens for + * annotation called 'JNETPRED' (translated to Secondary Structure Prediction), + * or 'JNETCONF' (translates to 'Prediction Confidence'). Numeric scores are + * differentiated from symbolic by being parseable into a float vector. They are + * put in Scores. Symscores gets the others. JNetAnnotationMaker translates the + * data parsed by this object into annotation on an alignment. It is + * automatically called but can be used to transfer the annotation onto a + * sequence in another alignment (and insert gaps where necessary) + * + * @author jprocter * @version $Revision$ */ public class JPredFile extends AlignFile { - Vector ids; - Vector conf; - Hashtable Scores; // Hash of names and score vectors - Hashtable Symscores; // indexes of symbol annotation properties in sequenceI vector - private int QuerySeqPosition; - - - /** - * Creates a new JPredFile object. - * - * @param inFile DOCUMENT ME! - * @param type DOCUMENT ME! - * - * @throws IOException DOCUMENT ME! - */ - public JPredFile(String inFile, String type) throws IOException - { - super(inFile, type); - } + Vector ids; - /** - * DOCUMENT ME! - * - * @param QuerySeqPosition DOCUMENT ME! - */ - public void setQuerySeqPosition(int QuerySeqPosition) - { - this.QuerySeqPosition = QuerySeqPosition; - } + Vector conf; - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public int getQuerySeqPosition() - { - return QuerySeqPosition; - } + Hashtable Scores; // Hash of names and score vectors - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public Hashtable getScores() - { - return Scores; - } + Hashtable Symscores; // indexes of symbol annotation properties in sequenceI - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public Hashtable getSymscores() - { - return Symscores; - } + // vector - /** - * DOCUMENT ME! - */ - public void initData() - { - super.initData(); - Scores = new Hashtable(); - ids = null; - conf = null; - } + private int QuerySeqPosition; - /** - * parse a JPred concise file into a sequence-alignment like object. - */ - public void parse() throws IOException + /** + * Creates a new JPredFile object. + * + * @param inFile + * DOCUMENT ME! + * @param sourceType + * DOCUMENT ME! + * + * @throws IOException + * DOCUMENT ME! + */ + public JPredFile(String inFile, DataSourceType sourceType) + throws IOException + { + super(inFile, sourceType); + } + + public JPredFile(FileParse source) throws IOException + { + super(source); + } + + /** + * DOCUMENT ME! + * + * @param QuerySeqPosition + * DOCUMENT ME! + */ + public void setQuerySeqPosition(int QuerySeqPosition) + { + this.QuerySeqPosition = QuerySeqPosition; + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public int getQuerySeqPosition() + { + return QuerySeqPosition; + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public Hashtable getScores() + { + return Scores; + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public Hashtable getSymscores() + { + return Symscores; + } + + /** + * DOCUMENT ME! + */ + @Override + public void initData() + { + super.initData(); + Scores = new Hashtable(); + ids = null; + conf = null; + } + + /** + * parse a JPred concise file into a sequence-alignment like object. + */ + @Override + public void parse() throws IOException + { + // JBPNote log.System.out.println("all read in "); + String line; + QuerySeqPosition = -1; + noSeqs = 0; + + Vector seq_entries = new Vector(); + Vector ids = new Vector(); + Hashtable Symscores = new Hashtable(); + + while ((line = nextLine()) != null) { - // JBPNote log.System.out.println("all read in "); - String line; - QuerySeqPosition = -1; - noSeqs = 0; + // Concise format allows no comments or non comma-formatted data + StringTokenizer str = new StringTokenizer(line, ":"); + String id = ""; + + if (!str.hasMoreTokens()) + { + continue; + } + + id = str.nextToken(); + + String seqsym = str.nextToken(); + StringTokenizer symbols = new StringTokenizer(seqsym, ","); + + // decide if we have more than just alphanumeric symbols + int numSymbols = symbols.countTokens(); + + if (numSymbols == 0) + { + continue; + } + + if (seqsym.length() != (2 * numSymbols)) + { + // Set of scalars for some property + if (Scores.containsKey(id)) + { + int i = 1; + + while (Scores.containsKey(id + "_" + i)) + { + i++; + } + + id = id + "_" + i; + } + + Vector scores = new Vector(); - Vector seq_entries = new Vector(); - Vector ids = new Vector(); - Hashtable Symscores = new Hashtable(); + // Typecheck from first entry + int i = 0; + String ascore = "dead"; - while ((line = nextLine()) != null) + try { - // Concise format allows no comments or non comma-formatted data - StringTokenizer str = new StringTokenizer(line, ":"); - String id = ""; - - if (!str.hasMoreTokens()) - { - continue; - } - - id = str.nextToken(); - - String seqsym = str.nextToken(); - StringTokenizer symbols = new StringTokenizer(seqsym, ","); - - // decide if we have more than just alphanumeric symbols - int numSymbols = symbols.countTokens(); - - if (numSymbols == 0) - { - continue; - } - - if (seqsym.length() != (2 * numSymbols)) - { - // Set of scalars for some property - if (Scores.containsKey(id)) - { - int i = 1; - - while (Scores.containsKey(id + "_" + i)) - { - i++; - } - - id = id + "_" + i; - } - - Vector scores = new Vector(); - - // Typecheck from first entry - int i = 0; - String ascore = "dead"; - - try - { - // store elements as floats... - while (symbols.hasMoreTokens()) - { - ascore = symbols.nextToken(); - - Float score = new Float(ascore); - scores.addElement((Object) score); - } - - Scores.put(id, scores); - } - catch (Exception e) - { - // or just keep them as strings - i = scores.size(); - - for (int j = 0; j < i; j++) - { - scores.setElementAt( - (Object) ((Float) scores.elementAt(j)).toString(), j); - } - - scores.addElement((Object) ascore); - - while (symbols.hasMoreTokens()) - { - ascore = symbols.nextToken(); - scores.addElement((Object) ascore); - } - - Scores.put(id, scores); - } - } - else if (id.equals("jnetconf")) - { - // log.debug System.out.println("here"); - id = "Prediction Confidence"; - this.conf = new Vector(numSymbols); - - for (int i = 0; i < numSymbols; i++) - { - conf.setElementAt( symbols.nextToken(), i); - } - } - else - { - // Sequence or a prediction string (rendered as sequence) - StringBuffer newseq = new StringBuffer(); - - for (int i = 0; i < numSymbols; i++) - { - newseq.append(symbols.nextToken()); - } - - if (id.indexOf(";") > -1) - { - seq_entries.addElement(newseq); - - int i = 1; - String name = id.substring(id.indexOf(";") + 1); - - while (ids.lastIndexOf(name) > -1) - { - name = id.substring(id.indexOf(";") + 1) + "_" + ++i; - } - - ids.addElement(name); - - noSeqs++; - } - else - { - if (id.equals("JNETPRED")) - { - id = "Predicted Secondary Structure"; - } - - seq_entries.addElement(newseq.toString()); - ids.addElement(id); - Symscores.put((Object) id, - (Object) new Integer(ids.size() - 1)); - } - } + // store elements as floats... + while (symbols.hasMoreTokens()) + { + ascore = symbols.nextToken(); + + Float score = new Float(ascore); + scores.addElement(score); + } + + Scores.put(id, scores); + } catch (Exception e) + { + // or just keep them as strings + i = scores.size(); + + for (int j = 0; j < i; j++) + { + scores.setElementAt( + ((Float) scores.elementAt(j)).toString(), j); + } + + scores.addElement(ascore); + + while (symbols.hasMoreTokens()) + { + ascore = symbols.nextToken(); + scores.addElement(ascore); + } + + Scores.put(id, scores); } - /* leave it to the parser user to actually check this. - if (noSeqs < 1) + } + else if (id.equals("jnetconf")) + { + // log.debug System.out.println("here"); + id = "Prediction Confidence"; + this.conf = new Vector(numSymbols); + + for (int i = 0; i < numSymbols; i++) { - throw new IOException( - "JpredFile Parser: No sequence in the prediction!"); - }*/ + conf.setElementAt(symbols.nextToken(), i); + } + } + else + { + // Sequence or a prediction string (rendered as sequence) + StringBuffer newseq = new StringBuffer(); - maxLength = seq_entries.elementAt(0).toString().length(); + for (int i = 0; i < numSymbols; i++) + { + newseq.append(symbols.nextToken()); + } - for (int i = 0; i < ids.size(); i++) + if (id.indexOf(";") > -1) + { + seq_entries.addElement(newseq); + + int i = 1; + String name = id.substring(id.indexOf(";") + 1); + + while (ids.lastIndexOf(name) > -1) + { + name = id.substring(id.indexOf(";") + 1) + "_" + ++i; + } + + if (QuerySeqPosition == -1) + { + QuerySeqPosition = ids.size(); + } + ids.addElement(name); + noSeqs++; + } + else { - // Add all sequence like objects - Sequence newSeq = new Sequence(ids.elementAt(i).toString(), - seq_entries.elementAt(i).toString(), 1, - seq_entries.elementAt(i).toString().length()); - - if (!Symscores.containsKey(ids.elementAt(i)) && - !isValidProteinSequence(newSeq.getSequence())) - { - throw new IOException("JPredConcise: " - +AppletFormatAdapter.INVALID_CHARACTERS +" : " - +ids.elementAt(i).toString() + ")"); - } - - if (maxLength != seq_entries.elementAt(i).toString().length()) - { - throw new IOException("JPredConcise: Entry (" + - ids.elementAt(i).toString() + - ") has an unexpected number of columns"); - } - - if (newSeq.getName().startsWith("QUERY") && - (QuerySeqPosition == -1)) - { - QuerySeqPosition = seqs.size(); - } - - seqs.addElement(newSeq); + if (id.equals("JNETPRED")) + { + id = "Predicted Secondary Structure"; + } + + seq_entries.addElement(newseq.toString()); + ids.addElement(id); + Symscores.put(id, new Integer(ids.size() - 1)); } + } } + /* + * leave it to the parser user to actually check this. if (noSeqs < 1) { + * throw new IOException( "JpredFile Parser: No sequence in the + * prediction!"); } + */ - /** - * print - * - * @return String - */ - public String print() + maxLength = seq_entries.elementAt(0).toString().length(); + + for (int i = 0; i < ids.size(); i++) { - return "Not Supported"; + // Add all sequence like objects + Sequence newSeq = new Sequence(ids.elementAt(i).toString(), + seq_entries.elementAt(i).toString(), 1, seq_entries + .elementAt(i).toString().length()); + + if (maxLength != seq_entries.elementAt(i).toString().length()) + { + throw new IOException( + MessageManager + .formatMessage( + "exception.jpredconcide_entry_has_unexpected_number_of_columns", + new String[] { ids.elementAt(i).toString() })); + } + + if ((newSeq.getName().startsWith("QUERY") || newSeq.getName() + .startsWith("align;")) && (QuerySeqPosition == -1)) + { + QuerySeqPosition = seqs.size(); + } + + seqs.addElement(newSeq); } + if (seqs.size() > 0 && QuerySeqPosition > -1) + { + // try to make annotation for a prediction only input (default if no + // alignment is given and prediction contains a QUERY or align;sequence_id + // line) + Alignment tal = new Alignment(this.getSeqsAsArray()); + try + { + JnetAnnotationMaker.add_annotation(this, tal, QuerySeqPosition, + true); + } catch (Exception e) + { + tal = null; + IOException ex = new IOException( + MessageManager + .formatMessage( + "exception.couldnt_parse_concise_annotation_for_prediction", + new String[] { e.getMessage() })); + e.printStackTrace(); // java 1.1 does not have : + // ex.setStackTrace(e.getStackTrace()); + throw ex; + } + this.annotations = new Vector(); + AlignmentAnnotation[] aan = tal.getAlignmentAnnotation(); + for (int aai = 0; aan != null && aai < aan.length; aai++) + { + annotations.addElement(aan[aai]); + } + } + } - /** - * DOCUMENT ME! - * - * @param args DOCUMENT ME! - */ - public static void main(String[] args) + /** + * print + * + * @return String + */ + @Override + public String print() + { + return "Not Supported"; + } + + /** + * DOCUMENT ME! + * + * @param args + * DOCUMENT ME! + */ + public static void main(String[] args) + { + try { - try - { - JPredFile blc = new JPredFile(args[0], "File"); - - for (int i = 0; i < blc.seqs.size(); i++) - { - System.out.println(((Sequence) blc.seqs.elementAt(i)).getName() + - "\n" + ((Sequence) blc.seqs.elementAt(i)).getSequenceAsString() + - "\n"); - } - } - catch (java.io.IOException e) - { - System.err.println("Exception " + e); - e.printStackTrace(); - } + JPredFile jpred = new JPredFile(args[0], DataSourceType.FILE); + + for (int i = 0; i < jpred.seqs.size(); i++) + { + System.out.println(((Sequence) jpred.seqs.elementAt(i)).getName() + + "\n" + + ((Sequence) jpred.seqs.elementAt(i)).getSequenceAsString() + + "\n"); + } + } catch (java.io.IOException e) + { + System.err.println("Exception " + e); + // e.printStackTrace(); not java 1.1 compatible! } - Vector annotSeqs=null; + } + + Vector annotSeqs = null; + /** * removeNonSequences */ public void removeNonSequences() { - if (annotSeqs!=null) + if (annotSeqs != null) + { return; + } annotSeqs = new Vector(); Vector newseqs = new Vector(); - int i=0; - int j=seqs.size(); - for (; i