X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FJPredFile.java;h=59a010dcc456264cf64c73311dd6e5e246fc9dbc;hb=a45774ee31d9f35d4eff46d54d7deab719afb092;hp=ac181ec60861d446de17f64a507b6ef08b37a246;hpb=24ad8bbb87343e317452c0dcb22efd800e7986d6;p=jalview.git diff --git a/src/jalview/io/JPredFile.java b/src/jalview/io/JPredFile.java index ac181ec..59a010d 100755 --- a/src/jalview/io/JPredFile.java +++ b/src/jalview/io/JPredFile.java @@ -1,3 +1,20 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7) + * Copyright (C) 2011 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with Jalview. If not, see . + */ /** * PredFile.java * JalviewX / Vamsas Project @@ -5,88 +22,148 @@ */ package jalview.io; -import jalview.datamodel.*; -import jalview.util.*; - import java.io.*; import java.util.*; -public class JPredFile - extends AlignFile +import jalview.datamodel.*; + +/** + * Parser for the JPred/JNet concise format. This is a series of CSV lines, each + * line is either a sequence (QUERY), a sequence profile (align;), or jnet + * prediction annotation (anything else). Automagic translation happens for + * annotation called 'JNETPRED' (translated to Secondary Structure Prediction), + * or 'JNETCONF' (translates to 'Prediction Confidence'). Numeric scores are + * differentiated from symbolic by being parseable into a float vector. They are + * put in Scores. Symscores gets the others. JNetAnnotationMaker translates the + * data parsed by this object into annotation on an alignment. It is + * automatically called but can be used to transfer the annotation onto a + * sequence in another alignment (and insert gaps where necessary) + * + * @author jprocter + * @version $Revision$ + */ +public class JPredFile extends AlignFile { Vector ids; + Vector conf; + Hashtable Scores; // Hash of names and score vectors - Hashtable Symscores; // indexes of symbol annotation properties in sequenceI vector - private int QuerySeqPosition = -1; + + Hashtable Symscores; // indexes of symbol annotation properties in sequenceI + + // vector + + private int QuerySeqPosition; + + /** + * Creates a new JPredFile object. + * + * @param inFile + * DOCUMENT ME! + * @param type + * DOCUMENT ME! + * + * @throws IOException + * DOCUMENT ME! + */ + public JPredFile(String inFile, String type) throws IOException + { + super(inFile, type); + } + + public JPredFile(FileParse source) throws IOException + { + super(source); + } + + /** + * DOCUMENT ME! + * + * @param QuerySeqPosition + * DOCUMENT ME! + */ public void setQuerySeqPosition(int QuerySeqPosition) { this.QuerySeqPosition = QuerySeqPosition; } + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ public int getQuerySeqPosition() { return QuerySeqPosition; } - public Hashtable getScores() { + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public Hashtable getScores() + { return Scores; } - public Hashtable getSymscores() { - return Symscores; - } - public JPredFile(String inStr) + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public Hashtable getSymscores() { - super(inStr); + return Symscores; } + /** + * DOCUMENT ME! + */ public void initData() { - super.initData(); Scores = new Hashtable(); ids = null; conf = null; - QuerySeqPosition = -1; - } - - public JPredFile(String inFile, String type) - throws IOException - { - - super(inFile, type); } /** * parse a JPred concise file into a sequence-alignment like object. */ - public void parse() - throws IOException + public void parse() throws IOException { -System.out.println("all read in "); + // JBPNote log.System.out.println("all read in "); String line; QuerySeqPosition = -1; noSeqs = 0; + Vector seq_entries = new Vector(); Vector ids = new Vector(); Hashtable Symscores = new Hashtable(); - while ( (line = nextLine()) != null) + + while ((line = nextLine()) != null) { // Concise format allows no comments or non comma-formatted data StringTokenizer str = new StringTokenizer(line, ":"); String id = ""; + if (!str.hasMoreTokens()) { continue; } id = str.nextToken(); + String seqsym = str.nextToken(); StringTokenizer symbols = new StringTokenizer(seqsym, ","); + // decide if we have more than just alphanumeric symbols int numSymbols = symbols.countTokens(); - if (numSymbols==0) { + if (numSymbols == 0) + { continue; } @@ -96,199 +173,259 @@ System.out.println("all read in "); if (Scores.containsKey(id)) { int i = 1; + while (Scores.containsKey(id + "_" + i)) { i++; } + id = id + "_" + i; } + Vector scores = new Vector(); + // Typecheck from first entry int i = 0; - String ascore="dead"; + String ascore = "dead"; + try { // store elements as floats... - while (symbols.hasMoreTokens()) { + while (symbols.hasMoreTokens()) + { ascore = symbols.nextToken(); + Float score = new Float(ascore); - scores.addElement( (Object) score); + scores.addElement((Object) score); } + Scores.put(id, scores); - } - catch (Exception e) + } catch (Exception e) { // or just keep them as strings i = scores.size(); + for (int j = 0; j < i; j++) { - scores.set(j, - (Object) ( (Float) scores.get(j)).toString()); + scores.setElementAt( + (Object) ((Float) scores.elementAt(j)).toString(), j); } + scores.addElement((Object) ascore); - while (symbols.hasMoreTokens()) { - { - ascore = symbols.nextToken(); - scores.addElement( (Object) ascore); - } + + while (symbols.hasMoreTokens()) + { + ascore = symbols.nextToken(); + scores.addElement((Object) ascore); } + Scores.put(id, scores); } } else if (id.equals("jnetconf")) { - System.out.println("here"); + // log.debug System.out.println("here"); id = "Prediction Confidence"; this.conf = new Vector(numSymbols); + for (int i = 0; i < numSymbols; i++) { - conf.set(i, (Object) symbols.nextToken()); + conf.setElementAt(symbols.nextToken(), i); } } else - { - // Sequence or a prediction string (rendered as sequence) + { + // Sequence or a prediction string (rendered as sequence) + StringBuffer newseq = new StringBuffer(); - StringBuffer newseq = new StringBuffer(); + for (int i = 0; i < numSymbols; i++) + { + newseq.append(symbols.nextToken()); + } - for (int i = 0; i < numSymbols; i++) { - newseq.append(symbols.nextToken()); - } + if (id.indexOf(";") > -1) + { + seq_entries.addElement(newseq); - if (id.indexOf(";") > -1) { - seq_entries.addElement(newseq); - int i=1; - String name = id.substring(id.indexOf(";")+1); - while (ids.lastIndexOf(name)>-1) { - name = id.substring(id.indexOf(";")+1)+"_"+1; - } - ids.addElement(name); + int i = 1; + String name = id.substring(id.indexOf(";") + 1); - noSeqs++; + while (ids.lastIndexOf(name) > -1) + { + name = id.substring(id.indexOf(";") + 1) + "_" + ++i; } - else + + if (QuerySeqPosition == -1) + QuerySeqPosition = ids.size(); + ids.addElement(name); + noSeqs++; + } + else + { + if (id.equals("JNETPRED")) { - if (id.equals("JNETPRED")) { - id = "Predicted Secondary Structure"; - } - seq_entries.addElement( newseq.toString() ); - ids.addElement(id); - Symscores.put((Object) id, (Object) new Integer(ids.size()-1)); + id = "Predicted Secondary Structure"; } + + seq_entries.addElement(newseq.toString()); + ids.addElement(id); + Symscores.put((Object) id, (Object) new Integer(ids.size() - 1)); + } } } + /* + * leave it to the parser user to actually check this. if (noSeqs < 1) { + * throw new IOException( "JpredFile Parser: No sequence in the + * prediction!"); } + */ - - if (noSeqs < 1) - { - throw new IOException( - "JpredFile Parser: No sequence in the prediction!"); - } maxLength = seq_entries.elementAt(0).toString().length(); + for (int i = 0; i < ids.size(); i++) { // Add all sequence like objects - Sequence newSeq = new Sequence(ids.elementAt(i).toString(), - seq_entries.elementAt(i).toString(), 1, - seq_entries.elementAt(i).toString(). - length()); - if (!Symscores.containsKey(ids.elementAt(i)) - && !isValidProteinSequence(newSeq.getSequence())) - { - throw new IOException( - "JPredConcise: Not a valid protein sequence - (" - + ids.elementAt(i).toString() + ")"); - } + seq_entries.elementAt(i).toString(), 1, seq_entries + .elementAt(i).toString().length()); if (maxLength != seq_entries.elementAt(i).toString().length()) { - throw new IOException("JPredConcise: Entry (" + - ids.elementAt(i).toString() - + ") has an unexpected number of columns"); + throw new IOException("JPredConcise: Entry (" + + ids.elementAt(i).toString() + + ") has an unexpected number of columns"); } - if (newSeq.getName().startsWith("QUERY") && QuerySeqPosition==-1) { + + if ((newSeq.getName().startsWith("QUERY") || newSeq.getName() + .startsWith("align;")) && (QuerySeqPosition == -1)) + { QuerySeqPosition = seqs.size(); } seqs.addElement(newSeq); - + } + if (seqs.size() > 0 && QuerySeqPosition > -1) + { + // try to make annotation for a prediction only input (default if no + // alignment is given and prediction contains a QUERY or align;sequence_id + // line) + Alignment tal = new Alignment(this.getSeqsAsArray()); + try + { + JnetAnnotationMaker.add_annotation(this, tal, QuerySeqPosition, + true); + } catch (Exception e) + { + tal = null; + IOException ex = new IOException( + "Couldn't parse concise annotation for prediction profile.\n" + + e); + e.printStackTrace(); // java 1.1 does not have : + // ex.setStackTrace(e.getStackTrace()); + throw ex; + } + this.annotations = new Vector(); + AlignmentAnnotation[] aan = tal.getAlignmentAnnotation(); + for (int aai = 0; aan != null && aai < aan.length; aai++) + { + annotations.addElement(aan[aai]); + } } } /** * print - * + * * @return String - */ + */ + public String print() + { + return "Not Supported"; + } - public String print() + /** + * DOCUMENT ME! + * + * @param args + * DOCUMENT ME! + */ + public static void main(String[] args) + { + try { - return "Not Supported"; - } + JPredFile blc = new JPredFile(args[0], "File"); - public static void main(String[] args) - { - try - { - JPredFile blc = new JPredFile(args[0], "File"); - for (int i = 0; i < blc.seqs.size(); i++) - { - System.out.println( ( (Sequence) blc.seqs.elementAt(i)).getName() - + "\n" + - ( (Sequence) blc.seqs.elementAt(i)).getSequence() - + "\n"); - } - } - catch (java.io.IOException e) + for (int i = 0; i < blc.seqs.size(); i++) { - System.out.println("Exception " + e); - e.printStackTrace(); + System.out.println(((Sequence) blc.seqs.elementAt(i)).getName() + + "\n" + + ((Sequence) blc.seqs.elementAt(i)).getSequenceAsString() + + "\n"); } + } catch (java.io.IOException e) + { + System.err.println("Exception " + e); + // e.printStackTrace(); not java 1.1 compatible! } } - /* - StringBuffer out = new StringBuffer(); - - out.append("START PRED\n"); - for (int i = 0; i < s[0].sequence.length(); i++) - { - out.append(s[0].sequence.substring(i, i + 1) + " "); - out.append(s[1].sequence.substring(i, i + 1) + " "); - out.append(s[1].score[0].elementAt(i) + " "); - out.append(s[1].score[1].elementAt(i) + " "); - out.append(s[1].score[2].elementAt(i) + " "); - out.append(s[1].score[3].elementAt(i) + " "); - - out.append("\n"); - } - out.append("END PRED\n"); - return out.toString(); - } - - public static void main(String[] args) - { - try + Vector annotSeqs = null; + + /** + * removeNonSequences + */ + public void removeNonSequences() + { + if (annotSeqs != null) { - BLCFile blc = new BLCFile(args[0], "File"); - DrawableSequence[] s = new DrawableSequence[blc.seqs.size()]; - for (int i = 0; i < blc.seqs.size(); i++) + return; + } + annotSeqs = new Vector(); + Vector newseqs = new Vector(); + int i = 0; + int j = seqs.size(); + for (; i < QuerySeqPosition; i++) + { + annotSeqs.addElement(seqs.elementAt(i)); + } + // check that no stray annotations have been added at the end. + { + SequenceI sq = (SequenceI) seqs.elementAt(j - 1); + if (sq.getName().toUpperCase().startsWith("JPRED")) { - s[i] = new DrawableSequence( (Sequence) blc.seqs.elementAt(i)); + annotSeqs.addElement(sq); + seqs.removeElementAt(--j); } - String out = BLCFile.print(s); - - AlignFrame af = new AlignFrame(null, s); - af.resize(700, 500); - af.show(); - System.out.println(out); } - catch (java.io.IOException e) + for (; i < j; i++) { - System.out.println("Exception " + e); + newseqs.addElement(seqs.elementAt(i)); } - } - } - */ + seqs.removeAllElements(); + seqs = newseqs; + } +} + +/* + * StringBuffer out = new StringBuffer(); + * + * out.append("START PRED\n"); for (int i = 0; i < s[0].sequence.length(); i++) + * { out.append(s[0].sequence.substring(i, i + 1) + " "); + * out.append(s[1].sequence.substring(i, i + 1) + " "); + * out.append(s[1].score[0].elementAt(i) + " "); + * out.append(s[1].score[1].elementAt(i) + " "); + * out.append(s[1].score[2].elementAt(i) + " "); + * out.append(s[1].score[3].elementAt(i) + " "); + * + * out.append("\n"); } out.append("END PRED\n"); return out.toString(); } + * + * public static void main(String[] args) { try { BLCFile blc = new + * BLCFile(args[0], "File"); DrawableSequence[] s = new + * DrawableSequence[blc.seqs.size()]; for (int i = 0; i < blc.seqs.size(); i++) + * { s[i] = new DrawableSequence( (Sequence) blc.seqs.elementAt(i)); } String + * out = BLCFile.print(s); + * + * AlignFrame af = new AlignFrame(null, s); af.resize(700, 500); af.show(); + * System.out.println(out); } catch (java.io.IOException e) { + * System.out.println("Exception " + e); } } } + */