X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FJPredFile.java;h=dabd47aa83ea04bb904a2f4299850331c03c0c44;hb=3ffe37a03a353140f7bf2b34e4617860aca28f22;hp=cc8fb56c7a5daa65790475a561648e731d24af45;hpb=a5424909b2df92129d47fe452c3185040fcc234e;p=jalview.git diff --git a/src/jalview/io/JPredFile.java b/src/jalview/io/JPredFile.java index cc8fb56..dabd47a 100755 --- a/src/jalview/io/JPredFile.java +++ b/src/jalview/io/JPredFile.java @@ -1,3 +1,22 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer + * Copyright (C) 2007 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + /** * PredFile.java * JalviewX / Vamsas Project @@ -5,177 +24,268 @@ */ package jalview.io; -import jalview.datamodel.*; -import jalview.util.*; - import java.io.*; import java.util.*; +import jalview.datamodel.*; + +/** + * DOCUMENT ME! + * + * @author $author$ + * @version $Revision$ + */ public class JPredFile extends AlignFile { Vector ids; Vector conf; Hashtable Scores; // Hash of names and score vectors + Hashtable Symscores; // indexes of symbol annotation properties in sequenceI vector + private int QuerySeqPosition; - public JPredFile(String inStr) + /** + * Creates a new JPredFile object. + * + * @param inFile DOCUMENT ME! + * @param type DOCUMENT ME! + * + * @throws IOException DOCUMENT ME! + */ + public JPredFile(String inFile, String type) + throws IOException { - super(inStr); + super(inFile, type); } - public void initData() + /** + * DOCUMENT ME! + * + * @param QuerySeqPosition DOCUMENT ME! + */ + public void setQuerySeqPosition(int QuerySeqPosition) { + this.QuerySeqPosition = QuerySeqPosition; + } - super.initData(); - Scores = new Hashtable(); + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public int getQuerySeqPosition() + { + return QuerySeqPosition; } - public JPredFile(String inFile, String type) - throws IOException + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public Hashtable getScores() { + return Scores; + } - super(inFile, type); + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public Hashtable getSymscores() + { + return Symscores; } /** - * parse a JPred concise file into a sequence-alignment like object. + * DOCUMENT ME! */ + public void initData() + { + super.initData(); + Scores = new Hashtable(); + ids = null; + conf = null; + } + /** + * parse a JPred concise file into a sequence-alignment like object. + */ public void parse() throws IOException { - + // JBPNote log.System.out.println("all read in "); String line; - + QuerySeqPosition = -1; noSeqs = 0; + Vector seq_entries = new Vector(); Vector ids = new Vector(); + Hashtable Symscores = new Hashtable(); while ( (line = nextLine()) != null) { // Concise format allows no comments or non comma-formatted data StringTokenizer str = new StringTokenizer(line, ":"); String id = ""; - String seq = ""; - if (str.hasMoreTokens()) + + if (!str.hasMoreTokens()) { - id = str.nextToken(); - String seqsym = str.nextToken(); - StringTokenizer symbols = new StringTokenizer(seqsym, ","); - // decide if we have more than just alphanumeric symbols - int numSymbols = symbols.countTokens(); - if (seq.length() != (2 * numSymbols)) + continue; + } + + id = str.nextToken(); + + String seqsym = str.nextToken(); + StringTokenizer symbols = new StringTokenizer(seqsym, ","); + + // decide if we have more than just alphanumeric symbols + int numSymbols = symbols.countTokens(); + + if (numSymbols == 0) + { + continue; + } + + if (seqsym.length() != (2 * numSymbols)) + { + // Set of scalars for some property + if (Scores.containsKey(id)) { - // Set of scalars for some property - if (Scores.containsKey(id)) + int i = 1; + + while (Scores.containsKey(id + "_" + i)) { - int i = 1; - while (Scores.containsKey(id + "_" + i)) - { - i++; - } - id = id + "_" + i; + i++; } - Vector scores = new Vector(numSymbols); - // Typecheck from first entry - int i = 0; - String ascore = symbols.nextToken(); - try + + id = id + "_" + i; + } + + Vector scores = new Vector(); + + // Typecheck from first entry + int i = 0; + String ascore = "dead"; + + try + { + // store elements as floats... + while (symbols.hasMoreTokens()) { - // store elements as floats... - do - { - Float score = new Float(ascore); - scores.set(i, (Object) score); - ascore = symbols.nextToken(); - } - while (++i < numSymbols); + ascore = symbols.nextToken(); + + Float score = new Float(ascore); + scores.addElement( (Object) score); } - catch (Exception e) + + Scores.put(id, scores); + } + catch (Exception e) + { + // or just keep them as strings + i = scores.size(); + + for (int j = 0; j < i; j++) { - // or just keep them as strings - for (int j = 0; j < i; j++) - { - scores.set(j, - (Object) - ( (Float) scores.get(j)).toString()); - } - do - { - scores.set(i, ascore); - ascore = symbols.nextToken(); - } - while (++i < numSymbols); + scores.setElementAt( + (Object) ( (Float) scores.elementAt(j)).toString(), j); } + + scores.addElement( (Object) ascore); + + while (symbols.hasMoreTokens()) + { + ascore = symbols.nextToken(); + scores.addElement( (Object) ascore); + } + Scores.put(id, scores); } - else + } + else if (id.equals("jnetconf")) + { + // log.debug System.out.println("here"); + id = "Prediction Confidence"; + this.conf = new Vector(numSymbols); + + for (int i = 0; i < numSymbols; i++) { - if (id.equals("jnetconf")) + conf.setElementAt(symbols.nextToken(), i); + } + } + else + { + // Sequence or a prediction string (rendered as sequence) + StringBuffer newseq = new StringBuffer(); + + for (int i = 0; i < numSymbols; i++) + { + newseq.append(symbols.nextToken()); + } + + if (id.indexOf(";") > -1) + { + seq_entries.addElement(newseq); + + int i = 1; + String name = id.substring(id.indexOf(";") + 1); + + while (ids.lastIndexOf(name) > -1) { - id = "Prediction Confidence"; - this.conf = new Vector(numSymbols); - for (int i = 0; i < numSymbols; i++) - { - conf.set(i, (Object) symbols.nextToken()); - } + name = id.substring(id.indexOf(";") + 1) + "_" + ++i; } - else + + if (QuerySeqPosition==-1) + QuerySeqPosition = ids.size(); + ids.addElement(name); + noSeqs++; + } + else + { + if (id.equals("JNETPRED")) { - // Sequence or a prediction string (rendered as sequence) - - StringBuffer newseq = new StringBuffer(); - for (int i = 0; i < numSymbols; i++) - { - newseq.append(symbols.nextToken()); - } - if (id.indexOf(";") > -1) - { - seq_entries.addElement(newseq); - ids.addElement(id.substring(id.indexOf(";"))); - noSeqs++; - } - else - { - if (id.equals("JNETPRED")) - { - id = "Predicted Secondary Structure"; - } - seq_entries.addElement(newseq); - ids.addElement(id); - } + id = "Predicted Secondary Structure"; } - } + seq_entries.addElement(newseq.toString()); + ids.addElement(id); + Symscores.put( (Object) id, + (Object)new Integer(ids.size() - 1)); + } } } + /* leave it to the parser user to actually check this. + if (noSeqs < 1) + { + throw new IOException( + "JpredFile Parser: No sequence in the prediction!"); + }*/ - if (noSeqs < 1) - { - throw new IOException("JpredFile Parser: No sequence in the prediction!"); - } maxLength = seq_entries.elementAt(0).toString().length(); + for (int i = 0; i < ids.size(); i++) { // Add all sequence like objects - Sequence newSeq = new Sequence(ids.elementAt(i).toString(), seq_entries.elementAt(i).toString(), 1, seq_entries.elementAt(i).toString().length()); - if (!isValidProteinSequence(newSeq.getSequence())) - { - throw new IOException("JPredConcise: Not a valid protein sequence - (" - + ids.elementAt(i).toString() + ")"); - } if (maxLength != seq_entries.elementAt(i).toString().length()) { throw new IOException("JPredConcise: Entry (" + - ids.elementAt(i).toString() - + ") has an unexpected number of columns"); + ids.elementAt(i).toString() + + ") has an unexpected number of columns"); } - seqs.addElement(newSeq); + if ((newSeq.getName().startsWith("QUERY") || newSeq.getName().startsWith("align;"))&& + (QuerySeqPosition == -1)) + { + QuerySeqPosition = seqs.size(); + } + + seqs.addElement(newSeq); } } @@ -184,18 +294,80 @@ public class JPredFile * * @return String */ - public String print() { return "Not Supported"; } + + /** + * DOCUMENT ME! + * + * @param args DOCUMENT ME! + */ + public static void main(String[] args) + { + try + { + JPredFile blc = new JPredFile(args[0], "File"); + + for (int i = 0; i < blc.seqs.size(); i++) + { + System.out.println( ( (Sequence) blc.seqs.elementAt(i)).getName() + + "\n" + + ( (Sequence) blc.seqs.elementAt(i)).getSequenceAsString() + + "\n"); + } + } + catch (java.io.IOException e) + { + System.err.println("Exception " + e); + e.printStackTrace(); + } + } + + Vector annotSeqs = null; + /** + * removeNonSequences + */ + public void removeNonSequences() + { + if (annotSeqs != null) + { + return; + } + annotSeqs = new Vector(); + Vector newseqs = new Vector(); + int i = 0; + int j = seqs.size(); + for (; i < QuerySeqPosition; i++) + { + annotSeqs.addElement(seqs.elementAt(i)); + } + // check that no stray annotations have been added at the end. + { + SequenceI sq = (SequenceI) seqs.elementAt(j - 1); + if (sq.getName().toUpperCase().startsWith("JPRED")) + { + annotSeqs.addElement(sq); + seqs.removeElementAt(--j); + } + } + for (; i < j; i++) + { + newseqs.addElement(seqs.elementAt(i)); + } + + seqs.removeAllElements(); + seqs = newseqs; + } } + /* -StringBuffer out = new StringBuffer(); + StringBuffer out = new StringBuffer(); -out.append("START PRED\n"); -for (int i = 0; i < s[0].sequence.length(); i++) -{ + out.append("START PRED\n"); + for (int i = 0; i < s[0].sequence.length(); i++) + { out.append(s[0].sequence.substring(i, i + 1) + " "); out.append(s[1].sequence.substring(i, i + 1) + " "); out.append(s[1].score[0].elementAt(i) + " "); @@ -204,13 +376,13 @@ for (int i = 0; i < s[0].sequence.length(); i++) out.append(s[1].score[3].elementAt(i) + " "); out.append("\n"); -} -out.append("END PRED\n"); -return out.toString(); -} + } + out.append("END PRED\n"); + return out.toString(); + } public static void main(String[] args) -{ + { try { BLCFile blc = new BLCFile(args[0], "File"); @@ -230,7 +402,7 @@ return out.toString(); { System.out.println("Exception " + e); } -} + } -} -*/ + } + */