From a5424909b2df92129d47fe452c3185040fcc234e Mon Sep 17 00:00:00 2001 From: jprocter Date: Wed, 23 Feb 2005 15:57:04 +0000 Subject: [PATCH] basic jpred concise prediction reader. --- src/jalview/io/JPredFile.java | 236 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 236 insertions(+) create mode 100755 src/jalview/io/JPredFile.java diff --git a/src/jalview/io/JPredFile.java b/src/jalview/io/JPredFile.java new file mode 100755 index 0000000..cc8fb56 --- /dev/null +++ b/src/jalview/io/JPredFile.java @@ -0,0 +1,236 @@ +/** + * PredFile.java + * JalviewX / Vamsas Project + * JPred.seq.concise reader + */ +package jalview.io; + +import jalview.datamodel.*; +import jalview.util.*; + +import java.io.*; +import java.util.*; + +public class JPredFile + extends AlignFile +{ + Vector ids; + Vector conf; + Hashtable Scores; // Hash of names and score vectors + + public JPredFile(String inStr) + { + super(inStr); + } + + public void initData() + { + + super.initData(); + Scores = new Hashtable(); + } + + public JPredFile(String inFile, String type) + throws IOException + { + + super(inFile, type); + } + + /** + * parse a JPred concise file into a sequence-alignment like object. + */ + + public void parse() + throws IOException + { + + String line; + + noSeqs = 0; + Vector seq_entries = new Vector(); + Vector ids = new Vector(); + + while ( (line = nextLine()) != null) + { + // Concise format allows no comments or non comma-formatted data + StringTokenizer str = new StringTokenizer(line, ":"); + String id = ""; + String seq = ""; + if (str.hasMoreTokens()) + { + id = str.nextToken(); + String seqsym = str.nextToken(); + StringTokenizer symbols = new StringTokenizer(seqsym, ","); + // decide if we have more than just alphanumeric symbols + int numSymbols = symbols.countTokens(); + if (seq.length() != (2 * numSymbols)) + { + // Set of scalars for some property + if (Scores.containsKey(id)) + { + int i = 1; + while (Scores.containsKey(id + "_" + i)) + { + i++; + } + id = id + "_" + i; + } + Vector scores = new Vector(numSymbols); + // Typecheck from first entry + int i = 0; + String ascore = symbols.nextToken(); + try + { + // store elements as floats... + do + { + Float score = new Float(ascore); + scores.set(i, (Object) score); + ascore = symbols.nextToken(); + } + while (++i < numSymbols); + } + catch (Exception e) + { + // or just keep them as strings + for (int j = 0; j < i; j++) + { + scores.set(j, + (Object) + ( (Float) scores.get(j)).toString()); + } + do + { + scores.set(i, ascore); + ascore = symbols.nextToken(); + } + while (++i < numSymbols); + } + Scores.put(id, scores); + } + else + { + if (id.equals("jnetconf")) + { + id = "Prediction Confidence"; + this.conf = new Vector(numSymbols); + for (int i = 0; i < numSymbols; i++) + { + conf.set(i, (Object) symbols.nextToken()); + } + } + else + { + // Sequence or a prediction string (rendered as sequence) + + StringBuffer newseq = new StringBuffer(); + for (int i = 0; i < numSymbols; i++) + { + newseq.append(symbols.nextToken()); + } + if (id.indexOf(";") > -1) + { + seq_entries.addElement(newseq); + ids.addElement(id.substring(id.indexOf(";"))); + noSeqs++; + } + else + { + if (id.equals("JNETPRED")) + { + id = "Predicted Secondary Structure"; + } + seq_entries.addElement(newseq); + ids.addElement(id); + } + } + } + + } + } + + if (noSeqs < 1) + { + throw new IOException("JpredFile Parser: No sequence in the prediction!"); + } + maxLength = seq_entries.elementAt(0).toString().length(); + for (int i = 0; i < ids.size(); i++) + { + // Add all sequence like objects + + Sequence newSeq = new Sequence(ids.elementAt(i).toString(), + seq_entries.elementAt(i).toString(), 1, + seq_entries.elementAt(i).toString().length()); + if (!isValidProteinSequence(newSeq.getSequence())) + { + throw new IOException("JPredConcise: Not a valid protein sequence - (" + + ids.elementAt(i).toString() + ")"); + } + + if (maxLength != seq_entries.elementAt(i).toString().length()) + { + throw new IOException("JPredConcise: Entry (" + + ids.elementAt(i).toString() + + ") has an unexpected number of columns"); + } + seqs.addElement(newSeq); + + } + } + + /** + * print + * + * @return String + */ + + public String print() + { + return "Not Supported"; + } +} +/* +StringBuffer out = new StringBuffer(); + +out.append("START PRED\n"); +for (int i = 0; i < s[0].sequence.length(); i++) +{ + out.append(s[0].sequence.substring(i, i + 1) + " "); + out.append(s[1].sequence.substring(i, i + 1) + " "); + out.append(s[1].score[0].elementAt(i) + " "); + out.append(s[1].score[1].elementAt(i) + " "); + out.append(s[1].score[2].elementAt(i) + " "); + out.append(s[1].score[3].elementAt(i) + " "); + + out.append("\n"); +} +out.append("END PRED\n"); +return out.toString(); +} + + public static void main(String[] args) +{ + try + { + BLCFile blc = new BLCFile(args[0], "File"); + DrawableSequence[] s = new DrawableSequence[blc.seqs.size()]; + for (int i = 0; i < blc.seqs.size(); i++) + { + s[i] = new DrawableSequence( (Sequence) blc.seqs.elementAt(i)); + } + String out = BLCFile.print(s); + + AlignFrame af = new AlignFrame(null, s); + af.resize(700, 500); + af.show(); + System.out.println(out); + } + catch (java.io.IOException e) + { + System.out.println("Exception " + e); + } +} + +} +*/ -- 1.7.10.2