- */\r
- public void parse() throws IOException {\r
- // JBPNote log.System.out.println("all read in ");\r
- String line;\r
- QuerySeqPosition = -1;\r
- noSeqs = 0;\r
-\r
- Vector seq_entries = new Vector();\r
- Vector ids = new Vector();\r
- Hashtable Symscores = new Hashtable();\r
-\r
- while ((line = nextLine()) != null) {\r
- // Concise format allows no comments or non comma-formatted data\r
- StringTokenizer str = new StringTokenizer(line, ":");\r
- String id = "";\r
-\r
- if (!str.hasMoreTokens()) {\r
- continue;\r
- }\r
-\r
- id = str.nextToken();\r
-\r
- String seqsym = str.nextToken();\r
- StringTokenizer symbols = new StringTokenizer(seqsym, ",");\r
-\r
- // decide if we have more than just alphanumeric symbols\r
- int numSymbols = symbols.countTokens();\r
-\r
- if (numSymbols == 0) {\r
- continue;\r
- }\r
-\r
- if (seqsym.length() != (2 * numSymbols)) {\r
- // Set of scalars for some property\r
- if (Scores.containsKey(id)) {\r
- int i = 1;\r
-\r
- while (Scores.containsKey(id + "_" + i)) {\r
- i++;\r
- }\r
-\r
- id = id + "_" + i;\r
- }\r
-\r
- Vector scores = new Vector();\r
-\r
- // Typecheck from first entry\r
- int i = 0;\r
- String ascore = "dead";\r
-\r
- try {\r
- // store elements as floats...\r
- while (symbols.hasMoreTokens()) {\r
- ascore = symbols.nextToken();\r
-\r
- Float score = new Float(ascore);\r
- scores.addElement((Object) score);\r
- }\r
-\r
- Scores.put(id, scores);\r
- } catch (Exception e) {\r
- // or just keep them as strings\r
- i = scores.size();\r
-\r
- for (int j = 0; j < i; j++) {\r
- scores.set(j,\r
- (Object) ((Float) scores.get(j)).toString());\r
- }\r
-\r
- scores.addElement((Object) ascore);\r
-\r
- while (symbols.hasMoreTokens()) {\r
- ascore = symbols.nextToken();\r
- scores.addElement((Object) ascore);\r
- }\r
-\r
- Scores.put(id, scores);\r
- }\r
- } else if (id.equals("jnetconf")) {\r
- // log.debug System.out.println("here");\r
- id = "Prediction Confidence";\r
- this.conf = new Vector(numSymbols);\r
-\r
- for (int i = 0; i < numSymbols; i++) {\r
- conf.set(i, (Object) symbols.nextToken());\r
- }\r
- } else {\r
- // Sequence or a prediction string (rendered as sequence)\r
- StringBuffer newseq = new StringBuffer();\r
-\r
- for (int i = 0; i < numSymbols; i++) {\r
- newseq.append(symbols.nextToken());\r
- }\r
-\r
- if (id.indexOf(";") > -1) {\r
- seq_entries.addElement(newseq);\r
-\r
- int i = 1;\r
- String name = id.substring(id.indexOf(";") + 1);\r
-\r
- while (ids.lastIndexOf(name) > -1) {\r
- name = id.substring(id.indexOf(";") + 1) + "_" + 1;\r
- }\r
-\r
- ids.addElement(name);\r
-\r
- noSeqs++;\r
- } else {\r
- if (id.equals("JNETPRED")) {\r
- id = "Predicted Secondary Structure";\r
- }\r
-\r
- seq_entries.addElement(newseq.toString());\r
- ids.addElement(id);\r
- Symscores.put((Object) id,\r
- (Object) new Integer(ids.size() - 1));\r
- }\r
- }\r
- }\r
-\r
- if (noSeqs < 1) {\r
- throw new IOException(\r
- "JpredFile Parser: No sequence in the prediction!");\r
- }\r
-\r
- maxLength = seq_entries.elementAt(0).toString().length();\r
-\r
- for (int i = 0; i < ids.size(); i++) {\r
- // Add all sequence like objects\r
- Sequence newSeq = new Sequence(ids.elementAt(i).toString(),\r
- seq_entries.elementAt(i).toString(), 1,\r
- seq_entries.elementAt(i).toString().length());\r
-\r
- if (!Symscores.containsKey(ids.elementAt(i)) &&\r
- !isValidProteinSequence(newSeq.getSequence())) {\r
- throw new IOException(\r
- "JPredConcise: Not a valid protein sequence - (" +\r
- ids.elementAt(i).toString() + ")");\r
- }\r
-\r
- if (maxLength != seq_entries.elementAt(i).toString().length()) {\r
- throw new IOException("JPredConcise: Entry (" +\r
- ids.elementAt(i).toString() +\r
- ") has an unexpected number of columns");\r
- }\r
-\r
- if (newSeq.getName().startsWith("QUERY") &&\r
- (QuerySeqPosition == -1)) {\r
- QuerySeqPosition = seqs.size();\r
- }\r
-\r
- seqs.addElement(newSeq);\r
- }\r
- }\r
-\r
+ */
+ public void parse() throws IOException
+ {
+ // JBPNote log.System.out.println("all read in ");
+ String line;
+ QuerySeqPosition = -1;
+ noSeqs = 0;
+
+ Vector seq_entries = new Vector();
+ Vector ids = new Vector();
+ Hashtable Symscores = new Hashtable();
+
+ while ((line = nextLine()) != null)
+ {
+ // Concise format allows no comments or non comma-formatted data
+ StringTokenizer str = new StringTokenizer(line, ":");
+ String id = "";
+
+ if (!str.hasMoreTokens())
+ {
+ continue;
+ }
+
+ id = str.nextToken();
+
+ String seqsym = str.nextToken();
+ StringTokenizer symbols = new StringTokenizer(seqsym, ",");
+
+ // decide if we have more than just alphanumeric symbols
+ int numSymbols = symbols.countTokens();
+
+ if (numSymbols == 0)
+ {
+ continue;
+ }
+
+ if (seqsym.length() != (2 * numSymbols))
+ {
+ // Set of scalars for some property
+ if (Scores.containsKey(id))
+ {
+ int i = 1;
+
+ while (Scores.containsKey(id + "_" + i))
+ {
+ i++;
+ }
+
+ id = id + "_" + i;
+ }
+
+ Vector scores = new Vector();
+
+ // Typecheck from first entry
+ int i = 0;
+ String ascore = "dead";
+
+ try
+ {
+ // store elements as floats...
+ while (symbols.hasMoreTokens())
+ {
+ ascore = symbols.nextToken();
+
+ Float score = new Float(ascore);
+ scores.addElement((Object) score);
+ }
+
+ Scores.put(id, scores);
+ }
+ catch (Exception e)
+ {
+ // or just keep them as strings
+ i = scores.size();
+
+ for (int j = 0; j < i; j++)
+ {
+ scores.setElementAt(
+ (Object) ((Float) scores.elementAt(j)).toString(), j);
+ }
+
+ scores.addElement((Object) ascore);
+
+ while (symbols.hasMoreTokens())
+ {
+ ascore = symbols.nextToken();
+ scores.addElement((Object) ascore);
+ }
+
+ Scores.put(id, scores);
+ }
+ }
+ else if (id.equals("jnetconf"))
+ {
+ // log.debug System.out.println("here");
+ id = "Prediction Confidence";
+ this.conf = new Vector(numSymbols);
+
+ for (int i = 0; i < numSymbols; i++)
+ {
+ conf.setElementAt( symbols.nextToken(), i);
+ }
+ }
+ else
+ {
+ // Sequence or a prediction string (rendered as sequence)
+ StringBuffer newseq = new StringBuffer();
+
+ for (int i = 0; i < numSymbols; i++)
+ {
+ newseq.append(symbols.nextToken());
+ }
+
+ if (id.indexOf(";") > -1)
+ {
+ seq_entries.addElement(newseq);
+
+ int i = 1;
+ String name = id.substring(id.indexOf(";") + 1);
+
+ while (ids.lastIndexOf(name) > -1)
+ {
+ name = id.substring(id.indexOf(";") + 1) + "_" + ++i;
+ }
+
+ ids.addElement(name);
+
+ noSeqs++;
+ }
+ else
+ {
+ if (id.equals("JNETPRED"))
+ {
+ id = "Predicted Secondary Structure";
+ }
+
+ seq_entries.addElement(newseq.toString());
+ ids.addElement(id);
+ Symscores.put((Object) id,
+ (Object) new Integer(ids.size() - 1));
+ }
+ }
+ }
+ /* leave it to the parser user to actually check this.
+ if (noSeqs < 1)
+ {
+ throw new IOException(
+ "JpredFile Parser: No sequence in the prediction!");
+ }*/
+
+ maxLength = seq_entries.elementAt(0).toString().length();
+
+ for (int i = 0; i < ids.size(); i++)
+ {
+ // Add all sequence like objects
+ Sequence newSeq = new Sequence(ids.elementAt(i).toString(),
+ seq_entries.elementAt(i).toString(), 1,
+ seq_entries.elementAt(i).toString().length());
+
+
+ if (maxLength != seq_entries.elementAt(i).toString().length())
+ {
+ throw new IOException("JPredConcise: Entry (" +
+ ids.elementAt(i).toString() +
+ ") has an unexpected number of columns");
+ }
+
+ if (newSeq.getName().startsWith("QUERY") &&
+ (QuerySeqPosition == -1))
+ {
+ QuerySeqPosition = seqs.size();
+ }
+
+ seqs.addElement(newSeq);
+ }
+ }
+