* \r
* TODO complete!\r
* \r
+ * >Sequence name\r
+ * \r
* RESIDUE COILS REM465 HOTLOOPS\r
* \r
* M 0.86010 0.88512 0.37094\r
* \r
* T 0.79983 0.85864 0.44331 ....\r
- * \r
+\r
+ * >Next Sequence name \r
* RESIDUE COILS REM465 HOTLOOPS\r
* \r
* M 0.86010 0.88512 0.37094\r
* @throws IOException\r
* @throws UnknownFileFormatException\r
*/\r
- static Map<FastaSequence, Set<Score>> readDisembl(final InputStream input)\r
+ public static Map<FastaSequence, Set<Score>> readDisembl(final InputStream input)\r
throws IOException, UnknownFileFormatException {\r
Scanner scan = new Scanner(input);\r
- scan.useDelimiter("# RESIDUE COILS REM465 HOTLOOPS\n");\r
+ scan.useDelimiter(">");\r
if (!scan.hasNext()) {\r
throw new UnknownFileFormatException(\r
- "In Disembl score format each seqeunce score is expected to start from the line: "\r
- + "'# RESIDUE COILS REM465 HOTLOOPS\\n'."\r
+ "In Disembl score format each sequence score is expected " +\r
+ "to start from the line: >Sequence name "\r
+ " No such line was found!");\r
}\r
\r
while (scan.hasNext()) {\r
seqCounter++;\r
String singleSeq = scan.next();\r
- Scanner scansingle = new Scanner(singleSeq);\r
+ Scanner scansingle = new Scanner(singleSeq);\r
+ if(!scansingle.hasNextLine()) {\r
+ throw new RuntimeException("The input looks like an incomplete disembl file - cannot parse!");\r
+ }\r
+ \r
StringBuffer seqbuffer = new StringBuffer();\r
ArrayList<Float> coils = new ArrayList<Float>();\r
ArrayList<Float> rem = new ArrayList<Float>();\r
ArrayList<Float> hotloops = new ArrayList<Float>();\r
- FastaSequence fs = new FastaSequence(Integer.toString(seqCounter),\r
- singleSeq);\r
- while (scansingle.hasNextLine()) {\r
- String valueLine = scansingle.nextLine();\r
- Scanner values = new Scanner(valueLine);\r
- seqbuffer.append(values.next());\r
- coils.add(values.nextFloat());\r
- rem.add(values.nextFloat());\r
- hotloops.add(values.nextFloat());\r
- values.close();\r
+\r
+ String sequenceName = scansingle.nextLine().trim();\r
+ String title = scansingle.nextLine();\r
+ assert title.startsWith("# RESIDUE COILS REM465 HOTLOOPS") : ">Sequence_name must follow column title: # RESIDUE COILS REM465 HOTLOOPS!";\r
+ \r
+ while (scansingle.hasNext()) {\r
+ seqbuffer.append(scansingle.next());\r
+ coils.add(scansingle.nextFloat());\r
+ rem.add(scansingle.nextFloat());\r
+ hotloops.add(scansingle.nextFloat());\r
}\r
+ FastaSequence fs = new FastaSequence(sequenceName,seqbuffer.toString());\r
Set<Score> scores = new HashSet<Score>();\r
scores.add(new Score(DisemblResultAnnot.COILS, coils));\r
scores.add(new Score(DisemblResultAnnot.HOTLOOPS, hotloops));\r
input.close();\r
return results;\r
}\r
+ \r
+ public static Map<String, Set<Score>> removeSequences(Map<FastaSequence, Set<Score>> disemblResults) { \r
+ Map<String, Set<Score>> seqNameScores = new HashMap<String, Set<Score>>();\r
+ for(Map.Entry<FastaSequence,Set<Score>> dres: disemblResults.entrySet()) {\r
+ seqNameScores.put(dres.getKey().getId(),dres.getValue()); \r
+ }\r
+ return seqNameScores;\r
+ }\r
+ \r
/**\r
* Read AACon result with no alignment files. This method leaves incoming\r
* the InputStream results open!\r