- public void parse() throws IOException {\r
- String id = "";\r
- StringBuffer seq = new StringBuffer();\r
- int count = 0;\r
- boolean flag = false;\r
-\r
- int sstart = 0;\r
- int send = 0;\r
-\r
- String line;\r
-\r
- while ((line = nextLine()) != null) {\r
- if (line.length() > 0) {\r
- // Do we have an id line?\r
- // JBPNote - this code needs to be standardised to EBI/whatever for the\r
- // >dbref/dbref/dbref|refid1|refid2|refid3 'human-readable' style of naming (should it really exist)\r
-\r
- if (line.substring(0, 1).equals(">")) {\r
- if (count != 0) {\r
- if (sstart != 0) {\r
- seqs.addElement(new Sequence(id,\r
- seq.toString().toUpperCase(), sstart, send));\r
- } else {\r
- seqs.addElement(new Sequence(id,\r
- seq.toString().toUpperCase(), 1,\r
- seq.length()));\r
- }\r
- }\r
-\r
- count++;\r
-\r
- StringTokenizer str = new StringTokenizer(line, " ");\r
-\r
- id = str.nextToken();\r
- id = id.substring(1);\r
-\r
- com.stevesoft.pat.Regex dbId = new com.stevesoft.pat.Regex(\r
- "[A-Za-z-]+/?[A-Za-z-]+\\|(\\w+)\\|(.+)");\r
- // JBPNote At the moment - we don't get rid of the friendly names but this\r
- // behaviour is probably wrong in the long run.\r
- if (dbId.search(id)) {\r
- String dbid = dbId.stringMatched(1);\r
- String idname = dbId.stringMatched(2);\r
- if ( (idname.length() > 0) &&\r
- (idname.indexOf("_") > -1)) {\r
- id = idname; // use the friendly name - apparently no dbid\r
- } else\r
- if (dbid.length()>1) {\r
- id = dbid; // ignore the friendly name - we lose uniprot accession ID otherwise\r
- }\r
- }\r
-\r
- if (id.indexOf("/") > 0) {\r
- StringTokenizer st = new StringTokenizer(id, "/");\r
-\r
- if (st.countTokens() == 2) {\r
- id = st.nextToken();\r
-\r
- String tmp = st.nextToken();\r
-\r
- st = new StringTokenizer(tmp, "-");\r
-\r
- if (st.countTokens() == 2) {\r
- sstart = Integer.valueOf(st.nextToken())\r
- .intValue();\r
- send = Integer.valueOf(st.nextToken()).intValue();\r
- }\r
- }\r
- }\r
-\r
- seq = new StringBuffer();\r
- } else {\r
- seq = seq.append(line);\r
- }\r
- }\r