remove imports
[jalview.git] / src / jalview / io / SequenceFeatureFetcher.java
1 /*\r
2 * Jalview - A Sequence Alignment Editor and Viewer\r
3 * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle\r
4 *\r
5 * This program is free software; you can redistribute it and/or\r
6 * modify it under the terms of the GNU General Public License\r
7 * as published by the Free Software Foundation; either version 2\r
8 * of the License, or (at your option) any later version.\r
9 *\r
10 * This program is distributed in the hope that it will be useful,\r
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of\r
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\r
13 * GNU General Public License for more details.\r
14 *\r
15 * You should have received a copy of the GNU General Public License\r
16 * along with this program; if not, write to the Free Software\r
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA\r
18 */\r
19 package jalview.io;\r
20 \r
21 import jalview.datamodel.*;\r
22 \r
23 import jalview.gui.*;\r
24 \r
25 import jalview.io.*;\r
26 \r
27 import java.io.*;\r
28 \r
29 import java.util.*;\r
30 \r
31 \r
32 public class SequenceFeatureFetcher implements Runnable {\r
33     AlignmentI align;\r
34     AlignmentPanel ap;\r
35     ArrayList unknownSequences;\r
36     CutAndPasteTransfer output = new CutAndPasteTransfer();\r
37     StringBuffer sbuffer = new StringBuffer();\r
38 \r
39     public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap) {\r
40         unknownSequences = new ArrayList();\r
41         this.align = align;\r
42         this.ap = ap;\r
43 \r
44         Thread thread = new Thread(this);\r
45         thread.start();\r
46     }\r
47 \r
48     public void run() {\r
49         String cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE");\r
50 \r
51         RandomAccessFile out = null;\r
52 \r
53         try {\r
54             if (cache == null) {\r
55                 jalview.bin.Cache.setProperty("UNIPROT_CACHE",\r
56                     System.getProperty("user.home") + "/uniprot.xml");\r
57                 cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE");\r
58             }\r
59 \r
60             File test = new File(cache);\r
61 \r
62             if (!test.exists()) {\r
63                 out = new RandomAccessFile(cache, "rw");\r
64                 out.writeBytes("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");\r
65                 out.writeBytes("<UNIPROT_CACHE>\n");\r
66             } else {\r
67                 out = new RandomAccessFile(cache, "rw");\r
68 \r
69                 // open exisiting cache and remove </UNIPROT_CACHE> from the end\r
70                 long lastLine = 0;\r
71                 String data;\r
72 \r
73                 while ((data = out.readLine()) != null) {\r
74                     if (data.indexOf("</entry>") > -1) {\r
75                         lastLine = out.getFilePointer();\r
76                     }\r
77                 }\r
78 \r
79                 out.seek(lastLine);\r
80             }\r
81 \r
82             int seqIndex = 0;\r
83             Vector sequences = align.getSequences();\r
84 \r
85             while (seqIndex < sequences.size()) {\r
86                 ArrayList ids = new ArrayList();\r
87 \r
88                 for (int i = 0; (seqIndex < sequences.size()) && (i < 50);\r
89                         seqIndex++, i++) {\r
90                     SequenceI sequence = (SequenceI) sequences.get(seqIndex);\r
91                     ids.add(sequence.getName());\r
92                 }\r
93 \r
94                 tryLocalCacheFirst(ids, align);\r
95 \r
96                 if (ids.size() > 0) {\r
97                     StringBuffer remainingIds = new StringBuffer("uniprot:");\r
98 \r
99                     for (int i = 0; i < ids.size(); i++)\r
100                         remainingIds.append(ids.get(i) + ";");\r
101 \r
102                     EBIFetchClient ebi = new EBIFetchClient();\r
103                     String[] result = ebi.fetchData(remainingIds.toString(),\r
104                             "xml", null);\r
105 \r
106                     if (result != null) {\r
107                         ReadUniprotFile(result, out, align);\r
108                     }\r
109                 }\r
110             }\r
111 \r
112             if (out != null) {\r
113                 out.writeBytes("</UNIPROT_CACHE>\n");\r
114                 out.close();\r
115             }\r
116         } catch (Exception ex) {\r
117             ex.printStackTrace();\r
118         }\r
119 \r
120         ap.repaint();\r
121         findMissingIds(align);\r
122 \r
123         if (sbuffer.length() > 0) {\r
124             output.setText(\r
125                 "Your sequences have been matched to Uniprot. Some of the ids have been\n" +\r
126                 "altered, most likely the start/end residue will have been updated.\n" +\r
127                 "Save your alignment to maintain the updated id.\n\n" +\r
128                 sbuffer.toString());\r
129             Desktop.addInternalFrame(output, "Sequence names updated ", 600, 300);\r
130         }\r
131 \r
132         if (unknownSequences.size() > 0) {\r
133             //ignore for now!!!!!!!!!!\r
134             //  WSWUBlastClient blastClient = new WSWUBlastClient(align, unknownSequences);\r
135         }\r
136     }\r
137 \r
138     void ReadUniprotFile(String[] result, RandomAccessFile out, AlignmentI align) {\r
139         SequenceI sequence = null;\r
140         Vector features = null;\r
141         String type;\r
142         String description;\r
143         String status;\r
144         String start;\r
145         String end;\r
146         String pdb = null;\r
147 \r
148         for (int r = 0; r < result.length; r++) {\r
149             if ((sequence == null) && (result[r].indexOf("<name>") > -1)) {\r
150                 long filePointer = 0;\r
151 \r
152                 if (out != null) {\r
153                     try {\r
154                         filePointer = out.getFilePointer();\r
155                         out.writeBytes("<entry>\n");\r
156                     } catch (Exception ex) {\r
157                     }\r
158                 }\r
159 \r
160                 String seqName = parseElement(result[r], "<name>", out);\r
161                 sequence = align.findName(seqName);\r
162 \r
163                 if (sequence == null) {\r
164                     sequence = align.findName(seqName.substring(0,\r
165                                 seqName.indexOf('_')));\r
166 \r
167                     if (sequence != null) {\r
168                         sbuffer.append("changing " + sequence.getName() +\r
169                             " to " + seqName + "\n");\r
170                         sequence.setName(seqName);\r
171                     }\r
172                 }\r
173 \r
174                 if (sequence == null) {\r
175                     sbuffer.append("UNIPROT updated suggestion is " +\r
176                         result[r] + "\n");\r
177                     sequence = align.findName(result[r]);\r
178 \r
179                     // this entry has been suggested by ebi.\r
180                     // doesn't match id in alignment file\r
181                     try {\r
182                         out.setLength(filePointer);\r
183                     } catch (Exception ex) {\r
184                     }\r
185 \r
186                     // now skip to next entry\r
187                     while (result[r].indexOf("</entry>") == -1)\r
188                         r++;\r
189                 }\r
190 \r
191                 features = new Vector();\r
192                 type = "";\r
193                 start = "0";\r
194                 end = "0";\r
195                 description = "";\r
196                 status = "";\r
197                 pdb = "";\r
198             }\r
199 \r
200             if (sequence == null) {\r
201                 continue;\r
202             }\r
203 \r
204             if (result[r].indexOf("<property type=\"pdb accession\"") > -1) {\r
205                 pdb = parseValue(result[r], "value=", out);\r
206                 sequence.setPDBId(pdb);\r
207             }\r
208 \r
209             if (result[r].indexOf("feature type") > -1) {\r
210                 type = parseValue(result[r], "type=", out);\r
211                 description = parseValue(result[r], "description=", null);\r
212                 status = parseValue(result[r], "status=", null);\r
213 \r
214                 while (result[r].indexOf("position") == -1) {\r
215                     r++; //<location>\r
216                 }\r
217 \r
218                 // r++;\r
219                 if (result[r].indexOf("begin") > -1) {\r
220                     start = parseValue(result[r], "position=", out);\r
221                     end = parseValue(result[++r], "position=", out);\r
222                 } else {\r
223                     start = parseValue(result[r], "position=", out);\r
224                     end = parseValue(result[r], "position=", null);\r
225                 }\r
226 \r
227                 int sstart = Integer.parseInt(start);\r
228                 int eend = Integer.parseInt(end);\r
229 \r
230                 if (out != null) {\r
231                     try {\r
232                         out.writeBytes("</feature>\n");\r
233                     } catch (Exception ex) {\r
234                     }\r
235                 }\r
236 \r
237                 SequenceFeature sf = new SequenceFeature(type, sstart, eend,\r
238                         description, status);\r
239                 features.add(sf);\r
240             }\r
241 \r
242             if (result[r].indexOf("<sequence") > -1) {\r
243                 StringBuffer seqString = new StringBuffer();\r
244 \r
245                 if (out != null) {\r
246                     try {\r
247                         out.writeBytes(result[r] + "\n");\r
248                     } catch (Exception ex) {\r
249                     }\r
250                 }\r
251 \r
252                 while (result[++r].indexOf("</sequence>") == -1) {\r
253                     seqString.append(result[r]);\r
254 \r
255                     if (out != null) {\r
256                         try {\r
257                             out.writeBytes(result[r] + "\n");\r
258                         } catch (Exception ex) {\r
259                         }\r
260                     }\r
261                 }\r
262 \r
263                 if (out != null) {\r
264                     try {\r
265                         out.writeBytes(result[r] + "\n");\r
266                     } catch (Exception ex) {\r
267                     }\r
268                 }\r
269 \r
270                 StringBuffer nonGapped = new StringBuffer();\r
271 \r
272                 for (int i = 0; i < sequence.getSequence().length(); i++) {\r
273                     if (!jalview.util.Comparison.isGap(sequence.getCharAt(i))) {\r
274                         nonGapped.append(sequence.getCharAt(i));\r
275                     }\r
276                 }\r
277 \r
278                 int absStart = seqString.toString().indexOf(nonGapped.toString());\r
279 \r
280                 if (absStart == -1) {\r
281                     unknownSequences.add(sequence.getName());\r
282                     features = null;\r
283                     sbuffer.append(sequence.getName() +\r
284                         " SEQUENCE NOT %100 MATCH \n");\r
285 \r
286                     continue;\r
287                 }\r
288 \r
289                 int absEnd = absStart + nonGapped.toString().length();\r
290                 absStart += 1;\r
291 \r
292                 if ((absStart != sequence.getStart()) ||\r
293                         (absEnd != sequence.getEnd())) {\r
294                     sbuffer.append("Updated: " + sequence.getName() + " " +\r
295                         sequence.getStart() + "/" + sequence.getEnd() +\r
296                         "  to  " + absStart + "/" + absEnd + "\n");\r
297                 }\r
298 \r
299                 sequence.setStart(absStart);\r
300                 sequence.setEnd(absEnd);\r
301             }\r
302 \r
303             if (result[r].indexOf("</entry>") > -1) {\r
304                 if (features != null) {\r
305                     sequence.setSequenceFeatures(features);\r
306                 }\r
307 \r
308                 features = null;\r
309                 sequence = null;\r
310 \r
311                 if (out != null) {\r
312                     try {\r
313                         out.writeBytes("</entry>\n");\r
314                     } catch (Exception ex) {\r
315                     }\r
316                 }\r
317             }\r
318         }\r
319     }\r
320 \r
321     void findMissingIds(AlignmentI align) {\r
322         String data;\r
323         ArrayList cachedIds = new ArrayList();\r
324 \r
325         try {\r
326             BufferedReader in = new BufferedReader(new FileReader(\r
327                         jalview.bin.Cache.getProperty("UNIPROT_CACHE")));\r
328 \r
329             while ((data = in.readLine()) != null) {\r
330                 if (data.indexOf("name") > -1) {\r
331                     String name = parseElement(data, "<name>", null);\r
332                     cachedIds.add(name);\r
333                 }\r
334             }\r
335         } catch (Exception ex) {\r
336             ex.printStackTrace();\r
337         }\r
338 \r
339         for (int i = 0; i < align.getHeight(); i++)\r
340             if (!cachedIds.contains(align.getSequenceAt(i).getName())) {\r
341                 unknownSequences.add(align.getSequenceAt(i).getName());\r
342             }\r
343     }\r
344 \r
345     void tryLocalCacheFirst(ArrayList ids, AlignmentI align) {\r
346         ArrayList cacheData = new ArrayList();\r
347 \r
348         try {\r
349             BufferedReader in = new BufferedReader(new FileReader(\r
350                         jalview.bin.Cache.getProperty("UNIPROT_CACHE")));\r
351 \r
352             // read through cache file, if the cache has sequences we're looking for\r
353             // add the lines to a new String array, Readthis new array and\r
354             // make sure we remove the ids from the list to retrieve from EBI\r
355             String data;\r
356 \r
357             while ((data = in.readLine()) != null) {\r
358                 if (data.indexOf("name") > -1) {\r
359                     String name = parseElement(data, "<name>", null);\r
360 \r
361                     if (ids.contains(name)) {\r
362                         cacheData.add("<entry>");\r
363                         cacheData.add(data);\r
364 \r
365                         while (data.indexOf("</entry>") == -1) {\r
366                             data = in.readLine();\r
367                             cacheData.add(data);\r
368                         }\r
369 \r
370                         cacheData.add(data);\r
371 \r
372                         ids.remove(name);\r
373                     }\r
374                 }\r
375             }\r
376         } catch (Exception ex) {\r
377             ex.printStackTrace();\r
378         }\r
379 \r
380         String[] localData = new String[cacheData.size()];\r
381         cacheData.toArray(localData);\r
382 \r
383         if ((localData != null) && (localData.length > 0)) {\r
384             ReadUniprotFile(localData, null, align);\r
385         }\r
386     }\r
387 \r
388     String parseValue(String line, String tag, RandomAccessFile out) {\r
389         if (out != null) {\r
390             try {\r
391                 out.writeBytes(line + "\n");\r
392             } catch (Exception ex) {\r
393             }\r
394         }\r
395 \r
396         int index = line.indexOf(tag) + tag.length() + 1;\r
397 \r
398         if (index == tag.length()) {\r
399             return "";\r
400         }\r
401 \r
402         return line.substring(index, line.indexOf("\"", index + 1));\r
403     }\r
404 \r
405     String parseElement(String line, String tag, RandomAccessFile out) {\r
406         if (out != null) {\r
407             try {\r
408                 out.writeBytes(line + "\n");\r
409             } catch (Exception ex) {\r
410             }\r
411         }\r
412 \r
413         int index = line.indexOf(tag) + tag.length();\r
414 \r
415         return line.substring(index, line.indexOf("</"));\r
416     }\r
417 }\r