2 * Jalview - A Sequence Alignment Editor and Viewer
\r
3 * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
\r
5 * This program is free software; you can redistribute it and/or
\r
6 * modify it under the terms of the GNU General Public License
\r
7 * as published by the Free Software Foundation; either version 2
\r
8 * of the License, or (at your option) any later version.
\r
10 * This program is distributed in the hope that it will be useful,
\r
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
13 * GNU General Public License for more details.
\r
15 * You should have received a copy of the GNU General Public License
\r
16 * along with this program; if not, write to the Free Software
\r
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
\r
21 import jalview.datamodel.*;
\r
23 import jalview.gui.*;
\r
34 * @version $Revision$
\r
36 public class SequenceFeatureFetcher implements Runnable
\r
40 ArrayList unknownSequences;
\r
41 CutAndPasteTransfer output = new CutAndPasteTransfer();
\r
42 StringBuffer sbuffer = new StringBuffer();
\r
45 * Creates a new SequenceFeatureFetcher object.
\r
47 * @param align DOCUMENT ME!
\r
48 * @param ap DOCUMENT ME!
\r
50 public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap)
\r
52 unknownSequences = new ArrayList();
\r
56 Thread thread = new Thread(this);
\r
65 RandomAccessFile out = null;
\r
69 String cache = System.getProperty("user.home") +
\r
70 "/.jalview.uniprot.xml";
\r
72 File test = new File(cache);
\r
76 out = new RandomAccessFile(cache, "rw");
\r
77 out.writeBytes("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
\r
78 out.writeBytes("<UNIPROT_CACHE>\n");
\r
82 out = new RandomAccessFile(cache, "rw");
\r
84 // open exisiting cache and remove </UNIPROT_CACHE> from the end
\r
88 while ((data = out.readLine()) != null)
\r
90 if (data.indexOf("</entry>") > -1)
\r
92 lastLine = out.getFilePointer();
\r
100 Vector sequences = align.getSequences();
\r
102 while (seqIndex < sequences.size())
\r
104 ArrayList ids = new ArrayList();
\r
106 for (int i = 0; (seqIndex < sequences.size()) && (i < 50);
\r
109 SequenceI sequence = (SequenceI) sequences.get(seqIndex);
\r
110 ids.add(sequence.getName());
\r
113 tryLocalCacheFirst(ids, align);
\r
115 if (ids.size() > 0)
\r
117 StringBuffer remainingIds = new StringBuffer("uniprot:");
\r
119 for (int i = 0; i < ids.size(); i++)
\r
120 remainingIds.append(ids.get(i) + ";");
\r
122 EBIFetchClient ebi = new EBIFetchClient();
\r
123 String[] result = ebi.fetchData(remainingIds.toString(),
\r
126 if (result != null)
\r
128 ReadUniprotFile(result, out, align);
\r
135 out.writeBytes("</UNIPROT_CACHE>\n");
\r
139 catch (Exception ex)
\r
141 ex.printStackTrace();
\r
144 findMissingIds(align);
\r
146 if (sbuffer.length() > 0)
\r
149 "Your sequences have been matched to Uniprot. Some of the ids have been\n" +
\r
150 "altered, most likely the start/end residue will have been updated.\n" +
\r
151 "Save your alignment to maintain the updated id.\n\n" +
\r
152 sbuffer.toString());
\r
153 Desktop.addInternalFrame(output, "Sequence names updated ", 600, 300);
\r
156 if (unknownSequences.size() > 0)
\r
158 //ignore for now!!!!!!!!!!
\r
159 // WSWUBlastClient blastClient = new WSWUBlastClient(align, unknownSequences);
\r
162 jalview.gui.PaintRefresher.Refresh(null, align);
\r
168 * @param result DOCUMENT ME!
\r
169 * @param out DOCUMENT ME!
\r
170 * @param align DOCUMENT ME!
\r
172 void ReadUniprotFile(String[] result, RandomAccessFile out, AlignmentI align)
\r
174 SequenceI sequence = null;
\r
175 Vector features = null;
\r
177 String description;
\r
183 for (int r = 0; r < result.length; r++)
\r
185 if ((sequence == null) && (result[r].indexOf("<name>") > -1))
\r
187 long filePointer = 0;
\r
193 filePointer = out.getFilePointer();
\r
194 out.writeBytes("<entry>\n");
\r
196 catch (Exception ex)
\r
201 String seqName = parseElement(result[r], "<name>", out);
\r
202 sequence = align.findName(seqName);
\r
204 if (sequence == null)
\r
206 sequence = align.findName(seqName.substring(0,
\r
207 seqName.indexOf('_')));
\r
209 if (sequence != null)
\r
211 sbuffer.append("changing " + sequence.getName() +
\r
212 " to " + seqName + "\n");
\r
213 sequence.setName(seqName);
\r
217 if (sequence == null)
\r
219 sbuffer.append("UNIPROT updated suggestion is " +
\r
221 sequence = align.findName(result[r]);
\r
223 // this entry has been suggested by ebi.
\r
224 // doesn't match id in alignment file
\r
227 out.setLength(filePointer);
\r
229 catch (Exception ex)
\r
233 // now skip to next entry
\r
234 while (result[r].indexOf("</entry>") == -1)
\r
238 features = new Vector();
\r
247 if (sequence == null)
\r
252 if (result[r].indexOf("<property type=\"pdb accession\"") > -1)
\r
254 pdb = parseValue(result[r], "value=", out);
\r
255 sequence.setPDBId(pdb);
\r
258 if (result[r].indexOf("feature type") > -1)
\r
260 type = parseValue(result[r], "type=", out);
\r
261 description = parseValue(result[r], "description=", null);
\r
262 status = parseValue(result[r], "status=", null);
\r
264 while (result[r].indexOf("position") == -1)
\r
270 if (result[r].indexOf("begin") > -1)
\r
272 start = parseValue(result[r], "position=", out);
\r
273 end = parseValue(result[++r], "position=", out);
\r
277 start = parseValue(result[r], "position=", out);
\r
278 end = parseValue(result[r], "position=", null);
\r
281 int sstart = Integer.parseInt(start);
\r
282 int eend = Integer.parseInt(end);
\r
288 out.writeBytes("</feature>\n");
\r
290 catch (Exception ex)
\r
295 SequenceFeature sf = new SequenceFeature(type, sstart, eend,
\r
296 description, status);
\r
300 if (result[r].indexOf("<sequence length=") > -1)
\r
302 StringBuffer seqString = new StringBuffer();
\r
308 out.writeBytes(result[r] + "\n");
\r
310 catch (Exception ex)
\r
315 while (result[++r].indexOf("</sequence>") == -1)
\r
317 seqString.append(result[r]);
\r
323 out.writeBytes(result[r] + "\n");
\r
325 catch (Exception ex)
\r
335 out.writeBytes(result[r] + "\n");
\r
337 catch (Exception ex)
\r
342 StringBuffer nonGapped = new StringBuffer();
\r
344 for (int i = 0; i < sequence.getSequence().length(); i++)
\r
346 if (!jalview.util.Comparison.isGap(sequence.getCharAt(i)))
\r
348 nonGapped.append(sequence.getCharAt(i));
\r
352 int absStart = seqString.toString().indexOf(nonGapped.toString());
\r
354 if (absStart == -1)
\r
356 unknownSequences.add(sequence.getName());
\r
358 sbuffer.append(sequence.getName() +
\r
359 " SEQUENCE NOT %100 MATCH \n");
\r
364 int absEnd = absStart + nonGapped.toString().length();
\r
367 if ((absStart != sequence.getStart()) ||
\r
368 (absEnd != sequence.getEnd()))
\r
370 sbuffer.append("Updated: " + sequence.getName() + " " +
\r
371 sequence.getStart() + "/" + sequence.getEnd() +
\r
372 " to " + absStart + "/" + absEnd + "\n");
\r
375 sequence.setStart(absStart);
\r
376 sequence.setEnd(absEnd);
\r
379 if (result[r].indexOf("</entry>") > -1)
\r
381 if (features != null)
\r
383 sequence.setSequenceFeatures(features);
\r
393 out.writeBytes("</entry>\n");
\r
395 catch (Exception ex)
\r
406 * @param align DOCUMENT ME!
\r
408 void findMissingIds(AlignmentI align)
\r
411 ArrayList cachedIds = new ArrayList();
\r
415 if(jalview.bin.Cache.getProperty("UNIPROT_CACHE")==null)
\r
418 BufferedReader in = new BufferedReader(new FileReader(
\r
419 jalview.bin.Cache.getProperty("UNIPROT_CACHE")));
\r
421 while ((data = in.readLine()) != null)
\r
423 if (data.indexOf("name") > -1)
\r
425 String name = parseElement(data, "<name>", null);
\r
426 cachedIds.add(name);
\r
430 catch (Exception ex)
\r
432 ex.printStackTrace();
\r
435 for (int i = 0; i < align.getHeight(); i++)
\r
436 if (!cachedIds.contains(align.getSequenceAt(i).getName()))
\r
438 unknownSequences.add(align.getSequenceAt(i).getName());
\r
445 * @param ids DOCUMENT ME!
\r
446 * @param align DOCUMENT ME!
\r
448 void tryLocalCacheFirst(ArrayList ids, AlignmentI align)
\r
450 ArrayList cacheData = new ArrayList();
\r
454 if(jalview.bin.Cache.getProperty("UNIPROT_CACHE")==null)
\r
457 BufferedReader in = new BufferedReader(new FileReader(
\r
458 jalview.bin.Cache.getProperty("UNIPROT_CACHE")));
\r
460 // read through cache file, if the cache has sequences we're looking for
\r
461 // add the lines to a new String array, Readthis new array and
\r
462 // make sure we remove the ids from the list to retrieve from EBI
\r
465 while ((data = in.readLine()) != null)
\r
467 if (data.indexOf("name") > -1)
\r
469 String name = parseElement(data, "<name>", null);
\r
471 if (ids.contains(name))
\r
473 cacheData.add("<entry>");
\r
474 cacheData.add(data);
\r
476 while (data.indexOf("</entry>") == -1)
\r
478 data = in.readLine();
\r
479 cacheData.add(data);
\r
482 cacheData.add(data);
\r
489 catch (Exception ex)
\r
491 ex.printStackTrace();
\r
494 String[] localData = new String[cacheData.size()];
\r
495 cacheData.toArray(localData);
\r
497 if ((localData != null) && (localData.length > 0))
\r
499 ReadUniprotFile(localData, null, align);
\r
506 * @param line DOCUMENT ME!
\r
507 * @param tag DOCUMENT ME!
\r
508 * @param out DOCUMENT ME!
\r
510 * @return DOCUMENT ME!
\r
512 String parseValue(String line, String tag, RandomAccessFile out)
\r
518 out.writeBytes(line + "\n");
\r
520 catch (Exception ex)
\r
525 int index = line.indexOf(tag) + tag.length() + 1;
\r
527 if (index == tag.length())
\r
532 return line.substring(index, line.indexOf("\"", index + 1));
\r
538 * @param line DOCUMENT ME!
\r
539 * @param tag DOCUMENT ME!
\r
540 * @param out DOCUMENT ME!
\r
542 * @return DOCUMENT ME!
\r
544 String parseElement(String line, String tag, RandomAccessFile out)
\r
550 out.writeBytes(line + "\n");
\r
552 catch (Exception ex)
\r
557 int index = line.indexOf(tag) + tag.length();
\r
559 return line.substring(index, line.indexOf("</"));
\r