2 * Jalview - A Sequence Alignment Editor and Viewer
\r
3 * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
\r
5 * This program is free software; you can redistribute it and/or
\r
6 * modify it under the terms of the GNU General Public License
\r
7 * as published by the Free Software Foundation; either version 2
\r
8 * of the License, or (at your option) any later version.
\r
10 * This program is distributed in the hope that it will be useful,
\r
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
13 * GNU General Public License for more details.
\r
15 * You should have received a copy of the GNU General Public License
\r
16 * along with this program; if not, write to the Free Software
\r
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
\r
24 import javax.swing.*;
\r
25 import jalview.io.*;
\r
26 import jalview.gui.*;
\r
27 import jalview.datamodel.*;
\r
29 public class SequenceFeatureFetcher implements Runnable
\r
33 ArrayList unknownSequences;
\r
34 CutAndPasteTransfer output = new CutAndPasteTransfer();
\r
35 StringBuffer sbuffer = new StringBuffer();
\r
37 public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap)
\r
39 unknownSequences = new ArrayList();
\r
42 Thread thread = new Thread(this);
\r
49 String cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE");
\r
51 RandomAccessFile out = null;
\r
56 jalview.bin.Cache.setProperty("UNIPROT_CACHE", System.getProperty("user.home")+"/uniprot.xml");
\r
57 cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE");
\r
62 File test = new File(cache);
\r
63 if( !test.exists() )
\r
65 out = new RandomAccessFile(cache, "rw");
\r
66 out.writeBytes("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
\r
67 out.writeBytes("<UNIPROT_CACHE>\n");
\r
71 out = new RandomAccessFile(cache, "rw");
\r
72 // open exisiting cache and remove </UNIPROT_CACHE> from the end
\r
75 while ( (data = out.readLine()) != null)
\r
77 if (data.indexOf("</entry>") > -1)
\r
78 lastLine = out.getFilePointer();
\r
85 Vector sequences = align.getSequences();
\r
87 while (seqIndex < sequences.size())
\r
89 ArrayList ids = new ArrayList();
\r
90 for (int i = 0; seqIndex < sequences.size() && i < 50; seqIndex++, i++)
\r
92 SequenceI sequence = (SequenceI) sequences.get(seqIndex);
\r
93 ids.add(sequence.getName());
\r
96 tryLocalCacheFirst(ids, align);
\r
100 StringBuffer remainingIds = new StringBuffer("uniprot:");
\r
101 for (int i = 0; i < ids.size(); i++)
\r
102 remainingIds.append(ids.get(i) + ";");
\r
104 EBIFetchClient ebi = new EBIFetchClient();
\r
105 String[] result = ebi.fetchData(remainingIds.toString(), "xml", null);
\r
108 ReadUniprotFile(result, out, align);
\r
115 out.writeBytes("</UNIPROT_CACHE>\n");
\r
118 }catch(Exception ex){ex.printStackTrace();}
\r
121 findMissingIds(align);
\r
122 if(sbuffer.length()>0)
\r
124 output.setText("Your sequences have been matched to Uniprot. Some of the ids have been\n"
\r
125 +"altered, most likely the start/end residue will have been updated.\n"
\r
126 +"Save your alignment to maintain the updated id.\n\n"+sbuffer.toString());
\r
127 Desktop.addInternalFrame(output, "Sequence names updated ", 600,300);
\r
131 if(unknownSequences.size()>0)
\r
133 //ignore for now!!!!!!!!!!
\r
134 // WSWUBlastClient blastClient = new WSWUBlastClient(align, unknownSequences);
\r
139 void ReadUniprotFile(String [] result, RandomAccessFile out, AlignmentI align)
\r
141 SequenceI sequence = null;
\r
142 Vector features = null;
\r
143 String type, description, status, start, end, pdb = null;
\r
146 for (int r = 0; r < result.length; r++)
\r
148 if(sequence==null && result[r].indexOf("<name>")>-1)
\r
150 long filePointer = 0;
\r
154 filePointer=out.getFilePointer();
\r
155 out.writeBytes("<entry>\n");
\r
156 }catch(Exception ex){}
\r
158 String seqName = parseElement( result[r], "<name>" , out);
\r
159 sequence = align.findName( seqName ) ;
\r
162 sequence = align.findName( seqName.substring(0, seqName.indexOf('_')));
\r
165 sbuffer.append("changing "+sequence.getName()+" to "+seqName+"\n");
\r
166 sequence.setName(seqName);
\r
171 sbuffer.append("UNIPROT updated suggestion is "+result[r]+"\n");
\r
172 sequence = align.findName( result[r] ) ;
\r
174 // this entry has been suggested by ebi.
\r
175 // doesn't match id in alignment file
\r
176 try { out.setLength(filePointer); } catch (Exception ex) {}
\r
177 // now skip to next entry
\r
178 while( result[r].indexOf("</entry>")==-1)
\r
182 features = new Vector();
\r
183 type=""; start="0"; end="0"; description=""; status=""; pdb="";
\r
190 if( result[r].indexOf("<property type=\"pdb accession\"")>-1)
\r
192 pdb = parseValue( result[r], "value=" , out);
\r
193 sequence.setPDBId(pdb);
\r
196 if(result[r].indexOf("feature type")>-1)
\r
198 type = parseValue( result[r], "type=" , out);
\r
199 description = parseValue( result[r], "description=" , null );
\r
200 status = parseValue ( result[r], "status=", null);
\r
202 while( result[r].indexOf("position")==-1)
\r
207 if(result[r].indexOf("begin")>-1)
\r
209 start = parseValue( result[r], "position=" , out);
\r
210 end = parseValue( result[++r], "position=" , out);
\r
214 start = parseValue( result[r], "position=" , out);
\r
215 end = parseValue( result[r], "position=" , null);
\r
217 int sstart = Integer.parseInt(start);
\r
218 int eend = Integer.parseInt(end);
\r
220 try{ out.writeBytes("</feature>\n"); }catch(Exception ex){}
\r
222 SequenceFeature sf = new SequenceFeature(type,
\r
230 if(result[r].indexOf("<sequence")>-1)
\r
232 StringBuffer seqString = new StringBuffer();
\r
235 try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){}
\r
237 while(result[++r].indexOf("</sequence>")==-1)
\r
239 seqString.append(result[r]);
\r
241 try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){}
\r
245 try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){}
\r
247 StringBuffer nonGapped = new StringBuffer();
\r
248 for (int i = 0; i < sequence.getSequence().length(); i++)
\r
250 if (!jalview.util.Comparison.isGap(sequence.getCharAt(i)))
\r
251 nonGapped.append(sequence.getCharAt(i));
\r
254 int absStart = seqString.toString().indexOf(nonGapped.toString());
\r
257 unknownSequences.add(sequence.getName());
\r
259 sbuffer.append(sequence.getName()+ " SEQUENCE NOT %100 MATCH \n");
\r
263 int absEnd = absStart + nonGapped.toString().length();
\r
266 if(absStart!=sequence.getStart() || absEnd!=sequence.getEnd())
\r
267 sbuffer.append("Updated: "+sequence.getName()+" "+
\r
268 sequence.getStart()+"/"+sequence.getEnd()+" to "+ absStart+"/"+absEnd+"\n");
\r
271 sequence.setStart(absStart);
\r
272 sequence.setEnd(absEnd);
\r
276 if(result[r].indexOf("</entry>")>-1)
\r
279 sequence.setSequenceFeatures( features );
\r
283 try{ out.writeBytes("</entry>\n"); }catch(Exception ex){}
\r
289 void findMissingIds(AlignmentI align)
\r
292 ArrayList cachedIds = new ArrayList();
\r
296 BufferedReader in = new BufferedReader(
\r
297 new FileReader(jalview.bin.Cache.getProperty("UNIPROT_CACHE")));
\r
299 while ( (data = in.readLine()) != null)
\r
301 if (data.indexOf("name") > -1)
\r
303 String name = parseElement(data, "<name>", null);
\r
304 cachedIds.add(name);
\r
308 catch (Exception ex)
\r
309 { ex.printStackTrace(); }
\r
311 for(int i=0; i<align.getHeight(); i++)
\r
312 if( !cachedIds.contains( align.getSequenceAt(i).getName() ) )
\r
313 unknownSequences.add( align.getSequenceAt(i).getName() );
\r
318 void tryLocalCacheFirst(ArrayList ids, AlignmentI align)
\r
320 ArrayList cacheData = new ArrayList();
\r
322 BufferedReader in = new BufferedReader(
\r
323 new FileReader(jalview.bin.Cache.getProperty("UNIPROT_CACHE")));
\r
325 // read through cache file, if the cache has sequences we're looking for
\r
326 // add the lines to a new String array, Readthis new array and
\r
327 // make sure we remove the ids from the list to retrieve from EBI
\r
329 while( ( data=in.readLine())!=null)
\r
331 if(data.indexOf("name")>-1)
\r
333 String name = parseElement( data, "<name>" , null) ;
\r
334 if(ids.contains( name ) )
\r
336 cacheData.add("<entry>");
\r
337 cacheData.add(data);
\r
338 while( data.indexOf("</entry>")==-1)
\r
340 data = in.readLine();
\r
341 cacheData.add(data);
\r
343 cacheData.add(data);
\r
345 ids.remove( name );
\r
350 catch(Exception ex){ex.printStackTrace();}
\r
352 String [] localData = new String[cacheData.size()];
\r
353 cacheData.toArray( localData );
\r
354 if(localData!=null && localData.length>0)
\r
355 ReadUniprotFile(localData, null, align);
\r
359 String parseValue(String line, String tag, RandomAccessFile out)
\r
362 try{ out.writeBytes(line+"\n"); }catch(Exception ex){}
\r
365 int index = line.indexOf(tag)+tag.length()+1;
\r
366 if(index==tag.length())
\r
369 return line.substring( index, line.indexOf("\"", index+1) );
\r
373 String parseElement(String line, String tag, RandomAccessFile out)
\r
378 out.writeBytes(line + "\n");
\r
380 catch (Exception ex)
\r
383 int index = line.indexOf(tag) + tag.length();
\r
384 return line.substring(index, line.indexOf("</"));
\r