1 package jalview.analysis;
\r
6 import jalview.gui.*;
\r
7 import jalview.datamodel.*;
\r
9 public class SequenceFeatureFetcher implements Runnable
\r
14 public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap)
\r
18 Thread thread = new Thread(this);
\r
25 String cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE");
\r
27 RandomAccessFile out = null;
\r
32 jalview.bin.Cache.setProperty("UNIPROT_CACHE", System.getProperty("user.home")+"/uniprot.xml");
\r
33 cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE");
\r
38 File test = new File(cache);
\r
39 if( !test.exists() )
\r
41 out = new RandomAccessFile(cache, "rw");
\r
42 out.writeBytes("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
\r
43 out.writeBytes("<UNIPROT_CACHE>\n");
\r
47 out = new RandomAccessFile(cache, "rw");
\r
48 // open exisiting cache and remove </UNIPROT_CACHE> from the end
\r
51 while ( (data = out.readLine()) != null)
\r
53 if (data.indexOf("</entry>") > -1)
\r
54 lastLine = out.getFilePointer();
\r
61 Vector sequences = align.getSequences();
\r
63 while (seqIndex < sequences.size())
\r
65 ArrayList ids = new ArrayList();
\r
66 for (int i = 0; seqIndex < sequences.size() && i < 50; seqIndex++, i++)
\r
68 SequenceI sequence = (SequenceI) sequences.get(seqIndex);
\r
69 ids.add(sequence.getName());
\r
72 tryLocalCacheFirst(ids, align);
\r
76 StringBuffer remainingIds = new StringBuffer("uniprot:");
\r
77 for (int i = 0; i < ids.size(); i++)
\r
78 remainingIds.append(ids.get(i) + ";");
\r
80 EBIFetchClient ebi = new EBIFetchClient();
\r
81 String[] result = ebi.fetchData(remainingIds.toString(), "xml", null);
\r
84 ReadUniprotFile(result, out, align);
\r
91 out.writeBytes("</UNIPROT_CACHE>\n");
\r
94 }catch(Exception ex){ex.printStackTrace();}
\r
99 void ReadUniprotFile(String [] result, RandomAccessFile out, AlignmentI align)
\r
101 SequenceI sequence = null;
\r
102 Vector features = null;
\r
103 String type, description, status, start, end, pdb = null;
\r
106 for (int r = 0; r < result.length; r++)
\r
108 if(sequence==null && result[r].indexOf("<name>")>-1)
\r
110 long filePointer = 0;
\r
114 filePointer=out.getFilePointer();
\r
115 out.writeBytes("<entry>\n");
\r
116 }catch(Exception ex){}
\r
118 sequence = align.findName( parseElement( result[r], "<name>" , out)) ;
\r
121 System.out.println("Couldnt find sequence id. Suggestion is "+result[r]);
\r
123 // this entry has been suggested by ebi.
\r
124 // doesn't match id in alignment file
\r
125 try { out.setLength(filePointer); } catch (Exception ex) {}
\r
126 // now skip to next entry
\r
127 while( result[r].indexOf("</entry>")==-1)
\r
131 features = new Vector();
\r
132 type=""; start="0"; end="0"; description=""; status=""; pdb="";
\r
139 if( result[r].indexOf("<property type=\"pdb accession\"")>-1)
\r
141 pdb = parseValue( result[r], "value=" , out);
\r
142 sequence.setPDBId(pdb);
\r
145 if(result[r].indexOf("feature type")>-1)
\r
147 type = parseValue( result[r], "type=" , out);
\r
148 description = parseValue( result[r], "description=" , null );
\r
149 status = parseValue ( result[r], "status=", null);
\r
151 while( result[r].indexOf("position")==-1)
\r
156 if(result[r].indexOf("begin")>-1)
\r
158 start = parseValue( result[r], "position=" , out);
\r
159 end = parseValue( result[++r], "position=" , out);
\r
163 start = parseValue( result[r], "position=" , out);
\r
164 end = parseValue( result[r], "position=" , null);
\r
166 int sstart = Integer.parseInt(start);
\r
167 int eend = Integer.parseInt(end);
\r
169 try{ out.writeBytes("</feature>\n"); }catch(Exception ex){}
\r
172 if(sstart>=sequence.getStart() && eend<=sequence.getEnd())
\r
174 SequenceFeature sf = new SequenceFeature(type,
\r
183 if(result[r].indexOf("</entry>")>-1)
\r
186 sequence.setSequenceFeatures( features );
\r
190 try{ out.writeBytes("</entry>\n"); }catch(Exception ex){}
\r
195 ap.RefreshPanels();
\r
199 void tryLocalCacheFirst(ArrayList ids, AlignmentI align)
\r
201 ArrayList cacheData = new ArrayList();
\r
203 BufferedReader in = new BufferedReader(
\r
204 new FileReader(jalview.bin.Cache.getProperty("UNIPROT_CACHE")));
\r
206 // read through cache file, if the cache has sequences we're looking for
\r
207 // add the lines to a new String array, Readthis new array and
\r
208 // make sure we remove the ids from the list to retrieve from EBI
\r
210 while( ( data=in.readLine())!=null)
\r
212 if(data.indexOf("name")>-1)
\r
214 String name = parseElement( data, "<name>" , null) ;
\r
215 if(ids.contains( name ) )
\r
217 cacheData.add("<entry>");
\r
218 cacheData.add(data);
\r
219 while( data.indexOf("</entry>")==-1)
\r
221 data = in.readLine();
\r
222 cacheData.add(data);
\r
224 cacheData.add(data);
\r
226 ids.remove( name );
\r
231 catch(Exception ex){ex.printStackTrace();}
\r
233 String [] localData = new String[cacheData.size()];
\r
234 cacheData.toArray( localData );
\r
235 if(localData!=null && localData.length>0)
\r
236 ReadUniprotFile(localData, null, align);
\r
240 String parseValue(String line, String tag, RandomAccessFile out)
\r
243 try{ out.writeBytes(line+"\n"); }catch(Exception ex){}
\r
246 int index = line.indexOf(tag)+tag.length()+1;
\r
247 if(index==tag.length())
\r
250 return line.substring( index, line.indexOf("\"", index+1) );
\r
254 String parseElement(String line, String tag, RandomAccessFile out)
\r
259 out.writeBytes(line + "\n");
\r
261 catch (Exception ex)
\r
264 int index = line.indexOf(tag) + tag.length();
\r
265 return line.substring(index, line.indexOf("</"));
\r