package jalview.analysis; import java.io.*; import java.util.*; import jalview.io.*; import jalview.gui.*; import jalview.datamodel.*; public class SequenceFeatureFetcher implements Runnable { AlignmentI align; AlignmentPanel ap; public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap) { this.align = align; this.ap = ap; Thread thread = new Thread(this); thread.start(); } public void run() { String cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE"); RandomAccessFile out = null; try{ if (cache == null) { jalview.bin.Cache.setProperty("UNIPROT_CACHE", System.getProperty("user.home")+"/uniprot.xml"); cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE"); } File test = new File(cache); if( !test.exists() ) { out = new RandomAccessFile(cache, "rw"); out.writeBytes("\n"); out.writeBytes("\n"); } else { out = new RandomAccessFile(cache, "rw"); // open exisiting cache and remove from the end long lastLine = 0; String data; while ( (data = out.readLine()) != null) { if (data.indexOf("") > -1) lastLine = out.getFilePointer(); } out.seek(lastLine); } int seqIndex = 0; Vector sequences = align.getSequences(); while (seqIndex < sequences.size()) { ArrayList ids = new ArrayList(); for (int i = 0; seqIndex < sequences.size() && i < 50; seqIndex++, i++) { SequenceI sequence = (SequenceI) sequences.get(seqIndex); ids.add(sequence.getName()); } tryLocalCacheFirst(ids, align); if (ids.size() > 0) { StringBuffer remainingIds = new StringBuffer("uniprot:"); for (int i = 0; i < ids.size(); i++) remainingIds.append(ids.get(i) + ";"); EBIFetchClient ebi = new EBIFetchClient(); String[] result = ebi.fetchData(remainingIds.toString(), "xml", null); if(result!=null) ReadUniprotFile(result, out, align); } } if (out != null) { out.writeBytes("\n"); out.close(); } }catch(Exception ex){ex.printStackTrace();} } void ReadUniprotFile(String [] result, RandomAccessFile out, AlignmentI align) { SequenceI sequence = null; Vector features = null; String type, description, status, start, end, pdb = null; for (int r = 0; r < result.length; r++) { if(sequence==null && result[r].indexOf("")>-1) { long filePointer = 0; if(out!=null) try{ filePointer=out.getFilePointer(); out.writeBytes("\n"); }catch(Exception ex){} sequence = align.findName( parseElement( result[r], "" , out)) ; if(sequence==null) { System.out.println("Couldnt find sequence id. Suggestion is "+result[r]); // this entry has been suggested by ebi. // doesn't match id in alignment file try { out.setLength(filePointer); } catch (Exception ex) {} // now skip to next entry while( result[r].indexOf("")==-1) r++; } features = new Vector(); type=""; start="0"; end="0"; description=""; status=""; pdb=""; } if(sequence==null) continue; if( result[r].indexOf("-1) { pdb = parseValue( result[r], "value=" , out); sequence.setPDBId(pdb); } if(result[r].indexOf("feature type")>-1) { type = parseValue( result[r], "type=" , out); description = parseValue( result[r], "description=" , null ); status = parseValue ( result[r], "status=", null); while( result[r].indexOf("position")==-1) { r++; // } // r++; if(result[r].indexOf("begin")>-1) { start = parseValue( result[r], "position=" , out); end = parseValue( result[++r], "position=" , out); } else { start = parseValue( result[r], "position=" , out); end = parseValue( result[r], "position=" , null); } int sstart = Integer.parseInt(start); int eend = Integer.parseInt(end); if(out!=null) try{ out.writeBytes("\n"); }catch(Exception ex){} if(sstart>=sequence.getStart() && eend<=sequence.getEnd()) { SequenceFeature sf = new SequenceFeature(type, sstart, eend, description, status); features.add(sf); } } if(result[r].indexOf("")>-1) { if(features!=null) sequence.setSequenceFeatures( features ); features = null; sequence = null; if(out!=null) try{ out.writeBytes("\n"); }catch(Exception ex){} } } ap.RefreshPanels(); } void tryLocalCacheFirst(ArrayList ids, AlignmentI align) { ArrayList cacheData = new ArrayList(); try{ BufferedReader in = new BufferedReader( new FileReader(jalview.bin.Cache.getProperty("UNIPROT_CACHE"))); // read through cache file, if the cache has sequences we're looking for // add the lines to a new String array, Readthis new array and // make sure we remove the ids from the list to retrieve from EBI String data; while( ( data=in.readLine())!=null) { if(data.indexOf("name")>-1) { String name = parseElement( data, "" , null) ; if(ids.contains( name ) ) { cacheData.add(""); cacheData.add(data); while( data.indexOf("")==-1) { data = in.readLine(); cacheData.add(data); } cacheData.add(data); ids.remove( name ); } } } } catch(Exception ex){ex.printStackTrace();} String [] localData = new String[cacheData.size()]; cacheData.toArray( localData ); if(localData!=null && localData.length>0) ReadUniprotFile(localData, null, align); } String parseValue(String line, String tag, RandomAccessFile out) { if(out!=null) try{ out.writeBytes(line+"\n"); }catch(Exception ex){} int index = line.indexOf(tag)+tag.length()+1; if(index==tag.length()) return ""; return line.substring( index, line.indexOf("\"", index+1) ); } String parseElement(String line, String tag, RandomAccessFile out) { if (out != null) try { out.writeBytes(line + "\n"); } catch (Exception ex) {} int index = line.indexOf(tag) + tag.length(); return line.substring(index, line.indexOf("