package jalview.analysis; import java.io.*; import java.util.*; import javax.swing.*; import jalview.io.*; import jalview.gui.*; import jalview.datamodel.*; public class SequenceFeatureFetcher implements Runnable { AlignmentI align; AlignmentPanel ap; ArrayList unknownSequences; JInternalFrame outputFrame = new JInternalFrame(); CutAndPasteTransfer output = new CutAndPasteTransfer(false); StringBuffer sbuffer = new StringBuffer(); public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap) { unknownSequences = new ArrayList(); this.align = align; this.ap = ap; Thread thread = new Thread(this); thread.start(); } public void run() { String cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE"); RandomAccessFile out = null; try{ if (cache == null) { jalview.bin.Cache.setProperty("UNIPROT_CACHE", System.getProperty("user.home")+"/uniprot.xml"); cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE"); } File test = new File(cache); if( !test.exists() ) { out = new RandomAccessFile(cache, "rw"); out.writeBytes("\n"); out.writeBytes("\n"); } else { out = new RandomAccessFile(cache, "rw"); // open exisiting cache and remove from the end long lastLine = 0; String data; while ( (data = out.readLine()) != null) { if (data.indexOf("") > -1) lastLine = out.getFilePointer(); } out.seek(lastLine); } int seqIndex = 0; Vector sequences = align.getSequences(); while (seqIndex < sequences.size()) { ArrayList ids = new ArrayList(); for (int i = 0; seqIndex < sequences.size() && i < 50; seqIndex++, i++) { SequenceI sequence = (SequenceI) sequences.get(seqIndex); ids.add(sequence.getName()); } tryLocalCacheFirst(ids, align); if (ids.size() > 0) { StringBuffer remainingIds = new StringBuffer("uniprot:"); for (int i = 0; i < ids.size(); i++) remainingIds.append(ids.get(i) + ";"); EBIFetchClient ebi = new EBIFetchClient(); String[] result = ebi.fetchData(remainingIds.toString(), "xml", null); if(result!=null) ReadUniprotFile(result, out, align); } } if (out != null) { out.writeBytes("\n"); out.close(); } }catch(Exception ex){ex.printStackTrace();} ap.RefreshPanels(); findMissingIds(align); if(sbuffer.length()>0) { output.formatForOutput(); outputFrame.setContentPane(output); output.setText("Your sequences have been matched to Uniprot. Some of the ids have been\n" +"altered, most likely the start/end residue will have been updated.\n" +"Save your alignment to maintain the updated id.\n\n"+sbuffer.toString()); Desktop.addInternalFrame(outputFrame, "Sequence names updated ", 600,300); } if(unknownSequences.size()>0) { //ignore for now!!!!!!!!!! // WSWUBlastClient blastClient = new WSWUBlastClient(align, unknownSequences); } } void ReadUniprotFile(String [] result, RandomAccessFile out, AlignmentI align) { SequenceI sequence = null; Vector features = null; String type, description, status, start, end, pdb = null; for (int r = 0; r < result.length; r++) { if(sequence==null && result[r].indexOf("")>-1) { long filePointer = 0; if(out!=null) try{ filePointer=out.getFilePointer(); out.writeBytes("\n"); }catch(Exception ex){} String seqName = parseElement( result[r], "" , out); sequence = align.findName( seqName ) ; if(sequence==null) { sequence = align.findName( seqName.substring(0, seqName.indexOf('_'))); if(sequence!=null) { sbuffer.append("changing "+sequence.getName()+" to "+seqName+"\n"); sequence.setName(seqName); } } if(sequence==null) { sbuffer.append("UNIPROT updated suggestion is "+result[r]+"\n"); sequence = align.findName( result[r] ) ; // this entry has been suggested by ebi. // doesn't match id in alignment file try { out.setLength(filePointer); } catch (Exception ex) {} // now skip to next entry while( result[r].indexOf("")==-1) r++; } features = new Vector(); type=""; start="0"; end="0"; description=""; status=""; pdb=""; } if(sequence==null) continue; if( result[r].indexOf("-1) { pdb = parseValue( result[r], "value=" , out); sequence.setPDBId(pdb); } if(result[r].indexOf("feature type")>-1) { type = parseValue( result[r], "type=" , out); description = parseValue( result[r], "description=" , null ); status = parseValue ( result[r], "status=", null); while( result[r].indexOf("position")==-1) { r++; // } // r++; if(result[r].indexOf("begin")>-1) { start = parseValue( result[r], "position=" , out); end = parseValue( result[++r], "position=" , out); } else { start = parseValue( result[r], "position=" , out); end = parseValue( result[r], "position=" , null); } int sstart = Integer.parseInt(start); int eend = Integer.parseInt(end); if(out!=null) try{ out.writeBytes("\n"); }catch(Exception ex){} SequenceFeature sf = new SequenceFeature(type, sstart, eend, description, status); features.add(sf); } if(result[r].indexOf("-1) { StringBuffer seqString = new StringBuffer(); if(out!=null) try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){} while(result[++r].indexOf("")==-1) { seqString.append(result[r]); if(out!=null) try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){} } if(out!=null) try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){} StringBuffer nonGapped = new StringBuffer(); for (int i = 0; i < sequence.getSequence().length(); i++) { if (!jalview.util.Comparison.isGap(sequence.getCharAt(i))) nonGapped.append(sequence.getCharAt(i)); } int absStart = seqString.toString().indexOf(nonGapped.toString()); if(absStart==-1) { unknownSequences.add(sequence.getName()); features = null; sbuffer.append(sequence.getName()+ " SEQUENCE NOT %100 MATCH \n"); continue; } int absEnd = absStart + nonGapped.toString().length(); absStart+=1; if(absStart!=sequence.getStart() || absEnd!=sequence.getEnd()) sbuffer.append("Updated: "+sequence.getName()+" "+ sequence.getStart()+"/"+sequence.getEnd()+" to "+ absStart+"/"+absEnd+"\n"); sequence.setStart(absStart); sequence.setEnd(absEnd); } if(result[r].indexOf("")>-1) { if(features!=null) sequence.setSequenceFeatures( features ); features = null; sequence = null; if(out!=null) try{ out.writeBytes("\n"); }catch(Exception ex){} } } } void findMissingIds(AlignmentI align) { String data; ArrayList cachedIds = new ArrayList(); try { BufferedReader in = new BufferedReader( new FileReader(jalview.bin.Cache.getProperty("UNIPROT_CACHE"))); while ( (data = in.readLine()) != null) { if (data.indexOf("name") > -1) { String name = parseElement(data, "", null); cachedIds.add(name); } } } catch (Exception ex) { ex.printStackTrace(); } for(int i=0; i-1) { String name = parseElement( data, "" , null) ; if(ids.contains( name ) ) { cacheData.add(""); cacheData.add(data); while( data.indexOf("")==-1) { data = in.readLine(); cacheData.add(data); } cacheData.add(data); ids.remove( name ); } } } } catch(Exception ex){ex.printStackTrace();} String [] localData = new String[cacheData.size()]; cacheData.toArray( localData ); if(localData!=null && localData.length>0) ReadUniprotFile(localData, null, align); } String parseValue(String line, String tag, RandomAccessFile out) { if(out!=null) try{ out.writeBytes(line+"\n"); }catch(Exception ex){} int index = line.indexOf(tag)+tag.length()+1; if(index==tag.length()) return ""; return line.substring( index, line.indexOf("\"", index+1) ); } String parseElement(String line, String tag, RandomAccessFile out) { if (out != null) try { out.writeBytes(line + "\n"); } catch (Exception ex) {} int index = line.indexOf(tag) + tag.length(); return line.substring(index, line.indexOf("