From 9270f5adbff6622400c8fc933ebdb652cbb760f5 Mon Sep 17 00:00:00 2001 From: amwaterhouse Date: Thu, 5 May 2005 10:35:44 +0000 Subject: [PATCH] new place for sequencefeaturefetcher --- src/jalview/io/SequenceFeatureFetcher.java | 371 ++++++++++++++++++++++++++++ 1 file changed, 371 insertions(+) create mode 100755 src/jalview/io/SequenceFeatureFetcher.java diff --git a/src/jalview/io/SequenceFeatureFetcher.java b/src/jalview/io/SequenceFeatureFetcher.java new file mode 100755 index 0000000..7770ced --- /dev/null +++ b/src/jalview/io/SequenceFeatureFetcher.java @@ -0,0 +1,371 @@ +package jalview.io; + +import java.io.*; +import java.util.*; +import javax.swing.*; +import jalview.io.*; +import jalview.gui.*; +import jalview.datamodel.*; + +public class SequenceFeatureFetcher implements Runnable +{ + AlignmentI align; + AlignmentPanel ap; + ArrayList unknownSequences; + JInternalFrame outputFrame = new JInternalFrame(); + CutAndPasteTransfer output = new CutAndPasteTransfer(false); + StringBuffer sbuffer = new StringBuffer(); + + public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap) + { + unknownSequences = new ArrayList(); + this.align = align; + this.ap = ap; + Thread thread = new Thread(this); + thread.start(); + } + + public void run() +{ + + String cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE"); + + RandomAccessFile out = null; + + try{ + if (cache == null) + { + jalview.bin.Cache.setProperty("UNIPROT_CACHE", System.getProperty("user.home")+"/uniprot.xml"); + cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE"); + } + + + + File test = new File(cache); + if( !test.exists() ) + { + out = new RandomAccessFile(cache, "rw"); + out.writeBytes("\n"); + out.writeBytes("\n"); + } + else + { + out = new RandomAccessFile(cache, "rw"); + // open exisiting cache and remove from the end + long lastLine = 0; + String data; + while ( (data = out.readLine()) != null) + { + if (data.indexOf("") > -1) + lastLine = out.getFilePointer(); + + } + out.seek(lastLine); + } + + int seqIndex = 0; + Vector sequences = align.getSequences(); + + while (seqIndex < sequences.size()) + { + ArrayList ids = new ArrayList(); + for (int i = 0; seqIndex < sequences.size() && i < 50; seqIndex++, i++) + { + SequenceI sequence = (SequenceI) sequences.get(seqIndex); + ids.add(sequence.getName()); + } + + tryLocalCacheFirst(ids, align); + + if (ids.size() > 0) + { + StringBuffer remainingIds = new StringBuffer("uniprot:"); + for (int i = 0; i < ids.size(); i++) + remainingIds.append(ids.get(i) + ";"); + + EBIFetchClient ebi = new EBIFetchClient(); + String[] result = ebi.fetchData(remainingIds.toString(), "xml", null); + + if(result!=null) + ReadUniprotFile(result, out, align); + } + + } + + if (out != null) + { + out.writeBytes("\n"); + out.close(); + } + }catch(Exception ex){ex.printStackTrace();} + + ap.repaint(); + findMissingIds(align); + if(sbuffer.length()>0) + { + output.formatForOutput(); + outputFrame.setContentPane(output); + output.setText("Your sequences have been matched to Uniprot. Some of the ids have been\n" + +"altered, most likely the start/end residue will have been updated.\n" + +"Save your alignment to maintain the updated id.\n\n"+sbuffer.toString()); + Desktop.addInternalFrame(outputFrame, "Sequence names updated ", 600,300); + + } + + if(unknownSequences.size()>0) + { + //ignore for now!!!!!!!!!! + // WSWUBlastClient blastClient = new WSWUBlastClient(align, unknownSequences); + } + +} + +void ReadUniprotFile(String [] result, RandomAccessFile out, AlignmentI align) +{ + SequenceI sequence = null; + Vector features = null; + String type, description, status, start, end, pdb = null; + + + for (int r = 0; r < result.length; r++) + { + if(sequence==null && result[r].indexOf("")>-1) + { + long filePointer = 0; + + if(out!=null) + try{ + filePointer=out.getFilePointer(); + out.writeBytes("\n"); + }catch(Exception ex){} + + String seqName = parseElement( result[r], "" , out); + sequence = align.findName( seqName ) ; + if(sequence==null) + { + sequence = align.findName( seqName.substring(0, seqName.indexOf('_'))); + if(sequence!=null) + { + sbuffer.append("changing "+sequence.getName()+" to "+seqName+"\n"); + sequence.setName(seqName); + } + } + if(sequence==null) + { + sbuffer.append("UNIPROT updated suggestion is "+result[r]+"\n"); + sequence = align.findName( result[r] ) ; + + // this entry has been suggested by ebi. + // doesn't match id in alignment file + try { out.setLength(filePointer); } catch (Exception ex) {} + // now skip to next entry + while( result[r].indexOf("")==-1) + r++; + } + + features = new Vector(); + type=""; start="0"; end="0"; description=""; status=""; pdb=""; + + } + + if(sequence==null) + continue; + + if( result[r].indexOf("-1) + { + pdb = parseValue( result[r], "value=" , out); + sequence.setPDBId(pdb); + } + + if(result[r].indexOf("feature type")>-1) + { + type = parseValue( result[r], "type=" , out); + description = parseValue( result[r], "description=" , null ); + status = parseValue ( result[r], "status=", null); + + while( result[r].indexOf("position")==-1) + { + r++; // + } + // r++; + if(result[r].indexOf("begin")>-1) + { + start = parseValue( result[r], "position=" , out); + end = parseValue( result[++r], "position=" , out); + } + else + { + start = parseValue( result[r], "position=" , out); + end = parseValue( result[r], "position=" , null); + } + int sstart = Integer.parseInt(start); + int eend = Integer.parseInt(end); + if(out!=null) + try{ out.writeBytes("\n"); }catch(Exception ex){} + + SequenceFeature sf = new SequenceFeature(type, + sstart, + eend, + description, + status); + features.add(sf); + } + + if(result[r].indexOf("-1) + { + StringBuffer seqString = new StringBuffer(); + + if(out!=null) + try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){} + + while(result[++r].indexOf("")==-1) + { + seqString.append(result[r]); + if(out!=null) + try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){} + } + + if(out!=null) + try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){} + + StringBuffer nonGapped = new StringBuffer(); + for (int i = 0; i < sequence.getSequence().length(); i++) + { + if (!jalview.util.Comparison.isGap(sequence.getCharAt(i))) + nonGapped.append(sequence.getCharAt(i)); + } + + int absStart = seqString.toString().indexOf(nonGapped.toString()); + if(absStart==-1) + { + unknownSequences.add(sequence.getName()); + features = null; + sbuffer.append(sequence.getName()+ " SEQUENCE NOT %100 MATCH \n"); + continue; + } + + int absEnd = absStart + nonGapped.toString().length(); + absStart+=1; + + if(absStart!=sequence.getStart() || absEnd!=sequence.getEnd()) + sbuffer.append("Updated: "+sequence.getName()+" "+ + sequence.getStart()+"/"+sequence.getEnd()+" to "+ absStart+"/"+absEnd+"\n"); + + + sequence.setStart(absStart); + sequence.setEnd(absEnd); + + } + + if(result[r].indexOf("")>-1) + { + if(features!=null) + sequence.setSequenceFeatures( features ); + features = null; + sequence = null; + if(out!=null) + try{ out.writeBytes("\n"); }catch(Exception ex){} + + } + } +} + +void findMissingIds(AlignmentI align) +{ + String data; + ArrayList cachedIds = new ArrayList(); + + try + { + BufferedReader in = new BufferedReader( + new FileReader(jalview.bin.Cache.getProperty("UNIPROT_CACHE"))); + + while ( (data = in.readLine()) != null) + { + if (data.indexOf("name") > -1) + { + String name = parseElement(data, "", null); + cachedIds.add(name); + } + } + } + catch (Exception ex) + { ex.printStackTrace(); } + + for(int i=0; i-1) + { + String name = parseElement( data, "" , null) ; + if(ids.contains( name ) ) + { + cacheData.add(""); + cacheData.add(data); + while( data.indexOf("")==-1) + { + data = in.readLine(); + cacheData.add(data); + } + cacheData.add(data); + + ids.remove( name ); + } + } + } + } + catch(Exception ex){ex.printStackTrace();} + + String [] localData = new String[cacheData.size()]; + cacheData.toArray( localData ); + if(localData!=null && localData.length>0) + ReadUniprotFile(localData, null, align); +} + + +String parseValue(String line, String tag, RandomAccessFile out) +{ + if(out!=null) + try{ out.writeBytes(line+"\n"); }catch(Exception ex){} + + + int index = line.indexOf(tag)+tag.length()+1; + if(index==tag.length()) + return ""; + + return line.substring( index, line.indexOf("\"", index+1) ); +} + + +String parseElement(String line, String tag, RandomAccessFile out) +{ + if (out != null) + try + { + out.writeBytes(line + "\n"); + } + catch (Exception ex) + {} + + int index = line.indexOf(tag) + tag.length(); + return line.substring(index, line.indexOf("