From 44b659b39011c69e40b43610c4a9b94501fd1bd5 Mon Sep 17 00:00:00 2001 From: amwaterhouse Date: Mon, 7 Feb 2005 17:05:01 +0000 Subject: [PATCH] New class for fetching sequence features --- src/jalview/analysis/SequenceFeatureFetcher.java | 268 ++++++++++++++++++++++ 1 file changed, 268 insertions(+) create mode 100755 src/jalview/analysis/SequenceFeatureFetcher.java diff --git a/src/jalview/analysis/SequenceFeatureFetcher.java b/src/jalview/analysis/SequenceFeatureFetcher.java new file mode 100755 index 0000000..dc8a3cd --- /dev/null +++ b/src/jalview/analysis/SequenceFeatureFetcher.java @@ -0,0 +1,268 @@ +package jalview.analysis; + +import java.io.*; +import java.util.*; +import jalview.io.*; +import jalview.gui.*; +import jalview.datamodel.*; + +public class SequenceFeatureFetcher implements Runnable +{ + AlignmentI align; + AlignmentPanel ap; + + public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap) + { + this.align = align; + this.ap = ap; + Thread thread = new Thread(this); + thread.start(); + } + + public void run() +{ + + String cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE"); + + RandomAccessFile out = null; + + try{ + if (cache == null) + { + jalview.bin.Cache.setProperty("UNIPROT_CACHE", System.getProperty("user.home/uniprot.xml")); + } + + + + File test = new File(cache); + if( !test.exists() ) + { + out = new RandomAccessFile(cache, "rw"); + out.writeBytes("\n"); + out.writeBytes("\n"); + } + else + { + out = new RandomAccessFile(cache, "rw"); + // open exisiting cache and remove from the end + long lastLine = 0; + String data; + while ( (data = out.readLine()) != null) + { + if (data.indexOf("") > -1) + lastLine = out.getFilePointer(); + + } + out.seek(lastLine); + } + + int seqIndex = 0; + Vector sequences = align.getSequences(); + + while (seqIndex < sequences.size()) + { + ArrayList ids = new ArrayList(); + for (int i = 0; seqIndex < sequences.size() && i < 50; seqIndex++, i++) + { + SequenceI sequence = (SequenceI) sequences.get(seqIndex); + ids.add(sequence.getName()); + } + + tryLocalCacheFirst(ids, align); + + if (ids.size() > 0) + { + StringBuffer remainingIds = new StringBuffer("uniprot:"); + for (int i = 0; i < ids.size(); i++) + remainingIds.append(ids.get(i) + ";"); + + EBIFetchClient ebi = new EBIFetchClient(); + String[] result = ebi.fetchData(remainingIds.toString(), "xml", null); + + if(result!=null) + ReadUniprotFile(result, out, align); + } + + } + + if (out != null) + { + out.writeBytes("\n"); + out.close(); + } + }catch(Exception ex){ex.printStackTrace();} + + +} + +void ReadUniprotFile(String [] result, RandomAccessFile out, AlignmentI align) +{ + SequenceI sequence = null; + Vector features = null; + String type, description, status, start, end, pdb = null; + + + for (int r = 0; r < result.length; r++) + { + if(sequence==null && result[r].indexOf("")>-1) + { + long filePointer = 0; + + if(out!=null) + try{ + filePointer=out.getFilePointer(); + out.writeBytes("\n"); + }catch(Exception ex){} + + sequence = align.findName( parseElement( result[r], "" , out)) ; + if(sequence==null) + { + System.out.println("Couldnt find sequence id. Suggestion is "+result[r]); + + // this entry has been suggested by ebi. + // doesn't match id in alignment file + try { out.setLength(filePointer); } catch (Exception ex) {} + // now skip to next entry + while( result[r].indexOf("")==-1) + r++; + } + + features = new Vector(); + type=""; start="0"; end="0"; description=""; status=""; pdb=""; + + } + + if(sequence==null) + continue; + + if( result[r].indexOf("-1) + { + pdb = parseValue( result[r], "value=" , out); + sequence.setPDBId(pdb); + } + + if(result[r].indexOf("feature type")>-1) + { + type = parseValue( result[r], "type=" , out); + description = parseValue( result[r], "description=" , null ); + status = parseValue ( result[r], "status=", null); + + while( result[r].indexOf("position")==-1) + { + r++; // + } + // r++; + if(result[r].indexOf("begin")>-1) + { + start = parseValue( result[r], "position=" , out); + end = parseValue( result[++r], "position=" , out); + } + else + { + start = parseValue( result[r], "position=" , out); + end = parseValue( result[r], "position=" , null); + } + int sstart = Integer.parseInt(start); + int eend = Integer.parseInt(end); + if(out!=null) + try{ out.writeBytes("\n"); }catch(Exception ex){} + + + if(sstart>=sequence.getStart() && eend<=sequence.getEnd()) + { + SequenceFeature sf = new SequenceFeature(type, + sstart, + eend, + description, + status); + features.add(sf); + } + } + + if(result[r].indexOf("")>-1) + { + if(features!=null) + sequence.setSequenceFeatures( features ); + features = null; + sequence = null; + if(out!=null) + try{ out.writeBytes("\n"); }catch(Exception ex){} + + } + } + + ap.RefreshPanels(); + +} + +void tryLocalCacheFirst(ArrayList ids, AlignmentI align) +{ + ArrayList cacheData = new ArrayList(); + try{ + BufferedReader in = new BufferedReader( + new FileReader(jalview.bin.Cache.getProperty("UNIPROT_CACHE"))); + + // read through cache file, if the cache has sequences we're looking for + // add the lines to a new String array, Readthis new array and + // make sure we remove the ids from the list to retrieve from EBI + String data; + while( ( data=in.readLine())!=null) + { + if(data.indexOf("name")>-1) + { + String name = parseElement( data, "" , null) ; + if(ids.contains( name ) ) + { + cacheData.add(""); + cacheData.add(data); + while( data.indexOf("")==-1) + { + data = in.readLine(); + cacheData.add(data); + } + cacheData.add(data); + + ids.remove( name ); + } + } + } + } + catch(Exception ex){ex.printStackTrace();} + + String [] localData = new String[cacheData.size()]; + cacheData.toArray( localData ); + if(localData!=null && localData.length>0) + ReadUniprotFile(localData, null, align); +} + + +String parseValue(String line, String tag, RandomAccessFile out) +{ + if(out!=null) + try{ out.writeBytes(line+"\n"); }catch(Exception ex){} + + + int index = line.indexOf(tag)+tag.length()+1; + if(index==tag.length()) + return ""; + + return line.substring( index, line.indexOf("\"", index+1) ); +} + + +String parseElement(String line, String tag, RandomAccessFile out) +{ + if (out != null) + try + { + out.writeBytes(line + "\n"); + } + catch (Exception ex) + {} + + int index = line.indexOf(tag) + tag.length(); + return line.substring(index, line.indexOf("