new place for sequencefeaturefetcher
authoramwaterhouse <Andrew Waterhouse>
Thu, 5 May 2005 10:35:44 +0000 (10:35 +0000)
committeramwaterhouse <Andrew Waterhouse>
Thu, 5 May 2005 10:35:44 +0000 (10:35 +0000)
src/jalview/io/SequenceFeatureFetcher.java [new file with mode: 0755]

diff --git a/src/jalview/io/SequenceFeatureFetcher.java b/src/jalview/io/SequenceFeatureFetcher.java
new file mode 100755 (executable)
index 0000000..7770ced
--- /dev/null
@@ -0,0 +1,371 @@
+package jalview.io;\r
+\r
+import java.io.*;\r
+import java.util.*;\r
+import javax.swing.*;\r
+import jalview.io.*;\r
+import jalview.gui.*;\r
+import jalview.datamodel.*;\r
+\r
+public class SequenceFeatureFetcher implements Runnable\r
+{\r
+  AlignmentI align;\r
+  AlignmentPanel ap;\r
+  ArrayList unknownSequences;\r
+  JInternalFrame outputFrame = new JInternalFrame();\r
+  CutAndPasteTransfer output = new CutAndPasteTransfer(false);\r
+  StringBuffer sbuffer = new StringBuffer();\r
+\r
+  public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap)\r
+  {\r
+    unknownSequences = new ArrayList();\r
+    this.align = align;\r
+    this.ap = ap;\r
+    Thread thread = new Thread(this);\r
+    thread.start();\r
+  }\r
+\r
+  public void run()\r
+{\r
+\r
+  String cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE");\r
+\r
+  RandomAccessFile out = null;\r
+\r
+  try{\r
+    if (cache == null)\r
+    {\r
+      jalview.bin.Cache.setProperty("UNIPROT_CACHE", System.getProperty("user.home")+"/uniprot.xml");\r
+      cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE");\r
+    }\r
+\r
+\r
+\r
+    File test = new File(cache);\r
+    if( !test.exists() )\r
+    {\r
+      out = new RandomAccessFile(cache, "rw");\r
+      out.writeBytes("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");\r
+      out.writeBytes("<UNIPROT_CACHE>\n");\r
+    }\r
+    else\r
+    {\r
+      out = new RandomAccessFile(cache, "rw");\r
+      // open exisiting cache and remove </UNIPROT_CACHE> from the end\r
+      long lastLine = 0;\r
+      String data;\r
+      while ( (data = out.readLine()) != null)\r
+      {\r
+        if (data.indexOf("</entry>") > -1)\r
+          lastLine = out.getFilePointer();\r
+\r
+      }\r
+      out.seek(lastLine);\r
+    }\r
+\r
+    int seqIndex = 0;\r
+    Vector sequences = align.getSequences();\r
+\r
+    while (seqIndex < sequences.size())\r
+    {\r
+      ArrayList ids = new ArrayList();\r
+      for (int i = 0; seqIndex < sequences.size() && i < 50; seqIndex++, i++)\r
+      {\r
+        SequenceI sequence = (SequenceI) sequences.get(seqIndex);\r
+        ids.add(sequence.getName());\r
+      }\r
+\r
+      tryLocalCacheFirst(ids, align);\r
+\r
+      if (ids.size() > 0)\r
+      {\r
+        StringBuffer remainingIds = new StringBuffer("uniprot:");\r
+        for (int i = 0; i < ids.size(); i++)\r
+          remainingIds.append(ids.get(i) + ";");\r
+\r
+         EBIFetchClient ebi = new EBIFetchClient();\r
+         String[] result = ebi.fetchData(remainingIds.toString(), "xml", null);\r
+\r
+        if(result!=null)\r
+          ReadUniprotFile(result, out, align);\r
+      }\r
+\r
+    }\r
+\r
+    if (out != null)\r
+    {\r
+      out.writeBytes("</UNIPROT_CACHE>\n");\r
+      out.close();\r
+    }\r
+  }catch(Exception ex){ex.printStackTrace();}\r
+\r
+  ap.repaint();\r
+  findMissingIds(align);\r
+  if(sbuffer.length()>0)\r
+  {\r
+    output.formatForOutput();\r
+    outputFrame.setContentPane(output);\r
+    output.setText("Your sequences have been matched to Uniprot. Some of the ids have been\n"\r
+                   +"altered, most likely the start/end residue will have been updated.\n"\r
+                   +"Save your alignment to maintain the updated id.\n\n"+sbuffer.toString());\r
+  Desktop.addInternalFrame(outputFrame, "Sequence names updated ", 600,300);\r
+\r
+  }\r
+\r
+  if(unknownSequences.size()>0)\r
+  {\r
+    //ignore for now!!!!!!!!!!\r
+  //  WSWUBlastClient blastClient = new WSWUBlastClient(align, unknownSequences);\r
+  }\r
+\r
+}\r
+\r
+void ReadUniprotFile(String [] result, RandomAccessFile out, AlignmentI align)\r
+{\r
+  SequenceI sequence = null;\r
+  Vector features = null;\r
+  String type, description, status, start, end, pdb = null;\r
+\r
+\r
+  for (int r = 0; r < result.length; r++)\r
+  {\r
+    if(sequence==null && result[r].indexOf("<name>")>-1)\r
+    {\r
+      long filePointer = 0;\r
+\r
+      if(out!=null)\r
+      try{\r
+        filePointer=out.getFilePointer();\r
+        out.writeBytes("<entry>\n");\r
+      }catch(Exception ex){}\r
+\r
+      String seqName = parseElement( result[r], "<name>" , out);\r
+      sequence = align.findName( seqName ) ;\r
+      if(sequence==null)\r
+      {\r
+        sequence = align.findName( seqName.substring(0, seqName.indexOf('_')));\r
+        if(sequence!=null)\r
+        {\r
+          sbuffer.append("changing "+sequence.getName()+" to "+seqName+"\n");\r
+          sequence.setName(seqName);\r
+        }\r
+      }\r
+      if(sequence==null)\r
+      {\r
+        sbuffer.append("UNIPROT updated suggestion is "+result[r]+"\n");\r
+        sequence = align.findName( result[r] ) ;\r
+\r
+        // this entry has been suggested by ebi.\r
+        // doesn't match id in alignment file\r
+        try   { out.setLength(filePointer);  }  catch (Exception ex) {}\r
+        // now skip to next entry\r
+        while( result[r].indexOf("</entry>")==-1)\r
+          r++;\r
+      }\r
+\r
+      features = new Vector();\r
+      type=""; start="0"; end="0"; description=""; status=""; pdb="";\r
+\r
+    }\r
+\r
+    if(sequence==null)\r
+      continue;\r
+\r
+     if( result[r].indexOf("<property type=\"pdb accession\"")>-1)\r
+     {\r
+       pdb = parseValue( result[r], "value=" , out);\r
+       sequence.setPDBId(pdb);\r
+     }\r
+\r
+     if(result[r].indexOf("feature type")>-1)\r
+     {\r
+       type = parseValue( result[r], "type=" , out);\r
+       description = parseValue( result[r], "description=" , null );\r
+       status = parseValue ( result[r], "status=", null);\r
+\r
+       while( result[r].indexOf("position")==-1)\r
+       {\r
+           r++;  //<location>\r
+       }\r
+      // r++;\r
+       if(result[r].indexOf("begin")>-1)\r
+       {\r
+         start = parseValue( result[r], "position=" , out);\r
+         end = parseValue( result[++r], "position=" , out);\r
+       }\r
+       else\r
+       {\r
+         start = parseValue( result[r], "position=" , out);\r
+         end = parseValue(   result[r], "position=" , null);\r
+       }\r
+       int sstart = Integer.parseInt(start);\r
+       int eend = Integer.parseInt(end);\r
+       if(out!=null)\r
+         try{ out.writeBytes("</feature>\n"); }catch(Exception ex){}\r
+\r
+         SequenceFeature sf = new SequenceFeature(type,\r
+             sstart,\r
+             eend,\r
+             description,\r
+             status);\r
+         features.add(sf);\r
+     }\r
+\r
+     if(result[r].indexOf("<sequence")>-1)\r
+     {\r
+       StringBuffer seqString = new StringBuffer();\r
+\r
+       if(out!=null)\r
+         try  {  out.writeBytes(result[r]+"\n"); }   catch (Exception ex){}\r
+\r
+       while(result[++r].indexOf("</sequence>")==-1)\r
+       {\r
+         seqString.append(result[r]);\r
+         if(out!=null)\r
+         try  {  out.writeBytes(result[r]+"\n"); }   catch (Exception ex){}\r
+       }\r
+\r
+       if(out!=null)\r
+         try  {  out.writeBytes(result[r]+"\n"); }   catch (Exception ex){}\r
+\r
+       StringBuffer nonGapped = new StringBuffer();\r
+       for (int i = 0; i < sequence.getSequence().length(); i++)\r
+       {\r
+         if (!jalview.util.Comparison.isGap(sequence.getCharAt(i)))\r
+           nonGapped.append(sequence.getCharAt(i));\r
+       }\r
+\r
+       int absStart = seqString.toString().indexOf(nonGapped.toString());\r
+       if(absStart==-1)\r
+       {\r
+         unknownSequences.add(sequence.getName());\r
+         features = null;\r
+         sbuffer.append(sequence.getName()+ " SEQUENCE NOT %100 MATCH \n");\r
+         continue;\r
+       }\r
+\r
+       int absEnd =   absStart + nonGapped.toString().length();\r
+       absStart+=1;\r
+\r
+       if(absStart!=sequence.getStart() || absEnd!=sequence.getEnd())\r
+         sbuffer.append("Updated: "+sequence.getName()+" "+\r
+                           sequence.getStart()+"/"+sequence.getEnd()+"  to  "+ absStart+"/"+absEnd+"\n");\r
+\r
+\r
+       sequence.setStart(absStart);\r
+       sequence.setEnd(absEnd);\r
+\r
+     }\r
+\r
+     if(result[r].indexOf("</entry>")>-1)\r
+     {\r
+       if(features!=null)\r
+         sequence.setSequenceFeatures( features );\r
+       features = null;\r
+       sequence = null;\r
+       if(out!=null)\r
+         try{  out.writeBytes("</entry>\n"); }catch(Exception ex){}\r
+\r
+     }\r
+  }\r
+}\r
+\r
+void findMissingIds(AlignmentI align)\r
+{\r
+  String data;\r
+  ArrayList cachedIds = new ArrayList();\r
+\r
+  try\r
+  {\r
+    BufferedReader in = new BufferedReader(\r
+        new FileReader(jalview.bin.Cache.getProperty("UNIPROT_CACHE")));\r
+\r
+    while ( (data = in.readLine()) != null)\r
+    {\r
+      if (data.indexOf("name") > -1)\r
+      {\r
+        String name = parseElement(data, "<name>", null);\r
+        cachedIds.add(name);\r
+      }\r
+    }\r
+  }\r
+  catch (Exception ex)\r
+  {   ex.printStackTrace();  }\r
+\r
+  for(int i=0; i<align.getHeight(); i++)\r
+    if( !cachedIds.contains( align.getSequenceAt(i).getName() ) )\r
+      unknownSequences.add( align.getSequenceAt(i).getName() );\r
+\r
+\r
+}\r
+\r
+void tryLocalCacheFirst(ArrayList ids, AlignmentI align)\r
+{\r
+  ArrayList cacheData = new ArrayList();\r
+  try{\r
+    BufferedReader in = new BufferedReader(\r
+          new FileReader(jalview.bin.Cache.getProperty("UNIPROT_CACHE")));\r
+\r
+    // read through cache file, if the cache has sequences we're looking for\r
+    // add the lines to a new String array, Readthis new array and\r
+    // make sure we remove the ids from the list to retrieve from EBI\r
+    String data;\r
+    while( ( data=in.readLine())!=null)\r
+    {\r
+      if(data.indexOf("name")>-1)\r
+      {\r
+        String name = parseElement( data, "<name>" , null) ;\r
+        if(ids.contains( name ) )\r
+        {\r
+          cacheData.add("<entry>");\r
+          cacheData.add(data);\r
+          while( data.indexOf("</entry>")==-1)\r
+          {\r
+            data = in.readLine();\r
+            cacheData.add(data);\r
+          }\r
+          cacheData.add(data);\r
+\r
+          ids.remove( name );\r
+        }\r
+      }\r
+    }\r
+  }\r
+  catch(Exception ex){ex.printStackTrace();}\r
+\r
+  String [] localData = new String[cacheData.size()];\r
+  cacheData.toArray( localData );\r
+  if(localData!=null && localData.length>0)\r
+    ReadUniprotFile(localData, null, align);\r
+}\r
+\r
+\r
+String parseValue(String line, String tag, RandomAccessFile out)\r
+{\r
+  if(out!=null)\r
+    try{  out.writeBytes(line+"\n"); }catch(Exception ex){}\r
+\r
+\r
+  int index = line.indexOf(tag)+tag.length()+1;\r
+  if(index==tag.length())\r
+    return "";\r
+\r
+  return line.substring( index, line.indexOf("\"", index+1) );\r
+}\r
+\r
+\r
+String parseElement(String line, String tag, RandomAccessFile out)\r
+{\r
+  if (out != null)\r
+    try\r
+    {\r
+      out.writeBytes(line + "\n");\r
+    }\r
+    catch (Exception ex)\r
+    {}\r
+\r
+  int index = line.indexOf(tag) + tag.length();\r
+  return line.substring(index, line.indexOf("</"));\r
+\r
+  }\r
+}\r