\r
import java.util.*;\r
\r
+import org.exolab.castor.mapping.Mapping;\r
+\r
+import org.exolab.castor.xml.*;\r
+import jalview.analysis.AlignSeq;\r
+\r
+\r
\r
/**\r
* DOCUMENT ME!\r
*/\r
public class SequenceFeatureFetcher implements Runnable\r
{\r
- AlignmentI align;\r
- AlignmentPanel ap;\r
- ArrayList unknownSequences;\r
- CutAndPasteTransfer output = new CutAndPasteTransfer();\r
- StringBuffer sbuffer = new StringBuffer();\r
-\r
- /**\r
- * Creates a new SequenceFeatureFetcher object.\r
- *\r
- * @param align DOCUMENT ME!\r
- * @param ap DOCUMENT ME!\r
- */\r
- public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap)\r
+\r
+ AlignmentI align;\r
+ AlignmentPanel ap;\r
+ ArrayList unknownSequences;\r
+ CutAndPasteTransfer output = new CutAndPasteTransfer();\r
+ StringBuffer sbuffer = new StringBuffer();\r
+\r
+ Vector localCache = new Vector();\r
+\r
+ Vector getUniprotEntries(File file)\r
+ {\r
+\r
+ UniprotFile uni = new UniprotFile();\r
+ try\r
{\r
- unknownSequences = new ArrayList();\r
- this.align = align;\r
- this.ap = ap;\r
+ // 1. Load the mapping information from the file\r
+ Mapping map = new Mapping(uni.getClass().getClassLoader());\r
+ java.net.URL url = uni.getClass().getResource("/uniprot_mapping.xml");\r
+ map.loadMapping(url);\r
+\r
+ // 2. Unmarshal the data\r
+ Unmarshaller unmar = new Unmarshaller();\r
+ unmar.setIgnoreExtraElements(true);\r
+ unmar.setMapping(map);\r
+ uni = (UniprotFile) unmar.unmarshal(new FileReader(file));\r
+ localCache.addAll( uni.getUniprotEntries() );\r
+\r
+ // 3. marshal the data with the total price back and print the XML in the console\r
+ // Marshaller marshaller = new Marshaller(\r
+ // new FileWriter(jalview.bin.Cache.getProperty("UNIPROT_CACHE"))\r
+ // );\r
+ // marshaller.setMapping(map);\r
+ // marshaller.marshal(uni);\r
\r
- Thread thread = new Thread(this);\r
- thread.start();\r
}\r
-\r
- /**\r
- * DOCUMENT ME!\r
- */\r
- public void run()\r
+ catch (Exception e)\r
{\r
- RandomAccessFile out = null;\r
+ System.out.println("Error getUniprotEntries() "+e);\r
+ // e.printStackTrace();\r
+ // if(!updateLocalCache)\r
+ // file.delete();\r
\r
- try\r
- {\r
- String cache = System.getProperty("user.home") +\r
- "/.jalview.uniprot.xml";\r
-\r
- File test = new File(cache);\r
-\r
- if (!test.exists())\r
- {\r
- out = new RandomAccessFile(cache, "rw");\r
- out.writeBytes("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");\r
- out.writeBytes("<UNIPROT_CACHE>\n");\r
- }\r
- else\r
- {\r
- out = new RandomAccessFile(cache, "rw");\r
-\r
- // open exisiting cache and remove </UNIPROT_CACHE> from the end\r
- long lastLine = 0;\r
- String data;\r
-\r
- while ((data = out.readLine()) != null)\r
- {\r
- if (data.indexOf("</entry>") > -1)\r
- {\r
- lastLine = out.getFilePointer();\r
- }\r
- }\r
-\r
- out.seek(lastLine);\r
- }\r
-\r
- int seqIndex = 0;\r
- Vector sequences = align.getSequences();\r
-\r
- while (seqIndex < sequences.size())\r
- {\r
- ArrayList ids = new ArrayList();\r
-\r
- for (int i = 0; (seqIndex < sequences.size()) && (i < 50);\r
- seqIndex++, i++)\r
- {\r
- SequenceI sequence = (SequenceI) sequences.get(seqIndex);\r
- ids.add(sequence.getName());\r
- }\r
-\r
- tryLocalCacheFirst(ids, align);\r
-\r
- if (ids.size() > 0)\r
- {\r
- StringBuffer remainingIds = new StringBuffer("uniprot:");\r
-\r
- for (int i = 0; i < ids.size(); i++)\r
- remainingIds.append(ids.get(i) + ";");\r
-\r
- EBIFetchClient ebi = new EBIFetchClient();\r
- String[] result = ebi.fetchData(remainingIds.toString(),\r
- "xml", null);\r
-\r
- if (result != null)\r
- {\r
- ReadUniprotFile(result, out, align);\r
- }\r
- }\r
- }\r
-\r
- if (out != null)\r
- {\r
- out.writeBytes("</UNIPROT_CACHE>\n");\r
- out.close();\r
- }\r
- }\r
- catch (Exception ex)\r
- {\r
- ex.printStackTrace();\r
- }\r
+ }\r
+ return uni.getUniprotEntries();\r
+ }\r
+\r
+ /**\r
+ * Creates a new SequenceFeatureFetcher object.\r
+ *\r
+ * @param align DOCUMENT ME!\r
+ * @param ap DOCUMENT ME!\r
+ */\r
+ public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap)\r
+ {\r
+ unknownSequences = new ArrayList();\r
+ this.align = align;\r
+ this.ap = ap;\r
+\r
+ Thread thread = new Thread(this);\r
+ thread.start();\r
+ }\r
+\r
+ /**\r
+ * DOCUMENT ME!\r
+ */\r
+ public void run()\r
+ {\r
+ try\r
+ {\r
+ int seqIndex = 0;\r
+ Vector sequences = align.getSequences();\r
\r
- findMissingIds(align);\r
+ while (seqIndex < sequences.size())\r
+ {\r
+ Vector ids = new Vector();\r
\r
- if (sbuffer.length() > 0)\r
+ for (int i = 0; (seqIndex < sequences.size()) && (i < 50);\r
+ seqIndex++, i++)\r
{\r
- output.setText(\r
- "Your sequences have been matched to Uniprot. Some of the ids have been\n" +\r
- "altered, most likely the start/end residue will have been updated.\n" +\r
- "Save your alignment to maintain the updated id.\n\n" +\r
- sbuffer.toString());\r
- Desktop.addInternalFrame(output, "Sequence names updated ", 600, 300);\r
+ SequenceI sequence = (SequenceI) sequences.get(seqIndex);\r
+ ids.add(sequence.getName());\r
+ unknownSequences.add(sequence.getName());\r
}\r
\r
- if (unknownSequences.size() > 0)\r
+ ///////////////////////////////////\r
+ ///READ FROM EBI\r
+ if (ids.size() > 0)\r
{\r
- //ignore for now!!!!!!!!!!\r
- // WSWUBlastClient blastClient = new WSWUBlastClient(align, unknownSequences);\r
+ StringBuffer remainingIds = new StringBuffer("uniprot:");\r
+ for (int i = 0; i < ids.size(); i++)\r
+ {\r
+ remainingIds.append(ids.get(i) + ";");\r
+ }\r
+ EBIFetchClient ebi = new EBIFetchClient();\r
+ File file = ebi.fetchDataAsFile(remainingIds.toString(),\r
+ "xml", null);\r
+\r
+\r
+ if (file != null)\r
+ {\r
+ ReadUniprotFile(file, align, ids);\r
+ }\r
}\r
-\r
- jalview.gui.PaintRefresher.Refresh(null, align);\r
+ }\r
}\r
-\r
- /**\r
- * DOCUMENT ME!\r
- *\r
- * @param result DOCUMENT ME!\r
- * @param out DOCUMENT ME!\r
- * @param align DOCUMENT ME!\r
- */\r
- void ReadUniprotFile(String[] result, RandomAccessFile out, AlignmentI align)\r
+ catch (Exception ex)\r
{\r
- SequenceI sequence = null;\r
- Vector features = null;\r
- String type;\r
- String description;\r
- String status;\r
- String start;\r
- String end;\r
- String pdb = null;\r
-\r
- for (int r = 0; r < result.length; r++)\r
- {\r
- if ((sequence == null) && (result[r].indexOf("<name>") > -1))\r
- {\r
- long filePointer = 0;\r
-\r
- if (out != null)\r
- {\r
- try\r
- {\r
- filePointer = out.getFilePointer();\r
- out.writeBytes("<entry>\n");\r
- }\r
- catch (Exception ex)\r
- {\r
- }\r
- }\r
-\r
- String seqName = parseElement(result[r], "<name>", out);\r
- sequence = align.findName(seqName);\r
-\r
- if (sequence == null)\r
- {\r
- sequence = align.findName(seqName.substring(0,\r
- seqName.indexOf('_')));\r
-\r
- if (sequence != null)\r
- {\r
- sbuffer.append("changing " + sequence.getName() +\r
- " to " + seqName + "\n");\r
- sequence.setName(seqName);\r
- }\r
- }\r
-\r
- if (sequence == null)\r
- {\r
- sbuffer.append("UNIPROT updated suggestion is " +\r
- result[r] + "\n");\r
- sequence = align.findName(result[r]);\r
-\r
- // this entry has been suggested by ebi.\r
- // doesn't match id in alignment file\r
- try\r
- {\r
- out.setLength(filePointer);\r
- }\r
- catch (Exception ex)\r
- {\r
- }\r
-\r
- // now skip to next entry\r
- while (result[r].indexOf("</entry>") == -1)\r
- r++;\r
- }\r
-\r
- features = new Vector();\r
- type = "";\r
- start = "0";\r
- end = "0";\r
- description = "";\r
- status = "";\r
- pdb = "";\r
- }\r
-\r
- if (sequence == null)\r
- {\r
- continue;\r
- }\r
-\r
- if (result[r].indexOf("<property type=\"pdb accession\"") > -1)\r
- {\r
- pdb = parseValue(result[r], "value=", out);\r
- sequence.setPDBId(pdb);\r
- }\r
-\r
- if (result[r].indexOf("feature type") > -1)\r
- {\r
- type = parseValue(result[r], "type=", out);\r
- description = parseValue(result[r], "description=", null);\r
- status = parseValue(result[r], "status=", null);\r
-\r
- while (result[r].indexOf("position") == -1)\r
- {\r
- r++; //<location>\r
- }\r
-\r
- // r++;\r
- if (result[r].indexOf("begin") > -1)\r
- {\r
- start = parseValue(result[r], "position=", out);\r
- end = parseValue(result[++r], "position=", out);\r
- }\r
- else\r
- {\r
- start = parseValue(result[r], "position=", out);\r
- end = parseValue(result[r], "position=", null);\r
- }\r
-\r
- int sstart = Integer.parseInt(start);\r
- int eend = Integer.parseInt(end);\r
-\r
- if (out != null)\r
- {\r
- try\r
- {\r
- out.writeBytes("</feature>\n");\r
- }\r
- catch (Exception ex)\r
- {\r
- }\r
- }\r
-\r
- SequenceFeature sf = new SequenceFeature(type, sstart, eend,\r
- description, status);\r
- features.add(sf);\r
- }\r
-\r
- if (result[r].indexOf("<sequence length=") > -1)\r
- {\r
- StringBuffer seqString = new StringBuffer();\r
-\r
- if (out != null)\r
- {\r
- try\r
- {\r
- out.writeBytes(result[r] + "\n");\r
- }\r
- catch (Exception ex)\r
- {\r
- }\r
- }\r
-\r
- while (result[++r].indexOf("</sequence>") == -1)\r
- {\r
- seqString.append(result[r]);\r
-\r
- if (out != null)\r
- {\r
- try\r
- {\r
- out.writeBytes(result[r] + "\n");\r
- }\r
- catch (Exception ex)\r
- {\r
- }\r
- }\r
- }\r
-\r
- if (out != null)\r
- {\r
- try\r
- {\r
- out.writeBytes(result[r] + "\n");\r
- }\r
- catch (Exception ex)\r
- {\r
- }\r
- }\r
-\r
- StringBuffer nonGapped = new StringBuffer();\r
-\r
- for (int i = 0; i < sequence.getSequence().length(); i++)\r
- {\r
- if (!jalview.util.Comparison.isGap(sequence.getCharAt(i)))\r
- {\r
- nonGapped.append(sequence.getCharAt(i));\r
- }\r
- }\r
-\r
- int absStart = seqString.toString().indexOf(nonGapped.toString());\r
-\r
- if (absStart == -1)\r
- {\r
- unknownSequences.add(sequence.getName());\r
- features = null;\r
- sbuffer.append(sequence.getName() +\r
- " SEQUENCE NOT %100 MATCH \n");\r
-\r
- continue;\r
- }\r
-\r
- int absEnd = absStart + nonGapped.toString().length();\r
- absStart += 1;\r
-\r
- if ((absStart != sequence.getStart()) ||\r
- (absEnd != sequence.getEnd()))\r
- {\r
- sbuffer.append("Updated: " + sequence.getName() + " " +\r
- sequence.getStart() + "/" + sequence.getEnd() +\r
- " to " + absStart + "/" + absEnd + "\n");\r
- }\r
-\r
- sequence.setStart(absStart);\r
- sequence.setEnd(absEnd);\r
- }\r
-\r
- if (result[r].indexOf("</entry>") > -1)\r
- {\r
- if (features != null)\r
- {\r
- sequence.setSequenceFeatures(features);\r
- }\r
-\r
- features = null;\r
- sequence = null;\r
-\r
- if (out != null)\r
- {\r
- try\r
- {\r
- out.writeBytes("</entry>\n");\r
- }\r
- catch (Exception ex)\r
- {\r
- }\r
- }\r
- }\r
- }\r
+ ex.printStackTrace();\r
}\r
\r
- /**\r
- * DOCUMENT ME!\r
- *\r
- * @param align DOCUMENT ME!\r
- */\r
- void findMissingIds(AlignmentI align)\r
+ if (sbuffer.length() > 0)\r
{\r
- String data;\r
- ArrayList cachedIds = new ArrayList();\r
-\r
- try\r
- {\r
- if(jalview.bin.Cache.getProperty("UNIPROT_CACHE")==null)\r
- return;\r
-\r
- BufferedReader in = new BufferedReader(new FileReader(\r
- jalview.bin.Cache.getProperty("UNIPROT_CACHE")));\r
-\r
- while ((data = in.readLine()) != null)\r
- {\r
- if (data.indexOf("name") > -1)\r
- {\r
- String name = parseElement(data, "<name>", null);\r
- cachedIds.add(name);\r
- }\r
- }\r
- }\r
- catch (Exception ex)\r
- {\r
- ex.printStackTrace();\r
- }\r
-\r
- for (int i = 0; i < align.getHeight(); i++)\r
- if (!cachedIds.contains(align.getSequenceAt(i).getName()))\r
- {\r
- unknownSequences.add(align.getSequenceAt(i).getName());\r
- }\r
+ output.setText(\r
+ "Your sequences have been matched to Uniprot. Some of the ids have been\n" +\r
+ "altered, most likely the start/end residue will have been updated.\n" +\r
+ "Save your alignment to maintain the updated id.\n\n" +\r
+ sbuffer.toString());\r
+ Desktop.addInternalFrame(output, "Sequence names updated ", 600, 300);\r
}\r
\r
- /**\r
- * DOCUMENT ME!\r
- *\r
- * @param ids DOCUMENT ME!\r
- * @param align DOCUMENT ME!\r
- */\r
- void tryLocalCacheFirst(ArrayList ids, AlignmentI align)\r
+ if (unknownSequences.size() > 0)\r
{\r
- ArrayList cacheData = new ArrayList();\r
-\r
- try\r
- {\r
- if(jalview.bin.Cache.getProperty("UNIPROT_CACHE")==null)\r
- return;\r
-\r
- BufferedReader in = new BufferedReader(new FileReader(\r
- jalview.bin.Cache.getProperty("UNIPROT_CACHE")));\r
-\r
- // read through cache file, if the cache has sequences we're looking for\r
- // add the lines to a new String array, Readthis new array and\r
- // make sure we remove the ids from the list to retrieve from EBI\r
- String data;\r
-\r
- while ((data = in.readLine()) != null)\r
- {\r
- if (data.indexOf("name") > -1)\r
- {\r
- String name = parseElement(data, "<name>", null);\r
-\r
- if (ids.contains(name))\r
- {\r
- cacheData.add("<entry>");\r
- cacheData.add(data);\r
-\r
- while (data.indexOf("</entry>") == -1)\r
- {\r
- data = in.readLine();\r
- cacheData.add(data);\r
- }\r
-\r
- cacheData.add(data);\r
-\r
- ids.remove(name);\r
- }\r
- }\r
- }\r
- }\r
- catch (Exception ex)\r
- {\r
- ex.printStackTrace();\r
- }\r
-\r
- String[] localData = new String[cacheData.size()];\r
- cacheData.toArray(localData);\r
-\r
- if ((localData != null) && (localData.length > 0))\r
- {\r
- ReadUniprotFile(localData, null, align);\r
- }\r
+ new WSWUBlastClient(ap, align, unknownSequences);\r
}\r
+ else\r
+ ((Alignment)align).featuresAdded = true;\r
\r
- /**\r
- * DOCUMENT ME!\r
- *\r
- * @param line DOCUMENT ME!\r
- * @param tag DOCUMENT ME!\r
- * @param out DOCUMENT ME!\r
- *\r
- * @return DOCUMENT ME!\r
- */\r
- String parseValue(String line, String tag, RandomAccessFile out)\r
- {\r
- if (out != null)\r
- {\r
- try\r
- {\r
- out.writeBytes(line + "\n");\r
- }\r
- catch (Exception ex)\r
- {\r
- }\r
- }\r
\r
- int index = line.indexOf(tag) + tag.length() + 1;\r
+ ap.repaint();\r
+ }\r
\r
- if (index == tag.length())\r
- {\r
- return "";\r
- }\r
+ /**\r
+ * DOCUMENT ME!\r
+ *\r
+ * @param result DOCUMENT ME!\r
+ * @param out DOCUMENT ME!\r
+ * @param align DOCUMENT ME!\r
+ */\r
+ void ReadUniprotFile(File file, AlignmentI align, Vector ids)\r
+ {\r
+ if(!file.exists())\r
+ return;\r
\r
- return line.substring(index, line.indexOf("\"", index + 1));\r
- }\r
+ SequenceI sequence = null;\r
+ // String pdb = null;\r
\r
- /**\r
- * DOCUMENT ME!\r
- *\r
- * @param line DOCUMENT ME!\r
- * @param tag DOCUMENT ME!\r
- * @param out DOCUMENT ME!\r
- *\r
- * @return DOCUMENT ME!\r
- */\r
- String parseElement(String line, String tag, RandomAccessFile out)\r
- {\r
- if (out != null)\r
- {\r
- try\r
- {\r
- out.writeBytes(line + "\n");\r
- }\r
- catch (Exception ex)\r
- {\r
- }\r
- }\r
-\r
- int index = line.indexOf(tag) + tag.length();\r
+ Vector entries = getUniprotEntries(file);\r
\r
- return line.substring(index, line.indexOf("</"));\r
+ int i, iSize = entries==null?0:entries.size();\r
+ UniprotEntry entry;\r
+ for (i = 0; i < iSize; i++)\r
+ {\r
+ entry = (UniprotEntry) entries.elementAt(i);\r
+ String idmatch = entry.getAccession();\r
+ sequence = align.findName(idmatch);\r
+\r
+ if (sequence == null)\r
+ {\r
+ //Sequence maybe Name, not Accession\r
+ idmatch = entry.getName();\r
+ sequence = align.findName(idmatch);\r
+ }\r
+\r
+ if (sequence == null)\r
+ {\r
+ continue;\r
+ }\r
+\r
+ ids.remove(sequence.getName());\r
+ unknownSequences.remove(sequence.getName());\r
+\r
+ String nonGapped = AlignSeq.extractGaps("-. ", sequence.getSequence());\r
+\r
+ int absStart = entry.getUniprotSequence().getContent().indexOf(\r
+ nonGapped.toString());\r
+\r
+ if (absStart == -1)\r
+ {\r
+ unknownSequences.add(sequence.getName());\r
+ sbuffer.append(sequence.getName() +\r
+ " SEQUENCE NOT %100 MATCH \n");\r
+\r
+ continue;\r
+ }\r
+\r
+ int absEnd = absStart + nonGapped.toString().length();\r
+ absStart += 1;\r
+\r
+ if ( (absStart != sequence.getStart()) ||\r
+ (absEnd != sequence.getEnd()))\r
+ {\r
+ sbuffer.append("Updated: " + sequence.getName() + " " +\r
+ sequence.getStart() + "/" + sequence.getEnd() +\r
+ " to " + absStart + "/" + absEnd + "\n");\r
+ }\r
+\r
+ sequence.setSequenceFeatures(entry.getFeatures());\r
+ sequence.setStart(absStart);\r
+ sequence.setEnd(absEnd);\r
}\r
+ }\r
}\r
+\r
+\r