* along with this program; if not, write to the Free Software\r
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA\r
*/\r
-\r
package jalview.io;\r
\r
+import jalview.datamodel.*;\r
+\r
+import jalview.gui.*;\r
+\r
import java.io.*;\r
+\r
import java.util.*;\r
-import javax.swing.*;\r
-import jalview.io.*;\r
-import jalview.gui.*;\r
-import jalview.datamodel.*;\r
\r
+import org.exolab.castor.mapping.Mapping;\r
+\r
+import org.exolab.castor.xml.*;\r
+import jalview.analysis.AlignSeq;\r
+\r
+\r
+\r
+/**\r
+ * DOCUMENT ME!\r
+ *\r
+ * @author $author$\r
+ * @version $Revision$\r
+ */\r
public class SequenceFeatureFetcher implements Runnable\r
{\r
+\r
AlignmentI align;\r
+ AlignmentI dataset;\r
AlignmentPanel ap;\r
ArrayList unknownSequences;\r
CutAndPasteTransfer output = new CutAndPasteTransfer();\r
StringBuffer sbuffer = new StringBuffer();\r
+ boolean uniprotFlag = false;\r
+\r
+ public SequenceFeatureFetcher()\r
+ {}\r
\r
+ public Vector getUniprotEntries(File file)\r
+ {\r
+\r
+ UniprotFile uni = new UniprotFile();\r
+ try\r
+ {\r
+ // 1. Load the mapping information from the file\r
+ Mapping map = new Mapping(uni.getClass().getClassLoader());\r
+ java.net.URL url = getClass().getResource("/uniprot_mapping.xml");\r
+ map.loadMapping(url);\r
+\r
+ // 2. Unmarshal the data\r
+ Unmarshaller unmar = new Unmarshaller();\r
+ unmar.setIgnoreExtraElements(true);\r
+ unmar.setMapping(map);\r
+ uni = (UniprotFile) unmar.unmarshal(new FileReader(file));\r
+\r
+ }\r
+ catch (Exception e)\r
+ {\r
+ System.out.println("Error getUniprotEntries() "+e);\r
+ }\r
+ return uni.getUniprotEntries();\r
+ }\r
+\r
+ /**\r
+ * Creates a new SequenceFeatureFetcher object.\r
+ *\r
+ * @param align DOCUMENT ME!\r
+ * @param ap DOCUMENT ME!\r
+ */\r
public SequenceFeatureFetcher(AlignmentI align, AlignmentPanel ap)\r
{\r
unknownSequences = new ArrayList();\r
this.align = align;\r
+ this.dataset = align.getDataset();\r
this.ap = ap;\r
+\r
Thread thread = new Thread(this);\r
thread.start();\r
}\r
\r
+ /**\r
+ * DOCUMENT ME!\r
+ */\r
public void run()\r
-{\r
-\r
- String cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE");\r
-\r
- RandomAccessFile out = null;\r
-\r
- try{\r
- if (cache == null)\r
+ {\r
+ try\r
{\r
- jalview.bin.Cache.setProperty("UNIPROT_CACHE", System.getProperty("user.home")+"/uniprot.xml");\r
- cache = jalview.bin.Cache.getProperty("UNIPROT_CACHE");\r
- }\r
+ int seqIndex = 0;\r
+ Vector sequences = dataset.getSequences();\r
\r
+ while (seqIndex < sequences.size())\r
+ {\r
+ Vector ids = new Vector();\r
\r
+ for (int i = 0; (seqIndex < sequences.size()) && (i < 50);\r
+ seqIndex++, i++)\r
+ {\r
+ Sequence sequence = (Sequence) sequences.get(seqIndex);\r
+ if(sequence.getSequenceFeatures()==null)\r
+ {\r
+ if (!ids.contains(sequence.getName()))\r
+ {\r
+ ids.add(sequence.getName());\r
+ unknownSequences.add(sequence);\r
+ }\r
+ }\r
+ }\r
\r
- File test = new File(cache);\r
- if( !test.exists() )\r
+ ///////////////////////////////////\r
+ ///READ FROM EBI\r
+ if (ids.size() > 0)\r
+ {\r
+ StringBuffer remainingIds = new StringBuffer("uniprot:");\r
+ for (int i = 0; i < ids.size(); i++)\r
+ {\r
+ if(ids.get(i).toString().indexOf("|")>-1)\r
+ {\r
+ remainingIds.append(ids.get(i).toString().substring(\r
+ ids.get(i).toString().lastIndexOf("|") + 1));\r
+ uniprotFlag = true;\r
+ }\r
+ remainingIds.append(ids.get(i) + ";");\r
+ }\r
+ EBIFetchClient ebi = new EBIFetchClient();\r
+ File file = ebi.fetchDataAsFile(remainingIds.toString(),\r
+ "xml", "raw");\r
+\r
+\r
+\r
+ if (file != null)\r
+ {\r
+ ReadUniprotFile(file, ids);\r
+ }\r
+ }\r
+ }\r
+ }\r
+ catch (Exception ex)\r
{\r
- out = new RandomAccessFile(cache, "rw");\r
- out.writeBytes("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");\r
- out.writeBytes("<UNIPROT_CACHE>\n");\r
+ ex.printStackTrace();\r
}\r
- else\r
+\r
+ if (sbuffer.length() > 0)\r
{\r
- out = new RandomAccessFile(cache, "rw");\r
- // open exisiting cache and remove </UNIPROT_CACHE> from the end\r
- long lastLine = 0;\r
- String data;\r
- while ( (data = out.readLine()) != null)\r
- {\r
- if (data.indexOf("</entry>") > -1)\r
- lastLine = out.getFilePointer();\r
+ output.setText(\r
+ "Your sequences have been matched to Uniprot. Some of the ids have been\n" +\r
+ "altered, most likely the start/end residue will have been updated.\n" +\r
+ "Save your alignment to maintain the updated id.\n\n" +\r
+ sbuffer.toString());\r
+ Desktop.addInternalFrame(output, "Sequence names updated ", 600, 300);\r
+ // The above is the dataset, we must now find out the index\r
+ // of the viewed sequence\r
\r
- }\r
- out.seek(lastLine);\r
}\r
\r
- int seqIndex = 0;\r
- Vector sequences = align.getSequences();\r
+ promptBeforeBlast();\r
\r
- while (seqIndex < sequences.size())\r
- {\r
- ArrayList ids = new ArrayList();\r
- for (int i = 0; seqIndex < sequences.size() && i < 50; seqIndex++, i++)\r
- {\r
- SequenceI sequence = (SequenceI) sequences.get(seqIndex);\r
- ids.add(sequence.getName());\r
- }\r
+ }\r
\r
- tryLocalCacheFirst(ids, align);\r
\r
- if (ids.size() > 0)\r
+ void promptBeforeBlast()\r
+ {\r
+ // This must be outside the run() body as java 1.5\r
+ // will not return any value from the OptionPane to the expired thread.\r
+ if (unknownSequences.size() > 0)\r
{\r
- StringBuffer remainingIds = new StringBuffer("uniprot:");\r
- for (int i = 0; i < ids.size(); i++)\r
- remainingIds.append(ids.get(i) + ";");\r
-\r
- EBIFetchClient ebi = new EBIFetchClient();\r
- String[] result = ebi.fetchData(remainingIds.toString(), "xml", null);\r
-\r
- if(result!=null)\r
- ReadUniprotFile(result, out, align);\r
+ int reply = javax.swing.JOptionPane.showConfirmDialog(\r
+ Desktop.desktop, "Couldn't find a match for "+unknownSequences.size()+" sequences."\r
+ +"\nPerform blast for unknown sequences?",\r
+ "Blast for Unidentified Sequences",\r
+ javax.swing.JOptionPane.YES_NO_OPTION, javax.swing.JOptionPane.QUESTION_MESSAGE);\r
+\r
+ if(reply == javax.swing.JOptionPane.YES_OPTION)\r
+ new WSWUBlastClient(ap, align, unknownSequences);\r
}\r
\r
- }\r
-\r
- if (out != null)\r
- {\r
- out.writeBytes("</UNIPROT_CACHE>\n");\r
- out.close();\r
- }\r
- }catch(Exception ex){ex.printStackTrace();}\r
-\r
- ap.repaint();\r
- findMissingIds(align);\r
- if(sbuffer.length()>0)\r
- {\r
- output.setText("Your sequences have been matched to Uniprot. Some of the ids have been\n"\r
- +"altered, most likely the start/end residue will have been updated.\n"\r
- +"Save your alignment to maintain the updated id.\n\n"+sbuffer.toString());\r
- Desktop.addInternalFrame(output, "Sequence names updated ", 600,300);\r
\r
+ ap.repaint();\r
}\r
\r
- if(unknownSequences.size()>0)\r
+ /**\r
+ * DOCUMENT ME!\r
+ *\r
+ * @param result DOCUMENT ME!\r
+ * @param out DOCUMENT ME!\r
+ * @param align DOCUMENT ME!\r
+ */\r
+ void ReadUniprotFile(File file, Vector ids)\r
{\r
- //ignore for now!!!!!!!!!!\r
- // WSWUBlastClient blastClient = new WSWUBlastClient(align, unknownSequences);\r
- }\r
-\r
-}\r
+ if(!file.exists())\r
+ return;\r
\r
-void ReadUniprotFile(String [] result, RandomAccessFile out, AlignmentI align)\r
-{\r
- SequenceI sequence = null;\r
- Vector features = null;\r
- String type, description, status, start, end, pdb = null;\r
+ SequenceI sequence = null;\r
\r
+ Vector entries = getUniprotEntries(file);\r
\r
- for (int r = 0; r < result.length; r++)\r
- {\r
- if(sequence==null && result[r].indexOf("<name>")>-1)\r
+ int i, iSize = entries==null?0:entries.size();\r
+ UniprotEntry entry;\r
+ for (i = 0; i < iSize; i++)\r
{\r
- long filePointer = 0;\r
+ entry = (UniprotEntry) entries.elementAt(i);\r
+ String idmatch = entry.getAccession().elementAt(0).toString();\r
+ sequence = dataset.findName(idmatch);\r
\r
- if(out!=null)\r
- try{\r
- filePointer=out.getFilePointer();\r
- out.writeBytes("<entry>\n");\r
- }catch(Exception ex){}\r
-\r
- String seqName = parseElement( result[r], "<name>" , out);\r
- sequence = align.findName( seqName ) ;\r
- if(sequence==null)\r
- {\r
- sequence = align.findName( seqName.substring(0, seqName.indexOf('_')));\r
- if(sequence!=null)\r
- {\r
- sbuffer.append("changing "+sequence.getName()+" to "+seqName+"\n");\r
- sequence.setName(seqName);\r
- }\r
- }\r
- if(sequence==null)\r
+ if (sequence == null)\r
{\r
- sbuffer.append("UNIPROT updated suggestion is "+result[r]+"\n");\r
- sequence = align.findName( result[r] ) ;\r
-\r
- // this entry has been suggested by ebi.\r
- // doesn't match id in alignment file\r
- try { out.setLength(filePointer); } catch (Exception ex) {}\r
- // now skip to next entry\r
- while( result[r].indexOf("</entry>")==-1)\r
- r++;\r
+ //Sequence maybe Name, not Accession\r
+ idmatch = entry.getName().elementAt(0).toString();\r
+ sequence = dataset.findName(idmatch);\r
}\r
\r
- features = new Vector();\r
- type=""; start="0"; end="0"; description=""; status=""; pdb="";\r
+ if(sequence!=null)\r
+ ids.remove(sequence.getName());\r
\r
- }\r
-\r
- if(sequence==null)\r
- continue;\r
-\r
- if( result[r].indexOf("<property type=\"pdb accession\"")>-1)\r
- {\r
- pdb = parseValue( result[r], "value=" , out);\r
- sequence.setPDBId(pdb);\r
- }\r
-\r
- if(result[r].indexOf("feature type")>-1)\r
- {\r
- type = parseValue( result[r], "type=" , out);\r
- description = parseValue( result[r], "description=" , null );\r
- status = parseValue ( result[r], "status=", null);\r
-\r
- while( result[r].indexOf("position")==-1)\r
- {\r
- r++; //<location>\r
- }\r
- // r++;\r
- if(result[r].indexOf("begin")>-1)\r
- {\r
- start = parseValue( result[r], "position=" , out);\r
- end = parseValue( result[++r], "position=" , out);\r
- }\r
- else\r
- {\r
- start = parseValue( result[r], "position=" , out);\r
- end = parseValue( result[r], "position=" , null);\r
- }\r
- int sstart = Integer.parseInt(start);\r
- int eend = Integer.parseInt(end);\r
- if(out!=null)\r
- try{ out.writeBytes("</feature>\n"); }catch(Exception ex){}\r
-\r
- SequenceFeature sf = new SequenceFeature(type,\r
- sstart,\r
- eend,\r
- description,\r
- status);\r
- features.add(sf);\r
- }\r
-\r
- if(result[r].indexOf("<sequence")>-1)\r
- {\r
- StringBuffer seqString = new StringBuffer();\r
-\r
- if(out!=null)\r
- try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){}\r
-\r
- while(result[++r].indexOf("</sequence>")==-1)\r
- {\r
- seqString.append(result[r]);\r
- if(out!=null)\r
- try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){}\r
- }\r
-\r
- if(out!=null)\r
- try { out.writeBytes(result[r]+"\n"); } catch (Exception ex){}\r
-\r
- StringBuffer nonGapped = new StringBuffer();\r
- for (int i = 0; i < sequence.getSequence().length(); i++)\r
- {\r
- if (!jalview.util.Comparison.isGap(sequence.getCharAt(i)))\r
- nonGapped.append(sequence.getCharAt(i));\r
- }\r
-\r
- int absStart = seqString.toString().indexOf(nonGapped.toString());\r
- if(absStart==-1)\r
- {\r
- unknownSequences.add(sequence.getName());\r
- features = null;\r
- sbuffer.append(sequence.getName()+ " SEQUENCE NOT %100 MATCH \n");\r
- continue;\r
- }\r
-\r
- int absEnd = absStart + nonGapped.toString().length();\r
- absStart+=1;\r
-\r
- if(absStart!=sequence.getStart() || absEnd!=sequence.getEnd())\r
- sbuffer.append("Updated: "+sequence.getName()+" "+\r
- sequence.getStart()+"/"+sequence.getEnd()+" to "+ absStart+"/"+absEnd+"\n");\r
-\r
-\r
- sequence.setStart(absStart);\r
- sequence.setEnd(absEnd);\r
-\r
- }\r
-\r
- if(result[r].indexOf("</entry>")>-1)\r
- {\r
- if(features!=null)\r
- sequence.setSequenceFeatures( features );\r
- features = null;\r
- sequence = null;\r
- if(out!=null)\r
- try{ out.writeBytes("</entry>\n"); }catch(Exception ex){}\r
-\r
- }\r
- }\r
-}\r
-\r
-void findMissingIds(AlignmentI align)\r
-{\r
- String data;\r
- ArrayList cachedIds = new ArrayList();\r
-\r
- try\r
- {\r
- BufferedReader in = new BufferedReader(\r
- new FileReader(jalview.bin.Cache.getProperty("UNIPROT_CACHE")));\r
+ else if (sequence == null && uniprotFlag)\r
+ {\r
+ sequence = dataset.findName("UniProt/Swiss-Prot|"+entry.getAccession().elementAt(0)+"|"+idmatch);\r
+ ids.remove(idmatch);\r
+ }\r
\r
- while ( (data = in.readLine()) != null)\r
- {\r
- if (data.indexOf("name") > -1)\r
+ if(sequence ==null)\r
{\r
- String name = parseElement(data, "<name>", null);\r
- cachedIds.add(name);\r
+ System.out.println(idmatch+" not found");\r
+ continue;\r
}\r
- }\r
- }\r
- catch (Exception ex)\r
- { ex.printStackTrace(); }\r
\r
- for(int i=0; i<align.getHeight(); i++)\r
- if( !cachedIds.contains( align.getSequenceAt(i).getName() ) )\r
- unknownSequences.add( align.getSequenceAt(i).getName() );\r
+ unknownSequences.remove(sequence);\r
\r
+ String nonGapped = AlignSeq.extractGaps("-. ", sequence.getSequence());\r
\r
-}\r
+ int absStart = entry.getUniprotSequence().getContent().indexOf(\r
+ nonGapped.toString());\r
\r
-void tryLocalCacheFirst(ArrayList ids, AlignmentI align)\r
-{\r
- ArrayList cacheData = new ArrayList();\r
- try{\r
- BufferedReader in = new BufferedReader(\r
- new FileReader(jalview.bin.Cache.getProperty("UNIPROT_CACHE")));\r
-\r
- // read through cache file, if the cache has sequences we're looking for\r
- // add the lines to a new String array, Readthis new array and\r
- // make sure we remove the ids from the list to retrieve from EBI\r
- String data;\r
- while( ( data=in.readLine())!=null)\r
- {\r
- if(data.indexOf("name")>-1)\r
+ if (absStart == -1)\r
{\r
- String name = parseElement( data, "<name>" , null) ;\r
- if(ids.contains( name ) )\r
+ // Is UniprotSequence contained in dataset sequence?\r
+ absStart = nonGapped.toString().indexOf(entry.getUniprotSequence().getContent());\r
+ if(absStart == -1)\r
{\r
- cacheData.add("<entry>");\r
- cacheData.add(data);\r
- while( data.indexOf("</entry>")==-1)\r
+ unknownSequences.add(sequence.getName());\r
+ sbuffer.append(sequence.getName() +\r
+ " SEQUENCE NOT %100 MATCH \n");\r
+\r
+ continue;\r
+ }\r
+ else\r
+ {\r
+ if(entry.getFeature()!=null)\r
{\r
- data = in.readLine();\r
- cacheData.add(data);\r
+ Enumeration e = entry.getFeature().elements();\r
+ while (e.hasMoreElements())\r
+ {\r
+ SequenceFeature sf = (SequenceFeature) e.nextElement();\r
+ sf.setBegin(sf.getBegin() + absStart + 1);\r
+ sf.setEnd(sf.getEnd() + absStart + 1);\r
+ }\r
}\r
- cacheData.add(data);\r
\r
- ids.remove( name );\r
+ sbuffer.append(sequence.getName() +\r
+ " HAS "+absStart+" PREFIXED RESIDUES COMPARED TO UNIPROT - ANY SEQUENCE FEATURES"\r
+ +" HAVE BEEN ADJUSTED ACCORDINGLY \n");\r
+ absStart = 0;\r
}\r
+\r
}\r
- }\r
- }\r
- catch(Exception ex){ex.printStackTrace();}\r
\r
- String [] localData = new String[cacheData.size()];\r
- cacheData.toArray( localData );\r
- if(localData!=null && localData.length>0)\r
- ReadUniprotFile(localData, null, align);\r
-}\r
+ int absEnd = absStart + nonGapped.toString().length();\r
+ absStart += 1;\r
\r
+ Enumeration e = entry.getDbReference().elements();\r
+ Vector onlyPdbEntries = new Vector();\r
+ while(e.hasMoreElements())\r
+ {\r
+ PDBEntry pdb = (PDBEntry)e.nextElement();\r
+ if(!pdb.getType().equals("PDB"))\r
+ continue;\r
\r
-String parseValue(String line, String tag, RandomAccessFile out)\r
-{\r
- if(out!=null)\r
- try{ out.writeBytes(line+"\n"); }catch(Exception ex){}\r
+ onlyPdbEntries.addElement(pdb);\r
+ }\r
\r
+ sequence.setPDBId(onlyPdbEntries);\r
+ sequence.setSequenceFeatures(entry.getFeature());\r
+ sequence.setStart(absStart);\r
+ sequence.setEnd(absEnd);\r
\r
- int index = line.indexOf(tag)+tag.length()+1;\r
- if(index==tag.length())\r
- return "";\r
\r
- return line.substring( index, line.indexOf("\"", index+1) );\r
-}\r
+ int n = 0;\r
+ SequenceI seq2;\r
+ while (n < align.getHeight())\r
+ {\r
+ //This loop enables multiple sequences with the same\r
+ //id to have features added and seq limits updated\r
+ seq2 = align.getSequenceAt(n);\r
+ if (seq2.getName().equals(idmatch))\r
+ {\r
\r
+ nonGapped = AlignSeq.extractGaps("-. ", seq2.getSequence());\r
\r
-String parseElement(String line, String tag, RandomAccessFile out)\r
-{\r
- if (out != null)\r
- try\r
- {\r
- out.writeBytes(line + "\n");\r
- }\r
- catch (Exception ex)\r
- {}\r
+ absStart = sequence.getSequence().indexOf(nonGapped);\r
+ absEnd = absStart + nonGapped.toString().length() - 1;\r
+\r
+ // This is the Viewd alignment sequences\r
+ // No need to tell the user of the dataset updates\r
+ if ( (seq2.getStart() != absStart+sequence.getStart())\r
+ || (seq2.getEnd() != absEnd+sequence.getStart()))\r
+ {\r
+ sbuffer.append("Updated: " + seq2.getName() + " " +\r
+ seq2.getStart() + "/" + seq2.getEnd() +\r
+ " to " + (absStart + sequence.getStart()) + "/" +\r
+ (absEnd + sequence.getStart()) + "\n");\r
\r
- int index = line.indexOf(tag) + tag.length();\r
- return line.substring(index, line.indexOf("</"));\r
+ seq2.setStart(absStart + sequence.getStart());\r
+ seq2.setEnd(absEnd + sequence.getStart());\r
+ }\r
+ }\r
\r
+ n++;\r
+ }\r
+ }\r
}\r
}\r
+\r
+\r