--- /dev/null
+/*\r
+* Jalview - A Sequence Alignment Editor and Viewer\r
+* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle\r
+*\r
+* This program is free software; you can redistribute it and/or\r
+* modify it under the terms of the GNU General Public License\r
+* as published by the Free Software Foundation; either version 2\r
+* of the License, or (at your option) any later version.\r
+*\r
+* This program is distributed in the hope that it will be useful,\r
+* but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
+* GNU General Public License for more details.\r
+*\r
+* You should have received a copy of the GNU General Public License\r
+* along with this program; if not, write to the Free Software\r
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA\r
+*/\r
+package jalview.io;\r
+\r
+import jalview.datamodel.*;\r
+\r
+import jalview.gui.*;\r
+\r
+import java.io.*;\r
+\r
+import java.util.*;\r
+\r
+import org.exolab.castor.mapping.Mapping;\r
+\r
+import org.exolab.castor.xml.*;\r
+import jalview.analysis.AlignSeq;\r
+\r
+\r
+\r
+/**\r
+ * DOCUMENT ME!\r
+ *\r
+ * @author $author$\r
+ * @version $Revision$\r
+ */\r
+public class DBRefFetcher implements Runnable\r
+{\r
+ AlignmentI align;\r
+ AlignmentI dataset;\r
+ AlignFrame af;\r
+ ArrayList unknownSequences;\r
+ CutAndPasteTransfer output = new CutAndPasteTransfer();\r
+ StringBuffer sbuffer = new StringBuffer();\r
+ boolean uniprotFlag = false;\r
+\r
+ public DBRefFetcher()\r
+ {}\r
+\r
+ public Vector getUniprotEntries(File file)\r
+ {\r
+ UniprotFile uni = new UniprotFile();\r
+ try\r
+ {\r
+ // 1. Load the mapping information from the file\r
+ Mapping map = new Mapping(uni.getClass().getClassLoader());\r
+ java.net.URL url = getClass().getResource("/uniprot_mapping.xml");\r
+ map.loadMapping(url);\r
+\r
+ // 2. Unmarshal the data\r
+ Unmarshaller unmar = new Unmarshaller(uni);\r
+ unmar.setIgnoreExtraElements(true);\r
+ unmar.setMapping(map);\r
+ // unmar.setDebug(true);\r
+\r
+ uni = (UniprotFile) unmar.unmarshal(new FileReader(file));\r
+ }\r
+ catch (Exception e)\r
+ {\r
+ System.out.println("Error getUniprotEntries() "+e);\r
+ }\r
+\r
+\r
+ return uni.getUniprotEntries();\r
+ }\r
+\r
+ /**\r
+ * Creates a new SequenceFeatureFetcher object.\r
+ *\r
+ * @param align DOCUMENT ME!\r
+ * @param ap DOCUMENT ME!\r
+ */\r
+ public DBRefFetcher(AlignmentI align, AlignFrame af)\r
+ {\r
+ this.af = af;\r
+ unknownSequences = new ArrayList();\r
+ this.align = align;\r
+ this.dataset = align.getDataset();\r
+\r
+ Thread thread = new Thread(this);\r
+ thread.start();\r
+ }\r
+\r
+ /**\r
+ * DOCUMENT ME!\r
+ */\r
+ public void run()\r
+ {\r
+ long startTime = System.currentTimeMillis();\r
+ af.setProgressBar("Fetching db refs", startTime);\r
+\r
+ try\r
+ {\r
+ int seqIndex = 0;\r
+ Vector sequences = dataset.getSequences();\r
+\r
+ while (seqIndex < sequences.size())\r
+ {\r
+ Vector ids = new Vector();\r
+\r
+ for (int i = 0; (seqIndex < sequences.size()) && (i < 50);\r
+ seqIndex++, i++)\r
+ {\r
+ Sequence sequence = (Sequence) sequences.get(seqIndex);\r
+ Vector uprefs = jalview.util.DBRefUtils.selectRefs(sequence.getDBRef(), new String[] {\r
+ jalview.datamodel.DBRefSource.UNIPROT});\r
+ if (uprefs!=null)\r
+ {\r
+ // we know the id for this entry, so don't note its ID in the unknownSequences list\r
+ for (int j=0,k=uprefs.size(); j<k; j++)\r
+ ids.add(((DBRefEntry) uprefs.get(j)).getAccessionId());\r
+ unknownSequences.add(sequence);\r
+ } else {\r
+ if (!ids.contains(sequence.getName()))\r
+ {\r
+ ids.add(sequence.getName());\r
+ unknownSequences.add(sequence);\r
+ }\r
+ }\r
+ }\r
+\r
+ ///////////////////////////////////\r
+ ///READ FROM EBI\r
+ if (ids.size() > 0)\r
+ {\r
+ StringBuffer remainingIds = new StringBuffer("uniprot:");\r
+ for (int i = 0; i < ids.size(); i++)\r
+ {\r
+ if(ids.get(i).toString().indexOf("|")>-1)\r
+ {\r
+ remainingIds.append(ids.get(i).toString().substring(\r
+ ids.get(i).toString().lastIndexOf("|") + 1));\r
+ uniprotFlag = true;\r
+ }\r
+ remainingIds.append(ids.get(i) + ";");\r
+ }\r
+ EBIFetchClient ebi = new EBIFetchClient();\r
+ File file = ebi.fetchDataAsFile(remainingIds.toString(),\r
+ "xml", "raw");\r
+\r
+\r
+\r
+ if (file != null)\r
+ {\r
+ ReadUniprotFile(file, ids);\r
+ }\r
+ }\r
+ }\r
+ }\r
+ catch (Exception ex)\r
+ {\r
+ ex.printStackTrace();\r
+ }\r
+\r
+ if (sbuffer.length() > 0)\r
+ {\r
+ output.setText(\r
+ "Your sequences have been matched to Uniprot. Some of the ids have been\n" +\r
+ "altered, most likely the start/end residue will have been updated.\n" +\r
+ "Save your alignment to maintain the updated id.\n\n" +\r
+ sbuffer.toString());\r
+ Desktop.addInternalFrame(output, "Sequence names updated ", 600, 300);\r
+ // The above is the dataset, we must now find out the index\r
+ // of the viewed sequence\r
+\r
+ }\r
+\r
+ af.setProgressBar("DBRef search completed", startTime);\r
+ // promptBeforeBlast();\r
+\r
+ }\r
+\r
+\r
+ void promptBeforeBlast()\r
+ {\r
+ // This must be outside the run() body as java 1.5\r
+ // will not return any value from the OptionPane to the expired thread.\r
+ if (unknownSequences.size() > 0)\r
+ {\r
+ // int reply = javax.swing.JOptionPane.showConfirmDialog(\r
+ // Desktop.desktop, "Couldn't find a match for "+unknownSequences.size()+" sequences."\r
+ // +"\nPerform blast for unknown sequences?",\r
+ // "Blast for Unidentified Sequences",\r
+ // javax.swing.JOptionPane.YES_NO_OPTION, javax.swing.JOptionPane.QUESTION_MESSAGE);\r
+ javax.swing.JOptionPane.showMessageDialog(\r
+ Desktop.desktop, "Couldn't find a match for "+unknownSequences.size()+" sequences.",\r
+ "Unidentified Sequences",\r
+ javax.swing.JOptionPane.WARNING_MESSAGE);\r
+\r
+\r
+ // if(reply == javax.swing.JOptionPane.YES_OPTION)\r
+ // new WSWUBlastClient(ap, align, unknownSequences);\r
+ }\r
+ }\r
+\r
+ /**\r
+ * DOCUMENT ME!\r
+ *\r
+ * @param result DOCUMENT ME!\r
+ * @param out DOCUMENT ME!\r
+ * @param align DOCUMENT ME!\r
+ */\r
+ void ReadUniprotFile(File file, Vector ids)\r
+ {\r
+ if(!file.exists())\r
+ return;\r
+\r
+ SequenceI sequence = null;\r
+\r
+ Vector entries = getUniprotEntries(file);\r
+\r
+ int i, iSize = entries==null?0:entries.size();\r
+ UniprotEntry entry;\r
+ for (i = 0; i < iSize; i++)\r
+ {\r
+ entry = (UniprotEntry) entries.elementAt(i);\r
+ String idmatch = entry.getAccession().elementAt(0).toString();\r
+ sequence = dataset.findName(idmatch);\r
+\r
+ if (sequence == null)\r
+ {\r
+ //Sequence maybe Name, not Accession\r
+ idmatch = entry.getName().elementAt(0).toString();\r
+ sequence = dataset.findName(idmatch);\r
+ }\r
+\r
+ if(sequence!=null)\r
+ ids.remove(sequence.getName());\r
+\r
+ else if (sequence == null && uniprotFlag)\r
+ {\r
+ sequence = dataset.findName("UniProt/Swiss-Prot|"+entry.getAccession().elementAt(0)+"|"+idmatch);\r
+ ids.remove(idmatch);\r
+ }\r
+\r
+ if(sequence ==null)\r
+ {\r
+ System.out.println(idmatch+" not found");\r
+ continue;\r
+ }\r
+\r
+\r
+ sequence.addDBRef(new DBRefEntry(DBRefSource.UNIPROT,\r
+ "1.0",\r
+ entry.getAccession().elementAt(0).toString()));\r
+\r
+ System.out.println("Adding dbref to "+sequence.getName()+" : "+\r
+ entry.getAccession().elementAt(0).toString());\r
+\r
+ String nonGapped = AlignSeq.extractGaps("-. ", sequence.getSequence());\r
+\r
+ int absStart = entry.getUniprotSequence().getContent().indexOf(\r
+ nonGapped.toString());\r
+\r
+ if (absStart == -1)\r
+ {\r
+ // Is UniprotSequence contained in dataset sequence?\r
+ absStart = nonGapped.toString().indexOf(entry.getUniprotSequence().getContent());\r
+ if(absStart == -1)\r
+ {\r
+ sbuffer.append(sequence.getName() +\r
+ " SEQUENCE NOT %100 MATCH \n");\r
+\r
+ continue;\r
+ }\r
+ else\r
+ {\r
+\r
+ if(entry.getFeature()!=null)\r
+ {\r
+ Enumeration e = entry.getFeature().elements();\r
+ while (e.hasMoreElements())\r
+ {\r
+ SequenceFeature sf = (SequenceFeature) e.nextElement();\r
+ sf.setBegin(sf.getBegin() + absStart + 1);\r
+ sf.setEnd(sf.getEnd() + absStart + 1);\r
+ }\r
+ }\r
+\r
+ sbuffer.append(sequence.getName() +\r
+ " HAS "+absStart+" PREFIXED RESIDUES COMPARED TO UNIPROT - ANY SEQUENCE FEATURES"\r
+ +" HAVE BEEN ADJUSTED ACCORDINGLY \n");\r
+ absStart = 0;\r
+ }\r
+\r
+ }\r
+\r
+ unknownSequences.remove(sequence);\r
+\r
+ int absEnd = absStart + nonGapped.toString().length();\r
+ absStart += 1;\r
+\r
+ Enumeration e = entry.getDbReference().elements();\r
+ Vector onlyPdbEntries = new Vector();\r
+ while(e.hasMoreElements())\r
+ {\r
+ PDBEntry pdb = (PDBEntry)e.nextElement();\r
+ if(!pdb.getType().equals("PDB"))\r
+ continue;\r
+\r
+ onlyPdbEntries.addElement(pdb);\r
+ }\r
+\r
+ sequence.setPDBId(onlyPdbEntries);\r
+\r
+ if (entry.getFeature()!=null) {\r
+ //e = entry.getFeature().elements();\r
+ // while (e.hasMoreElements())\r
+ {\r
+ // SequenceFeature sf = (SequenceFeature) e.nextElement();\r
+ // sf.setFeatureGroup("Uniprot");\r
+ // sequence.addSequenceFeature( sf );\r
+ }\r
+ }\r
+ sequence.setStart(absStart);\r
+ sequence.setEnd(absEnd);\r
+\r
+\r
+ int n = 0;\r
+ SequenceI seq2;\r
+ while (n < align.getHeight())\r
+ {\r
+ //This loop enables multiple sequences with the same\r
+ //id to have features added and seq limits updated\r
+ seq2 = align.getSequenceAt(n);\r
+ if (seq2.getName().equals(idmatch))\r
+ {\r
+ nonGapped = AlignSeq.extractGaps("-. ", seq2.getSequence());\r
+\r
+ absStart = sequence.getSequence().indexOf(nonGapped);\r
+ absEnd = absStart + nonGapped.toString().length() - 1;\r
+\r
+ // This is the Viewd alignment sequences\r
+ // No need to tell the user of the dataset updates\r
+ if ( (seq2.getStart() != absStart+sequence.getStart())\r
+ || (seq2.getEnd() != absEnd+sequence.getStart()))\r
+ {\r
+ sbuffer.append("Updated: " + seq2.getName() + " " +\r
+ seq2.getStart() + "/" + seq2.getEnd() +\r
+ " to " + (absStart + sequence.getStart()) + "/" +\r
+ (absEnd + sequence.getStart()) + "\n");\r
+\r
+ seq2.setStart(absStart + sequence.getStart());\r
+ seq2.setEnd(absEnd + sequence.getStart());\r
+ }\r
+ }\r
+\r
+ n++;\r
+ }\r
+ }\r
+ }\r
+}\r
+\r
+\r