*/\r
public class DBRefFetcher implements Runnable\r
{\r
- AlignmentI align;\r
AlignmentI dataset;\r
AlignFrame af;\r
- ArrayList unknownSequences;\r
CutAndPasteTransfer output = new CutAndPasteTransfer();\r
StringBuffer sbuffer = new StringBuffer();\r
- boolean uniprotFlag = false;\r
boolean running = false;\r
\r
+ ///This will be a collection of Vectors of sequenceI refs.\r
+ //The key will be the seq name or accession id of the seq\r
+ Hashtable seqRefs;\r
+\r
public DBRefFetcher()\r
{}\r
\r
Unmarshaller unmar = new Unmarshaller(uni);\r
unmar.setIgnoreExtraElements(true);\r
unmar.setMapping(map);\r
- // unmar.setDebug(true);\r
\r
uni = (UniprotFile) unmar.unmarshal(new FileReader(file));\r
}\r
public DBRefFetcher(AlignmentI align, AlignFrame af)\r
{\r
this.af = af;\r
- unknownSequences = new ArrayList();\r
- this.align = align;\r
this.dataset = align.getDataset();\r
}\r
\r
}\r
\r
/**\r
+ * The sequence will be added to a vector of sequences\r
+ * belonging to key which could be either seq name or dbref id\r
+ * @param seq SequenceI\r
+ * @param key String\r
+ */\r
+ void addSeqId(SequenceI seq, String key)\r
+ {\r
+ key = key.toUpperCase();\r
+\r
+ Vector seqs;\r
+ if(seqRefs.containsKey(key))\r
+ {\r
+ seqs = (Vector)seqRefs.get(key);\r
+\r
+ if(seqs!=null && !seqs.contains(seq))\r
+ {\r
+ seqs.addElement(seq);\r
+ }\r
+ else if(seqs==null)\r
+ {\r
+ seqs = new Vector();\r
+ seqs.addElement(seq);\r
+ }\r
+\r
+ }\r
+ else\r
+ {\r
+ seqs = new Vector();\r
+ seqs.addElement(seq);\r
+ }\r
+\r
+ seqRefs.put(key, seqs);\r
+ }\r
+\r
+\r
+ /**\r
* DOCUMENT ME!\r
*/\r
public void run()\r
af.setProgressBar("Fetching db refs", startTime);\r
running = true;\r
\r
+ seqRefs = new Hashtable();\r
+\r
try\r
{\r
int seqIndex = 0;\r
\r
while (seqIndex < sequences.size())\r
{\r
- Vector ids = new Vector();\r
+ StringBuffer queryString = new StringBuffer("uniprot:");\r
\r
for (int i = 0; (seqIndex < sequences.size()) && (i < 50);\r
seqIndex++, i++)\r
jalview.datamodel.DBRefSource.UNIPROT});\r
if (uprefs!=null)\r
{\r
- // we know the id for this entry, so don't note its ID in the unknownSequences list\r
- for (int j=0,k=uprefs.length; j<k; j++)\r
- ids.add(uprefs[j].getAccessionId());\r
- unknownSequences.add(sequence);\r
- } else {\r
- if (!ids.contains(sequence.getName()))\r
+ if(uprefs.length+i>50)\r
+ break;\r
+\r
+ for(int j=0; j<uprefs.length; j++)\r
+ {\r
+ addSeqId(sequence, uprefs[j].getAccessionId());\r
+ queryString.append(uprefs[i].getAccessionId()+";");\r
+ }\r
+ }\r
+ else\r
+ {\r
+ StringTokenizer st = new StringTokenizer(sequence.getName(), "|");\r
+ if(st.countTokens()+i>50)\r
+ {\r
+ //Dont send more than 50 id strings to dbFetch!!\r
+ seqIndex --;\r
+ }\r
+ else\r
{\r
- ids.add(sequence.getName());\r
- unknownSequences.add(sequence);\r
+ while(st.hasMoreTokens())\r
+ {\r
+ String token = st.nextToken();\r
+ addSeqId(sequence, token);\r
+ queryString.append(token+";");\r
+ }\r
}\r
}\r
}\r
\r
///////////////////////////////////\r
///READ FROM EBI\r
- if (ids.size() > 0)\r
+ EBIFetchClient ebi = new EBIFetchClient();\r
+ File file = ebi.fetchDataAsFile(queryString.toString(), "xml", "raw");\r
+ if (file != null)\r
{\r
- StringBuffer remainingIds = new StringBuffer("uniprot:");\r
- for (int i = 0; i < ids.size(); i++)\r
- {\r
- if(ids.get(i).toString().indexOf("|")>-1)\r
- {\r
- remainingIds.append(ids.get(i).toString().substring(\r
- ids.get(i).toString().lastIndexOf("|") + 1));\r
- uniprotFlag = true;\r
- }\r
- else\r
- remainingIds.append(ids.get(i));\r
-\r
- remainingIds.append(";");\r
- }\r
-\r
- EBIFetchClient ebi = new EBIFetchClient();\r
- File file = ebi.fetchDataAsFile(remainingIds.toString(),\r
- "xml", "raw");\r
-\r
-\r
-\r
- if (file != null)\r
- {\r
- ReadUniprotFile(file, ids);\r
- }\r
+ ReadUniprotFile(file);\r
}\r
}\r
}\r
}\r
\r
\r
- void promptBeforeBlast()\r
- {\r
- // This must be outside the run() body as java 1.5\r
- // will not return any value from the OptionPane to the expired thread.\r
- if (unknownSequences.size() > 0)\r
- {\r
- // int reply = javax.swing.JOptionPane.showConfirmDialog(\r
- // Desktop.desktop, "Couldn't find a match for "+unknownSequences.size()+" sequences."\r
- // +"\nPerform blast for unknown sequences?",\r
- // "Blast for Unidentified Sequences",\r
- // javax.swing.JOptionPane.YES_NO_OPTION, javax.swing.JOptionPane.QUESTION_MESSAGE);\r
- javax.swing.JOptionPane.showMessageDialog(\r
- Desktop.desktop, "Couldn't find a match for "+unknownSequences.size()+" sequences.",\r
- "Unidentified Sequences",\r
- javax.swing.JOptionPane.WARNING_MESSAGE);\r
-\r
-\r
- // if(reply == javax.swing.JOptionPane.YES_OPTION)\r
- // new WSWUBlastClient(ap, align, unknownSequences);\r
- }\r
- }\r
-\r
/**\r
* DOCUMENT ME!\r
*\r
* @param out DOCUMENT ME!\r
* @param align DOCUMENT ME!\r
*/\r
- void ReadUniprotFile(File file, Vector ids)\r
+ void ReadUniprotFile(File file)\r
{\r
- if(!file.exists())\r
+ if (!file.exists())\r
return;\r
\r
- SequenceI [] sequence = null;\r
+ SequenceI sequence = null;\r
\r
Vector entries = getUniprotEntries(file);\r
\r
- int i, iSize = entries==null?0:entries.size();\r
+ int i, iSize = entries == null ? 0 : entries.size();\r
UniprotEntry entry;\r
for (i = 0; i < iSize; i++)\r
{\r
entry = (UniprotEntry) entries.elementAt(i);\r
- String idmatch = entry.getAccession().elementAt(0).toString();\r
- sequence = dataset.findSequenceMatch(idmatch);\r
\r
- if (sequence.length==0)\r
+ //Work out which sequences this Uniprot file has matches to,\r
+ //taking into account all accessionIds and names in the file\r
+ Vector sequenceMatches = new Vector();\r
+ for (int j = 0; j < entry.getAccession().size(); j++)\r
{\r
- //Sequence maybe Name, not Accession\r
- idmatch = entry.getName().elementAt(0).toString();\r
- sequence = dataset.findSequenceMatch(idmatch);\r
- }\r
-\r
- if(sequence.length>0)\r
- ids.remove(sequence[0].getName());\r
-\r
- else if (sequence.length==0 && uniprotFlag)\r
- {\r
- StringBuffer upid = new StringBuffer("UniProt/Swiss-Prot|");\r
- for(int u=0; u<entry.getAccession().size(); u++)\r
- upid.append(entry.getAccession().elementAt(u)+"|");\r
-\r
- sequence = dataset.findSequenceMatch(upid+idmatch);\r
- ids.remove(idmatch);\r
+ String accessionId = entry.getAccession().elementAt(j).toString();\r
+ if (seqRefs.containsKey(accessionId))\r
+ {\r
+ Vector seqs = (Vector) seqRefs.get(accessionId);\r
+ for (int jj = 0; jj < seqs.size(); jj++)\r
+ {\r
+ sequence = (SequenceI) seqs.elementAt(jj);\r
+ if (!sequenceMatches.contains(sequence))\r
+ sequenceMatches.addElement(sequence);\r
+ }\r
+ }\r
}\r
-\r
- if(sequence.length==0)\r
+ for (int j = 0; j < entry.getName().size(); j++)\r
{\r
- System.out.println(idmatch+" not found");\r
- continue;\r
+ String name = entry.getName().elementAt(j).toString();\r
+ if (seqRefs.containsKey(name))\r
+ {\r
+ Vector seqs = (Vector) seqRefs.get(name);\r
+ for (int jj = 0; jj < seqs.size(); jj++)\r
+ {\r
+ sequence = (SequenceI) seqs.elementAt(jj);\r
+ if (!sequenceMatches.contains(sequence))\r
+ sequenceMatches.addElement(sequence);\r
+ }\r
+ }\r
}\r
\r
- for(int m=0; m<sequence.length; m++)\r
+ for (int m = 0; m < sequenceMatches.size(); m++)\r
{\r
+ sequence = (SequenceI) sequenceMatches.elementAt(m);\r
+ sequence.addDBRef(new DBRefEntry(DBRefSource.UNIPROT,\r
+ "0",\r
+ entry.getAccession().elementAt(0).\r
+ toString()));\r
\r
- sequence[m].addDBRef(new DBRefEntry(DBRefSource.UNIPROT,\r
- "0",\r
- entry.getAccession().elementAt(0).toString()));\r
+ System.out.println("Adding dbref to " + sequence.getName() + " : " +\r
+ entry.getAccession().elementAt(0).toString());\r
\r
- System.out.println("Adding dbref to "+sequence[m].getName()+" : "+\r
- entry.getAccession().elementAt(0).toString());\r
+ String nonGapped = AlignSeq.extractGaps("-. ", sequence.getSequence()).\r
+ toUpperCase();\r
\r
- String nonGapped = AlignSeq.extractGaps("-. ", sequence[m].getSequence()).toUpperCase();\r
+ int absStart = entry.getUniprotSequence().getContent().indexOf(\r
+ nonGapped.toString());\r
\r
- int absStart = entry.getUniprotSequence().getContent().indexOf(\r
- nonGapped.toString());\r
-\r
- if (absStart == -1)\r
- {\r
- // Is UniprotSequence contained in dataset sequence?\r
- absStart = nonGapped.toString().indexOf(entry.getUniprotSequence().getContent());\r
- if(absStart == -1)\r
+ if (absStart == -1)\r
{\r
- sbuffer.append(sequence[m].getName() +\r
- " SEQUENCE NOT %100 MATCH \n");\r
-\r
- continue;\r
- }\r
-\r
- if (entry.getFeature() != null)\r
- {\r
- Enumeration e = entry.getFeature().elements();\r
- while (e.hasMoreElements())\r
+ // Is UniprotSequence contained in dataset sequence?\r
+ absStart = nonGapped.toString().indexOf(entry.getUniprotSequence().\r
+ getContent());\r
+ if (absStart == -1)\r
{\r
- SequenceFeature sf = (SequenceFeature) e.nextElement();\r
- sf.setBegin(sf.getBegin() + absStart + 1);\r
- sf.setEnd(sf.getEnd() + absStart + 1);\r
+ sbuffer.append(sequence.getName() + " SEQUENCE NOT %100 MATCH \n");\r
+ continue;\r
}\r
\r
- sbuffer.append(sequence[m].getName() +\r
- " HAS " + absStart +\r
- " PREFIXED RESIDUES COMPARED TO UNIPROT - ANY SEQUENCE FEATURES"\r
- + " HAVE BEEN ADJUSTED ACCORDINGLY \n");\r
- absStart = 0;\r
- }\r
+ if (entry.getFeature() != null)\r
+ {\r
+ Enumeration e = entry.getFeature().elements();\r
+ while (e.hasMoreElements())\r
+ {\r
+ SequenceFeature sf = (SequenceFeature) e.nextElement();\r
+ sf.setBegin(sf.getBegin() + absStart + 1);\r
+ sf.setEnd(sf.getEnd() + absStart + 1);\r
+ }\r
\r
- }\r
+ sbuffer.append(sequence.getName() +\r
+ " HAS " + absStart +\r
+ " PREFIXED RESIDUES COMPARED TO UNIPROT - ANY SEQUENCE FEATURES"\r
+ + " HAVE BEEN ADJUSTED ACCORDINGLY \n");\r
+ absStart = 0;\r
+ }\r
\r
- unknownSequences.remove(sequence);\r
+ }\r
\r
- int absEnd = absStart + nonGapped.toString().length();\r
- absStart += 1;\r
+ //unknownSequences.remove(sequence);\r
\r
- Enumeration e = entry.getDbReference().elements();\r
- Vector onlyPdbEntries = new Vector();\r
- while(e.hasMoreElements())\r
- {\r
- PDBEntry pdb = (PDBEntry)e.nextElement();\r
- if(!pdb.getType().equals(DBRefSource.PDB))\r
- continue;\r
+ int absEnd = absStart + nonGapped.toString().length();\r
+ absStart += 1;\r
\r
- sequence[m].addDBRef(new DBRefEntry(DBRefSource.PDB,\r
- "0",\r
- pdb.getId()));\r
+ Enumeration e = entry.getDbReference().elements();\r
+ Vector onlyPdbEntries = new Vector();\r
+ while (e.hasMoreElements())\r
+ {\r
+ PDBEntry pdb = (PDBEntry) e.nextElement();\r
+ if (!pdb.getType().equals(DBRefSource.PDB))\r
+ continue;\r
\r
- onlyPdbEntries.addElement(pdb);\r
- }\r
+ sequence.addDBRef(new DBRefEntry(DBRefSource.PDB,\r
+ "0",\r
+ pdb.getId()));\r
\r
- sequence[m].setPDBId(onlyPdbEntries);\r
+ onlyPdbEntries.addElement(pdb);\r
+ }\r
\r
- sequence[m].setStart(absStart);\r
- sequence[m].setEnd(absEnd);\r
+ sequence.setPDBId(onlyPdbEntries);\r
\r
+ sequence.setStart(absStart);\r
+ sequence.setEnd(absEnd);\r
\r
}\r
}\r