{\r
\r
AlignmentI align;\r
+ AlignmentI dataset;\r
AlignmentPanel ap;\r
ArrayList unknownSequences;\r
CutAndPasteTransfer output = new CutAndPasteTransfer();\r
StringBuffer sbuffer = new StringBuffer();\r
+ boolean uniprotFlag = false;\r
\r
- Vector localCache = new Vector();\r
+ public SequenceFeatureFetcher()\r
+ {}\r
\r
- Vector getUniprotEntries(File file)\r
+ public Vector getUniprotEntries(File file)\r
{\r
\r
UniprotFile uni = new UniprotFile();\r
{\r
// 1. Load the mapping information from the file\r
Mapping map = new Mapping(uni.getClass().getClassLoader());\r
- java.net.URL url = uni.getClass().getResource("/uniprot_mapping.xml");\r
+ java.net.URL url = getClass().getResource("/uniprot_mapping.xml");\r
map.loadMapping(url);\r
\r
// 2. Unmarshal the data\r
unmar.setIgnoreExtraElements(true);\r
unmar.setMapping(map);\r
uni = (UniprotFile) unmar.unmarshal(new FileReader(file));\r
- localCache.addAll( uni.getUniprotEntries() );\r
-\r
- // 3. marshal the data with the total price back and print the XML in the console\r
- // Marshaller marshaller = new Marshaller(\r
- // new FileWriter(jalview.bin.Cache.getProperty("UNIPROT_CACHE"))\r
- // );\r
- // marshaller.setMapping(map);\r
- // marshaller.marshal(uni);\r
\r
}\r
catch (Exception e)\r
{\r
System.out.println("Error getUniprotEntries() "+e);\r
- // e.printStackTrace();\r
- // if(!updateLocalCache)\r
- // file.delete();\r
-\r
}\r
return uni.getUniprotEntries();\r
}\r
{\r
unknownSequences = new ArrayList();\r
this.align = align;\r
+ this.dataset = align.getDataset();\r
this.ap = ap;\r
\r
Thread thread = new Thread(this);\r
try\r
{\r
int seqIndex = 0;\r
- Vector sequences = align.getSequences();\r
+ Vector sequences = dataset.getSequences();\r
\r
while (seqIndex < sequences.size())\r
{\r
for (int i = 0; (seqIndex < sequences.size()) && (i < 50);\r
seqIndex++, i++)\r
{\r
- SequenceI sequence = (SequenceI) sequences.get(seqIndex);\r
- ids.add(sequence.getName());\r
- unknownSequences.add(sequence.getName());\r
+ Sequence sequence = (Sequence) sequences.get(seqIndex);\r
+ if(sequence.getSequenceFeatures()==null)\r
+ {\r
+ if (!ids.contains(sequence.getName()))\r
+ {\r
+ ids.add(sequence.getName());\r
+ unknownSequences.add(sequence);\r
+ }\r
+ }\r
}\r
\r
///////////////////////////////////\r
StringBuffer remainingIds = new StringBuffer("uniprot:");\r
for (int i = 0; i < ids.size(); i++)\r
{\r
+ if(ids.get(i).toString().indexOf("|")>-1)\r
+ {\r
+ remainingIds.append(ids.get(i).toString().substring(\r
+ ids.get(i).toString().lastIndexOf("|") + 1));\r
+ uniprotFlag = true;\r
+ }\r
remainingIds.append(ids.get(i) + ";");\r
}\r
EBIFetchClient ebi = new EBIFetchClient();\r
File file = ebi.fetchDataAsFile(remainingIds.toString(),\r
- "xml", null);\r
+ "xml", "raw");\r
+\r
\r
\r
if (file != null)\r
{\r
- ReadUniprotFile(file, align, ids);\r
+ ReadUniprotFile(file, ids);\r
}\r
}\r
}\r
"Save your alignment to maintain the updated id.\n\n" +\r
sbuffer.toString());\r
Desktop.addInternalFrame(output, "Sequence names updated ", 600, 300);\r
- }\r
+ // The above is the dataset, we must now find out the index\r
+ // of the viewed sequence\r
\r
- if (unknownSequences.size() > 0)\r
- {\r
- new WSWUBlastClient(ap, align, unknownSequences);\r
}\r
- else\r
- ((Alignment)align).featuresAdded = true;\r
+\r
+ promptBeforeBlast();\r
+\r
+ }\r
+\r
+\r
+ void promptBeforeBlast()\r
+ {\r
+ // This must be outside the run() body as java 1.5\r
+ // will not return any value from the OptionPane to the expired thread.\r
+ if (unknownSequences.size() > 0)\r
+ {\r
+ int reply = javax.swing.JOptionPane.showConfirmDialog(\r
+ Desktop.desktop, "Couldn't find a match for "+unknownSequences.size()+" sequences."\r
+ +"\nPerform blast for unknown sequences?",\r
+ "Blast for Unidentified Sequences",\r
+ javax.swing.JOptionPane.YES_NO_OPTION, javax.swing.JOptionPane.QUESTION_MESSAGE);\r
+\r
+ if(reply == javax.swing.JOptionPane.YES_OPTION)\r
+ new WSWUBlastClient(ap, align, unknownSequences);\r
+ }\r
\r
\r
ap.repaint();\r
* @param out DOCUMENT ME!\r
* @param align DOCUMENT ME!\r
*/\r
- void ReadUniprotFile(File file, AlignmentI align, Vector ids)\r
+ void ReadUniprotFile(File file, Vector ids)\r
{\r
if(!file.exists())\r
return;\r
\r
SequenceI sequence = null;\r
- // String pdb = null;\r
\r
Vector entries = getUniprotEntries(file);\r
\r
for (i = 0; i < iSize; i++)\r
{\r
entry = (UniprotEntry) entries.elementAt(i);\r
- String idmatch = entry.getAccession();\r
- sequence = align.findName(idmatch);\r
+ String idmatch = entry.getAccession().elementAt(0).toString();\r
+ sequence = dataset.findName(idmatch);\r
\r
if (sequence == null)\r
{\r
//Sequence maybe Name, not Accession\r
- idmatch = entry.getName();\r
- sequence = align.findName(idmatch);\r
+ idmatch = entry.getName().elementAt(0).toString();\r
+ sequence = dataset.findName(idmatch);\r
}\r
\r
- if (sequence == null)\r
+ if(sequence!=null)\r
+ ids.remove(sequence.getName());\r
+\r
+ else if (sequence == null && uniprotFlag)\r
{\r
+ sequence = dataset.findName("UniProt/Swiss-Prot|"+entry.getAccession().elementAt(0)+"|"+idmatch);\r
+ ids.remove(idmatch);\r
+ }\r
+\r
+ if(sequence ==null)\r
+ {\r
+ System.out.println(idmatch+" not found");\r
continue;\r
}\r
\r
- ids.remove(sequence.getName());\r
- unknownSequences.remove(sequence.getName());\r
+ unknownSequences.remove(sequence);\r
\r
String nonGapped = AlignSeq.extractGaps("-. ", sequence.getSequence());\r
\r
\r
if (absStart == -1)\r
{\r
- unknownSequences.add(sequence.getName());\r
- sbuffer.append(sequence.getName() +\r
- " SEQUENCE NOT %100 MATCH \n");\r
+ // Is UniprotSequence contained in dataset sequence?\r
+ absStart = nonGapped.toString().indexOf(entry.getUniprotSequence().getContent());\r
+ if(absStart == -1)\r
+ {\r
+ unknownSequences.add(sequence.getName());\r
+ sbuffer.append(sequence.getName() +\r
+ " SEQUENCE NOT %100 MATCH \n");\r
+\r
+ continue;\r
+ }\r
+ else\r
+ {\r
+ if(entry.getFeature()!=null)\r
+ {\r
+ Enumeration e = entry.getFeature().elements();\r
+ while (e.hasMoreElements())\r
+ {\r
+ SequenceFeature sf = (SequenceFeature) e.nextElement();\r
+ sf.setBegin(sf.getBegin() + absStart + 1);\r
+ sf.setEnd(sf.getEnd() + absStart + 1);\r
+ }\r
+ }\r
+\r
+ sbuffer.append(sequence.getName() +\r
+ " HAS "+absStart+" PREFIXED RESIDUES COMPARED TO UNIPROT - ANY SEQUENCE FEATURES"\r
+ +" HAVE BEEN ADJUSTED ACCORDINGLY \n");\r
+ absStart = 0;\r
+ }\r
\r
- continue;\r
}\r
\r
int absEnd = absStart + nonGapped.toString().length();\r
absStart += 1;\r
\r
- if ( (absStart != sequence.getStart()) ||\r
- (absEnd != sequence.getEnd()))\r
+ Enumeration e = entry.getDbReference().elements();\r
+ Vector onlyPdbEntries = new Vector();\r
+ while(e.hasMoreElements())\r
{\r
- sbuffer.append("Updated: " + sequence.getName() + " " +\r
- sequence.getStart() + "/" + sequence.getEnd() +\r
- " to " + absStart + "/" + absEnd + "\n");\r
+ PDBEntry pdb = (PDBEntry)e.nextElement();\r
+ if(!pdb.getType().equals("PDB"))\r
+ continue;\r
+\r
+ onlyPdbEntries.addElement(pdb);\r
}\r
\r
- sequence.setSequenceFeatures(entry.getFeatures());\r
+ sequence.setPDBId(onlyPdbEntries);\r
+ sequence.setSequenceFeatures(entry.getFeature());\r
sequence.setStart(absStart);\r
sequence.setEnd(absEnd);\r
+\r
+\r
+ int n = 0;\r
+ SequenceI seq2;\r
+ while (n < align.getHeight())\r
+ {\r
+ //This loop enables multiple sequences with the same\r
+ //id to have features added and seq limits updated\r
+ seq2 = align.getSequenceAt(n);\r
+ if (seq2.getName().equals(idmatch))\r
+ {\r
+\r
+ nonGapped = AlignSeq.extractGaps("-. ", seq2.getSequence());\r
+\r
+ absStart = sequence.getSequence().indexOf(nonGapped);\r
+ absEnd = absStart + nonGapped.toString().length() - 1;\r
+\r
+ // This is the Viewd alignment sequences\r
+ // No need to tell the user of the dataset updates\r
+ if ( (seq2.getStart() != absStart+sequence.getStart())\r
+ || (seq2.getEnd() != absEnd+sequence.getStart()))\r
+ {\r
+ sbuffer.append("Updated: " + seq2.getName() + " " +\r
+ seq2.getStart() + "/" + seq2.getEnd() +\r
+ " to " + (absStart + sequence.getStart()) + "/" +\r
+ (absEnd + sequence.getStart()) + "\n");\r
+\r
+ seq2.setStart(absStart + sequence.getStart());\r
+ seq2.setEnd(absEnd + sequence.getStart());\r
+ }\r
+ }\r
+\r
+ n++;\r
+ }\r
}\r
}\r
}\r