4 package jalview.ws.dbsources;
7 import java.io.FileReader;
8 import java.io.IOException;
9 import java.util.Enumeration;
10 import java.util.Hashtable;
11 import java.util.Vector;
13 import org.exolab.castor.xml.Unmarshaller;
15 import com.stevesoft.pat.Regex;
17 import jalview.datamodel.Alignment;
18 import jalview.datamodel.AlignmentI;
19 import jalview.datamodel.DBRefEntry;
20 import jalview.datamodel.DBRefSource;
21 import jalview.datamodel.PDBEntry;
22 import jalview.datamodel.SequenceFeature;
23 import jalview.datamodel.SequenceI;
24 import jalview.datamodel.UniprotEntry;
25 import jalview.datamodel.UniprotFile;
26 import jalview.io.FormatAdapter;
27 import jalview.io.IdentifyFile;
28 import jalview.ws.DBRefFetcher;
29 import jalview.ws.ebi.EBIFetchClient;
30 import jalview.ws.seqfetcher.DbSourceProxy;
31 import jalview.ws.seqfetcher.DbSourceProxyImpl;
37 public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
41 addDbSourceProperty(DBRefSource.SEQDB, DBRefSource.SEQDB);
42 addDbSourceProperty(DBRefSource.PROTSEQDB);
43 // addDbSourceProperty(DBRefSource.MULTIACC, new Integer(50));
49 * @see jalview.ws.DbSourceProxy#getAccessionSeparator()
51 public String getAccessionSeparator()
59 * @see jalview.ws.DbSourceProxy#getAccessionValidator()
61 public Regex getAccessionValidator()
69 * @see jalview.ws.DbSourceProxy#getDbSource()
71 public String getDbSource()
73 return DBRefSource.UNIPROT;
79 * @see jalview.ws.DbSourceProxy#getDbVersion()
81 public String getDbVersion()
83 return "0"; // we really don't know what version we're on.
86 private EBIFetchClient ebi = null;
88 public Vector getUniprotEntries(File file)
90 UniprotFile uni = new UniprotFile();
93 // 1. Load the mapping information from the file
94 org.exolab.castor.mapping.Mapping map = new org.exolab.castor.mapping.Mapping(uni.getClass().getClassLoader());
95 java.net.URL url = getClass().getResource("/uniprot_mapping.xml");
98 // 2. Unmarshal the data
99 Unmarshaller unmar = new Unmarshaller(uni);
100 unmar.setIgnoreExtraElements(true);
101 unmar.setMapping(map);
103 uni = (UniprotFile) unmar.unmarshal(new FileReader(file));
107 System.out.println("Error getUniprotEntries() " + e);
110 return uni.getUniprotEntries();
116 * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
118 public AlignmentI getSequenceRecords(String queries) throws Exception
124 ebi = new EBIFetchClient();
125 StringBuffer result=new StringBuffer();
126 // uniprotxml parameter required since december 2007
127 File file = ebi.fetchDataAsFile("uniprot:" + queries, "uniprotxml", null);
128 Vector entries = getUniprotEntries(file);
132 // First, make the new sequences
133 Enumeration en = entries.elements();
134 while (en.hasMoreElements())
136 UniprotEntry entry = (UniprotEntry) en.nextElement();
138 StringBuffer name = new StringBuffer(">UniProt/Swiss-Prot");
139 Enumeration en2 = entry.getAccession().elements();
140 while (en2.hasMoreElements())
143 name.append(en2.nextElement());
145 en2 = entry.getName().elements();
146 while (en2.hasMoreElements())
149 name.append(en2.nextElement());
152 if (entry.getProtein()!=null && entry.getProtein().getName()!=null)
154 for (int nm=0,nmSize=entry.getProtein().getName().size(); nm<nmSize;nm++)
156 name.append(" " + entry.getProtein().getName().elementAt(nm));
160 result.append(name + "\n"
161 + entry.getUniprotSequence().getContent() + "\n");
165 // Then read in the features and apply them to the dataset
166 al = parseResult(result.toString());
169 // Decorate the alignment with database entries.
170 addUniprotXrefs(al, entries);
177 } catch (Exception e)
185 * add an ordered set of UniprotEntry objects to an ordered set of seuqences.
188 * a sequence of n sequences
190 * a seuqence of n uniprot entries to be analysed.
192 public void addUniprotXrefs(Alignment al, Vector entries)
194 for (int i = 0; i < entries.size(); i++)
196 UniprotEntry entry = (UniprotEntry) entries.elementAt(i);
197 Enumeration e = entry.getDbReference().elements();
198 Vector onlyPdbEntries = new Vector();
199 Vector dbxrefs = new Vector();
200 while (e.hasMoreElements())
202 PDBEntry pdb = (PDBEntry) e.nextElement();
203 DBRefEntry dbr = new DBRefEntry();
204 dbr.setSource(pdb.getType());
205 dbr.setAccessionId(pdb.getId());
206 dbr.setVersion(DBRefSource.UNIPROT+":"+getDbVersion());
207 dbxrefs.addElement(dbr);
208 if (!pdb.getType().equals("PDB"))
213 onlyPdbEntries.addElement(pdb);
215 SequenceI sq = al.getSequenceAt(i);
216 while (sq.getDatasetSequence()!=null)
218 sq = sq.getDatasetSequence();
221 Enumeration en2 = entry.getAccession().elements();
222 while (en2.hasMoreElements())
224 // we always add as uniprot if we retrieved from uniprot or uniprot name
226 new DBRefEntry(DBRefSource.UNIPROT, getDbVersion(), en2.nextElement()
229 en2 = dbxrefs.elements();
230 while (en2.hasMoreElements())
232 // we always add as uniprot if we retrieved from uniprot or uniprot name
233 sq.addDBRef((DBRefEntry) en2.nextElement());
236 sq.setPDBId(onlyPdbEntries);
237 if (entry.getFeature() != null)
239 e = entry.getFeature().elements();
240 while (e.hasMoreElements())
242 SequenceFeature sf = (SequenceFeature) e.nextElement();
243 sf.setFeatureGroup("Uniprot");
244 sq.addSequenceFeature(sf);
253 * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
255 public boolean isValidReference(String accession)
260 * return LDHA_CHICK uniprot entry
262 public String getTestQuery()
266 public String getDbName()
268 return "Uniprot"; // getDbSource();