2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)
3 * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
18 package jalview.ws.dbsources;
21 import java.io.FileReader;
22 import java.io.IOException;
23 import java.util.Enumeration;
24 import java.util.Hashtable;
25 import java.util.Vector;
27 import org.exolab.castor.xml.Unmarshaller;
29 import com.stevesoft.pat.Regex;
31 import jalview.datamodel.Alignment;
32 import jalview.datamodel.AlignmentI;
33 import jalview.datamodel.DBRefEntry;
34 import jalview.datamodel.DBRefSource;
35 import jalview.datamodel.PDBEntry;
36 import jalview.datamodel.SequenceFeature;
37 import jalview.datamodel.SequenceI;
38 import jalview.datamodel.UniprotEntry;
39 import jalview.datamodel.UniprotFile;
40 import jalview.io.FormatAdapter;
41 import jalview.io.IdentifyFile;
42 import jalview.ws.DBRefFetcher;
43 import jalview.ws.ebi.EBIFetchClient;
44 import jalview.ws.seqfetcher.DbSourceProxy;
45 import jalview.ws.seqfetcher.DbSourceProxyImpl;
51 public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
56 addDbSourceProperty(DBRefSource.SEQDB, DBRefSource.SEQDB);
57 addDbSourceProperty(DBRefSource.PROTSEQDB);
58 // addDbSourceProperty(DBRefSource.MULTIACC, new Integer(50));
64 * @see jalview.ws.DbSourceProxy#getAccessionSeparator()
66 public String getAccessionSeparator()
74 * @see jalview.ws.DbSourceProxy#getAccessionValidator()
76 public Regex getAccessionValidator()
78 return new Regex("([A-Z]+[0-9]+[A-Z0-9]+|[A-Z0-9]+_[A-Z0-9]+)");
84 * @see jalview.ws.DbSourceProxy#getDbSource()
86 public String getDbSource()
88 return DBRefSource.UNIPROT;
94 * @see jalview.ws.DbSourceProxy#getDbVersion()
96 public String getDbVersion()
98 return "0"; // we really don't know what version we're on.
101 private EBIFetchClient ebi = null;
103 private static org.exolab.castor.mapping.Mapping map;
105 public Vector getUniprotEntries(File file)
107 UniprotFile uni = new UniprotFile();
112 // 1. Load the mapping information from the file
113 map = new org.exolab.castor.mapping.Mapping(uni.getClass()
115 java.net.URL url = getClass().getResource("/uniprot_mapping.xml");
116 map.loadMapping(url);
119 // 2. Unmarshal the data
120 Unmarshaller unmar = new Unmarshaller(uni);
121 unmar.setIgnoreExtraElements(true);
122 unmar.setMapping(map);
125 uni = (UniprotFile) unmar.unmarshal(new FileReader(file));
127 } catch (Exception e)
129 System.out.println("Error getUniprotEntries() " + e);
132 return uni.getUniprotEntries();
138 * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
140 public AlignmentI getSequenceRecords(String queries) throws Exception
145 queries = queries.toUpperCase().replaceAll(
146 "(UNIPROT\\|?|UNIPROT_|UNIREF\\d+_|UNIREF\\d+\\|?)", "");
148 ebi = new EBIFetchClient();
149 StringBuffer result = new StringBuffer();
150 // uniprotxml parameter required since december 2007
151 // uniprotkb dbname changed introduced december 2008
152 File file = ebi.fetchDataAsFile("uniprotkb:" + queries, "uniprotxml",
154 Vector entries = getUniprotEntries(file);
158 // First, make the new sequences
159 Enumeration en = entries.elements();
160 while (en.hasMoreElements())
162 UniprotEntry entry = (UniprotEntry) en.nextElement();
164 StringBuffer name = new StringBuffer(">UniProt/Swiss-Prot");
165 Enumeration en2 = entry.getAccession().elements();
166 while (en2.hasMoreElements())
169 name.append(en2.nextElement());
171 en2 = entry.getName().elements();
172 while (en2.hasMoreElements())
175 name.append(en2.nextElement());
178 if (entry.getProtein() != null
179 && entry.getProtein().getName() != null)
181 for (int nm = 0, nmSize = entry.getProtein().getName().size(); nm < nmSize; nm++)
183 name.append(" " + entry.getProtein().getName().elementAt(nm));
187 result.append(name + "\n"
188 + entry.getUniprotSequence().getContent() + "\n");
192 // Then read in the features and apply them to the dataset
193 al = parseResult(result.toString());
196 // Decorate the alignment with database entries.
197 addUniprotXrefs(al, entries);
206 } catch (Exception e)
214 * add an ordered set of UniprotEntry objects to an ordered set of seuqences.
217 * - a sequence of n sequences
219 * a seuqence of n uniprot entries to be analysed.
221 public void addUniprotXrefs(Alignment al, Vector entries)
223 for (int i = 0; i < entries.size(); i++)
225 UniprotEntry entry = (UniprotEntry) entries.elementAt(i);
226 Enumeration e = entry.getDbReference().elements();
227 Vector onlyPdbEntries = new Vector();
228 Vector dbxrefs = new Vector();
229 while (e.hasMoreElements())
231 PDBEntry pdb = (PDBEntry) e.nextElement();
232 DBRefEntry dbr = new DBRefEntry();
233 dbr.setSource(pdb.getType());
234 dbr.setAccessionId(pdb.getId());
235 dbr.setVersion(DBRefSource.UNIPROT + ":" + getDbVersion());
236 dbxrefs.addElement(dbr);
237 if (!pdb.getType().equals("PDB"))
242 onlyPdbEntries.addElement(pdb);
244 SequenceI sq = al.getSequenceAt(i);
245 while (sq.getDatasetSequence() != null)
247 sq = sq.getDatasetSequence();
250 Enumeration en2 = entry.getAccession().elements();
251 while (en2.hasMoreElements())
253 // we always add as uniprot if we retrieved from uniprot or uniprot name
254 sq.addDBRef(new DBRefEntry(DBRefSource.UNIPROT, getDbVersion(), en2
255 .nextElement().toString()));
257 en2 = dbxrefs.elements();
258 while (en2.hasMoreElements())
260 // we always add as uniprot if we retrieved from uniprot or uniprot name
261 sq.addDBRef((DBRefEntry) en2.nextElement());
264 sq.setPDBId(onlyPdbEntries);
265 if (entry.getFeature() != null)
267 e = entry.getFeature().elements();
268 while (e.hasMoreElements())
270 SequenceFeature sf = (SequenceFeature) e.nextElement();
271 sf.setFeatureGroup("Uniprot");
272 sq.addSequenceFeature(sf);
281 * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
283 public boolean isValidReference(String accession)
285 // TODO: make the following a standard validator
286 return (accession == null || accession.length() < 2) ? false
287 : getAccessionValidator().search(accession);
291 * return LDHA_CHICK uniprot entry
293 public String getTestQuery()
298 public String getDbName()
300 return "Uniprot"; // getDbSource();