2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.4.0.b2)
3 * Copyright (C) 2009 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19 package jalview.ws.dbsources;
22 import java.io.FileReader;
23 import java.io.IOException;
24 import java.util.Enumeration;
25 import java.util.Hashtable;
26 import java.util.Vector;
28 import org.exolab.castor.xml.Unmarshaller;
30 import com.stevesoft.pat.Regex;
32 import jalview.datamodel.Alignment;
33 import jalview.datamodel.AlignmentI;
34 import jalview.datamodel.DBRefEntry;
35 import jalview.datamodel.DBRefSource;
36 import jalview.datamodel.PDBEntry;
37 import jalview.datamodel.SequenceFeature;
38 import jalview.datamodel.SequenceI;
39 import jalview.datamodel.UniprotEntry;
40 import jalview.datamodel.UniprotFile;
41 import jalview.io.FormatAdapter;
42 import jalview.io.IdentifyFile;
43 import jalview.ws.DBRefFetcher;
44 import jalview.ws.ebi.EBIFetchClient;
45 import jalview.ws.seqfetcher.DbSourceProxy;
46 import jalview.ws.seqfetcher.DbSourceProxyImpl;
52 public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
57 addDbSourceProperty(DBRefSource.SEQDB, DBRefSource.SEQDB);
58 addDbSourceProperty(DBRefSource.PROTSEQDB);
59 // addDbSourceProperty(DBRefSource.MULTIACC, new Integer(50));
65 * @see jalview.ws.DbSourceProxy#getAccessionSeparator()
67 public String getAccessionSeparator()
75 * @see jalview.ws.DbSourceProxy#getAccessionValidator()
77 public Regex getAccessionValidator()
85 * @see jalview.ws.DbSourceProxy#getDbSource()
87 public String getDbSource()
89 return DBRefSource.UNIPROT;
95 * @see jalview.ws.DbSourceProxy#getDbVersion()
97 public String getDbVersion()
99 return "0"; // we really don't know what version we're on.
102 private EBIFetchClient ebi = null;
104 public Vector getUniprotEntries(File file)
106 UniprotFile uni = new UniprotFile();
109 // 1. Load the mapping information from the file
110 org.exolab.castor.mapping.Mapping map = new org.exolab.castor.mapping.Mapping(
111 uni.getClass().getClassLoader());
112 java.net.URL url = getClass().getResource("/uniprot_mapping.xml");
113 map.loadMapping(url);
115 // 2. Unmarshal the data
116 Unmarshaller unmar = new Unmarshaller(uni);
117 unmar.setIgnoreExtraElements(true);
118 unmar.setMapping(map);
120 uni = (UniprotFile) unmar.unmarshal(new FileReader(file));
121 } catch (Exception e)
123 System.out.println("Error getUniprotEntries() " + e);
126 return uni.getUniprotEntries();
132 * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
134 public AlignmentI getSequenceRecords(String queries) throws Exception
140 ebi = new EBIFetchClient();
141 StringBuffer result = new StringBuffer();
142 // uniprotxml parameter required since december 2007
143 File file = ebi.fetchDataAsFile("uniprot:" + queries, "uniprotxml",
145 Vector entries = getUniprotEntries(file);
149 // First, make the new sequences
150 Enumeration en = entries.elements();
151 while (en.hasMoreElements())
153 UniprotEntry entry = (UniprotEntry) en.nextElement();
155 StringBuffer name = new StringBuffer(">UniProt/Swiss-Prot");
156 Enumeration en2 = entry.getAccession().elements();
157 while (en2.hasMoreElements())
160 name.append(en2.nextElement());
162 en2 = entry.getName().elements();
163 while (en2.hasMoreElements())
166 name.append(en2.nextElement());
169 if (entry.getProtein() != null
170 && entry.getProtein().getName() != null)
172 for (int nm = 0, nmSize = entry.getProtein().getName().size(); nm < nmSize; nm++)
174 name.append(" " + entry.getProtein().getName().elementAt(nm));
178 result.append(name + "\n"
179 + entry.getUniprotSequence().getContent() + "\n");
183 // Then read in the features and apply them to the dataset
184 al = parseResult(result.toString());
187 // Decorate the alignment with database entries.
188 addUniprotXrefs(al, entries);
197 } catch (Exception e)
205 * add an ordered set of UniprotEntry objects to an ordered set of seuqences.
208 * a sequence of n sequences
210 * a seuqence of n uniprot entries to be analysed.
212 public void addUniprotXrefs(Alignment al, Vector entries)
214 for (int i = 0; i < entries.size(); i++)
216 UniprotEntry entry = (UniprotEntry) entries.elementAt(i);
217 Enumeration e = entry.getDbReference().elements();
218 Vector onlyPdbEntries = new Vector();
219 Vector dbxrefs = new Vector();
220 while (e.hasMoreElements())
222 PDBEntry pdb = (PDBEntry) e.nextElement();
223 DBRefEntry dbr = new DBRefEntry();
224 dbr.setSource(pdb.getType());
225 dbr.setAccessionId(pdb.getId());
226 dbr.setVersion(DBRefSource.UNIPROT + ":" + getDbVersion());
227 dbxrefs.addElement(dbr);
228 if (!pdb.getType().equals("PDB"))
233 onlyPdbEntries.addElement(pdb);
235 SequenceI sq = al.getSequenceAt(i);
236 while (sq.getDatasetSequence() != null)
238 sq = sq.getDatasetSequence();
241 Enumeration en2 = entry.getAccession().elements();
242 while (en2.hasMoreElements())
244 // we always add as uniprot if we retrieved from uniprot or uniprot name
245 sq.addDBRef(new DBRefEntry(DBRefSource.UNIPROT, getDbVersion(), en2
246 .nextElement().toString()));
248 en2 = dbxrefs.elements();
249 while (en2.hasMoreElements())
251 // we always add as uniprot if we retrieved from uniprot or uniprot name
252 sq.addDBRef((DBRefEntry) en2.nextElement());
255 sq.setPDBId(onlyPdbEntries);
256 if (entry.getFeature() != null)
258 e = entry.getFeature().elements();
259 while (e.hasMoreElements())
261 SequenceFeature sf = (SequenceFeature) e.nextElement();
262 sf.setFeatureGroup("Uniprot");
263 sq.addSequenceFeature(sf);
272 * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
274 public boolean isValidReference(String accession)
280 * return LDHA_CHICK uniprot entry
282 public String getTestQuery()
287 public String getDbName()
289 return "Uniprot"; // getDbSource();