2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.ws.dbsources;
24 import java.io.FileReader;
25 import java.io.Reader;
26 import java.util.Vector;
28 import org.exolab.castor.xml.Unmarshaller;
30 import com.stevesoft.pat.Regex;
32 import jalview.datamodel.Alignment;
33 import jalview.datamodel.AlignmentI;
34 import jalview.datamodel.DBRefEntry;
35 import jalview.datamodel.DBRefSource;
36 import jalview.datamodel.PDBEntry;
37 import jalview.datamodel.SequenceFeature;
38 import jalview.datamodel.SequenceI;
39 import jalview.datamodel.UniprotEntry;
40 import jalview.datamodel.UniprotFile;
41 import jalview.ws.ebi.EBIFetchClient;
42 import jalview.ws.seqfetcher.DbSourceProxy;
43 import jalview.ws.seqfetcher.DbSourceProxyImpl;
49 public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
52 private static final String BAR_DELIMITER = "|";
54 private static final String NEWLINE = "\n";
56 private static org.exolab.castor.mapping.Mapping map;
64 addDbSourceProperty(DBRefSource.SEQDB, DBRefSource.SEQDB);
65 addDbSourceProperty(DBRefSource.PROTSEQDB);
66 // addDbSourceProperty(DBRefSource.MULTIACC, new Integer(50));
72 * @see jalview.ws.DbSourceProxy#getAccessionSeparator()
74 public String getAccessionSeparator()
82 * @see jalview.ws.DbSourceProxy#getAccessionValidator()
84 public Regex getAccessionValidator()
86 return new Regex("([A-Z]+[0-9]+[A-Z0-9]+|[A-Z0-9]+_[A-Z0-9]+)");
92 * @see jalview.ws.DbSourceProxy#getDbSource()
94 public String getDbSource()
96 return DBRefSource.UNIPROT;
102 * @see jalview.ws.DbSourceProxy#getDbVersion()
104 public String getDbVersion()
106 return "0"; // we really don't know what version we're on.
110 * Reads a file containing the reply to the EBI Fetch Uniprot data query,
111 * unmarshals it to a UniprotFile object, and returns the list of UniprotEntry
112 * data models (mapped from <entry> elements)
117 public Vector<UniprotEntry> getUniprotEntries(Reader fileReader)
119 UniprotFile uni = new UniprotFile();
124 // 1. Load the mapping information from the file
125 map = new org.exolab.castor.mapping.Mapping(uni.getClass()
127 java.net.URL url = getClass().getResource("/uniprot_mapping.xml");
128 map.loadMapping(url);
131 // 2. Unmarshal the data
132 Unmarshaller unmar = new Unmarshaller(uni);
133 unmar.setIgnoreExtraElements(true);
134 unmar.setMapping(map);
135 if (fileReader != null)
137 uni = (UniprotFile) unmar.unmarshal(fileReader);
139 } catch (Exception e)
141 System.out.println("Error getUniprotEntries() " + e);
144 return uni.getUniprotEntries();
150 * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
152 public AlignmentI getSequenceRecords(String queries) throws Exception
157 queries = queries.toUpperCase().replaceAll(
158 "(UNIPROT\\|?|UNIPROT_|UNIREF\\d+_|UNIREF\\d+\\|?)", "");
160 EBIFetchClient ebi = new EBIFetchClient();
161 // uniprotxml parameter required since december 2007
162 // uniprotkb dbname changed introduced december 2008
163 File file = ebi.fetchDataAsFile("uniprotkb:" + queries, "uniprotxml",
165 Vector<UniprotEntry> entries = getUniprotEntries(new FileReader(file));
170 * If Castor binding included sequence@length, we could guesstimate the
171 * size of buffer to hold the alignment
173 StringBuffer result = new StringBuffer(128);
174 // First, make the new sequences
175 for (UniprotEntry entry : entries)
177 StringBuilder name = constructSequenceFastaHeader(entry);
179 result.append(name).append(NEWLINE)
180 .append(entry.getUniprotSequence().getContent())
184 // Then read in the features and apply them to the dataset
185 al = parseResult(result.toString());
188 // Decorate the alignment with database entries.
189 addUniprotXrefs(al, entries);
198 } catch (Exception e)
206 * Construct a Fasta-format sequence header by concatenating the source,
207 * accession id(s) and name(s), delimited by '|', plus any protein names, now
208 * with space rather than bar delimiter
213 public static StringBuilder constructSequenceFastaHeader(
216 StringBuilder name = new StringBuilder(32);
217 name.append(">UniProt/Swiss-Prot");
218 for (String accessionId : entry.getAccession())
220 name.append(BAR_DELIMITER);
221 name.append(accessionId);
223 for (String n : entry.getName())
225 name.append(BAR_DELIMITER);
229 if (entry.getProtein() != null
230 && entry.getProtein().getName() != null)
232 for (String nm : entry.getProtein().getName())
234 name.append(" ").append(nm);
241 * add an ordered set of UniprotEntry objects to an ordered set of seuqences.
244 * - a sequence of n sequences
246 * a list of n uniprot entries to be analysed.
248 public void addUniprotXrefs(Alignment al, Vector<UniprotEntry> entries)
250 final String dbVersion = getDbVersion();
252 for (int i = 0; i < entries.size(); i++)
254 UniprotEntry entry = entries.elementAt(i);
255 Vector<PDBEntry> onlyPdbEntries = new Vector<PDBEntry>();
256 Vector<DBRefEntry> dbxrefs = new Vector<DBRefEntry>();
258 for (PDBEntry pdb : entry.getDbReference())
260 DBRefEntry dbr = new DBRefEntry();
261 dbr.setSource(pdb.getType());
262 dbr.setAccessionId(pdb.getId());
263 dbr.setVersion(DBRefSource.UNIPROT + ":" + dbVersion);
264 dbxrefs.addElement(dbr);
265 if ("PDB".equals(pdb.getType()))
267 onlyPdbEntries.addElement(pdb);
271 SequenceI sq = al.getSequenceAt(i);
272 while (sq.getDatasetSequence() != null)
274 sq = sq.getDatasetSequence();
277 for (String accessionId : entry.getAccession())
280 * add as uniprot whether retrieved from uniprot or uniprot_name
282 sq.addDBRef(new DBRefEntry(DBRefSource.UNIPROT, dbVersion,
286 for (DBRefEntry dbRef : dbxrefs)
290 sq.setPDBId(onlyPdbEntries);
291 if (entry.getFeature() != null)
293 for (SequenceFeature sf : entry.getFeature())
295 sf.setFeatureGroup("Uniprot");
296 sq.addSequenceFeature(sf);
305 * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
307 public boolean isValidReference(String accession)
309 // TODO: make the following a standard validator
310 return (accession == null || accession.length() < 2) ? false
311 : getAccessionValidator().search(accession);
315 * return LDHA_CHICK uniprot entry
317 public String getTestQuery()
322 public String getDbName()
324 return "Uniprot"; // getDbSource();