2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.ws.dbsources;
23 import jalview.datamodel.Alignment;
24 import jalview.datamodel.AlignmentI;
25 import jalview.datamodel.DBRefEntry;
26 import jalview.datamodel.DBRefSource;
27 import jalview.datamodel.PDBEntry;
28 import jalview.datamodel.Sequence;
29 import jalview.datamodel.SequenceFeature;
30 import jalview.datamodel.SequenceI;
31 import jalview.datamodel.UniprotEntry;
32 import jalview.datamodel.UniprotFile;
33 import jalview.ws.ebi.EBIFetchClient;
34 import jalview.ws.seqfetcher.DbSourceProxy;
35 import jalview.ws.seqfetcher.DbSourceProxyImpl;
38 import java.io.FileReader;
39 import java.io.Reader;
40 import java.util.ArrayList;
41 import java.util.Vector;
43 import org.exolab.castor.xml.Unmarshaller;
45 import com.stevesoft.pat.Regex;
51 public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy
54 private static final String BAR_DELIMITER = "|";
56 private static org.exolab.castor.mapping.Mapping map;
64 addDbSourceProperty(DBRefSource.SEQDB, DBRefSource.SEQDB);
65 addDbSourceProperty(DBRefSource.PROTSEQDB);
71 * @see jalview.ws.DbSourceProxy#getAccessionSeparator()
74 public String getAccessionSeparator()
82 * @see jalview.ws.DbSourceProxy#getAccessionValidator()
85 public Regex getAccessionValidator()
87 return new Regex("([A-Z]+[0-9]+[A-Z0-9]+|[A-Z0-9]+_[A-Z0-9]+)");
93 * @see jalview.ws.DbSourceProxy#getDbSource()
96 public String getDbSource()
98 return DBRefSource.UNIPROT;
104 * @see jalview.ws.DbSourceProxy#getDbVersion()
107 public String getDbVersion()
109 return "0"; // we really don't know what version we're on.
113 * Reads a file containing the reply to the EBI Fetch Uniprot data query,
114 * unmarshals it to a UniprotFile object, and returns the list of UniprotEntry
115 * data models (mapped from <entry> elements)
120 public Vector<UniprotEntry> getUniprotEntries(Reader fileReader)
122 UniprotFile uni = new UniprotFile();
127 // 1. Load the mapping information from the file
128 map = new org.exolab.castor.mapping.Mapping(uni.getClass()
130 java.net.URL url = getClass().getResource("/uniprot_mapping.xml");
131 map.loadMapping(url);
134 // 2. Unmarshal the data
135 Unmarshaller unmar = new Unmarshaller(uni);
136 unmar.setIgnoreExtraElements(true);
137 unmar.setMapping(map);
138 if (fileReader != null)
140 uni = (UniprotFile) unmar.unmarshal(fileReader);
142 } catch (Exception e)
144 System.out.println("Error getUniprotEntries() " + e);
147 return uni.getUniprotEntries();
153 * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[])
156 public AlignmentI getSequenceRecords(String queries) throws Exception
161 queries = queries.toUpperCase().replaceAll(
162 "(UNIPROT\\|?|UNIPROT_|UNIREF\\d+_|UNIREF\\d+\\|?)", "");
163 AlignmentI al = null;
164 EBIFetchClient ebi = new EBIFetchClient();
165 // uniprotxml parameter required since december 2007
166 // uniprotkb dbname changed introduced december 2008
167 File file = ebi.fetchDataAsFile("uniprotkb:" + queries, "uniprotxml",
169 Vector<UniprotEntry> entries = getUniprotEntries(new FileReader(file));
173 ArrayList<SequenceI> seqs = new ArrayList<SequenceI>();
174 for (UniprotEntry entry : entries)
176 seqs.add(uniprotEntryToSequenceI(entry));
178 al = new Alignment(seqs.toArray(new SequenceI[0]));
183 } catch (Exception e)
194 * @return SequenceI instance created from the UniprotEntry instance
196 public SequenceI uniprotEntryToSequenceI(UniprotEntry entry){
197 String id = getUniprotEntryId(entry);
198 SequenceI sequence = new Sequence(id, entry.getUniprotSequence()
200 sequence.setDescription(getUniprotEntryDescription(entry));
202 final String dbVersion = getDbVersion();
203 ArrayList<DBRefEntry> dbRefs = new ArrayList<DBRefEntry>();
204 for (String accessionId : entry.getAccession())
206 DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT, dbVersion,
210 sequence.setSourceDBRef((dbRefs != null && dbRefs.size() > 0) ? dbRefs
213 Vector<PDBEntry> onlyPdbEntries = new Vector<PDBEntry>();
214 for (PDBEntry pdb : entry.getDbReference())
216 DBRefEntry dbr = new DBRefEntry();
217 dbr.setSource(pdb.getType());
218 dbr.setAccessionId(pdb.getId());
219 dbr.setVersion(DBRefSource.UNIPROT + ":" + dbVersion);
221 if ("PDB".equals(pdb.getType()))
223 onlyPdbEntries.addElement(pdb);
227 sequence.setPDBId(onlyPdbEntries);
228 if (entry.getFeature() != null)
230 for (SequenceFeature sf : entry.getFeature())
232 sf.setFeatureGroup("Uniprot");
233 sequence.addSequenceFeature(sf);
236 sequence.setDBRefs(dbRefs.toArray(new DBRefEntry[0]));
244 * @return protein name(s) delimited by a white space character
246 public static String getUniprotEntryDescription(UniprotEntry entry)
248 StringBuilder desc = new StringBuilder(32);
249 if (entry.getProtein() != null && entry.getProtein().getName() != null)
251 for (String nm : entry.getProtein().getName())
253 desc.append(nm).append(" ");
256 return desc.toString();
263 * @return The accession id(s) and name(s) delimited by '|'.
265 public static String getUniprotEntryId(UniprotEntry entry)
267 StringBuilder name = new StringBuilder(32);
268 name.append("UniProt/Swiss-Prot");
269 for (String accessionId : entry.getAccession())
271 name.append(BAR_DELIMITER);
272 name.append(accessionId);
274 for (String n : entry.getName())
276 name.append(BAR_DELIMITER);
279 return name.toString();
285 * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String)
288 public boolean isValidReference(String accession)
290 // TODO: make the following a standard validator
291 return (accession == null || accession.length() < 2) ? false
292 : getAccessionValidator().search(accession);
296 * return LDHA_CHICK uniprot entry
299 public String getTestQuery()
305 public String getDbName()
307 return "Uniprot"; // getDbSource();