- queries = queries.toUpperCase().replaceAll(
- "(UNIPROT\\|?|UNIPROT_|UNIREF\\d+_|UNIREF\\d+\\|?)", "");
- Alignment al = null;
- ebi = new EBIFetchClient();
- StringBuffer result = new StringBuffer();
- // uniprotxml parameter required since december 2007
- // uniprotkb dbname changed introduced december 2008
- File file = ebi.fetchDataAsFile("uniprotkb:" + queries, "uniprotxml",
- null);
- Vector entries = getUniprotEntries(file);
-
- if (entries != null)
+ DBRefEntry dbRef = new DBRefEntry(DBRefSource.UNIPROT,
+ uniprotRecordVersion, accessionId, null, canonical);
+ canonical = false;
+ dbRefs.add(dbRef);
+ }
+
+ /*
+ * add a DBRefEntry for each dbReference element in the XML;
+ * also add a PDBEntry if type="PDB";
+ * also add an EMBLCDS dbref if protein sequence id is given
+ * also add an Ensembl dbref " " " " " "
+ */
+ Vector<PDBEntry> pdbRefs = new Vector<>();
+ for (DbReferenceType dbref : entry.getDbReference())
+ {
+ String type = dbref.getType();
+ DBRefEntry dbr = new DBRefEntry(type,
+ DBRefSource.UNIPROT + ":" + dbVersion, dbref.getId());
+ dbRefs.add(dbr);
+ if ("PDB".equals(type))
+ {
+ pdbRefs.add(new PDBEntry(dbr));
+ }
+ if ("EMBL".equals(type))
+ {
+ /*
+ * e.g. Uniprot accession Q9BXM7 has
+ * <dbReference type="EMBL" id="M19359">
+ * <property type="protein sequence ID" value="AAA40981.1"/>
+ * <property type="molecule type" value="Genomic_DNA"/>
+ * </dbReference>
+ */
+ String cdsId = getProperty(dbref.getProperty(),
+ "protein sequence ID");
+ if (cdsId != null && cdsId.trim().length() > 0)
+ {
+ // remove version
+ String[] vrs = cdsId.split("\\.");
+ String version = vrs.length > 1 ? vrs[1]
+ : DBRefSource.UNIPROT + ":" + uniprotRecordVersion;
+ dbr = new DBRefEntry(DBRefSource.EMBLCDS, version, vrs[0]);
+ // TODO: process VARIANT features to allow EMBLCDS record's product to
+ // match Uniprot
+ dbr.setCanonical(true);
+ dbRefs.add(dbr);
+ }
+ }
+ if (type != null
+ && type.toLowerCase(Locale.ROOT).startsWith("ensembl"))