From 5d88ae6bbf1ea113cefcb43b7918b5baf560a76e Mon Sep 17 00:00:00 2001 From: jprocter Date: Fri, 13 Jul 2007 15:07:20 +0000 Subject: [PATCH] new DBRef management and generalised source and entry retrieval mechanism for references and cross references --- src/jalview/ws/ASequenceFetcher.java | 189 +++++++++++++++++ src/jalview/{io => ws}/DBRefFetcher.java | 8 +- .../{io => ws}/DasSequenceFeatureFetcher.java | 4 +- src/jalview/ws/DbSourceProxy.java | 74 +++++++ src/jalview/ws/DbSourceProxyImpl.java | 96 +++++++++ src/jalview/{io => ws}/EBIFetchClient.java | 2 +- src/jalview/ws/SequenceFetcher.java | 167 +++++++++++++++ .../ws/dbsources/EbiFileRetrievedProxy.java | 46 +++++ src/jalview/ws/dbsources/EmblCdsSouce.java | 62 ++++++ src/jalview/ws/dbsources/EmblSource.java | 92 +++++++++ src/jalview/ws/dbsources/EmblXmlSource.java | 97 +++++++++ src/jalview/ws/dbsources/Pdb.java | 180 ++++++++++++++++ src/jalview/ws/dbsources/Pfam.java | 108 ++++++++++ src/jalview/ws/dbsources/Uniprot.java | 217 ++++++++++++++++++++ 14 files changed, 1336 insertions(+), 6 deletions(-) create mode 100644 src/jalview/ws/ASequenceFetcher.java rename src/jalview/{io => ws}/DBRefFetcher.java (94%) rename src/jalview/{io => ws}/DasSequenceFeatureFetcher.java (96%) mode change 100755 => 100644 create mode 100644 src/jalview/ws/DbSourceProxy.java create mode 100644 src/jalview/ws/DbSourceProxyImpl.java rename src/jalview/{io => ws}/EBIFetchClient.java (99%) mode change 100755 => 100644 create mode 100644 src/jalview/ws/SequenceFetcher.java create mode 100644 src/jalview/ws/dbsources/EbiFileRetrievedProxy.java create mode 100644 src/jalview/ws/dbsources/EmblCdsSouce.java create mode 100644 src/jalview/ws/dbsources/EmblSource.java create mode 100644 src/jalview/ws/dbsources/EmblXmlSource.java create mode 100644 src/jalview/ws/dbsources/Pdb.java create mode 100644 src/jalview/ws/dbsources/Pfam.java create mode 100644 src/jalview/ws/dbsources/Uniprot.java diff --git a/src/jalview/ws/ASequenceFetcher.java b/src/jalview/ws/ASequenceFetcher.java new file mode 100644 index 0000000..c6e167b --- /dev/null +++ b/src/jalview/ws/ASequenceFetcher.java @@ -0,0 +1,189 @@ +package jalview.ws; + +import jalview.datamodel.AlignmentI; +import jalview.datamodel.SequenceI; + +import java.util.Enumeration; +import java.util.Hashtable; +import java.util.Vector; + +public class ASequenceFetcher +{ + + /** + * set of databases we can retrieve entries from + */ + protected Hashtable FETCHABLEDBS; + + public ASequenceFetcher() + { + super(); + } + + /** + * get list of supported Databases + * + * @return database source string for each database - only the latest version + * of a source db is bound to each source. + */ + public String[] getSupportedDb() + { + if (FETCHABLEDBS == null) + return null; + String[] sf = new String[FETCHABLEDBS.size()]; + Enumeration e = FETCHABLEDBS.keys(); + int i = 0; + while (e.hasMoreElements()) + { + sf[i++] = (String) e.nextElement(); + } + ; + return sf; + } + + public boolean isFetchable(String source) + { + Enumeration e = FETCHABLEDBS.keys(); + while (e.hasMoreElements()) + { + String db = (String) e.nextElement(); + if (source.compareToIgnoreCase(db) == 0) + return true; + } + jalview.bin.Cache.log.warn("isFetchable doesn't know about '" + source + + "'"); + return false; + } + + public SequenceI[] getSequences(jalview.datamodel.DBRefEntry[] refs) + { + SequenceI[] ret = null; + Vector rseqs = new Vector(); + Hashtable queries = new Hashtable(); + for (int r = 0; r < refs.length; r++) + { + if (!queries.containsKey(refs[r].getSource())) + { + queries.put(refs[r].getSource(), new Vector()); + } + Vector qset = (Vector) queries.get(refs[r].getSource()); + if (!qset.contains(refs[r].getAccessionId())) + { + qset.addElement(refs[r].getAccessionId()); + } + } + Enumeration e = queries.keys(); + while (e.hasMoreElements()) + { + Vector query = null; + String db = null; + try + { + db = (String) e.nextElement(); + query = (Vector) queries.get(db); + if (!isFetchable(db)) + throw new Exception( + "Don't know how to fetch from this database :" + db); + DbSourceProxy fetcher = getSourceProxy(db); + boolean doMultiple = fetcher.getAccessionSeparator() != null; // No + // separator + // - no + // Multiple + // Queries + Enumeration qs = query.elements(); + while (qs.hasMoreElements()) + { + StringBuffer qsb = new StringBuffer(); + do + { + qsb.append((String) qs.nextElement()); + if (qs.hasMoreElements() && doMultiple) // and not reached limit for + // multiple queries at one + // time for this source + { + qsb.append(fetcher.getAccessionSeparator()); + } + } while (doMultiple && qs.hasMoreElements()); + + // create a fetcher and go to it + AlignmentI seqset = fetcher.getSequenceRecords(qsb.toString()); + // TODO: Merge alignment together - perhaps + if (seqset != null) + { + SequenceI seqs[] = seqset.getSequencesArray(); + if (seqs != null) + { + for (int is = 0; is < seqs.length; is++) + { + rseqs.addElement(seqs[is]); + seqs[is] = null; + } + } + else + { + if (fetcher.getRawRecords() != null) + { + System.out.println("# Retrieved from " + db + ":" + + qs.toString()); + StringBuffer rrb = fetcher.getRawRecords(); + /* + * for (int rr = 0; rr 10) + { + System.err.println(); + n = 0; + } + } + System.err.println(); + ex.printStackTrace(); + } + } + if (rseqs.size() > 0) + { + ret = new SequenceI[rseqs.size()]; + rseqs.copyInto(ret); + } + return ret; + } + + /** + * Retrieve an instance of the proxy for the given source + * + * @param db + * database source string TODO: add version string/wildcard for + * retrieval of specific DB source/version combinations. + * @return an instance of DbSourceProxy for that db. + */ + public DbSourceProxy getSourceProxy(String db) + { + DbSourceProxy dbs = (DbSourceProxy) FETCHABLEDBS.get(db); + return dbs; + } + +} \ No newline at end of file diff --git a/src/jalview/io/DBRefFetcher.java b/src/jalview/ws/DBRefFetcher.java similarity index 94% rename from src/jalview/io/DBRefFetcher.java rename to src/jalview/ws/DBRefFetcher.java index 6d589f9..fb7eac9 100644 --- a/src/jalview/io/DBRefFetcher.java +++ b/src/jalview/ws/DBRefFetcher.java @@ -16,7 +16,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ -package jalview.io; +package jalview.ws; import java.io.*; import java.util.*; @@ -310,7 +310,7 @@ public class DBRefFetcher { sequence = (SequenceI) sequenceMatches.elementAt(m); sequence.addDBRef(new DBRefEntry(DBRefSource.UNIPROT, - "0", + "0", // TODO: VERSION FROM UNIPROT entry.getAccession().elementAt(0). toString())); @@ -366,9 +366,11 @@ public class DBRefFetcher PDBEntry pdb = (PDBEntry) e.nextElement(); if (!pdb.getType().equals(DBRefSource.PDB)) { + DBRefEntry xref = new DBRefEntry(pdb.getType(), DBRefSource.UNIPROT, pdb.getId()); + sequence.addDBRef(xref); continue; } - + sequence.addDBRef(new DBRefEntry(DBRefSource.PDB, "0", pdb.getId())); diff --git a/src/jalview/io/DasSequenceFeatureFetcher.java b/src/jalview/ws/DasSequenceFeatureFetcher.java old mode 100755 new mode 100644 similarity index 96% rename from src/jalview/io/DasSequenceFeatureFetcher.java rename to src/jalview/ws/DasSequenceFeatureFetcher.java index 2f94d64..2d4e73f --- a/src/jalview/io/DasSequenceFeatureFetcher.java +++ b/src/jalview/ws/DasSequenceFeatureFetcher.java @@ -16,7 +16,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ -package jalview.io; +package jalview.ws; import java.net.*; import java.util.*; @@ -384,7 +384,7 @@ public class DasSequenceFeatureFetcher Cache.log.info("Error in 'experimental' mapping of features. Please try to reproduce and then report info to help@jalview.org."); Cache.log.info("Mapping feature from "+f.getBegin()+" to "+f.getEnd()+" in dbref "+dbref.getAccessionId()+" in "+dbref.getSource()); Cache.log.info("using das Source "+ds.getUrl()); - Cache.log.info(ex); + Cache.log.info("Exception", ex); } if (vf!=null) { diff --git a/src/jalview/ws/DbSourceProxy.java b/src/jalview/ws/DbSourceProxy.java new file mode 100644 index 0000000..8db6116 --- /dev/null +++ b/src/jalview/ws/DbSourceProxy.java @@ -0,0 +1,74 @@ +package jalview.ws; + +import jalview.datamodel.AlignmentI; + +import java.util.Hashtable; + +import com.stevesoft.pat.Regex; +/** + * generic Reference Retrieval interface for a particular database source/version as cited in DBRefEntry. + * TODO: add/define property to describe max number of queries that this source can cope with at once. + * TODO: add/define mechanism for retrieval of Trees and distance matrices from a database (unify with io) + * @author JimP + * + */ +public interface DbSourceProxy +{ + /** + * + * @return source string constant used for this DB source + */ + public String getDbSource(); + /** + * + * @return version string for this database. + */ + public String getDbVersion(); + /** + * Separator between individual accession queries for a database that allows multiple IDs + * to be fetched in a single query. Null implies that only a single ID can be fetched at a time. + * @return string for separating concatenated queries (as individually validated by the accession validator) + */ + public String getAccessionSeparator(); + /** + * Regular expression for checking form of query string understood by this source. + * @return null or a validation regex + */ + public Regex getAccessionValidator(); + /** + * DbSource properties hash - define the capabilities of this source + * Property hash methods defined in DbSourceProxyImpl. + * See constants in jalview.datamodel.DBRefSource for definition of properties. + * @return + */ + public Hashtable getDbSourceProperties(); + /** + * + * @return a test/example query that can be used to validate retrieval and parsing mechanisms + */ + public String getTestQuery(); + /** + * optionally implemented + * @param accession + * @return + */ + public boolean isValidReference(String accession); + /** + * make one or more queries to the database + * and attempt to parse the response into an alignment + * @param queries + * @return null if queries were successful but result was not parsable + * @throws Exception TODO + */ + public AlignmentI getSequenceRecords(String queries) throws Exception; + /** + * + * @return true if a query is currently being made + */ + public boolean queryInProgress(); + /** + * get the raw reponse from the last set of queries + * @return one or more string buffers for each individual query + */ + public StringBuffer getRawRecords(); +} diff --git a/src/jalview/ws/DbSourceProxyImpl.java b/src/jalview/ws/DbSourceProxyImpl.java new file mode 100644 index 0000000..17040d4 --- /dev/null +++ b/src/jalview/ws/DbSourceProxyImpl.java @@ -0,0 +1,96 @@ +package jalview.ws; + +import jalview.datamodel.Alignment; +import jalview.datamodel.DBRefSource; +import jalview.io.FormatAdapter; +import jalview.io.IdentifyFile; + +import java.util.Hashtable; + +/** + * common methods for implementations of the DbSourceProxy interface. + * + * @author JimP + * + */ +public abstract class DbSourceProxyImpl implements DbSourceProxy +{ + public DbSourceProxyImpl() + { + // default constructor - do nothing probably. + } + private Hashtable props=null; + /* (non-Javadoc) + * @see jalview.ws.DbSourceProxy#getDbSourceProperties() + */ + public Hashtable getDbSourceProperties() + { + return props; + } + protected void addDbSourceProperty(Object propname) + { + addDbSourceProperty(propname, propname); + } + + protected void addDbSourceProperty(Object propname, Object propvalue) + { + if (props==null) + { + props = new Hashtable(); + } + props.put(propname, propvalue); + } + boolean queryInProgress=false; + protected StringBuffer results = null; + /* + * (non-Javadoc) + * + * @see jalview.ws.DbSourceProxy#getRawRecords() + */ + public StringBuffer getRawRecords() + { + return results; + } + + /* (non-Javadoc) + * @see jalview.ws.DbSourceProxy#queryInProgress() + */ + public boolean queryInProgress() + { + return queryInProgress; + } + /** + * call to set the queryInProgress flag + * + */ + protected void startQuery() + { + queryInProgress=true; + } + /** + * call to clear the queryInProgress flag + * + */ + protected void stopQuery() + { + queryInProgress=false; + } + + /** + * create an alignment from raw text file... + * @param result + * @return null or a valid alignment + * @throws Exception + */ + protected Alignment parseResult(String result) throws Exception { + Alignment sequences = null; + String format = new IdentifyFile().Identify(result, "Paste"); + if (FormatAdapter.isValidFormat(format)) + { + sequences = new FormatAdapter().readFile(result.toString(), "Paste", + format); + } + return sequences; + } + +} diff --git a/src/jalview/io/EBIFetchClient.java b/src/jalview/ws/EBIFetchClient.java old mode 100755 new mode 100644 similarity index 99% rename from src/jalview/io/EBIFetchClient.java rename to src/jalview/ws/EBIFetchClient.java index 292247f..f691d5c --- a/src/jalview/io/EBIFetchClient.java +++ b/src/jalview/ws/EBIFetchClient.java @@ -16,7 +16,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ -package jalview.io; +package jalview.ws; import java.io.*; import java.util.*; diff --git a/src/jalview/ws/SequenceFetcher.java b/src/jalview/ws/SequenceFetcher.java new file mode 100644 index 0000000..e6fc385 --- /dev/null +++ b/src/jalview/ws/SequenceFetcher.java @@ -0,0 +1,167 @@ +package jalview.ws; + +import java.util.Enumeration; +import java.util.Hashtable; +import java.util.Vector; + +import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.DBRefSource; +import jalview.datamodel.SequenceI; + +/** + * prototype of abstract sequence retrieval interface + * + */ +public class SequenceFetcher extends ASequenceFetcher +{ + /** + * Thread safe construction of database proxies TODO: extend to a configurable + * database plugin mechanism where classes are instantiated by reflection and + * queried for their DbRefSource and version association. + * + */ + public SequenceFetcher() + { + FETCHABLEDBS = new Hashtable(); + FETCHABLEDBS.put(DBRefSource.EMBL, + new jalview.ws.dbsources.EmblSource()); + FETCHABLEDBS.put(DBRefSource.EMBLCDS, + new jalview.ws.dbsources.EmblCdsSouce()); + FETCHABLEDBS.put(DBRefSource.UNIPROT, + new jalview.ws.dbsources.Uniprot()); + FETCHABLEDBS.put(DBRefSource.UP_NAME, + new jalview.ws.dbsources.Uniprot()); + FETCHABLEDBS.put(DBRefSource.PDB, new jalview.ws.dbsources.Pdb()); + FETCHABLEDBS.put(DBRefSource.PFAM, new jalview.ws.dbsources.Pfam()); + }; + + public static void main(String[] argv) + { + AlignmentI ds = null; + Vector noProds = new Vector(); + if (argv != null && argv.length > 0) + { + System.out + .println("Ignoring arguments. Future Usage = dbname:query1;query2;..."); + } + SequenceFetcher sfetcher = new SequenceFetcher(); + Enumeration e = sfetcher.FETCHABLEDBS.keys(); + while (e.hasMoreElements()) + { + String db = (String) e.nextElement(); + // skip me + if (db.equals(DBRefSource.PDB)) + continue; + DbSourceProxy sp = sfetcher.getSourceProxy(db); + System.out + .println("" + db + ": retrieving test:" + sp.getTestQuery()); + AlignmentI al = null; + try + { + al = sp.getSequenceRecords(sp.getTestQuery()); + if (al != null && al.getHeight() > 0) + { + boolean dna = sp.getDbSourceProperties().containsKey( + DBRefSource.DNACODINGSEQDB) + || sp.getDbSourceProperties().containsKey( + DBRefSource.DNASEQDB) + || sp.getDbSourceProperties().containsKey( + DBRefSource.CODINGSEQDB); + // try and find products + String types[] = jalview.analysis.CrossRef.findSequenceXrefTypes( + dna, al.getSequencesArray()); + if (types != null) + { + System.out.println("Xref Types for: "+(dna ? "dna" : "prot")); + for (int t = 0; t < types.length; t++) + { + System.out.println("Type: " + types[t]); + SequenceI[] prod = jalview.analysis.CrossRef.findXrefSequences(al + .getSequencesArray(), dna, types[t]).getSequencesArray(); + System.out.println("Found " + + ((prod == null) ? "no" : "" + prod.length) + + " products"); + if (prod!=null) + { + for (int p=0; p0) + { + Enumeration ts = noProds.elements(); + while (ts.hasMoreElements()) + + { + Object[] typeSq = (Object[]) ts.nextElement(); + boolean dna = (typeSq.length>1); + AlignmentI al = (AlignmentI) typeSq[0]; + System.out.println("Trying getProducts for "+al.getSequenceAt(0).getDisplayId(true)); + System.out.println("Search DS Xref for: "+(dna ? "dna" : "prot")); + // have a bash at finding the products amongst all the retrieved sequences. + SequenceI[] prod = jalview.analysis.CrossRef.findXrefSequences(al + .getSequencesArray(), dna, null, ds).getSequencesArray(); // note should test rather than throw away codon mapping (if present) + System.out.println("Found " + + ((prod == null) ? "no" : "" + prod.length) + + " products"); + if (prod!=null) + { + for (int p=0; p-1) + { + queries = queries.substring(0, queries.indexOf(".")); + } + return getEmblSequenceRecords(DBRefSource.EMBLCDS, queries); + } + + public boolean isValidReference(String accession) + { + // TODO Auto-generated method stub + return true; + } + /** + * cDNA for LDHA_CHICK swissprot sequence + */ + public String getTestQuery() + { + return "CAA37824"; + } + +} diff --git a/src/jalview/ws/dbsources/EmblSource.java b/src/jalview/ws/dbsources/EmblSource.java new file mode 100644 index 0000000..57818a0 --- /dev/null +++ b/src/jalview/ws/dbsources/EmblSource.java @@ -0,0 +1,92 @@ +/** + * + */ +package jalview.ws.dbsources; + +import java.io.File; +import java.util.Hashtable; +import java.util.Iterator; +import java.util.StringTokenizer; + +import com.stevesoft.pat.Regex; + +import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.DBRefSource; +import jalview.datamodel.SequenceI; +import jalview.datamodel.xdb.embl.EmblEntry; +import jalview.ws.DbSourceProxy; +import jalview.ws.DbSourceProxyImpl; +import jalview.ws.EBIFetchClient; + +/** + * @author JimP + * + */ +public class EmblSource extends EmblXmlSource implements DbSourceProxy +{ + + public EmblSource() { + addDbSourceProperty(DBRefSource.DNASEQDB); + addDbSourceProperty(DBRefSource.CODINGSEQDB); + } + + /* (non-Javadoc) + * @see jalview.ws.DbSourceProxy#getAccessionSeparator() + */ + public String getAccessionSeparator() + { + // TODO Auto-generated method stub + return null; + } + + /* (non-Javadoc) + * @see jalview.ws.DbSourceProxy#getAccessionValidator() + */ + public Regex getAccessionValidator() + { + // TODO Auto-generated method stub + return null; + } + + /* (non-Javadoc) + * @see jalview.ws.DbSourceProxy#getDbSource() + */ + public String getDbSource() + { + return DBRefSource.EMBL; + } + /* (non-Javadoc) + * @see jalview.ws.DbSourceProxy#getDbVersion() + */ + public String getDbVersion() + { + // TODO Auto-generated method stub + return "0"; + } + + /* (non-Javadoc) + * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[]) + */ + public AlignmentI getSequenceRecords(String queries) throws Exception + { + return getEmblSequenceRecords(DBRefSource.EMBL, queries); + } + /* (non-Javadoc) + * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String) + */ + public boolean isValidReference(String accession) + { + // TODO Auto-generated method stub + return false; + } + + /** + * return LHD_CHICK coding gene + */ + public String getTestQuery() + { + return "X53828"; + } + +} diff --git a/src/jalview/ws/dbsources/EmblXmlSource.java b/src/jalview/ws/dbsources/EmblXmlSource.java new file mode 100644 index 0000000..1300c65 --- /dev/null +++ b/src/jalview/ws/dbsources/EmblXmlSource.java @@ -0,0 +1,97 @@ +package jalview.ws.dbsources; + +import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.SequenceI; +import jalview.datamodel.xdb.embl.EmblEntry; +import jalview.ws.EBIFetchClient; + +import java.io.File; +import java.util.Iterator; +import java.util.Vector; + +public abstract class EmblXmlSource extends EbiFileRetrievedProxy +{ + + /** + * Last properly parsed embl file. + */ + public jalview.datamodel.xdb.embl.EmblFile efile = null; + + public EmblXmlSource() + { + super(); + } + /** + * set this to false to *not* add protein products to alignment dataset. + */ + public boolean getProteinProducts=false; + /** + * retrieve and parse an emblxml file + * @param emprefx either EMBL or EMBLCDS strings are allowed - anything else will not retrieve emblxml + * @param query + * @return + * @throws Exception + */ + public AlignmentI getEmblSequenceRecords(String emprefx, String query) throws Exception + { + startQuery(); + SequenceI seqs[] = null; + Vector alseq = new Vector(); // the sequences that will actually be presented in the alignment + StringBuffer result = new StringBuffer(); + EBIFetchClient dbFetch = new EBIFetchClient(); + File reply; + try { + reply = dbFetch.fetchDataAsFile( + emprefx.toLowerCase() + ":" + query.trim(), + "emblxml",null); + } + catch (Exception e) + { + stopQuery(); + throw new Exception("EBI EMBL XML retrieval failed on "+emprefx.toLowerCase()+":"+query.trim(),e); + } + if (reply != null && reply.exists()) + { + file = reply.getAbsolutePath(); + efile = jalview.datamodel.xdb.embl.EmblFile.getEmblFile(reply); + } + if (efile!=null) { + for (Iterator i=efile.getEntries().iterator(); i.hasNext(); ) { + EmblEntry entry = (EmblEntry) i.next(); + SequenceI[] seqparts = entry.getSequences(false,!getProteinProducts, emprefx); + if (seqparts!=null) { + SequenceI[] newseqs = null; + int si=0; + if (seqs==null) { + newseqs = new SequenceI[seqparts.length]; + } else { + newseqs = new SequenceI[seqs.length+seqparts.length]; + + for (;si0) + { + al = new Alignment(seqs); + result.append("# Successfully parsed the "+emprefx+" queries into an Alignment"); + results = result; + } + stopQuery(); + return al; + } + +} \ No newline at end of file diff --git a/src/jalview/ws/dbsources/Pdb.java b/src/jalview/ws/dbsources/Pdb.java new file mode 100644 index 0000000..bcf26be --- /dev/null +++ b/src/jalview/ws/dbsources/Pdb.java @@ -0,0 +1,180 @@ +/** + * + */ +package jalview.ws.dbsources; + +import jalview.datamodel.Alignment; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.DBRefSource; +import jalview.datamodel.SequenceI; + +import java.io.BufferedInputStream; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Hashtable; +import java.util.Vector; + +import MCview.PDBChain; +import MCview.PDBfile; + +import com.stevesoft.pat.Regex; + +import jalview.datamodel.AlignmentI; +import jalview.io.FileParse; +import jalview.ws.DbSourceProxy; +import jalview.ws.DbSourceProxyImpl; +import jalview.ws.EBIFetchClient; + +/** + * @author JimP + * + */ +public class Pdb extends EbiFileRetrievedProxy implements DbSourceProxy +{ + public Pdb() { + super(); + addDbSourceProperty(DBRefSource.PROTSEQDB); + } + + /* (non-Javadoc) + * @see jalview.ws.DbSourceProxy#getAccessionSeparator() + */ + public String getAccessionSeparator() + { + // TODO Auto-generated method stub + return null; + } + + /* (non-Javadoc) + * @see jalview.ws.DbSourceProxy#getAccessionValidator() + */ + public Regex getAccessionValidator() + { + return new Regex("[1-9][0-9A-Za-z]{3}[ _A-Za-z0-9]?"); + } + + /* (non-Javadoc) + * @see jalview.ws.DbSourceProxy#getDbSource() + */ + public String getDbSource() + { + return DBRefSource.PDB; + } + + /* (non-Javadoc) + * @see jalview.ws.DbSourceProxy#getDbVersion() + */ + public String getDbVersion() + { + return "0"; + } + /* (non-Javadoc) + * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[]) + */ + public AlignmentI getSequenceRecords(String queries) throws Exception + { + + Vector result = new Vector(); + String chain = null; + String id = null; + if (queries.indexOf(":") > -1) + { + chain = queries.substring(queries.indexOf(":") + 1); + id = queries.substring(0, queries.indexOf(":")); + } + else + { + id = queries; + } + if (queries.length() > 4 && chain == null) + { + chain = queries.substring(4); + id = queries.substring(0, 4); + } + EBIFetchClient ebi = new EBIFetchClient(); + file = ebi.fetchDataAsFile("pdb:" + id, "pdb", "raw") + .getAbsolutePath(); + stopQuery(); + if (file == null) + { + return null; + } + try + { + + PDBfile pdbfile = new PDBfile(file, + jalview.io.AppletFormatAdapter.FILE); + for (int i = 0; i < pdbfile.chains.size(); i++) + { + if (chain == null + || ((PDBChain) pdbfile.chains.elementAt(i)).id + .toUpperCase().equals(chain)) + { + PDBChain pdbchain = (PDBChain) pdbfile.chains.elementAt(i); + // Get the Chain's Sequence - who's dataset includes any special features added from the PDB file + SequenceI sq = pdbchain.sequence; + // Specially formatted name for the PDB chain sequences retrieved from the PDB + sq.setName("PDB|" + id + "|" + sq.getName()); + // Might need to add more metadata to the PDBEntry object + // like below + /* + * PDBEntry entry = new PDBEntry(); + // Construct the PDBEntry + entry.setId(id); + if (entry.getProperty() == null) + entry.setProperty(new Hashtable()); + entry.getProperty().put("chains", + pdbchain.id + + "=" + sq.getStart() + + "-" + sq.getEnd()); + sq.getDatasetSequence().addPDBId(entry); + */ + // Add PDB DB Refs + // We make a DBRefEtntry because we have obtained the PDB file from a verifiable source + // JBPNote - PDB DBRefEntry should also carry the chain and mapping information + DBRefEntry dbentry = new DBRefEntry(getDbSource(), + getDbVersion(), id + pdbchain.id); + sq.addDBRef(dbentry); + // and add seuqence to the retrieved set + result.addElement(sq.deriveSequence()); + } + } + + if (result.size() < 1) + { + throw new Exception("No PDB Records for " + id + " chain " + + ((chain == null) ? " " : chain)); + } + } catch (Exception ex) // Problem parsing PDB file + { + stopQuery(); + throw (ex); + } + + SequenceI[] results = new SequenceI[result.size()]; + for (int i = 0, j = result.size(); i < j; i++) + { + results[i] = (SequenceI) result.elementAt(i); + result.setElementAt(null, i); + } + return new Alignment(results); + } + + /* (non-Javadoc) + * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String) + */ + public boolean isValidReference(String accession) + { + Regex r = getAccessionValidator(); + return r.search(accession.trim()); + } + + /** + * obtain human glyoxalase chain A sequence + */ + public String getTestQuery() + { + return "1QIPA"; + } + +} diff --git a/src/jalview/ws/dbsources/Pfam.java b/src/jalview/ws/dbsources/Pfam.java new file mode 100644 index 0000000..d1f5ea2 --- /dev/null +++ b/src/jalview/ws/dbsources/Pfam.java @@ -0,0 +1,108 @@ +/** + * + */ +package jalview.ws.dbsources; + +import java.util.Hashtable; + +import com.stevesoft.pat.Regex; + +import jalview.datamodel.AlignmentI; +import jalview.ws.DbSourceProxy; +import jalview.ws.DbSourceProxyImpl; +/** + * TODO: later PFAM is a complex datasource - it currently returns a seed alignment, but could optionally return a full alignment. + * TODO: later PFAM is a complex datasource - it could return a tree in addition to an alignment + * TODO: HP: Incorporate jalview.gui.SequenceFetcher retrieval code here. + * @author JimP + * + */ +public class Pfam extends DbSourceProxyImpl implements DbSourceProxy +{ + + /* (non-Javadoc) + * @see jalview.ws.DbSourceProxy#getAccessionSeparator() + */ + public String getAccessionSeparator() + { + // TODO Auto-generated method stub + return null; + } + + /* (non-Javadoc) + * @see jalview.ws.DbSourceProxy#getAccessionValidator() + */ + public Regex getAccessionValidator() + { + // TODO Auto-generated method stub + return null; + } + + /* (non-Javadoc) + * @see jalview.ws.DbSourceProxy#getDbSource() + */ + public String getDbSource() + { + // TODO Auto-generated method stub + return null; + } + + /* (non-Javadoc) + * @see jalview.ws.DbSourceProxy#getDbSourceProperties() + */ + public Hashtable getDbSourceProperties() + { + // TODO Auto-generated method stub + return null; + } + + /* (non-Javadoc) + * @see jalview.ws.DbSourceProxy#getDbVersion() + */ + public String getDbVersion() + { + // TODO Auto-generated method stub + return null; + } + + /* (non-Javadoc) + * @see jalview.ws.DbSourceProxy#getRawRecords() + */ + public StringBuffer getRawRecords() + { + // TODO Auto-generated method stub + return null; + } + + /* (non-Javadoc) + * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[]) + */ + public AlignmentI getSequenceRecords(String queries) throws Exception + { + throw new Exception("PFAM Retrieval not yet implemented - see jalview.gui.SequenceFetcher for current implementation"); + } + + /* (non-Javadoc) + * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String) + */ + public boolean isValidReference(String accession) + { + // TODO Auto-generated method stub + return false; + } + + /* (non-Javadoc) + * @see jalview.ws.DbSourceProxy#queryInProgress() + */ + public boolean queryInProgress() + { + // TODO Auto-generated method stub + return false; + } + + public String getTestQuery() + { + return "PF00535"; + } + +} diff --git a/src/jalview/ws/dbsources/Uniprot.java b/src/jalview/ws/dbsources/Uniprot.java new file mode 100644 index 0000000..789688c --- /dev/null +++ b/src/jalview/ws/dbsources/Uniprot.java @@ -0,0 +1,217 @@ +/** + * + */ +package jalview.ws.dbsources; + +import java.io.File; +import java.io.IOException; +import java.util.Enumeration; +import java.util.Hashtable; +import java.util.Vector; + +import com.stevesoft.pat.Regex; + +import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.DBRefSource; +import jalview.datamodel.PDBEntry; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; +import jalview.datamodel.UniprotEntry; +import jalview.io.FormatAdapter; +import jalview.io.IdentifyFile; +import jalview.ws.DBRefFetcher; +import jalview.ws.DbSourceProxy; +import jalview.ws.DbSourceProxyImpl; +import jalview.ws.EBIFetchClient; + +/** + * @author JimP + * + */ +public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy +{ + public Uniprot() { + super(); + addDbSourceProperty(DBRefSource.SEQDB, DBRefSource.SEQDB); + addDbSourceProperty(DBRefSource.PROTSEQDB); + addDbSourceProperty(DBRefSource.MULTIACC); + } + + /* + * (non-Javadoc) + * + * @see jalview.ws.DbSourceProxy#getAccessionSeparator() + */ + public String getAccessionSeparator() + { + return ";"; + } + + /* + * (non-Javadoc) + * + * @see jalview.ws.DbSourceProxy#getAccessionValidator() + */ + public Regex getAccessionValidator() + { + return null; + } + + /* + * (non-Javadoc) + * + * @see jalview.ws.DbSourceProxy#getDbSource() + */ + public String getDbSource() + { + return DBRefSource.UNIPROT; + } + + /* + * (non-Javadoc) + * + * @see jalview.ws.DbSourceProxy#getDbVersion() + */ + public String getDbVersion() + { + return "0"; // we really don't know what version we're on. + } + + private EBIFetchClient ebi = null; + + /* + * (non-Javadoc) + * + * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[]) + */ + public AlignmentI getSequenceRecords(String queries) throws Exception + { + startQuery(); + try + { + Alignment al=null; + ebi = new EBIFetchClient(); + StringBuffer result=new StringBuffer(); + File file = ebi.fetchDataAsFile("uniprot:" + queries, "xml", null); + DBRefFetcher dbref = new DBRefFetcher(); + Vector entries = dbref.getUniprotEntries(file); + + if (entries != null) + { + // First, make the new sequences + Enumeration en = entries.elements(); + while (en.hasMoreElements()) + { + UniprotEntry entry = (UniprotEntry) en.nextElement(); + + StringBuffer name = new StringBuffer(">UniProt/Swiss-Prot"); + Enumeration en2 = entry.getAccession().elements(); + while (en2.hasMoreElements()) + { + name.append("|"); + name.append(en2.nextElement()); + } + en2 = entry.getName().elements(); + while (en2.hasMoreElements()) + { + name.append("|"); + name.append(en2.nextElement()); + } + + if (entry.getProtein() != null) + { + name.append(" " + entry.getProtein().getName().elementAt(0)); + } + + result.append(name + "\n" + + entry.getUniprotSequence().getContent() + "\n"); + + } + + // Then read in the features and apply them to the dataset + al = parseResult(result.toString()); + if (al!=null) + { + // Decorate the alignment with database entries. + addUniprotXrefs(al, entries); + } else { + results = result; + } + } + stopQuery(); + return al; + } catch (Exception e) + { + stopQuery(); + throw(e); + } + } + + /** + * add an ordered set of UniprotEntry objects to an ordered set of seuqences. + * + * @param al - + * a sequence of n sequences + * @param entries + * a seuqence of n uniprot entries to be analysed. + */ + public void addUniprotXrefs(Alignment al, Vector entries) + { + for (int i = 0; i < entries.size(); i++) + { + UniprotEntry entry = (UniprotEntry) entries.elementAt(i); + Enumeration e = entry.getDbReference().elements(); + Vector onlyPdbEntries = new Vector(); + while (e.hasMoreElements()) + { + PDBEntry pdb = (PDBEntry) e.nextElement(); + if (!pdb.getType().equals("PDB")) + { + continue; + } + + onlyPdbEntries.addElement(pdb); + } + SequenceI sq = al.getSequenceAt(i); + sq = (sq.getDatasetSequence()==null) ? sq : sq.getDatasetSequence(); + + Enumeration en2 = entry.getAccession().elements(); + while (en2.hasMoreElements()) + { + sq.addDBRef( + new DBRefEntry(getDbSource(), getDbVersion(), en2.nextElement() + .toString())); + } + sq.setPDBId(onlyPdbEntries); + if (entry.getFeature() != null) + { + e = entry.getFeature().elements(); + while (e.hasMoreElements()) + { + SequenceFeature sf = (SequenceFeature) e.nextElement(); + sf.setFeatureGroup("Uniprot"); + sq.addSequenceFeature(sf); + } + } + } + } + + /* + * (non-Javadoc) + * + * @see jalview.ws.DbSourceProxy#isValidReference(java.lang.String) + */ + public boolean isValidReference(String accession) + { + return true; + } + /** + * return LDHA_CHICK uniprot entry + */ + public String getTestQuery() + { + return "P00340"; + } +} -- 1.7.10.2