1ab6125578086144beb29c4b41fb05c2d5c341b3
[jalview.git] / src / jalview / ws / dbsources / PDBRestClient.java
1 package jalview.ws.dbsources;
2
3 import jalview.ws.uimodel.PDBRestRequest;
4 import jalview.ws.uimodel.PDBRestResponse;
5 import jalview.ws.uimodel.PDBRestResponse.PDBResponseSummary;
6
7 import java.util.ArrayList;
8 import java.util.Collection;
9 import java.util.Iterator;
10 import java.util.List;
11
12 import javax.ws.rs.core.MediaType;
13
14 import org.json.simple.JSONArray;
15 import org.json.simple.JSONObject;
16 import org.json.simple.parser.JSONParser;
17 import org.json.simple.parser.ParseException;
18
19 import com.sun.jersey.api.client.Client;
20 import com.sun.jersey.api.client.ClientResponse;
21 import com.sun.jersey.api.client.WebResource;
22 import com.sun.jersey.api.client.config.ClientConfig;
23 import com.sun.jersey.api.client.config.DefaultClientConfig;
24 import com.sun.jersey.api.json.JSONConfiguration;
25
26 /**
27  * A rest client for querying the Search endpoing of the PDB REST API
28  * 
29  * @author tcnofoegbu
30  *
31  */
32 public class PDBRestClient
33 {
34   private static String PDB_SEARCH_ENDPOINT = "http://wwwdev.ebi.ac.uk/pdbe/search/pdb/select?";
35
36   private static int DEFAULT_RESPONSE_SIZE = 200;
37
38   /**
39    * Takes a PDBRestRequest object and returns a response upon execution
40    * 
41    * @param pdbRestRequest
42    *          the PDBRestRequest instance to be processed
43    * @return the pdbResponse object for the given request
44    */
45   public PDBRestResponse executeRequest(PDBRestRequest pdbRestRequest)
46   {
47     ClientConfig clientConfig = new DefaultClientConfig();
48     clientConfig.getFeatures().put(JSONConfiguration.FEATURE_POJO_MAPPING,
49             Boolean.TRUE);
50     Client client = Client.create(clientConfig);
51
52     String wantedFields = getPDBDocFieldsAsCommaDelimitedString(pdbRestRequest
53             .getWantedFields());
54     int responseSize = (pdbRestRequest.getResponseSize() == 0) ? DEFAULT_RESPONSE_SIZE
55             : pdbRestRequest.getResponseSize();
56     String sortParam = (pdbRestRequest.getFieldToSortBy() == null || pdbRestRequest
57             .getFieldToSortBy().trim().isEmpty()) ? "" : (pdbRestRequest
58             .getFieldToSortBy() + (pdbRestRequest.isAscending() ? " asc"
59             : " desc"));
60
61     // Build request parameters for the REST Request
62     WebResource webResource = client.resource(PDB_SEARCH_ENDPOINT)
63             .queryParam("wt", "json").queryParam("fl", wantedFields)
64             .queryParam("rows", String.valueOf(responseSize))
65             .queryParam("q", pdbRestRequest.getQuery())
66             .queryParam("sort", sortParam);
67
68     // Execute the REST request
69     ClientResponse clientResponse = webResource.accept(
70             MediaType.APPLICATION_JSON).get(ClientResponse.class);
71
72     // Get the JSON string from the response object
73     String responseString = clientResponse.getEntity(String.class);
74
75     // Check the response status and report exception if one occurs
76     if (clientResponse.getStatus() != 200)
77     {
78       String errorMessage = "";
79       if (clientResponse.getStatus() == 400)
80       {
81         errorMessage = parseJsonExceptionString(responseString);
82         throw new RuntimeException(errorMessage);
83       }
84       else
85       {
86         errorMessage = "Failed : HTTP error code : "
87                 + clientResponse.getStatus();
88         throw new RuntimeException(errorMessage);
89       }
90     }
91
92     // Make redundant objects eligible for garbage collection to conserve
93     // memory
94     clientResponse = null;
95     client = null;
96
97     // Process the response and return the result to the caller.
98     return parsePDBJsonResponse(responseString, pdbRestRequest);
99   }
100
101   /**
102    * Process error response from PDB server if/when one occurs.
103    * 
104    * @param jsonResponse
105    *          the JSON string containing error message from the server
106    * @return the processed error message from the JSON string
107    */
108   public static String parseJsonExceptionString(String jsonErrorResponse)
109   {
110     String errorMessage = "RunTime error";
111     try
112     {
113       JSONParser jsonParser = new JSONParser();
114       JSONObject jsonObj = (JSONObject) jsonParser.parse(jsonErrorResponse);
115       JSONObject errorResponse = (JSONObject) jsonObj.get("error");
116       errorMessage = errorResponse.get("msg").toString();
117
118       JSONObject responseHeader = (JSONObject) jsonObj
119               .get("responseHeader");
120       errorMessage += responseHeader.get("params").toString();
121     } catch (ParseException e)
122     {
123       e.printStackTrace();
124     }
125     return errorMessage;
126   }
127
128   /**
129    * Parses the JSON response string from PDB REST API. The response is dynamic
130    * hence, only fields specifically requested for in the 'wantedFields'
131    * parameter is fetched/processed
132    * 
133    * @param pdbJsonResponseString
134    *          the JSON string to be parsed
135    * @param pdbRestRequest
136    *          the request object which contains parameters used to process the
137    *          JSON string
138    * @return
139    */
140   @SuppressWarnings("unchecked")
141   public static PDBRestResponse parsePDBJsonResponse(
142           String pdbJsonResponseString, PDBRestRequest pdbRestRequest)
143   {
144     PDBRestResponse searchResult = new PDBRestResponse();
145     List<PDBResponseSummary> result = null;
146     try
147     {
148       JSONParser jsonParser = new JSONParser();
149       JSONObject jsonObj = (JSONObject) jsonParser
150               .parse(pdbJsonResponseString);
151
152       JSONObject pdbResponse = (JSONObject) jsonObj.get("response");
153       String queryTime = ((JSONObject) jsonObj.get("responseHeader")).get(
154               "QTime").toString();
155       int numFound = Integer
156               .valueOf(pdbResponse.get("numFound").toString());
157       if (numFound > 0)
158       {
159         result = new ArrayList<PDBResponseSummary>();
160         JSONArray docs = (JSONArray) pdbResponse.get("docs");
161         for (Iterator<JSONObject> docIter = docs.iterator(); docIter
162                 .hasNext();)
163         {
164           JSONObject doc = docIter.next();
165           result.add(searchResult.new PDBResponseSummary(doc,
166                   pdbRestRequest));
167         }
168         searchResult.setNumberOfItemsFound(numFound);
169         searchResult.setResponseTime(queryTime);
170         searchResult.setSearchSummary(result);
171       }
172     } catch (ParseException e)
173     {
174       e.printStackTrace();
175     }
176     return searchResult;
177   }
178
179   /**
180    * Takes a collection of PDBDocField and converts its 'code' Field values into
181    * a comma delimited string.
182    * 
183    * @param pdbDocfields
184    *          the collection of PDBDocField to process
185    * @return the comma delimited string from the pdbDocFields collection
186    */
187   public static String getPDBDocFieldsAsCommaDelimitedString(
188           Collection<PDBDocField> pdbDocfields)
189   {
190     String result = "";
191     if (pdbDocfields != null && !pdbDocfields.isEmpty())
192     {
193       StringBuilder returnedFields = new StringBuilder();
194       for (PDBDocField field : pdbDocfields)
195       {
196         returnedFields.append(",").append(field.getCode());
197       }
198       returnedFields.deleteCharAt(0);
199       result = returnedFields.toString();
200     }
201     return result;
202   }
203
204   /**
205    * Determines the column index for 'PDB Id' Fields in the dynamic summary
206    * table. The PDB Id serves as a unique identifier for a given row in the
207    * summary table
208    * 
209    * @param wantedFeilds
210    *          the available table columns in no particular order
211    * @return the pdb id field column index
212    */
213   public static int getPDBIdColumIndex(
214           Collection<PDBDocField> wantedFeilds, boolean hasRefSeq)
215   {
216
217     // If a reference sequence is attached then start counting from 1 else
218     // start from zero
219     int pdbFeildIndexCounter = hasRefSeq ? 1 : 0;
220
221     for (PDBDocField feild : wantedFeilds)
222     {
223       if (feild.equals(PDBDocField.PDB_ID))
224       {
225         break; // Once PDB Id index is determined exit iteration
226       }
227       ++pdbFeildIndexCounter;
228     }
229     return pdbFeildIndexCounter;
230   }
231
232   /**
233    * This enum represents the fields available in the PDB JSON response
234    *
235    */
236   public enum PDBDocField
237   {
238     PDB_ID("PDB Id", "pdb_id"), TITLE("Title", "title"), MOLECULE_NAME(
239             "Molecule", "molecule_name"), MOLECULE_TYPE("Molecule Type",
240             "molecule_type"), MOLECULE_SEQUENCE("Sequence",
241             "molecule_sequence"), PFAM_ACCESSION("PFAM Accession",
242             "pfam_accession"), PFAM_NAME("PFAM Name", "pfam_name"), INTERPRO_NAME(
243             "InterPro Name", "interpro_name"), INTERPRO_ACCESSION(
244             "InterPro Accession", "interpro_accession"), UNIPROT_ID(
245             "UniProt Id", "uniprot_id"), UNIPROT_ACCESSION(
246             "UniProt Accession", "uniprot_accession"), UNIPROT_COVERAGE(
247             "UniProt Coverage", "uniprot_coverage"), UNIPROT_FEATURES(
248             "Uniprot Features", "uniprot_features"), R_FACTOR("R Factor",
249             "r_factor"), RESOLUTION("Resolution", "resolution"), DATA_QUALITY(
250             "Data Quality", "data_quality"), OVERALL_QUALITY(
251             "Overall Quality", "overall_quality"), POLYMER_COUNT(
252             "Number of Polymers", "number_of_polymers"), PROTEIN_CHAIN_COUNT(
253             "Number of Protein Chains", "number_of_protein_chains"), BOUND_MOLECULE_COUNT(
254             "Number of Bound Molecule", "number_of_bound_molecules"), POLYMER_RESIDUE_COUNT(
255             "Number of Polymer Residue", "number_of_polymer_residues"), GENUS(
256             "GENUS", "genus"), GENE_NAME("Gene Name", "gene_name"), EXPERIMENTAL_METHOD(
257             "Experimental Method", "experimental_method"), GO_ID("GO Id",
258             "go_id"), ASSEMBLY_ID("Assembly Id", "assembly_form"), ASSEMBLY_FORM(
259             "Assembly Form", "assembly_id"), ASSEMBLY_TYPE("Assembly Type",
260             "assembly_type"), SPACE_GROUP("Space Group", "spacegroup"), CATH_CODE(
261             "Cath Code", "cath_code"), TAX_ID("Tax Id", "tax_id"), TAX_QUERY(
262             "Tax Query", "tax_query"), INTERACTING_ENTRY_ID(
263             "Interacting Entry Id", "interacting_entry_id"), INTERACTING_ENTITY_ID(
264             "Interacting Entity Id", "interacting_entity_id"), INTERACTING_MOLECULES(
265             "Interacting Molecules", "interacting_molecules"), PUBMED_ID(
266             "Pubmed Id", "pubmed_id"), STATUS("Status", "status"), MODEL_QUALITY(
267             "Model Quality", "model_quality"), PIVOT_RESOLUTION(
268             "Pivot Resolution", "pivot_resolution"), DATA_REDUCTION_SOFTWARE(
269             "Data reduction software", "data_reduction_software"), MAX_OBSERVED_RES(
270             "Max observed residues", "max_observed_residues"), ORG_SCI_NAME(
271             "Organism scientific name", "organism_scientific_name"), SUPER_KINGDOM(
272             "Super kingdom", "superkingdom"), RANK("Rank", "rank"), CRYSTALLISATION_PH(
273             "Crystallisation Ph", "crystallisation_ph"), BIOLOGICAL_FUNCTION(
274             "Biological Function", "biological_function"), BIOLOGICAL_PROCESS(
275             "Biological Process", "biological_process"), BIOLOGICAL_CELL_COMPONENT(
276             "Biological Cell Component", "biological_cell_component"), COMPOUND_NAME(
277             "Compound Name", "compound_name"), COMPOUND_ID("Compound Id",
278             "compound_id"), COMPOUND_WEIGHT("Compound Weight",
279             "compound_weight"), COMPOUND_SYSTEMATIC_NAME(
280             "Compound Systematic Name", "compound_systematic_name"), INTERACTING_LIG(
281             "Interacting Ligands", "interacting_ligands"), JOURNAL(
282             "Journal", "journal"), ALL_AUTHORS("All Authors", "all_authors"), EXPERIMENTAL_DATA_AVAILABLE(
283             "Experiment Data Available", "experiment_data_available"), DIFFRACTION_PROTOCOL(
284             "Diffraction Protocol", "diffraction_protocol"), REFINEMENT_SOFTWARE(
285             "Refinement Software", "refinement_software"), STRUCTURE_DETERMINATION_METHOD(
286             "Structure Determination Method",
287             "structure_determination_method"), SYNCHROTON_SITE(
288             "Synchrotron Site", "synchrotron_site"), SAMPLE_PREP_METHOD(
289             "Sample Preparation Method", "sample_preparation_method"), ENTRY_AUTHORS(
290             "Entry Authors", "entry_authors"), CITATION_TITLE(
291             "Citation Title", "citation_title"), STRUCTURE_SOLUTION_SOFTWARE(
292             "Structure Solution Software", "structure_solution_software"), ENTRY_ENTITY(
293             "Entry Entity", "entry_entity"), R_FREE("R Free", "r_free"), NO_OF_POLYMER_ENTITIES(
294             "Number of Polymer Entities", "number_of_polymer_entities"), NO_OF_BOUND_ENTITIES(
295             "Number of Bound Entities", "number_of_bound_entities"), CRYSTALLISATION_RESERVOIR(
296             "Crystallisation Reservoir", "crystallisation_reservoir"), DATA_SCALING_SW(
297             "Data Scalling Software", "data_scaling_software"), DETECTOR(
298             "Detector", "detector"), DETECTOR_TYPE("Detector Type",
299             "detector_type"), MODIFIED_RESIDUE_FLAG(
300             "Modified Residue Flag", "modified_residue_flag"), NUMBER_OF_COPIES(
301             "Number of Copies", "number_of_copies"), STRUCT_ASYM_ID(
302             "Struc Asym Id", "struct_asym_id"), HOMOLOGUS_PDB_ENTITY_ID(
303             "Homologus PDB Entity Id", "homologus_pdb_entity_id"), MOLECULE_SYNONYM(
304             "Molecule Synonym", "molecule_synonym"), DEPOSITION_SITE(
305             "Deposition Site", "deposition_site"), SYNCHROTRON_BEAMLINE(
306             "Synchrotron Beamline", "synchrotron_beamline"), ENTITY_ID(
307             "Entity Id", "entity_id"), BEAM_SOURCE_NAME("Beam Source Name",
308             "beam_source_name"), PROCESSING_SITE("Processing Site",
309             "processing_site"), ENTITY_WEIGHT("Entity Weight",
310             "entity_weight"), VERSION("Version", "_version_"), ALL("ALL",
311             "text");
312
313     private String name;
314
315     private String code;
316
317     PDBDocField(String name, String code)
318     {
319       this.name = name;
320       this.code = code;
321     }
322
323     public String getName()
324     {
325       return name;
326     }
327
328     public String getCode()
329     {
330       return code;
331     }
332
333     public String toString()
334     {
335       return name;
336     }
337   }
338 }