develop merge
[jalview.git] / src / jalview / ws / dbsources / PDBRestClient.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.ws.dbsources;
22
23 import jalview.util.MessageManager;
24 import jalview.ws.uimodel.PDBRestRequest;
25 import jalview.ws.uimodel.PDBRestResponse;
26 import jalview.ws.uimodel.PDBRestResponse.PDBResponseSummary;
27
28 import java.util.ArrayList;
29 import java.util.Collection;
30 import java.util.Iterator;
31 import java.util.List;
32
33 import javax.ws.rs.core.MediaType;
34
35 import org.json.simple.JSONArray;
36 import org.json.simple.JSONObject;
37 import org.json.simple.parser.JSONParser;
38 import org.json.simple.parser.ParseException;
39
40 import com.sun.jersey.api.client.Client;
41 import com.sun.jersey.api.client.ClientResponse;
42 import com.sun.jersey.api.client.WebResource;
43 import com.sun.jersey.api.client.config.ClientConfig;
44 import com.sun.jersey.api.client.config.DefaultClientConfig;
45
46 /**
47  * A rest client for querying the Search endpoing of the PDB REST API
48  * 
49  * @author tcnofoegbu
50  *
51  */
52 public class PDBRestClient
53 {
54   public static final String PDB_SEARCH_ENDPOINT = "http://www.ebi.ac.uk/pdbe/search/pdb/select?";
55
56   private static int DEFAULT_RESPONSE_SIZE = 200;
57
58   /**
59    * Takes a PDBRestRequest object and returns a response upon execution
60    * 
61    * @param pdbRestRequest
62    *          the PDBRestRequest instance to be processed
63    * @return the pdbResponse object for the given request
64    * @throws Exception
65    */
66   public PDBRestResponse executeRequest(PDBRestRequest pdbRestRequest)
67           throws Exception
68   {
69     try
70     {
71       ClientConfig clientConfig = new DefaultClientConfig();
72       Client client = Client.create(clientConfig);
73
74       String wantedFields = getPDBDocFieldsAsCommaDelimitedString(pdbRestRequest
75               .getWantedFields());
76       int responseSize = (pdbRestRequest.getResponseSize() == 0) ? DEFAULT_RESPONSE_SIZE
77               : pdbRestRequest.getResponseSize();
78       String sortParam = null;
79       if (pdbRestRequest.getFieldToSortBy() == null
80               || pdbRestRequest.getFieldToSortBy().trim().isEmpty())
81       {
82         sortParam = "";
83       }
84       else
85       {
86         if (pdbRestRequest.getFieldToSortBy()
87                 .equalsIgnoreCase("Resolution"))
88         {
89           sortParam = pdbRestRequest.getFieldToSortBy()
90                   + (pdbRestRequest.isAscending() ? " asc" : " desc");
91         }
92         else
93         {
94           sortParam = pdbRestRequest.getFieldToSortBy()
95                   + (pdbRestRequest.isAscending() ? " desc" : " asc");
96         }
97       }
98
99       String facetPivot = (pdbRestRequest.getFacetPivot() == null || pdbRestRequest
100               .getFacetPivot().isEmpty()) ? "" : pdbRestRequest
101               .getFacetPivot();
102       String facetPivotMinCount = String.valueOf(pdbRestRequest
103               .getFacetPivotMinCount());
104       
105       // Build request parameters for the REST Request
106       WebResource webResource = null;
107       if (pdbRestRequest.isFacet())
108       {
109         webResource = client.resource(PDB_SEARCH_ENDPOINT)
110                 .queryParam("wt", "json").queryParam("fl", wantedFields)
111                 .queryParam("rows", String.valueOf(responseSize))
112                 .queryParam("q", pdbRestRequest.getQuery())
113                 .queryParam("sort", sortParam).queryParam("facet", "true")
114                 .queryParam("facet.pivot", facetPivot)
115                 .queryParam("facet.pivot.mincount", facetPivotMinCount);
116       }
117       else
118       {
119         webResource = client.resource(PDB_SEARCH_ENDPOINT)
120                 .queryParam("wt", "json").queryParam("fl", wantedFields)
121                 .queryParam("rows", String.valueOf(responseSize))
122                 .queryParam("q", pdbRestRequest.getQuery())
123                 .queryParam("sort", sortParam);
124       }
125       // Execute the REST request
126       ClientResponse clientResponse = webResource.accept(
127               MediaType.APPLICATION_JSON).get(ClientResponse.class);
128
129       // Get the JSON string from the response object
130       String responseString = clientResponse.getEntity(String.class);
131       // System.out.println("query >>>>>>> " + pdbRestRequest.toString());
132
133       // Check the response status and report exception if one occurs
134       if (clientResponse.getStatus() != 200)
135       {
136         String errorMessage = "";
137         if (clientResponse.getStatus() == 400)
138         {
139           errorMessage = parseJsonExceptionString(responseString);
140           throw new Exception(errorMessage);
141         }
142         else
143         {
144           errorMessage = getMessageByHTTPStatusCode(clientResponse
145                   .getStatus());
146           throw new Exception(errorMessage);
147         }
148       }
149
150       // Make redundant objects eligible for garbage collection to conserve
151       // memory
152       clientResponse = null;
153       client = null;
154
155       // Process the response and return the result to the caller.
156       return parsePDBJsonResponse(responseString, pdbRestRequest);
157     } catch (Exception e)
158     {
159       String exceptionMsg = e.getMessage();
160       if (exceptionMsg.contains("SocketException"))
161       {
162         // No internet connection
163         throw new Exception(
164                 MessageManager
165                         .getString("exception.unable_to_detect_internet_connection"));
166       }
167       else if (exceptionMsg.contains("UnknownHostException"))
168       {
169         // The server 'www.ebi.ac.uk' is unreachable
170         throw new Exception(
171                 MessageManager
172                         .getString("exception.pdb_server_unreachable"));
173       }
174       else
175       {
176         throw e;
177       }
178     }
179   }
180
181   public String getMessageByHTTPStatusCode(int code)
182   {
183     String message = "";
184     switch (code)
185     {
186     case 410:
187       message = MessageManager
188               .getString("exception.pdb_rest_service_no_longer_available");
189       break;
190     case 403:
191     case 404:
192       message = MessageManager.getString("exception.resource_not_be_found");
193       break;
194     case 408:
195     case 409:
196     case 500:
197     case 501:
198     case 502:
199     case 503:
200     case 504:
201     case 505:
202       message = MessageManager.getString("exception.pdb_server_error");
203       break;
204
205     default:
206       break;
207     }
208     return message;
209   }
210
211   /**
212    * Process error response from PDB server if/when one occurs.
213    * 
214    * @param jsonResponse
215    *          the JSON string containing error message from the server
216    * @return the processed error message from the JSON string
217    */
218   public static String parseJsonExceptionString(String jsonErrorResponse)
219   {
220     StringBuilder errorMessage = new StringBuilder(
221             "\n============= PDB Rest Client RunTime error =============\n");
222
223     try
224     {
225       JSONParser jsonParser = new JSONParser();
226       JSONObject jsonObj = (JSONObject) jsonParser.parse(jsonErrorResponse);
227       JSONObject errorResponse = (JSONObject) jsonObj.get("error");
228
229       JSONObject responseHeader = (JSONObject) jsonObj
230               .get("responseHeader");
231       JSONObject paramsObj = (JSONObject) responseHeader.get("params");
232       String status = responseHeader.get("status").toString();
233       String message = errorResponse.get("msg").toString();
234       String query = paramsObj.get("q").toString();
235       String fl = paramsObj.get("fl").toString();
236
237       errorMessage.append("Status: ").append(status).append("\n");
238       errorMessage.append("Message: ").append(message).append("\n");
239       errorMessage.append("query: ").append(query).append("\n");
240       errorMessage.append("fl: ").append(fl).append("\n");
241
242     } catch (ParseException e)
243     {
244       e.printStackTrace();
245     }
246     return errorMessage.toString();
247   }
248
249   /**
250    * Parses the JSON response string from PDB REST API. The response is dynamic
251    * hence, only fields specifically requested for in the 'wantedFields'
252    * parameter is fetched/processed
253    * 
254    * @param pdbJsonResponseString
255    *          the JSON string to be parsed
256    * @param pdbRestRequest
257    *          the request object which contains parameters used to process the
258    *          JSON string
259    * @return
260    */
261   @SuppressWarnings("unchecked")
262   public static PDBRestResponse parsePDBJsonResponse(
263           String pdbJsonResponseString, PDBRestRequest pdbRestRequest)
264   {
265     PDBRestResponse searchResult = new PDBRestResponse();
266     List<PDBResponseSummary> result = null;
267     try
268     {
269       JSONParser jsonParser = new JSONParser();
270       JSONObject jsonObj = (JSONObject) jsonParser
271               .parse(pdbJsonResponseString);
272
273       JSONObject pdbResponse = (JSONObject) jsonObj.get("response");
274       String queryTime = ((JSONObject) jsonObj.get("responseHeader")).get(
275               "QTime").toString();
276       int numFound = Integer
277               .valueOf(pdbResponse.get("numFound").toString());
278       if (numFound > 0)
279       {
280         result = new ArrayList<PDBResponseSummary>();
281         JSONArray docs = (JSONArray) pdbResponse.get("docs");
282         for (Iterator<JSONObject> docIter = docs.iterator(); docIter
283                 .hasNext();)
284         {
285           JSONObject doc = docIter.next();
286           result.add(searchResult.new PDBResponseSummary(doc,
287                   pdbRestRequest));
288         }
289         searchResult.setNumberOfItemsFound(numFound);
290         searchResult.setResponseTime(queryTime);
291         searchResult.setSearchSummary(result);
292       }
293     } catch (ParseException e)
294     {
295       e.printStackTrace();
296     }
297     return searchResult;
298   }
299
300   /**
301    * Takes a collection of PDBDocField and converts its 'code' Field values into
302    * a comma delimited string.
303    * 
304    * @param pdbDocfields
305    *          the collection of PDBDocField to process
306    * @return the comma delimited string from the pdbDocFields collection
307    */
308   public static String getPDBDocFieldsAsCommaDelimitedString(
309           Collection<PDBDocField> pdbDocfields)
310   {
311     String result = "";
312     if (pdbDocfields != null && !pdbDocfields.isEmpty())
313     {
314       StringBuilder returnedFields = new StringBuilder();
315       for (PDBDocField field : pdbDocfields)
316       {
317         returnedFields.append(",").append(field.getCode());
318       }
319       returnedFields.deleteCharAt(0);
320       result = returnedFields.toString();
321     }
322     return result;
323   }
324
325   /**
326    * Determines the column index for 'PDB Id' Fields in the dynamic summary
327    * table. The PDB Id serves as a unique identifier for a given row in the
328    * summary table
329    * 
330    * @param wantedFields
331    *          the available table columns in no particular order
332    * @return the pdb id field column index
333    */
334   public static int getPDBIdColumIndex(
335           Collection<PDBDocField> wantedFields, boolean hasRefSeq)
336   {
337
338     // If a reference sequence is attached then start counting from 1 else
339     // start from zero
340     int pdbFieldIndexCounter = hasRefSeq ? 1 : 0;
341
342     for (PDBDocField field : wantedFields)
343     {
344       if (field.equals(PDBDocField.PDB_ID))
345       {
346         break; // Once PDB Id index is determined exit iteration
347       }
348       ++pdbFieldIndexCounter;
349     }
350     return pdbFieldIndexCounter;
351   }
352
353   public static PDBDocField getPDBDocFieldByCode(String fieldCode)
354           throws Exception
355   {
356     for (PDBDocField curPDBDocField : PDBDocField.values())
357     {
358       if (curPDBDocField.getCode().equalsIgnoreCase(fieldCode))
359       {
360         return curPDBDocField;
361       }
362     }
363     throw new Exception("PDB doc Field not found!");
364   }
365
366   /**
367    * This enum represents the fields available in the PDB JSON response
368    *
369    */
370   public enum PDBDocField
371   {
372     PDB_ID("PDB Id", "pdb_id", Group.CROSS_REFS), TITLE(
373             "Title",
374             "title", Group.MISCELLANEOUS), MOLECULE_NAME("Molecule",
375             "molecule_name",
376             Group.NAMES_AND_TAXONOMY), MOLECULE_TYPE(
377             "Molecule Type", "molecule_type", Group.NAMES_AND_TAXONOMY), MOLECULE_SEQUENCE(
378             "Sequence", "molecule_sequence", Group.MISCELLANEOUS), PFAM_ACCESSION(
379             "PFAM Accession", "pfam_accession",
380             Group.CROSS_REFS), PFAM_NAME(
381             "PFAM Name", "pfam_name", Group.NAMES_AND_TAXONOMY), INTERPRO_NAME(
382             "InterPro Name", "interpro_name", Group.NAMES_AND_TAXONOMY), INTERPRO_ACCESSION(
383             "InterPro Accession", "interpro_accession",
384             Group.CROSS_REFS), UNIPROT_ID("UniProt Id",
385             "uniprot_id", Group.CROSS_REFS), UNIPROT_ACCESSION(
386             "UniProt Accession", "uniprot_accession",
387             Group.CROSS_REFS),
388
389     UNIPROT_COVERAGE(
390             "UniProt Coverage", "uniprot_coverage", Group.MISCELLANEOUS), UNIPROT_FEATURES(
391             "Uniprot Features", "uniprot_features", Group.MISCELLANEOUS), R_FACTOR(
392 "R Factor",
393             "r_factor", Group.QUALITY_MEASURES), RESOLUTION("Resolution",
394             "resolution", Group.QUALITY_MEASURES), DATA_QUALITY(
395             "Data Quality", "data_quality", Group.QUALITY_MEASURES), OVERALL_QUALITY(
396             "Overall Quality", "overall_quality", Group.QUALITY_MEASURES), POLYMER_COUNT(
397             "Number of Polymers", "number_of_polymers", Group.MISCELLANEOUS), PROTEIN_CHAIN_COUNT(
398             "Number of Protein Chains", "number_of_protein_chains",
399             Group.MISCELLANEOUS), BOUND_MOLECULE_COUNT(
400             "Number of Bound Molecule", "number_of_bound_molecules",
401             Group.MISCELLANEOUS), POLYMER_RESIDUE_COUNT(
402             "Number of Polymer Residue", "number_of_polymer_residues",
403             Group.MISCELLANEOUS), GENUS("GENUS", "genus",
404             Group.NAMES_AND_TAXONOMY), GENE_NAME("Gene Name", "gene_name",
405             Group.NAMES_AND_TAXONOMY), EXPERIMENTAL_METHOD(
406             "Experimental Method", "experimental_method",
407             Group.PROCEDURE_AND_SOFTWARE), GO_ID("GO Id", "go_id",
408             Group.CROSS_REFS), ASSEMBLY_ID("Assembly Id",
409             "assembly_id", Group.CROSS_REFS), ASSEMBLY_FORM(
410             "Assembly Form", "assembly_form", Group.MISCELLANEOUS), ASSEMBLY_TYPE(
411             "Assembly Type", "assembly_type", Group.MISCELLANEOUS), SPACE_GROUP(
412             "Space Group", "spacegroup", Group.MISCELLANEOUS), CATH_CODE(
413             "Cath Code", "cath_code", Group.CROSS_REFS), TAX_ID(
414             "Tax Id", "tax_id", Group.CROSS_REFS), TAX_QUERY(
415             "Tax Query", "tax_query", Group.CROSS_REFS), INTERACTING_ENTITY_ID(
416             "Interacting Entity Id", "interacting_entity_id",
417             Group.CROSS_REFS), INTERACTING_MOLECULES(
418             "Interacting Molecules", "interacting_molecules",
419             Group.MISCELLANEOUS), PUBMED_ID("Pubmed Id", "pubmed_id",
420             Group.CROSS_REFS), STATUS("Status", "status",
421             Group.MISCELLANEOUS), MODEL_QUALITY("Model Quality",
422             "model_quality", Group.QUALITY_MEASURES), PIVOT_RESOLUTION(
423             "Pivot Resolution", "pivot_resolution", Group.QUALITY_MEASURES), DATA_REDUCTION_SOFTWARE(
424             "Data reduction software", "data_reduction_software",
425             Group.PROCEDURE_AND_SOFTWARE), MAX_OBSERVED_RES(
426             "Max observed residues",
427             "max_observed_residues", Group.MISCELLANEOUS), ORG_SCI_NAME(
428             "Organism scientific name", "organism_scientific_name",
429             Group.NAMES_AND_TAXONOMY), SUPER_KINGDOM("Super kingdom",
430             "superkingdom", Group.NAMES_AND_TAXONOMY), RANK("Rank", "rank",
431             Group.NAMES_AND_TAXONOMY), CRYSTALLISATION_PH(
432             "Crystallisation Ph",
433             "crystallisation_ph", Group.MISCELLANEOUS), BIOLOGICAL_FUNCTION(
434             "Biological Function", "biological_function",
435             Group.MISCELLANEOUS), BIOLOGICAL_PROCESS("Biological Process",
436             "biological_process", Group.MISCELLANEOUS), BIOLOGICAL_CELL_COMPONENT(
437             "Biological Cell Component", "biological_cell_component",
438             Group.MISCELLANEOUS), COMPOUND_NAME("Compound Name",
439             "compound_name", Group.NAMES_AND_TAXONOMY), COMPOUND_ID(
440             "Compound Id", "compound_id", Group.CROSS_REFS), COMPOUND_WEIGHT(
441             "Compound Weight", "compound_weight", Group.MISCELLANEOUS), COMPOUND_SYSTEMATIC_NAME(
442             "Compound Systematic Name", "compound_systematic_name",
443             Group.NAMES_AND_TAXONOMY), INTERACTING_LIG(
444             "Interacting Ligands",
445             "interacting_ligands", Group.MISCELLANEOUS), JOURNAL("Journal",
446             "journal", Group.MISCELLANEOUS), ALL_AUTHORS("All Authors",
447             "all_authors", Group.MISCELLANEOUS), EXPERIMENTAL_DATA_AVAILABLE(
448             "Experiment Data Available", "experiment_data_available",
449             Group.MISCELLANEOUS), DIFFRACTION_PROTOCOL(
450             "Diffraction Protocol", "diffraction_protocol",
451             Group.PROCEDURE_AND_SOFTWARE), REFINEMENT_SOFTWARE(
452             "Refinement Software", "refinement_software",
453             Group.PROCEDURE_AND_SOFTWARE), STRUCTURE_DETERMINATION_METHOD(
454             "Structure Determination Method",
455             "structure_determination_method", Group.PROCEDURE_AND_SOFTWARE), SYNCHROTON_SITE(
456             "Synchrotron Site", "synchrotron_site", Group.MISCELLANEOUS), SAMPLE_PREP_METHOD(
457             "Sample Preparation Method", "sample_preparation_method",
458             Group.PROCEDURE_AND_SOFTWARE), ENTRY_AUTHORS("Entry Authors",
459             "entry_authors", Group.MISCELLANEOUS), CITATION_TITLE(
460             "Citation Title", "citation_title", Group.MISCELLANEOUS), STRUCTURE_SOLUTION_SOFTWARE(
461             "Structure Solution Software", "structure_solution_software",
462             Group.PROCEDURE_AND_SOFTWARE), ENTRY_ENTITY("Entry Entity",
463             "entry_entity", Group.MISCELLANEOUS), R_FREE("R Free", "r_free",
464             Group.QUALITY_MEASURES), NO_OF_POLYMER_ENTITIES(
465             "Number of Polymer Entities", "number_of_polymer_entities",
466             Group.MISCELLANEOUS), NO_OF_BOUND_ENTITIES(
467             "Number of Bound Entities", "number_of_bound_entities",
468             Group.MISCELLANEOUS), CRYSTALLISATION_RESERVOIR(
469             "Crystallisation Reservoir", "crystallisation_reservoir",
470             Group.MISCELLANEOUS), DATA_SCALING_SW("Data Scalling Software",
471             "data_scaling_software", Group.PROCEDURE_AND_SOFTWARE), DETECTOR(
472             "Detector", "detector", Group.MISCELLANEOUS), DETECTOR_TYPE(
473             "Detector Type", "detector_type", Group.MISCELLANEOUS), MODIFIED_RESIDUE_FLAG(
474             "Modified Residue Flag", "modified_residue_flag",
475             Group.MISCELLANEOUS), NUMBER_OF_COPIES("Number of Copies",
476             "number_of_copies", Group.MISCELLANEOUS), STRUCT_ASYM_ID(
477             "Struc Asym Id", "struct_asym_id",
478             Group.CROSS_REFS), HOMOLOGUS_PDB_ENTITY_ID(
479             "Homologus PDB Entity Id", "homologus_pdb_entity_id",
480             Group.CROSS_REFS), MOLECULE_SYNONYM(
481             "Molecule Synonym",
482             "molecule_synonym", Group.MISCELLANEOUS), DEPOSITION_SITE(
483             "Deposition Site", "deposition_site", Group.MISCELLANEOUS), SYNCHROTRON_BEAMLINE(
484             "Synchrotron Beamline", "synchrotron_beamline",
485             Group.MISCELLANEOUS), ENTITY_ID("Entity Id", "entity_id",
486             Group.CROSS_REFS), BEAM_SOURCE_NAME(
487             "Beam Source Name",
488  "beam_source_name",
489             Group.NAMES_AND_TAXONOMY), PROCESSING_SITE(
490             "Processing Site", "processing_site", Group.MISCELLANEOUS), ENTITY_WEIGHT(
491             "Entity Weight", "entity_weight", Group.MISCELLANEOUS), VERSION(
492             "Version", "_version_", Group.MISCELLANEOUS), ALL("ALL", "text",
493             Group.MISCELLANEOUS);
494
495     private enum Group
496     {
497       DATE_OF("Date Of"), NAMES_AND_TAXONOMY("Names & Taxonomy"),
498       MISCELLANEOUS("Miscellaneous"), QUALITY_MEASURES("Quality Measures"),
499       CROSS_REFS("Cross References"), PROCEDURE_AND_SOFTWARE(
500               "Procedures & Softwares");
501
502       Group(String name)
503       {
504         this.name = name;
505       }
506
507       private String name;
508
509       public String getName()
510       {
511         return this.name;
512       }
513     };
514     private String name;
515
516     private String code;
517
518     private Group group;
519
520     PDBDocField(String name, String code, Group group)
521     {
522       this.name = name;
523       this.code = code;
524       this.group = group;
525     }
526
527     public String getName()
528     {
529       return name;
530     }
531
532     public String getCode()
533     {
534       return code;
535     }
536
537     public String getGroup()
538     {
539       return group.getName();
540     }
541
542     @Override
543     public String toString()
544     {
545       return name;
546     }
547   }
548 }