JAL-1668 house keeping
[jalview.git] / src / jalview / ws / dbsources / PDBRestClient.java
index e00d9ac..039d23f 100644 (file)
@@ -1,10 +1,11 @@
 package jalview.ws.dbsources;
 
-import jalview.ws.uimodel.PDBSearchRequest;
-import jalview.ws.uimodel.PDBSearchResponse;
-import jalview.ws.uimodel.PDBSearchResponse.PDBResponseSummary;
+import jalview.ws.uimodel.PDBRestRequest;
+import jalview.ws.uimodel.PDBRestResponse;
+import jalview.ws.uimodel.PDBRestResponse.PDBResponseSummary;
 
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Iterator;
 import java.util.List;
 
@@ -26,44 +27,35 @@ public class PDBRestClient
 {
   private String pdbSearchEndpoint = "http://wwwdev.ebi.ac.uk/pdbe/search/pdb/select?";
 
-  public static void main(String[] args)
-  {
-    PDBSearchRequest request = new PDBSearchRequest();
-    request.setAllowEmptySeq(false);
-    request.setResponseSize(100);
-    request.setFieldToSearchBy("pfam_name");
-    request.setSearchTerm("Lipoc*");
-    List<PDBDocField> wantedFields = new ArrayList<PDBDocField>();
-    wantedFields.add(PDBDocField.MOLECULE_TYPE);
-    wantedFields.add(PDBDocField.PDB_ID);
-    wantedFields.add(PDBDocField.GENUS);
-    wantedFields.add(PDBDocField.GENE_NAME);
-    wantedFields.add(PDBDocField.TITLE);
-    request.setWantedFields(wantedFields);
-    new PDBRestClient().executeRequest(request);
-  }
-
-  public PDBSearchResponse executeRequest(PDBSearchRequest request)
+  /**
+   * Takes a PDBRestRequest object and returns a response upon execution
+   * 
+   * @param pdbRestRequest
+   *          the pdbRequest to be sent
+   * @return the pdbResponse object for the given pdbRequest
+   */
+  public PDBRestResponse executeRequest(PDBRestRequest pdbRestRequest)
   {
     ClientConfig clientConfig = new DefaultClientConfig();
     clientConfig.getFeatures().put(JSONConfiguration.FEATURE_POJO_MAPPING,
             Boolean.TRUE);
     Client client = Client.create(clientConfig);
 
-    String query = request.getFieldToSearchBy()
-            + request.getSearchTerm()
-            + ((request.isAllowEmptySeq()) ? ""
+    String query = pdbRestRequest.getFieldToSearchBy()
+            + pdbRestRequest.getSearchTerm()
+            + ((pdbRestRequest.isAllowEmptySeq()) ? ""
                     : " AND molecule_sequence:['' TO *]");
 
-    String wantedFields = getFieldsAsCommaDelimitedString(request
+    String wantedFields = getPDBDocFieldsAsCommaDelimitedString(pdbRestRequest
             .getWantedFields());
 
-    String responseSize = (request.getResponseSize() == 0) ? "200" : String
-            .valueOf(request.getResponseSize());
-    String sortParam = (request.getFieldToSortBy() == null || request
+    String responseSize = (pdbRestRequest.getResponseSize() == 0) ? "200"
+            : String.valueOf(pdbRestRequest.getResponseSize());
+    String sortParam = (pdbRestRequest.getFieldToSortBy() == null || pdbRestRequest
             .getFieldToSortBy().trim().isEmpty()) ? ""
-            : (request
-            .getFieldToSortBy() + (request.isAscending() ? " asc" : " desc"));
+ : (pdbRestRequest
+            .getFieldToSortBy() + (pdbRestRequest.isAscending() ? " asc"
+            : " desc"));
 
     WebResource webResource = client.resource(pdbSearchEndpoint)
             .queryParam("wt", "json").queryParam("fl", wantedFields)
@@ -78,7 +70,7 @@ public class PDBRestClient
     {
       if (clientResponse.getStatus() == 400)
       {
-        throw new RuntimeException(parseException(responseString));
+        throw new RuntimeException(parseJsonExceptionString(responseString));
       }
       else
       {
@@ -86,18 +78,25 @@ public class PDBRestClient
               + clientResponse.getStatus());
       }
     }
-    // System.out.println("--------------> " + responseString);
-    return parseResponse(responseString, request.getWantedFields(),
-            request.getAssociatedSequence());
+    clientResponse = null;
+    client = null;
+    return parsePDBJsonResponse(responseString, pdbRestRequest);
   }
 
-  private String parseException(String jsonResponse)
+  /**
+   * Process error response from PDB server if/when one occurs.
+   * 
+   * @param jsonResponse
+   *          the json string containing error message from the server
+   * @return the processed error message from the json string
+   */
+  public static String parseJsonExceptionString(String jsonErrorResponse)
   {
     String errorMessage = "RunTime error";
     try
     {
       JSONParser jsonParser = new JSONParser();
-      JSONObject jsonObj = (JSONObject) jsonParser.parse(jsonResponse);
+      JSONObject jsonObj = (JSONObject) jsonParser.parse(jsonErrorResponse);
       JSONObject errorResponse = (JSONObject) jsonObj.get("error");
       errorMessage = errorResponse.get("msg").toString();
 
@@ -111,17 +110,30 @@ public class PDBRestClient
     return errorMessage;
   }
 
+  /**
+   * Parses json response string from PDB REST API to a PDBRestResponse
+   * instance. The parsed response is dynamic and based upon some of the request
+   * parameters.
+   * 
+   * @param pdbJsonResponseString
+   *          the json string to be parsed
+   * @param pdbRestRequest
+   *          the request object which contains parameters used to process the
+   *          json string
+   * @return
+   */
   @SuppressWarnings("unchecked")
-  private PDBSearchResponse parseResponse(String jsonResponse,
-          List<PDBDocField> wantedFields, String associatedSequence)
+  public static PDBRestResponse parsePDBJsonResponse(
+          String pdbJsonResponseString,
+          PDBRestRequest pdbRestRequest)
   {
-    PDBSearchResponse searchResult = new PDBSearchResponse();
+    PDBRestResponse searchResult = new PDBRestResponse();
     List<PDBResponseSummary> result = null;
     try
     {
       JSONParser jsonParser = new JSONParser();
       JSONObject jsonObj = (JSONObject) jsonParser
-.parse(jsonResponse);
+              .parse(pdbJsonResponseString);
 
       JSONObject pdbResponse = (JSONObject) jsonObj.get("response");
       String queryTime = ((JSONObject) jsonObj.get("responseHeader")).get(
@@ -136,13 +148,9 @@ public class PDBRestClient
                 .hasNext();)
         {
           JSONObject doc = docIter.next();
-          // if (doc.get("molecule_sequence") != null)
-          // {
-          result.add(searchResult.new PDBResponseSummary(doc, wantedFields,
-                  associatedSequence));
-          // }
+          result.add(searchResult.new PDBResponseSummary(doc, pdbRestRequest));
         }
-        searchResult.setItemsFound(numFound);
+        searchResult.setNumberOfItemsFound(numFound);
         searchResult.setResponseTime(queryTime);
         searchResult.setSearchSummary(result);
       }
@@ -150,46 +158,139 @@ public class PDBRestClient
     {
       e.printStackTrace();
     }
-
     return searchResult;
   }
 
-  private String getFieldsAsCommaDelimitedString(List<PDBDocField> fields)
+  /**
+   * Takes a collection of PDBDocField and converts its code values into a comma
+   * delimited string.
+   * 
+   * @param pdbDocfields
+   * @return
+   */
+  public static String getPDBDocFieldsAsCommaDelimitedString(
+          Collection<PDBDocField> pdbDocfields)
   {
     String result = "";
-    if (fields != null && !fields.isEmpty())
+    if (pdbDocfields != null && !pdbDocfields.isEmpty())
     {
       StringBuilder returnedFields = new StringBuilder();
-      for (PDBDocField field : fields)
+      for (PDBDocField field : pdbDocfields)
       {
         returnedFields.append(",").append(field.getCode());
       }
-      returnedFields.deleteCharAt(0);
+      returnedFields.deleteCharAt(0); 
       result = returnedFields.toString();
     }
     return result;
   }
 
+  /**
+   * Determines the column index for 'PDB Id' Fields in the dynamic summary
+   * table. The PDB Id serves as a unique identifier for a given row in the
+   * summary table
+   * 
+   * @param wantedFeilds
+   *          the available table columns in no particular order
+   * @return the pdb id field column index
+   */
+  public static int getPDBIdColumIndex(
+          Collection<PDBDocField> wantedFeilds, boolean hasRefSeq)
+  {
+    int pdbFeildIndexCounter = hasRefSeq ? 1 : 0; // If a reference sequence is
+                                           // attached then start counting from
+                                           // 1 else start from zero
+    for (PDBDocField feild : wantedFeilds)
+    {
+      if (feild.equals(PDBDocField.PDB_ID))
+      {
+        break; // once PDB Id index is determined exit iteration
+      }
+      ++pdbFeildIndexCounter;
+    }
+    return pdbFeildIndexCounter;
+  }
 
+  /**
+   * Represents the fields retrievable from a PDB Document response
+   *
+   */
   public enum PDBDocField
   {
     PDB_ID("PDB Id", "pdb_id"), TITLE("Title", "title"), MOLECULE_NAME(
             "Molecule", "molecule_name"), MOLECULE_TYPE("Molecule Type",
             "molecule_type"), MOLECULE_SEQUENCE("Sequence",
-            "molecule_sequence"), UNIPROT_FEATURES("Uniprot Features",
-            "uniprot_features"), PFAM_ACCESSION("PFAM Accession",
-            "pfam_accession"), INTERPRO_ACCESSION("InterPro Accession",
-            "interpro_accession"), UNIPROT_ACCESSION("UniProt Accession",
-            "uniprot_accession"), R_FACTOR("R Factor", "r_factor"), RESOLUTION(
-            "Resolution", "resolution"), DATA_QUALITY("Data Quality",
-            "data_quality"), OVERALL_QUALITY("Overall Quality",
-            "overall_quality"), POLYMER_COUNT("Polymer Count",
-            "number_of_polymers"), PROTEIN_CHAIN_COUNT(
-            "Protein Chain Count", "number_of_protein_chains"), BOUND_MOLECULE_COUNT(
-            "Bound Molecule Count", "number_of_bound_molecules"), POLYMER_RESIDUE_COUNT(
-            "Polymer Residue Count", "number_of_polymer_residues"), UNIPROT_COVERAGE(
-            "UniProt Coverage", "uniprot_coverage"), GENUS("GENUS", "genus"), GENE_NAME(
-            "Gene Name", "gene_name"), ALL("ALL", "text");
+            "molecule_sequence"), PFAM_ACCESSION("PFAM Accession",
+            "pfam_accession"), PFAM_NAME("PFAM Name", "pfam_name"), INTERPRO_NAME(
+            "InterPro Name", "interpro_name"), INTERPRO_ACCESSION(
+            "InterPro Accession", "interpro_accession"), UNIPROT_ID(
+            "UniProt Id", "uniprot_id"), UNIPROT_ACCESSION(
+            "UniProt Accession", "uniprot_accession"), UNIPROT_COVERAGE(
+            "UniProt Coverage", "uniprot_coverage"), UNIPROT_FEATURES(
+            "Uniprot Features", "uniprot_features"), R_FACTOR("R Factor",
+            "r_factor"), RESOLUTION("Resolution", "resolution"), DATA_QUALITY(
+            "Data Quality", "data_quality"), OVERALL_QUALITY(
+            "Overall Quality", "overall_quality"), POLYMER_COUNT(
+            "Number of Polymers", "number_of_polymers"), PROTEIN_CHAIN_COUNT(
+            "Number of Protein Chains", "number_of_protein_chains"), BOUND_MOLECULE_COUNT(
+            "Number of Bound Molecule", "number_of_bound_molecules"), POLYMER_RESIDUE_COUNT(
+            "Number of Polymer Residue", "number_of_polymer_residues"), GENUS(
+            "GENUS", "genus"), GENE_NAME("Gene Name", "gene_name"), EXPERIMENTAL_METHOD(
+            "Experimental Method", "experimental_method"), GO_ID("GO Id",
+            "go_id"), ASSEMBLY_ID("Assembly Id", "assembly_form"), ASSEMBLY_FORM(
+            "Assembly Form", "assembly_id"), ASSEMBLY_TYPE("Assembly Type",
+            "assembly_type"), SPACE_GROUP("Space Group", "spacegroup"), CATH_CODE(
+            "Cath Code", "cath_code"), TAX_ID("Tax Id", "tax_id"), TAX_QUERY(
+            "Tax Query", "tax_query"), INTERACTING_ENTRY_ID(
+            "Interacting Entry Id", "interacting_entry_id"), INTERACTING_ENTITY_ID(
+            "Interacting Entity Id", "interacting_entity_id"), INTERACTING_MOLECULES(
+            "Interacting Molecules", "interacting_molecules"), PUBMED_ID(
+            "Pubmed Id", "pubmed_id"), STATUS("Status", "status"), MODEL_QUALITY(
+            "Model Quality", "model_quality"), PIVOT_RESOLUTION(
+            "Pivot Resolution", "pivot_resolution"), DATA_REDUCTION_SOFTWARE(
+            "Data reduction software", "data_reduction_software"), MAX_OBSERVED_RES(
+            "Max observed residues", "max_observed_residues"), ORG_SCI_NAME(
+            "Organism scientific name", "organism_scientific_name"), SUPER_KINGDOM(
+            "Super kingdom", "superkingdom"), RANK("Rank", "rank"), CRYSTALLISATION_PH(
+            "Crystallisation Ph", "crystallisation_ph"), BIO_FUNCTION(
+            "Biological Function", "biological_function"), BIO_PROCESS(
+            "Biological Process", "biological_process"), BIO_CELL_COMP(
+            "Biological Cell Component", "biological_cell_component"), COMPOUND_NAME(
+            "Compound Name", "compound_name"), COMPOUND_ID("Compound Id",
+            "compound_id"), COMPOUND_WEIGHT("Compound Weight",
+            "compound_weight"), COMP_SYS_NAME("Compound Systematic Name",
+            "compound_systematic_name"), INTERACTING_LIG(
+            "Interacting Ligands", "interacting_ligands"), JOURNAL(
+            "Journal", "journal"), ALL_AUTHORS("All Authors", "all_authors"), EXPERIMENTAL_DATA_AVAILABLE(
+            "Experiment Data Available", "experiment_data_available"), DIFFRACTION_PROTOCOL(
+            "Diffraction Protocol", "diffraction_protocol"), REFINEMENT_SOFTWARE(
+            "Refinement Software", "refinement_software"), STRUCTURE_DETERMINATION_METHOD(
+            "Structure Determination Method",
+            "structure_determination_method"), SYNCHROTON_SITE(
+            "Synchrotron Site", "synchrotron_site"), SAMPLE_PREP_METHOD(
+            "Sample Preparation Method", "sample_preparation_method"), ENTRY_AUTHORS(
+            "Entry Authors", "entry_authors"), CITATION_TITLE(
+            "Citation Title", "citation_title"), STRUCTURE_SOLUTION_SOFTWARE(
+            "Structure Solution Software", "structure_solution_software"), ENTRY_ENTITY(
+            "Entry Entity", "entry_entity"), R_FREE("R Free", "r_free"), NO_OF_POLYMER_ENTITIES(
+            "Number of Polymer Entities", "number_of_polymer_entities"), NO_OF_BOUND_ENTITIES(
+            "Number of Bound Entities", "number_of_bound_entities"), CRYSTALLISATION_RESERVOIR(
+            "Crystallisation Reservoir", "crystallisation_reservoir"), DATA_SCALING_SW(
+            "Data Scalling Software", "data_scaling_software"), DETECTOR(
+            "Detector", "detector"), DETECTOR_TYPE("Detector Type",
+            "detector_type"), MODIFIED_RESIDUE_FLAG(
+            "Modified Residue Flag", "modified_residue_flag"), NUMBER_OF_COPIES(
+            "Number of Copies", "number_of_copies"), STRUCT_ASYM_ID(
+            "Struc Asym Id", "struct_asym_id"), HOMOLOGUS_PDB_ENTITY_ID(
+            "Homologus PDB Entity Id", "homologus_pdb_entity_id"), MOLECULE_SYNONYM(
+            "Molecule Synonym", "molecule_synonym"), DEPOSITION_SITE(
+            "Deposition Site", "deposition_site"), SYNCHROTRON_BEAMLINE(
+            "Synchrotron Beamline", "synchrotron_beamline"), ENTITY_ID(
+            "Entity Id", "entity_id"), BEAM_SOURCE_NAME("Beam Source Name",
+            "beam_source_name"), PROCESSING_SITE("Processing Site",
+            "processing_site"), ENTITY_WEIGHT("Entity Weight",
+            "entity_weight"), VERSION("Version", "_version_"), ALL("ALL",
+            "text");
 
     private String name;