JAL-1919 code improvement to make PDB sequence fetcher file format configurable....
[jalview.git] / src / jalview / gui / StructureChooser.java
index cf58322..89ba84b 100644 (file)
@@ -43,6 +43,8 @@ import java.util.Collection;
 import java.util.HashSet;
 import java.util.LinkedHashSet;
 import java.util.List;
+import java.util.Objects;
+import java.util.Vector;
 
 import javax.swing.JCheckBox;
 import javax.swing.JComboBox;
@@ -301,6 +303,7 @@ public class StructureChooser extends GStructureChooser implements
     if (!isPDBRefsFound && !isUniProtRefsFound)
     {
       String seqName = seq.getName();
+      seqName = sanitizeSeqName(seqName);
       String[] names = seqName.toLowerCase().split("\\|");
       for (String name : names)
       {
@@ -328,6 +331,21 @@ public class StructureChooser extends GStructureChooser implements
   }
 
   /**
+   * Remove the following special characters from input string +, -, &, |, !, (,
+   * ), {, }, [, ], ^, ", ~, *, ?, :, \
+   * 
+   * @param seqName
+   * @return
+   */
+  private static String sanitizeSeqName(String seqName)
+  {
+    Objects.requireNonNull(seqName);
+    return seqName.replaceAll("\\[\\d*\\]", "")
+            .replaceAll("[^\\dA-Za-z ]", "").replaceAll("\\s+", "+");
+  }
+
+
+  /**
    * Ensures sequence ref names are not less than 3 characters and does not
    * contain a database name
    * 
@@ -382,17 +400,34 @@ public class StructureChooser extends GStructureChooser implements
                 .getStructureSummaryFields();
         Collection<PDBResponseSummary> filteredResponse = new HashSet<PDBResponseSummary>();
         HashSet<String> errors = new HashSet<String>();
+
         for (SequenceI seq : selectedSequences)
         {
           PDBRestRequest pdbRequest = new PDBRestRequest();
-          pdbRequest.setAllowEmptySeq(false);
-          pdbRequest.setResponseSize(1);
-          pdbRequest.setFieldToSearchBy("(");
-          pdbRequest.setFieldToSortBy(fieldToFilterBy,
-                  !chk_invertFilter.isSelected());
-          pdbRequest.setSearchTerm(buildQuery(seq) + ")");
-          pdbRequest.setWantedFields(wantedFields);
-          pdbRequest.setAssociatedSequence(seq);
+          if (fieldToFilterBy.equalsIgnoreCase("uniprot_coverage"))
+          {
+            System.out.println(">>>>>> Filtering with uniprot coverate");
+            pdbRequest.setAllowEmptySeq(false);
+            pdbRequest.setResponseSize(1);
+            pdbRequest.setFieldToSearchBy("(");
+            pdbRequest.setSearchTerm(buildQuery(seq) + ")");
+            pdbRequest.setWantedFields(wantedFields);
+            pdbRequest.setAssociatedSequence(seq);
+            pdbRequest.setFacet(true);
+            pdbRequest.setFacetPivot(fieldToFilterBy + ",entry_entity");
+            pdbRequest.setFacetPivotMinCount(1);
+          }
+          else
+          {
+            pdbRequest.setAllowEmptySeq(false);
+            pdbRequest.setResponseSize(1);
+            pdbRequest.setFieldToSearchBy("(");
+            pdbRequest.setFieldToSortBy(fieldToFilterBy,
+                    !chk_invertFilter.isSelected());
+            pdbRequest.setSearchTerm(buildQuery(seq) + ")");
+            pdbRequest.setWantedFields(wantedFields);
+            pdbRequest.setAssociatedSequence(seq);
+          }
           pdbRestCleint = new PDBRestClient();
           PDBRestResponse resultList;
           try
@@ -491,15 +526,15 @@ public class StructureChooser extends GStructureChooser implements
     {
       cmb_filterOption.addItem(new FilterOption("Best Quality",
               PDBDocField.OVERALL_QUALITY.getCode(), VIEWS_FILTER));
-      cmb_filterOption.addItem(new FilterOption("Best UniProt Coverage",
+      cmb_filterOption.addItem(new FilterOption("Most UniProt Coverage",
               PDBDocField.UNIPROT_COVERAGE.getCode(), VIEWS_FILTER));
-      cmb_filterOption.addItem(new FilterOption("Highest Resolution",
+      cmb_filterOption.addItem(new FilterOption("Best Resolution",
               PDBDocField.RESOLUTION.getCode(), VIEWS_FILTER));
-      cmb_filterOption.addItem(new FilterOption("Highest Protein Chain",
+      cmb_filterOption.addItem(new FilterOption("Most Protein Chain",
               PDBDocField.PROTEIN_CHAIN_COUNT.getCode(), VIEWS_FILTER));
-      cmb_filterOption.addItem(new FilterOption("Highest Bound Molecules",
+      cmb_filterOption.addItem(new FilterOption("Most Bound Molecules",
               PDBDocField.BOUND_MOLECULE_COUNT.getCode(), VIEWS_FILTER));
-      cmb_filterOption.addItem(new FilterOption("Highest Polymer Residues",
+      cmb_filterOption.addItem(new FilterOption("Most Polymer Residues",
               PDBDocField.POLYMER_RESIDUE_COUNT.getCode(), VIEWS_FILTER));
     }
     cmb_filterOption.addItem(new FilterOption("Enter PDB Id", "-",
@@ -706,7 +741,12 @@ public class StructureChooser extends GStructureChooser implements
         SequenceI selectedSeq = (SequenceI) tbl_summary.getValueAt(row,
                 refSeqColIndex);
         selectedSeqsToView.add(selectedSeq);
-        PDBEntry pdbEntry = selectedSeq.getPDBEntry(pdbIdStr);
+            PDBEntry pdbEntry = selectedSeq.getPDBEntry(pdbIdStr);
+            if (pdbEntry == null)
+            {
+              pdbEntry = getFindEntry(pdbIdStr,
+                      selectedSeq.getAllPDBEntries());
+            }
         if (pdbEntry == null)
         {
           pdbEntry = new PDBEntry();
@@ -787,6 +827,21 @@ public class StructureChooser extends GStructureChooser implements
     }).start();
   }
 
+  private PDBEntry getFindEntry(String id, Vector<PDBEntry> pdbEntries)
+  {
+    Objects.requireNonNull(id);
+    Objects.requireNonNull(pdbEntries);
+    PDBEntry foundEntry = null;
+    for (PDBEntry entry : pdbEntries)
+    {
+      if (entry.getId().equalsIgnoreCase(id))
+      {
+        return entry;
+      }
+    }
+    return foundEntry;
+  }
+
   private void launchStructureViewer(StructureSelectionManager ssm,
           final PDBEntry[] pdbEntriesToView,
           final AlignmentPanel alignPanel, SequenceI[] sequences)
@@ -892,7 +947,8 @@ public class StructureChooser extends GStructureChooser implements
           pdbRequest.setResponseSize(1);
           pdbRequest.setFieldToSearchBy("(pdb_id:");
           pdbRequest.setWantedFields(wantedFields);
-          pdbRequest.setSearchTerm(txt_search.getText() + ")");
+          pdbRequest
+                  .setSearchTerm(txt_search.getText().toLowerCase() + ")");
           pdbRequest.setAssociatedSequence(selectedSequence);
           pdbRestCleint = new PDBRestClient();
           PDBRestResponse resultList;