JAL-3829 pushed the PDBFTSClient specific code to new ‘StructureChooserQuerySource...
authorJim Procter <j.procter@dundee.ac.uk>
Mon, 23 Aug 2021 16:45:04 +0000 (17:45 +0100)
committerJim Procter <j.procter@dundee.ac.uk>
Mon, 30 Aug 2021 11:17:32 +0000 (12:17 +0100)
src/jalview/gui/StructureChooser.java
src/jalview/gui/StructureChooserQuerySource.java [new file with mode: 0644]
test/jalview/gui/StructureChooserTest.java

index 33d8c33..99f71dd 100644 (file)
@@ -36,6 +36,7 @@ import jalview.fts.core.FTSRestResponse;
 import jalview.fts.service.pdb.PDBFTSRestClient;
 import jalview.io.DataSourceType;
 import jalview.jbgui.GStructureChooser;
+import jalview.jbgui.GStructureChooser.FilterOption;
 import jalview.structure.StructureMapping;
 import jalview.structure.StructureSelectionManager;
 import jalview.util.MessageManager;
@@ -71,8 +72,6 @@ public class StructureChooser extends GStructureChooser
 {
   private static final String AUTOSUPERIMPOSE = "AUTOSUPERIMPOSE";
 
-  private static int MAX_QLENGTH = 7820;
-
   private SequenceI selectedSequence;
 
   private SequenceI[] selectedSequences;
@@ -81,9 +80,7 @@ public class StructureChooser extends GStructureChooser
 
   private Collection<FTSData> discoveredStructuresSet;
 
-  private FTSRestRequest lastPdbRequest;
-
-  private FTSRestClientI pdbRestClient;
+  private StructureChooserQuerySource data = StructureChooserQuerySource.getPDBfts();
 
   private String selectedPdbFileName;
 
@@ -217,36 +214,28 @@ public class StructureChooser extends GStructureChooser
   void fetchStructuresMetaData()
   {
     long startTime = System.currentTimeMillis();
-    pdbRestClient = PDBFTSRestClient.getInstance();
     Collection<FTSDataColumnI> wantedFields = pdbDocFieldPrefs
             .getStructureSummaryFields();
 
     discoveredStructuresSet = new LinkedHashSet<>();
     HashSet<String> errors = new HashSet<>();
+
+    FilterOption selectedFilterOpt = ((FilterOption) cmb_filterOption
+            .getSelectedItem());
+    
     for (SequenceI seq : selectedSequences)
     {
-      FTSRestRequest pdbRequest = new FTSRestRequest();
-      pdbRequest.setAllowEmptySeq(false);
-      pdbRequest.setResponseSize(500);
-      pdbRequest.setFieldToSearchBy("(");
-      FilterOption selectedFilterOpt = ((FilterOption) cmb_filterOption
-              .getSelectedItem());
-      pdbRequest.setFieldToSortBy(selectedFilterOpt.getValue(),
-              !chk_invertFilter.isSelected());
-      pdbRequest.setWantedFields(wantedFields);
-      pdbRequest.setSearchTerm(buildQuery(seq) + ")");
-      pdbRequest.setAssociatedSequence(seq);
+
       FTSRestResponse resultList;
       try
       {
-        resultList = pdbRestClient.executeRequest(pdbRequest);
+        resultList = data.fetchStructuresMetaData(seq, wantedFields, selectedFilterOpt, !chk_invertFilter.isSelected());
       } catch (Exception e)
       {
         e.printStackTrace();
         errors.add(e.getMessage());
         continue;
       }
-      lastPdbRequest = pdbRequest;
       if (resultList.getSearchSummary() != null
               && !resultList.getSearchSummary().isEmpty())
       {
@@ -260,8 +249,7 @@ public class StructureChooser extends GStructureChooser
     if (discoveredStructuresSet != null
             && !discoveredStructuresSet.isEmpty())
     {
-      getResultTable().setModel(FTSRestResponse
-              .getTableModel(lastPdbRequest, discoveredStructuresSet));
+      getResultTable().setModel(data.getTableModel(discoveredStructuresSet));
       noOfStructuresFound = discoveredStructuresSet.size();
       mainFrame.setTitle(MessageManager.formatMessage(
               "label.structure_chooser_no_of_structures",
@@ -309,157 +297,6 @@ public class StructureChooser extends GStructureChooser
   }
 
   /**
-   * Builds a query string for a given sequences using its DBRef entries
-   * 
-   * @param seq
-   *          the sequences to build a query for
-   * @return the built query string
-   */
-
-  static String buildQuery(SequenceI seq)
-  {
-    boolean isPDBRefsFound = false;
-    boolean isUniProtRefsFound = false;
-    StringBuilder queryBuilder = new StringBuilder();
-    Set<String> seqRefs = new LinkedHashSet<>();
-    
-    /*
-     * note PDBs as DBRefEntry so they are not duplicated in query
-     */
-    Set<String> pdbids = new HashSet<>();
-
-    if (seq.getAllPDBEntries() != null
-            && queryBuilder.length() < MAX_QLENGTH)
-    {
-      for (PDBEntry entry : seq.getAllPDBEntries())
-      {
-        if (isValidSeqName(entry.getId()))
-        {
-          String id = entry.getId().toLowerCase();
-          queryBuilder.append("pdb_id:").append(id).append(" OR ");
-          isPDBRefsFound = true;
-          pdbids.add(id);
-        }
-      }
-    }
-
-    List<DBRefEntry> refs = seq.getDBRefs();
-    if (refs != null && refs.size() != 0)
-    {
-      for (int ib = 0, nb = refs.size(); ib < nb; ib++)
-      {
-         DBRefEntry dbRef = refs.get(ib);
-        if (isValidSeqName(getDBRefId(dbRef))
-                && queryBuilder.length() < MAX_QLENGTH)
-        {
-          if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT))
-          {
-            queryBuilder.append("uniprot_accession:")
-                    .append(getDBRefId(dbRef)).append(" OR ");
-            queryBuilder.append("uniprot_id:").append(getDBRefId(dbRef))
-                    .append(" OR ");
-            isUniProtRefsFound = true;
-          }
-          else if (dbRef.getSource().equalsIgnoreCase(DBRefSource.PDB))
-          {
-
-            String id = getDBRefId(dbRef).toLowerCase();
-            if (!pdbids.contains(id))
-            {
-              queryBuilder.append("pdb_id:").append(id).append(" OR ");
-              isPDBRefsFound = true;
-              pdbids.add(id);
-            }
-          }
-          else
-          {
-            seqRefs.add(getDBRefId(dbRef));
-          }
-        }
-      }
-    }
-
-    if (!isPDBRefsFound && !isUniProtRefsFound)
-    {
-      String seqName = seq.getName();
-      seqName = sanitizeSeqName(seqName);
-      String[] names = seqName.toLowerCase().split("\\|");
-      for (String name : names)
-      {
-        // System.out.println("Found name : " + name);
-        name.trim();
-        if (isValidSeqName(name))
-        {
-          seqRefs.add(name);
-        }
-      }
-
-      for (String seqRef : seqRefs)
-      {
-        queryBuilder.append("text:").append(seqRef).append(" OR ");
-      }
-    }
-
-    int endIndex = queryBuilder.lastIndexOf(" OR ");
-    if (queryBuilder.toString().length() < 6)
-    {
-      return null;
-    }
-    String query = queryBuilder.toString().substring(0, endIndex);
-    return query;
-  }
-
-  /**
-   * Remove the following special characters from input string +, -, &, !, (, ),
-   * {, }, [, ], ^, ", ~, *, ?, :, \
-   * 
-   * @param seqName
-   * @return
-   */
-  static String sanitizeSeqName(String seqName)
-  {
-    Objects.requireNonNull(seqName);
-    return seqName.replaceAll("\\[\\d*\\]", "")
-            .replaceAll("[^\\dA-Za-z|_]", "").replaceAll("\\s+", "+");
-  }
-
-  /**
-   * Ensures sequence ref names are not less than 3 characters and does not
-   * contain a database name
-   * 
-   * @param seqName
-   * @return
-   */
-  static boolean isValidSeqName(String seqName)
-  {
-    // System.out.println("seqName : " + seqName);
-    String ignoreList = "pdb,uniprot,swiss-prot";
-    if (seqName.length() < 3)
-    {
-      return false;
-    }
-    if (seqName.contains(":"))
-    {
-      return false;
-    }
-    seqName = seqName.toLowerCase();
-    for (String ignoredEntry : ignoreList.split(","))
-    {
-      if (seqName.contains(ignoredEntry))
-      {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  static String getDBRefId(DBRefEntry dbRef)
-  {
-    String ref = dbRef.getAccessionId().replaceAll("GO:", "");
-    return ref;
-  }
-
-  /**
    * Filters a given list of discovered structures based on supplied argument
    * 
    * @param fieldToFilterBy
@@ -473,7 +310,6 @@ public class StructureChooser extends GStructureChooser
       public void run()
       {
         long startTime = System.currentTimeMillis();
-        pdbRestClient = PDBFTSRestClient.getInstance();
         lbl_loading.setVisible(true);
         Collection<FTSDataColumnI> wantedFields = pdbDocFieldPrefs
                 .getStructureSummaryFields();
@@ -482,41 +318,19 @@ public class StructureChooser extends GStructureChooser
 
         for (SequenceI seq : selectedSequences)
         {
-          FTSRestRequest pdbRequest = new FTSRestRequest();
-          if (fieldToFilterBy.equalsIgnoreCase("uniprot_coverage"))
-          {
-            pdbRequest.setAllowEmptySeq(false);
-            pdbRequest.setResponseSize(1);
-            pdbRequest.setFieldToSearchBy("(");
-            pdbRequest.setSearchTerm(buildQuery(seq) + ")");
-            pdbRequest.setWantedFields(wantedFields);
-            pdbRequest.setAssociatedSequence(seq);
-            pdbRequest.setFacet(true);
-            pdbRequest.setFacetPivot(fieldToFilterBy + ",entry_entity");
-            pdbRequest.setFacetPivotMinCount(1);
-          }
-          else
-          {
-            pdbRequest.setAllowEmptySeq(false);
-            pdbRequest.setResponseSize(1);
-            pdbRequest.setFieldToSearchBy("(");
-            pdbRequest.setFieldToSortBy(fieldToFilterBy,
-                    !chk_invertFilter.isSelected());
-            pdbRequest.setSearchTerm(buildQuery(seq) + ")");
-            pdbRequest.setWantedFields(wantedFields);
-            pdbRequest.setAssociatedSequence(seq);
-          }
+          
           FTSRestResponse resultList;
           try
           {
-            resultList = pdbRestClient.executeRequest(pdbRequest);
+            resultList = data.selectFirstRankedQuery(seq, wantedFields, fieldToFilterBy,
+                    !chk_invertFilter.isSelected());
+
           } catch (Exception e)
           {
             e.printStackTrace();
             errors.add(e.getMessage());
             continue;
           }
-          lastPdbRequest = pdbRequest;
           if (resultList.getSearchSummary() != null
                   && !resultList.getSearchSummary().isEmpty())
           {
@@ -532,8 +346,7 @@ public class StructureChooser extends GStructureChooser
           Collection<FTSData> reorderedStructuresSet = new LinkedHashSet<>();
           reorderedStructuresSet.addAll(filteredResponse);
           reorderedStructuresSet.addAll(discoveredStructuresSet);
-          getResultTable().setModel(FTSRestResponse
-                  .getTableModel(lastPdbRequest, reorderedStructuresSet));
+          getResultTable().setModel(data.getTableModel(reorderedStructuresSet));
 
           FTSRestResponse.configureTableColumn(getResultTable(),
                   wantedFields, tempUserPrefs);
@@ -1223,6 +1036,8 @@ public class StructureChooser extends GStructureChooser
         isValidPBDEntry = false;
         if (text.length() > 0)
         {
+          // TODO move this pdb id search into the PDB specific FTSSearchEngine 
+          // for moment, it will work fine as is because it is self-contained
           String searchTerm = text.toLowerCase();
           searchTerm = searchTerm.split(":")[0];
           // System.out.println(">>>>> search term : " + searchTerm);
@@ -1234,7 +1049,7 @@ public class StructureChooser extends GStructureChooser
           pdbRequest.setWantedFields(wantedFields);
           pdbRequest.setSearchTerm(searchTerm + ")");
           pdbRequest.setAssociatedSequence(selectedSequence);
-          pdbRestClient = PDBFTSRestClient.getInstance();
+          FTSRestClientI pdbRestClient = PDBFTSRestClient.getInstance();
           wantedFields.add(pdbRestClient.getPrimaryKeyColumn());
           FTSRestResponse resultList;
           try
diff --git a/src/jalview/gui/StructureChooserQuerySource.java b/src/jalview/gui/StructureChooserQuerySource.java
new file mode 100644 (file)
index 0000000..c127a78
--- /dev/null
@@ -0,0 +1,292 @@
+package jalview.gui;
+
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Objects;
+import java.util.Set;
+
+import javax.swing.table.TableModel;
+
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.DBRefSource;
+import jalview.datamodel.PDBEntry;
+import jalview.datamodel.SequenceI;
+import jalview.fts.api.FTSData;
+import jalview.fts.api.FTSDataColumnI;
+import jalview.fts.api.FTSRestClientI;
+import jalview.fts.core.FTSRestRequest;
+import jalview.fts.core.FTSRestResponse;
+import jalview.fts.service.pdb.PDBFTSRestClient;
+import jalview.jbgui.GStructureChooser.FilterOption;
+
+/**
+ * logic for querying sources of structural data for structures of sequences
+ * 
+ * @author jprocter
+ *
+ * @param <T>
+ */
+public class StructureChooserQuerySource
+{
+  private FTSRestRequest lastPdbRequest;
+
+  private FTSRestClientI pdbRestClient;
+
+  private static int MAX_QLENGTH = 7820;
+
+  public StructureChooserQuerySource()
+  {
+  }
+
+  public static StructureChooserQuerySource getPDBfts()
+  {
+    StructureChooserQuerySource pdbfts = new StructureChooserQuerySource();
+    pdbfts.pdbRestClient = PDBFTSRestClient.getInstance();
+    return pdbfts;
+  }
+
+  /**
+   * Builds a query string for a given sequences using its DBRef entries
+   * 
+   * @param seq
+   *          the sequences to build a query for
+   * @return the built query string
+   */
+
+  String buildQuery(SequenceI seq)
+  {
+    boolean isPDBRefsFound = false;
+    boolean isUniProtRefsFound = false;
+    StringBuilder queryBuilder = new StringBuilder();
+    Set<String> seqRefs = new LinkedHashSet<>();
+
+    /*
+     * note PDBs as DBRefEntry so they are not duplicated in query
+     */
+    Set<String> pdbids = new HashSet<>();
+
+    if (seq.getAllPDBEntries() != null
+            && queryBuilder.length() < MAX_QLENGTH)
+    {
+      for (PDBEntry entry : seq.getAllPDBEntries())
+      {
+        if (isValidSeqName(entry.getId()))
+        {
+          String id = entry.getId().toLowerCase();
+          queryBuilder.append("pdb_id:").append(id).append(" OR ");
+          isPDBRefsFound = true;
+          pdbids.add(id);
+        }
+      }
+    }
+
+    List<DBRefEntry> refs = seq.getDBRefs();
+    if (refs != null && refs.size() != 0)
+    {
+      for (int ib = 0, nb = refs.size(); ib < nb; ib++)
+      {
+        DBRefEntry dbRef = refs.get(ib);
+        if (isValidSeqName(getDBRefId(dbRef))
+                && queryBuilder.length() < MAX_QLENGTH)
+        {
+          if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT))
+          {
+            queryBuilder.append("uniprot_accession:")
+                    .append(getDBRefId(dbRef)).append(" OR ");
+            queryBuilder.append("uniprot_id:").append(getDBRefId(dbRef))
+                    .append(" OR ");
+            isUniProtRefsFound = true;
+          }
+          else if (dbRef.getSource().equalsIgnoreCase(DBRefSource.PDB))
+          {
+
+            String id = getDBRefId(dbRef).toLowerCase();
+            if (!pdbids.contains(id))
+            {
+              queryBuilder.append("pdb_id:").append(id).append(" OR ");
+              isPDBRefsFound = true;
+              pdbids.add(id);
+            }
+          }
+          else
+          {
+            seqRefs.add(getDBRefId(dbRef));
+          }
+        }
+      }
+    }
+
+    if (!isPDBRefsFound && !isUniProtRefsFound)
+    {
+      String seqName = seq.getName();
+      seqName = sanitizeSeqName(seqName);
+      String[] names = seqName.toLowerCase().split("\\|");
+      for (String name : names)
+      {
+        // System.out.println("Found name : " + name);
+        name.trim();
+        if (isValidSeqName(name))
+        {
+          seqRefs.add(name);
+        }
+      }
+
+      for (String seqRef : seqRefs)
+      {
+        queryBuilder.append("text:").append(seqRef).append(" OR ");
+      }
+    }
+
+    int endIndex = queryBuilder.lastIndexOf(" OR ");
+    if (queryBuilder.toString().length() < 6)
+    {
+      return null;
+    }
+    String query = queryBuilder.toString().substring(0, endIndex);
+    return query;
+  }
+
+  /**
+   * Remove the following special characters from input string +, -, &, !, (, ),
+   * {, }, [, ], ^, ", ~, *, ?, :, \
+   * 
+   * @param seqName
+   * @return
+   */
+  static String sanitizeSeqName(String seqName)
+  {
+    Objects.requireNonNull(seqName);
+    return seqName.replaceAll("\\[\\d*\\]", "")
+            .replaceAll("[^\\dA-Za-z|_]", "").replaceAll("\\s+", "+");
+  }
+
+  /**
+   * Ensures sequence ref names are not less than 3 characters and does not
+   * contain a database name
+   * 
+   * @param seqName
+   * @return
+   */
+  static boolean isValidSeqName(String seqName)
+  {
+    // System.out.println("seqName : " + seqName);
+    String ignoreList = "pdb,uniprot,swiss-prot";
+    if (seqName.length() < 3)
+    {
+      return false;
+    }
+    if (seqName.contains(":"))
+    {
+      return false;
+    }
+    seqName = seqName.toLowerCase();
+    for (String ignoredEntry : ignoreList.split(","))
+    {
+      if (seqName.contains(ignoredEntry))
+      {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  static String getDBRefId(DBRefEntry dbRef)
+  {
+    String ref = dbRef.getAccessionId().replaceAll("GO:", "");
+    return ref;
+  }
+
+  /**
+   * FTSRestClient specific query builder to recover associated structure data
+   * records for a sequence
+   * 
+   * @param seq
+   *          - seq to generate a query for
+   * @param wantedFields
+   *          - fields to retrieve
+   * @param selectedFilterOpt
+   *          - criterion for ranking results (e.g. resolution)
+   * @param b
+   *          - sort ascending or descending
+   * @return
+   * @throws Exception
+   */
+  public FTSRestResponse fetchStructuresMetaData(SequenceI seq,
+          Collection<FTSDataColumnI> wantedFields,
+          FilterOption selectedFilterOpt, boolean b) throws Exception
+  {
+    FTSRestResponse resultList;
+    FTSRestRequest pdbRequest = new FTSRestRequest();
+    pdbRequest.setAllowEmptySeq(false);
+    pdbRequest.setResponseSize(500);
+    pdbRequest.setFieldToSearchBy("(");
+    pdbRequest.setFieldToSortBy(selectedFilterOpt.getValue(), b);
+    pdbRequest.setWantedFields(wantedFields);
+    pdbRequest.setSearchTerm(buildQuery(seq) + ")");
+    pdbRequest.setAssociatedSequence(seq);
+    resultList = pdbRestClient.executeRequest(pdbRequest);
+
+    lastPdbRequest = pdbRequest;
+    return resultList;
+  }
+
+  /**
+   * FTSRestClient specific query builder to pick top ranked entry from a
+   * fetchStructuresMetaData query
+   * 
+   * @param seq
+   *          - seq to generate a query for
+   * @param wantedFields
+   *          - fields to retrieve
+   * @param selectedFilterOpt
+   *          - criterion for ranking results (e.g. resolution)
+   * @param b
+   *          - sort ascending or descending
+   * @return
+   * @throws Exception
+   */
+  public FTSRestResponse selectFirstRankedQuery(SequenceI seq,
+          Collection<FTSDataColumnI> wantedFields, String fieldToFilterBy,
+          boolean b) throws Exception
+  {
+
+    FTSRestResponse resultList;
+    FTSRestRequest pdbRequest = new FTSRestRequest();
+    if (fieldToFilterBy.equalsIgnoreCase("uniprot_coverage"))
+    {
+      pdbRequest.setAllowEmptySeq(false);
+      pdbRequest.setResponseSize(1);
+      pdbRequest.setFieldToSearchBy("(");
+      pdbRequest.setSearchTerm(buildQuery(seq) + ")");
+      pdbRequest.setWantedFields(wantedFields);
+      pdbRequest.setAssociatedSequence(seq);
+      pdbRequest.setFacet(true);
+      pdbRequest.setFacetPivot(fieldToFilterBy + ",entry_entity");
+      pdbRequest.setFacetPivotMinCount(1);
+    }
+    else
+    {
+      pdbRequest.setAllowEmptySeq(false);
+      pdbRequest.setResponseSize(1);
+      pdbRequest.setFieldToSearchBy("(");
+      pdbRequest.setFieldToSortBy(fieldToFilterBy, b);
+      pdbRequest.setSearchTerm(buildQuery(seq) + ")");
+      pdbRequest.setWantedFields(wantedFields);
+      pdbRequest.setAssociatedSequence(seq);
+    }
+    resultList = pdbRestClient.executeRequest(pdbRequest);
+
+    lastPdbRequest = pdbRequest;
+    return resultList;
+  }
+
+  public TableModel getTableModel(
+          Collection<FTSData> discoveredStructuresSet)
+  {
+    return FTSRestResponse.getTableModel(lastPdbRequest,
+            discoveredStructuresSet);
+  }
+
+}
\ No newline at end of file
index 9529d9f..66e606a 100644 (file)
@@ -88,15 +88,16 @@ public class StructureChooserTest
   public void buildQueryTest()
   {
     System.out.println("seq >>>> " + seq);
-    String query = StructureChooser.buildQuery(seq);
+    StructureChooserQuerySource scquery = StructureChooserQuerySource.getPDBfts();
+    String query = scquery.buildQuery(seq);
     assertEquals("pdb_id:1tim", query);
     seq.getAllPDBEntries().clear();
-    query = StructureChooser.buildQuery(seq);
+    query = scquery.buildQuery(seq);
     assertEquals(
             "text:XYZ_1 OR text:XYZ_2 OR text:XYZ_3 OR text:XYZ_4 OR text:4kqy",
             query);
     seq.setDBRefs(null);
-    query = StructureChooser.buildQuery(seq);
+    query = scquery.buildQuery(seq);
     System.out.println(query);
     assertEquals("text:4kqy", query);
 
@@ -119,7 +120,7 @@ public class StructureChooserTest
     System.out.println("");
     System.out.println(seq.getDBRefs());
     System.out.println(query);
-    query = StructureChooser.buildQuery(seq);
+    query = scquery.buildQuery(seq);
     assertEquals(
             "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
             query);
@@ -164,17 +165,17 @@ public class StructureChooserTest
   public void sanitizeSeqNameTest()
   {
     String name = "ab_cdEF|fwxyz012349";
-    assertEquals(name, StructureChooser.sanitizeSeqName(name));
+    assertEquals(name, StructureChooserQuerySource.sanitizeSeqName(name));
 
     // remove a [nn] substring
     name = "abcde12[345]fg";
-    assertEquals("abcde12fg", StructureChooser.sanitizeSeqName(name));
+    assertEquals("abcde12fg", StructureChooserQuerySource.sanitizeSeqName(name));
 
     // remove characters other than a-zA-Z0-9 | or _
     name = "ab[cd],.\t£$*!- \\\"@:e";
-    assertEquals("abcde", StructureChooser.sanitizeSeqName(name));
+    assertEquals("abcde", StructureChooserQuerySource.sanitizeSeqName(name));
 
     name = "abcde12[345a]fg";
-    assertEquals("abcde12345afg", StructureChooser.sanitizeSeqName(name));
+    assertEquals("abcde12345afg", StructureChooserQuerySource.sanitizeSeqName(name));
   }
 }