JAL-3829 Structure chooser specific FTS interface and query source logic for PDBe...
authorJim Procter <j.procter@dundee.ac.uk>
Mon, 30 Aug 2021 09:06:08 +0000 (10:06 +0100)
committerJim Procter <j.procter@dundee.ac.uk>
Mon, 30 Aug 2021 11:17:32 +0000 (12:17 +0100)
src/jalview/fts/api/StructureFTSRestClientI.java [new file with mode: 0644]
src/jalview/gui/structurechooser/PDBStructureChooserQuerySource.java [new file with mode: 0644]
src/jalview/gui/structurechooser/ThreeDBStructureChooserQuerySource.java [new file with mode: 0644]

diff --git a/src/jalview/fts/api/StructureFTSRestClientI.java b/src/jalview/fts/api/StructureFTSRestClientI.java
new file mode 100644 (file)
index 0000000..4974b80
--- /dev/null
@@ -0,0 +1,10 @@
+package jalview.fts.api;
+
+import java.util.Collection;
+
+public interface StructureFTSRestClientI
+{
+
+  Collection<FTSDataColumnI> getAllDefaultDisplayedStructureDataColumns();
+
+}
diff --git a/src/jalview/gui/structurechooser/PDBStructureChooserQuerySource.java b/src/jalview/gui/structurechooser/PDBStructureChooserQuerySource.java
new file mode 100644 (file)
index 0000000..3d2efb2
--- /dev/null
@@ -0,0 +1,324 @@
+package jalview.gui.structurechooser;
+
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Objects;
+import java.util.Set;
+
+import javax.swing.JTable;
+import javax.swing.table.TableModel;
+
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.DBRefSource;
+import jalview.datamodel.PDBEntry;
+import jalview.datamodel.SequenceI;
+import jalview.fts.api.FTSData;
+import jalview.fts.api.FTSDataColumnI;
+import jalview.fts.api.FTSRestClientI;
+import jalview.fts.core.FTSDataColumnPreferences;
+import jalview.fts.core.FTSDataColumnPreferences.PreferenceSource;
+import jalview.fts.core.FTSRestRequest;
+import jalview.fts.core.FTSRestResponse;
+import jalview.fts.service.pdb.PDBFTSRestClient;
+import jalview.jbgui.GStructureChooser.FilterOption;
+
+/**
+ * logic for querying the PDBe API for structures of sequences
+ * 
+ * @author jprocter
+ */
+public class PDBStructureChooserQuerySource
+        extends StructureChooserQuerySource
+{
+
+  private static int MAX_QLENGTH = 7820;
+
+  public PDBStructureChooserQuerySource()
+  {
+    pdbRestClient = PDBFTSRestClient.getInstance();
+    docFieldPrefs = new FTSDataColumnPreferences(
+            PreferenceSource.STRUCTURE_CHOOSER,
+            PDBFTSRestClient.getInstance());
+
+  }
+
+
+  /**
+   * Builds a query string for a given sequences using its DBRef entries
+   * 
+   * @param seq
+   *          the sequences to build a query for
+   * @return the built query string
+   */
+
+  public String buildQuery(SequenceI seq)
+  {
+    boolean isPDBRefsFound = false;
+    boolean isUniProtRefsFound = false;
+    StringBuilder queryBuilder = new StringBuilder();
+    Set<String> seqRefs = new LinkedHashSet<>();
+
+    /*
+     * note PDBs as DBRefEntry so they are not duplicated in query
+     */
+    Set<String> pdbids = new HashSet<>();
+
+    if (seq.getAllPDBEntries() != null
+            && queryBuilder.length() < MAX_QLENGTH)
+    {
+      for (PDBEntry entry : seq.getAllPDBEntries())
+      {
+        if (isValidSeqName(entry.getId()))
+        {
+          String id = entry.getId().toLowerCase();
+          queryBuilder.append("pdb_id:").append(id).append(" OR ");
+          isPDBRefsFound = true;
+          pdbids.add(id);
+        }
+      }
+    }
+
+    List<DBRefEntry> refs = seq.getDBRefs();
+    if (refs != null && refs.size() != 0)
+    {
+      for (int ib = 0, nb = refs.size(); ib < nb; ib++)
+      {
+        DBRefEntry dbRef = refs.get(ib);
+        if (isValidSeqName(getDBRefId(dbRef))
+                && queryBuilder.length() < MAX_QLENGTH)
+        {
+          if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT))
+          {
+            queryBuilder.append("uniprot_accession:")
+                    .append(getDBRefId(dbRef)).append(" OR ");
+            queryBuilder.append("uniprot_id:").append(getDBRefId(dbRef))
+                    .append(" OR ");
+            isUniProtRefsFound = true;
+          }
+          else if (dbRef.getSource().equalsIgnoreCase(DBRefSource.PDB))
+          {
+
+            String id = getDBRefId(dbRef).toLowerCase();
+            if (!pdbids.contains(id))
+            {
+              queryBuilder.append("pdb_id:").append(id).append(" OR ");
+              isPDBRefsFound = true;
+              pdbids.add(id);
+            }
+          }
+          else
+          {
+            seqRefs.add(getDBRefId(dbRef));
+          }
+        }
+      }
+    }
+
+    if (!isPDBRefsFound && !isUniProtRefsFound)
+    {
+      String seqName = seq.getName();
+      seqName = sanitizeSeqName(seqName);
+      String[] names = seqName.toLowerCase().split("\\|");
+      for (String name : names)
+      {
+        // System.out.println("Found name : " + name);
+        name.trim();
+        if (isValidSeqName(name))
+        {
+          seqRefs.add(name);
+        }
+      }
+
+      for (String seqRef : seqRefs)
+      {
+        queryBuilder.append("text:").append(seqRef).append(" OR ");
+      }
+    }
+
+    int endIndex = queryBuilder.lastIndexOf(" OR ");
+    if (queryBuilder.toString().length() < 6)
+    {
+      return null;
+    }
+    String query = queryBuilder.toString().substring(0, endIndex);
+    return query;
+  }
+
+  /**
+   * Remove the following special characters from input string +, -, &, !, (, ),
+   * {, }, [, ], ^, ", ~, *, ?, :, \
+   * 
+   * @param seqName
+   * @return
+   */
+  public static String sanitizeSeqName(String seqName)
+  {
+    Objects.requireNonNull(seqName);
+    return seqName.replaceAll("\\[\\d*\\]", "")
+            .replaceAll("[^\\dA-Za-z|_]", "").replaceAll("\\s+", "+");
+  }
+
+  /**
+   * Ensures sequence ref names are not less than 3 characters and does not
+   * contain a database name
+   * 
+   * @param seqName
+   * @return
+   */
+  static boolean isValidSeqName(String seqName)
+  {
+    // System.out.println("seqName : " + seqName);
+    String ignoreList = "pdb,uniprot,swiss-prot";
+    if (seqName.length() < 3)
+    {
+      return false;
+    }
+    if (seqName.contains(":"))
+    {
+      return false;
+    }
+    seqName = seqName.toLowerCase();
+    for (String ignoredEntry : ignoreList.split(","))
+    {
+      if (seqName.contains(ignoredEntry))
+      {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  static String getDBRefId(DBRefEntry dbRef)
+  {
+    String ref = dbRef.getAccessionId().replaceAll("GO:", "");
+    return ref;
+  }
+
+  /**
+   * FTSRestClient specific query builder to recover associated structure data
+   * records for a sequence
+   * 
+   * @param seq
+   *          - seq to generate a query for
+   * @param wantedFields
+   *          - fields to retrieve
+   * @param selectedFilterOpt
+   *          - criterion for ranking results (e.g. resolution)
+   * @param b
+   *          - sort ascending or descending
+   * @return
+   * @throws Exception
+   */
+  public FTSRestResponse fetchStructuresMetaData(SequenceI seq,
+          Collection<FTSDataColumnI> wantedFields,
+          FilterOption selectedFilterOpt, boolean b) throws Exception
+  {
+    FTSRestResponse resultList;
+    FTSRestRequest pdbRequest = new FTSRestRequest();
+    pdbRequest.setAllowEmptySeq(false);
+    pdbRequest.setResponseSize(500);
+    pdbRequest.setFieldToSearchBy("(");
+    pdbRequest.setFieldToSortBy(selectedFilterOpt.getValue(), b);
+    pdbRequest.setWantedFields(wantedFields);
+    pdbRequest.setSearchTerm(buildQuery(seq) + ")");
+    pdbRequest.setAssociatedSequence(seq);
+    resultList = pdbRestClient.executeRequest(pdbRequest);
+
+    lastPdbRequest = pdbRequest;
+    return resultList;
+  }
+
+  /**
+   * FTSRestClient specific query builder to pick top ranked entry from a
+   * fetchStructuresMetaData query
+   * 
+   * @param seq
+   *          - seq to generate a query for
+   * @param wantedFields
+   *          - fields to retrieve
+   * @param selectedFilterOpt
+   *          - criterion for ranking results (e.g. resolution)
+   * @param b
+   *          - sort ascending or descending
+   * @return
+   * @throws Exception
+   */
+  public FTSRestResponse selectFirstRankedQuery(SequenceI seq,
+          Collection<FTSDataColumnI> wantedFields, String fieldToFilterBy,
+          boolean b) throws Exception
+  {
+
+    FTSRestResponse resultList;
+    FTSRestRequest pdbRequest = new FTSRestRequest();
+    if (fieldToFilterBy.equalsIgnoreCase("uniprot_coverage"))
+    {
+      pdbRequest.setAllowEmptySeq(false);
+      pdbRequest.setResponseSize(1);
+      pdbRequest.setFieldToSearchBy("(");
+      pdbRequest.setSearchTerm(buildQuery(seq) + ")");
+      pdbRequest.setWantedFields(wantedFields);
+      pdbRequest.setAssociatedSequence(seq);
+      pdbRequest.setFacet(true);
+      pdbRequest.setFacetPivot(fieldToFilterBy + ",entry_entity");
+      pdbRequest.setFacetPivotMinCount(1);
+    }
+    else
+    {
+      pdbRequest.setAllowEmptySeq(false);
+      pdbRequest.setResponseSize(1);
+      pdbRequest.setFieldToSearchBy("(");
+      pdbRequest.setFieldToSortBy(fieldToFilterBy, b);
+      pdbRequest.setSearchTerm(buildQuery(seq) + ")");
+      pdbRequest.setWantedFields(wantedFields);
+      pdbRequest.setAssociatedSequence(seq);
+    }
+    resultList = pdbRestClient.executeRequest(pdbRequest);
+
+    lastPdbRequest = pdbRequest;
+    return resultList;
+  }
+
+
+  @Override
+  public PDBEntry[] collectSelectedRows(JTable restable, int[] selectedRows,
+          List<SequenceI> selectedSeqsToView)
+  {
+    int refSeqColIndex = restable.getColumn("Ref Sequence")
+            .getModelIndex();
+
+    PDBEntry[] pdbEntriesToView=new PDBEntry[selectedRows.length];
+    int count = 0;
+    int idColumnIndex=-1;
+    boolean fromTDB=true;
+    idColumnIndex = restable.getColumn("PDB Id").getModelIndex();
+    
+    for (int row : selectedRows)
+    {
+      
+      String pdbIdStr = restable.getValueAt(row,idColumnIndex)
+              .toString();
+      SequenceI selectedSeq = (SequenceI) restable.getValueAt(row,
+              refSeqColIndex);
+      selectedSeqsToView.add(selectedSeq);
+      PDBEntry pdbEntry = selectedSeq.getPDBEntry(pdbIdStr);
+      if (pdbEntry == null)
+      {
+        pdbEntry = getFindEntry(pdbIdStr,
+                selectedSeq.getAllPDBEntries());
+      }
+
+      if (pdbEntry == null)
+      {
+        pdbEntry = new PDBEntry();
+        pdbEntry.setId(pdbIdStr);
+        pdbEntry.setType(PDBEntry.Type.MMCIF);
+        selectedSeq.getDatasetSequence().addPDBId(pdbEntry);
+      }
+      pdbEntriesToView[count++] = pdbEntry;
+    }
+    return pdbEntriesToView;
+  }
+
+}
\ No newline at end of file
diff --git a/src/jalview/gui/structurechooser/ThreeDBStructureChooserQuerySource.java b/src/jalview/gui/structurechooser/ThreeDBStructureChooserQuerySource.java
new file mode 100644 (file)
index 0000000..6c2123b
--- /dev/null
@@ -0,0 +1,256 @@
+package jalview.gui.structurechooser;
+
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Objects;
+import java.util.Set;
+
+import javax.swing.JTable;
+import javax.swing.table.TableModel;
+
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.DBRefSource;
+import jalview.datamodel.PDBEntry;
+import jalview.datamodel.SequenceI;
+import jalview.fts.api.FTSData;
+import jalview.fts.api.FTSDataColumnI;
+import jalview.fts.api.FTSRestClientI;
+import jalview.fts.core.FTSDataColumnPreferences;
+import jalview.fts.core.FTSDataColumnPreferences.PreferenceSource;
+import jalview.fts.core.FTSRestRequest;
+import jalview.fts.core.FTSRestResponse;
+import jalview.fts.service.pdb.PDBFTSRestClient;
+import jalview.fts.service.threedbeacons.TDBeaconsFTSRestClient;
+import jalview.jbgui.GStructureChooser.FilterOption;
+
+/**
+ * logic for querying the PDBe API for structures of sequences
+ * 
+ * @author jprocter
+ */
+public class ThreeDBStructureChooserQuerySource
+        extends StructureChooserQuerySource
+{
+
+  private static int MAX_QLENGTH = 7820;
+
+  public ThreeDBStructureChooserQuerySource()
+  {
+    pdbRestClient = TDBeaconsFTSRestClient.getInstance();
+    docFieldPrefs = new FTSDataColumnPreferences(
+            PreferenceSource.STRUCTURE_CHOOSER,
+            TDBeaconsFTSRestClient.getInstance());
+
+  }
+
+
+  /**
+   * Builds a query string for a given sequences using its DBRef entries
+   * 3d Beacons is only useful for uniprot IDs
+   * @param seq
+   *          the sequences to build a query for
+   * @return the built query string
+   */
+
+  public String buildQuery(SequenceI seq)
+  {
+    boolean isPDBRefsFound = false;
+    boolean isUniProtRefsFound = false;
+    StringBuilder queryBuilder = new StringBuilder();
+    Set<String> seqRefs = new LinkedHashSet<>();
+
+    /*
+     * note PDBs as DBRefEntry so they are not duplicated in query
+     */
+    Set<String> pdbids = new HashSet<>();
+
+    List<DBRefEntry> refs = seq.getDBRefs();
+    if (refs != null && refs.size() != 0)
+    {
+      for (int ib = 0, nb = refs.size(); ib < nb; ib++)
+      {
+        DBRefEntry dbRef = refs.get(ib);
+        if (isValidSeqName(getDBRefId(dbRef))
+                && queryBuilder.length() < MAX_QLENGTH)
+        {
+          if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) && dbRef.isCanonical())
+          {
+            // TODO: pick best Uniprot accession 
+            isUniProtRefsFound=true;
+            return getDBRefId(dbRef);
+            
+          }
+        }
+      }
+    }
+    return null;
+  }
+
+
+  /**
+   * Ensures sequence ref names are not less than 3 characters and does not
+   * contain a database name
+   * 
+   * @param seqName
+   * @return
+   */
+  static boolean isValidSeqName(String seqName)
+  {
+    // System.out.println("seqName : " + seqName);
+    String ignoreList = "pdb,uniprot,swiss-prot";
+    if (seqName.length() < 3)
+    {
+      return false;
+    }
+    if (seqName.contains(":"))
+    {
+      return false;
+    }
+    seqName = seqName.toLowerCase();
+    for (String ignoredEntry : ignoreList.split(","))
+    {
+      if (seqName.contains(ignoredEntry))
+      {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  static String getDBRefId(DBRefEntry dbRef)
+  {
+    String ref = dbRef.getAccessionId().replaceAll("GO:", "");
+    return ref;
+  }
+
+  /**
+   * FTSRestClient specific query builder to recover associated structure data
+   * records for a sequence
+   * 
+   * @param seq
+   *          - seq to generate a query for
+   * @param wantedFields
+   *          - fields to retrieve
+   * @param selectedFilterOpt
+   *          - criterion for ranking results (e.g. resolution)
+   * @param b
+   *          - sort ascending or descending
+   * @return
+   * @throws Exception
+   */
+  public FTSRestResponse fetchStructuresMetaData(SequenceI seq,
+          Collection<FTSDataColumnI> wantedFields,
+          FilterOption selectedFilterOpt, boolean b) throws Exception
+  {
+    FTSRestResponse resultList;
+    FTSRestRequest pdbRequest = getTDBeaconsRequest(seq, wantedFields);
+    resultList = pdbRestClient.executeRequest(pdbRequest);
+
+    lastPdbRequest = pdbRequest;
+    return resultList;
+  }
+  
+
+  private FTSRestRequest getTDBeaconsRequest(SequenceI seq, Collection<FTSDataColumnI> wantedFields)
+  {
+    FTSRestRequest pdbRequest = new FTSRestRequest();
+    pdbRequest.setAllowEmptySeq(false);
+    pdbRequest.setResponseSize(500);
+    pdbRequest.setWantedFields(wantedFields);
+    String query = buildQuery(seq);
+    if (query==null)  {
+      return null;
+    }
+    pdbRequest.setSearchTerm(query + ".json");
+    pdbRequest.setAssociatedSequence(seq);
+    return pdbRequest;
+  }
+
+
+  /**
+   * FTSRestClient specific query builder to pick top ranked entry from a
+   * fetchStructuresMetaData query
+   * 
+   * @param seq
+   *          - seq to generate a query for
+   * @param wantedFields
+   *          - fields to retrieve
+   * @param selectedFilterOpt
+   *          - criterion for ranking results (e.g. resolution)
+   * @param b
+   *          - sort ascending or descending
+   * @return
+   * @throws Exception
+   */
+  public FTSRestResponse selectFirstRankedQuery(SequenceI seq,
+          Collection<FTSDataColumnI> wantedFields, String fieldToFilterBy,
+          boolean b) throws Exception
+  {
+
+    FTSRestResponse resultList;
+    FTSRestRequest pdbRequest = getTDBeaconsRequest(seq, wantedFields);
+    if (pdbRequest == null) {
+      return null;
+    }
+    pdbRequest.setResponseSize(1);
+    resultList = pdbRestClient.executeRequest(pdbRequest);
+    
+    // TODO: client side filtering - sort results and pick top one (or N)
+
+    lastPdbRequest = pdbRequest;
+    return resultList;
+  }
+
+  @Override
+  public PDBEntry[] collectSelectedRows(JTable restable, int[] selectedRows,
+          List<SequenceI> selectedSeqsToView)
+  {
+    int refSeqColIndex = restable.getColumn("Ref Sequence")
+            .getModelIndex();
+
+    PDBEntry[] pdbEntriesToView=new PDBEntry[selectedRows.length];
+    int count = 0;
+    int idColumnIndex = restable.getColumn("Model id").getModelIndex();
+    int urlColumnIndex = restable.getColumn("Url").getModelIndex();
+    int typeColumnIndex = restable.getColumn("Provider").getModelIndex();
+    int categoryColumnIndex = restable.getColumn("Model Category").getModelIndex();
+    
+    for (int row : selectedRows)
+    {
+      // unique id - could be a horrible hash
+      
+      String pdbIdStr = restable.getValueAt(row,idColumnIndex)
+              .toString();
+      String urlStr = restable.getValueAt(row,urlColumnIndex)
+              .toString();
+      String typeColumn = restable.getValueAt(row,typeColumnIndex)
+              .toString();
+      SequenceI selectedSeq = (SequenceI) restable.getValueAt(row,
+              refSeqColIndex);
+      selectedSeqsToView.add(selectedSeq);
+      PDBEntry pdbEntry = selectedSeq.getPDBEntry(pdbIdStr);
+      if (pdbEntry == null)
+      {
+        pdbEntry = getFindEntry(pdbIdStr,
+                selectedSeq.getAllPDBEntries());
+      }
+
+      if (pdbEntry == null)
+      {
+        pdbEntry = new PDBEntry();
+        pdbEntry.setId(pdbIdStr);
+        pdbEntry.setType(PDBEntry.Type.MMCIF);
+        if (!"PDBe".equalsIgnoreCase(typeColumn))
+        {
+          pdbEntry.setRetrievalUrl(urlStr);
+        }
+        selectedSeq.getDatasetSequence().addPDBId(pdbEntry);
+      }
+      pdbEntriesToView[count++] = pdbEntry;
+    }
+    return pdbEntriesToView;
+  }
+}
\ No newline at end of file