JAL-3829 do better at detecting structure type, prompt to retrieve Uniprot ids for...
authorJim Procter <j.procter@dundee.ac.uk>
Mon, 20 Sep 2021 10:19:35 +0000 (11:19 +0100)
committerJim Procter <j.procter@dundee.ac.uk>
Mon, 20 Sep 2021 10:19:35 +0000 (11:19 +0100)
resources/fts/tdbeacons_data_columns.txt
resources/lang/Messages.properties
src/jalview/gui/StructureChooser.java
src/jalview/gui/structurechooser/StructureChooserQuerySource.java
src/jalview/gui/structurechooser/TDBResultAnalyser.java
src/jalview/gui/structurechooser/ThreeDBStructureChooserQuerySource.java

index dd6bec5..4ebcc1b 100644 (file)
@@ -58,5 +58,5 @@ Sequence Identity;sequence_identity;double|T|1;g2;80;150;85;false;false
 Created Date;created;string;g3;80;150;85;true;false
 UniProt Accession;uniprot_accession;String;g1;50;400;95;false;true
 Url;model_url;String;g3;100;150;105;true;false
-Page URL;model_page_url;String;g3;100;150;105;false;false
+Page URL;model_page_url;String;g3;100;150;105;true;false
 Ensemble Sample Url;ensembl_sample_url;String;g3;100;150;105;false;false
index 4bc88e8..92a6ab8 100644 (file)
@@ -512,6 +512,8 @@ label.load_tree_file = Load a tree file
 label.retrieve_parse_sequence_database_records_alignment_or_selected_sequences = Retrieve and parse sequence database records for the alignment or the currently selected sequences
 label.standard_databases = Standard Databases
 label.fetch_embl_uniprot = Fetch from EMBL/EMBLCDS or Uniprot/PDB and any selected DAS sources
+label.fetch_uniprot_references = Fetch Uniprot references
+label.fetch_references_for = Fetch database references for {0} sequences ?
 label.reset_min_max_colours_to_defaults = Reset min and max colours to defaults from user preferences.
 label.align_structures_using_linked_alignment_views = Superpose structures using {0} selected alignment view(s)
 label.threshold_feature_display_by_score = Threshold the feature display by score.
index 6bcac24..2e7019a 100644 (file)
@@ -37,6 +37,7 @@ import jalview.fts.core.FTSRestResponse;
 import jalview.fts.service.pdb.PDBFTSRestClient;
 import jalview.gui.structurechooser.PDBStructureChooserQuerySource;
 import jalview.gui.structurechooser.StructureChooserQuerySource;
+import jalview.gui.structurechooser.ThreeDBStructureChooserQuerySource;
 import jalview.io.DataSourceType;
 import jalview.jbgui.FilterOption;
 import jalview.jbgui.GStructureChooser;
@@ -55,6 +56,9 @@ import java.util.List;
 import java.util.Objects;
 import java.util.Set;
 import java.util.Vector;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.Executor;
+import java.util.concurrent.Executors;
 
 import javax.swing.JCheckBox;
 import javax.swing.JComboBox;
@@ -131,19 +135,21 @@ public class StructureChooser extends GStructureChooser
 
     // ensure a filter option is in force for search
     populateFilterComboBox(true, cachedPDBExists);
-    Thread discoverPDBStructuresThread = new Thread(new Runnable()
+    // todo change to futures I guess
+
+    final Runnable discoverPDBStructures = new Runnable()
     {
       @Override
       public void run()
       {
-        // looks for any existing structures already loaded 
-        // for the sequences (the cached ones) 
-        // then queries the StructureChooserQuerySource to 
+        // looks for any existing structures already loaded
+        // for the sequences (the cached ones)
+        // then queries the StructureChooserQuerySource to
         // discover more structures.
-        // 
+        //
         // Possible optimisation is to only begin querying
         // the structure chooser if there are no cached structures.
-        
+
         long startTime = System.currentTimeMillis();
         updateProgressIndicator(MessageManager
                 .getString("status.loading_cached_pdb_entries"), startTime);
@@ -159,8 +165,68 @@ public class StructureChooser extends GStructureChooser
         mainFrame.setVisible(true);
         updateCurrentView();
       }
-    });
-    discoverPDBStructuresThread.start();
+    };
+    final List<SequenceI> seqsWithoutSourceDBRef = new ArrayList<SequenceI>();
+
+    final Runnable discoverCanonicalDBrefs = new Runnable()
+    {
+      @Override
+      public void run()
+      {
+        long progressId = System.currentTimeMillis();
+
+        int y = seqsWithoutSourceDBRef.size();
+        setProgressBar(MessageManager.formatMessage(
+                "status.fetching_dbrefs_for_sequences_without_valid_refs",
+                y), progressId);
+        SequenceI[] seqWithoutSrcDBRef = seqsWithoutSourceDBRef
+                .toArray(new SequenceI[y]);
+        DBRefFetcher dbRefFetcher = new DBRefFetcher(seqWithoutSrcDBRef);
+        dbRefFetcher.fetchDBRefs(true);
+
+        setProgressBar("Fetch complete.", progressId); // todo i18n
+
+        SwingUtilities.invokeLater(discoverPDBStructures);
+      }
+    };
+
+    Executors.defaultThreadFactory().newThread(new Runnable()
+    {
+      public void run()
+      {
+
+        for (SequenceI seq : selectedSequences)
+        {
+          if (seq.isProtein())
+          {
+            int dbRef = ThreeDBStructureChooserQuerySource
+                    .checkUniprotRefs(seq.getDBRefs());
+            if (dbRef < 0)
+            {
+              seqsWithoutSourceDBRef.add(seq);
+            }
+          }
+        }
+        // retrieve database refs for protein sequences
+        if (!seqsWithoutSourceDBRef.isEmpty())
+        {
+          JvOptionPane.newOptionDialog(Desktop.getDesktop())
+                  .setResponseHandler(0, discoverCanonicalDBrefs)
+                  .setResponseHandler(1, discoverPDBStructures)
+                  .showDialog(MessageManager.formatMessage("label.fetch_references_for",seqsWithoutSourceDBRef.size()), MessageManager.getString(
+                          "label.fetch_uniprot_references"),
+                          JvOptionPane.YES_NO_OPTION,
+                          JvOptionPane.PLAIN_MESSAGE, null, new Object[]
+                          { MessageManager.getString("action.ok"),
+                              MessageManager.getString("action.cancel") },
+                          MessageManager.getString("action.ok"));
+        } else {
+          // get structures directly
+          Executors.defaultThreadFactory().newThread(discoverPDBStructures).start();
+        }
+      };
+    }).start();;
+
   }
 
   /**
@@ -799,8 +865,8 @@ public class StructureChooser extends GStructureChooser
           List<SequenceI> selectedSeqsToView = new ArrayList<>();
           for (int row : selectedRows)
           {
-            PDBEntry pdbEntry = (PDBEntry) tbl_local_pdb.getValueAt(row,
-                    pdbIdColIndex);
+            PDBEntry pdbEntry = ((PDBEntryTableModel) tbl_local_pdb.getModel()).getPDBEntryAt(row).getPdbEntry();
+            
             pdbEntriesToView[count++] = pdbEntry;
             SequenceI selectedSeq = (SequenceI) tbl_local_pdb
                     .getValueAt(row, refSeqColIndex);
@@ -1159,7 +1225,7 @@ public class StructureChooser extends GStructureChooser
         value = entry.getSequence();
         break;
       case 1:
-        value = entry.getPdbEntry();
+        value = entry.getQualifiedId();
         break;
       case 2:
         value = entry.getPdbEntry().getChainCode() == null ? "_"
@@ -1200,6 +1266,15 @@ public class StructureChooser extends GStructureChooser
       this.pdbEntry = pdbEntry;
     }
 
+    public String getQualifiedId()
+    {
+      if (pdbEntry.hasProvider())
+      {
+        return pdbEntry.getProvider()+":"+pdbEntry.getId();
+      } 
+      return pdbEntry.toString();
+    }
+
     public SequenceI getSequence()
     {
       return sequence;
index b5646db..1023b59 100644 (file)
@@ -216,21 +216,33 @@ public abstract class StructureChooserQuerySource
           SequenceI[] selectedSeqs)
   {
     ThreeDBStructureChooserQuerySource tdbSource = new ThreeDBStructureChooserQuerySource();
-    boolean hasUniprot=false;
+    boolean hasUniprot=false,hasCanonical=false;
     boolean hasNA=false,hasProtein=false;
-    for (SequenceI seq:selectedSeqs)
+    int protWithoutUni=0;
+    for (SequenceI seq : selectedSeqs)
     {
-      hasNA|=!seq.isProtein();
+      hasNA |= !seq.isProtein();
       hasProtein |= seq.isProtein();
       if (seq.isProtein())
       {
-        String query = tdbSource.buildQuery(seq);
-        if (query!=null && query.length()>0)
+        int refsAvailable = ThreeDBStructureChooserQuerySource.checkUniprotRefs(seq.getDBRefs());
+        if (refsAvailable > -2)
         {
-          hasUniprot=true;
+          if (refsAvailable > -1)
+            hasCanonical = true;
         }
+        hasUniprot = true;
+      }
+      else
+      {
+        protWithoutUni++;
       }
     }
+    //
+    // logic: all canonicals - no fetchdb
+    // some uniprot no canonicals: prompt do fetchDb for remaining
+    // no uniprot but protein: offer 3d-beacons search
+    //
     if (hasProtein && hasUniprot && !hasNA)
     {
       return tdbSource;
index ffac9c4..9f57100 100644 (file)
@@ -177,8 +177,8 @@ public class TDBResultAnalyser
                   return nonNullFirst(idx_mqual, o1data, o2data);
                 }
                 // models, so rank on qmean - b
-                float o1_mq = (Float) o1data[idx_mqual];
-                float o2_mq = (Float) o2data[idx_mqual];
+                double o1_mq = (Double) o1data[idx_mqual];
+                double o2_mq = (Double) o2data[idx_mqual];
                 return (o2_mq < o1_mq) ? 1 : (o2_mq == o1_mq) ? 0 : -1;
               }
             }
@@ -250,6 +250,10 @@ public class TDBResultAnalyser
         cover.andNot(scover);
       }
     }
+    if (selected.size()==0)
+    {
+      return selected;
+    }
     // final step is to sort on length - this might help the superposition
     // process
     Collections.sort(selected, new Comparator<FTSData>()
index 4229b72..9b6d8e8 100644 (file)
@@ -3,7 +3,6 @@ package jalview.gui.structurechooser;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
-import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashSet;
 import java.util.LinkedHashSet;
@@ -26,7 +25,6 @@ import jalview.fts.core.FTSRestRequest;
 import jalview.fts.core.FTSRestResponse;
 import jalview.fts.service.threedbeacons.TDBeaconsFTSRestClient;
 import jalview.jbgui.FilterOption;
-import jalview.util.MessageManager;
 
 /**
  * logic for querying the 3DBeacons API for structures of sequences
@@ -87,26 +85,38 @@ public class ThreeDBStructureChooserQuerySource
     Set<String> pdbids = new HashSet<>();
 
     List<DBRefEntry> refs = seq.getDBRefs();
+    int ib = checkUniprotRefs(refs);
+    if (ib>-1)
+    {
+      return getDBRefId(refs.get(ib));
+    }
+    return null;
+  }
+
+  /**
+   * Searches DBRefEntry for uniprot refs
+   * @param seq
+   * @return -2 if no uniprot refs, -1 if no canonical ref., otherwise index of Uniprot canonical DBRefEntry 
+   */
+  public static int checkUniprotRefs(List<DBRefEntry> refs)
+  {
+    boolean hasUniprot = false;
     if (refs != null && refs.size() != 0)
     {
       for (int ib = 0, nb = refs.size(); ib < nb; ib++)
       {
         DBRefEntry dbRef = refs.get(ib);
-        if (isValidSeqName(getDBRefId(dbRef))
-                && queryBuilder.length() < MAX_QLENGTH)
+        if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT))
         {
-          if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT)
-                  && dbRef.isCanonical())
+          hasUniprot = true;
+          if (dbRef.isCanonical())
           {
-            // TODO: pick best Uniprot accession
-            isUniProtRefsFound = true;
-            return getDBRefId(dbRef);
-
+            return ib;
           }
         }
       }
     }
-    return null;
+    return hasUniprot ? -1 : -2;
   }
 
   /**
@@ -165,17 +175,19 @@ public class ThreeDBStructureChooserQuerySource
           FilterOption selectedFilterOpt, boolean b) throws Exception
   {
     FTSRestResponse resultList;
-    if (selectedFilterOpt!=null && tdBeaconsFilter(selectedFilterOpt.getValue()))
+    if (selectedFilterOpt != null
+            && tdBeaconsFilter(selectedFilterOpt.getValue()))
     {
       FTSRestRequest tdbRequest = getTDBeaconsRequest(seq, wantedFields);
       resultList = tdbRestClient.executeRequest(tdbRequest);
-
+      
       lastTdbRequest = tdbRequest;
-
-      // Query the PDB and add additional metadata
-      FTSRestResponse pdbResponse = fetchStructuresMetaDataFor(
-              getPDBQuerySource(), resultList);
-      FTSRestResponse joinedResp = joinResponses(resultList, pdbResponse);
+      if (resultList!=null)
+      { // Query the PDB and add additional metadata
+        FTSRestResponse pdbResponse = fetchStructuresMetaDataFor(
+                getPDBQuerySource(), resultList);
+        FTSRestResponse joinedResp = joinResponses(resultList, pdbResponse);
+      }
       return resultList;
     }
     // use the PDBFTS directly
@@ -183,7 +195,7 @@ public class ThreeDBStructureChooserQuerySource
             wantedFields, selectedFilterOpt, b);
     lastTdbRequest = getPDBQuerySource().lastPdbRequest;
     lastPdbRequest = lastTdbRequest; // both queries the same - indicates we
-                                     // rank using PDBe
+    // rank using PDBe
     return resultList;
 
   }
@@ -235,22 +247,39 @@ public class ThreeDBStructureChooserQuerySource
   public void updateAvailableFilterOptions(String VIEWS_FILTER,
           List<FilterOption> xtantOptions, Collection<FTSData> tdbEntries)
   {
-    if (tdbEntries !=null && lastTdbRequest != null)
+    if (tdbEntries != null && lastTdbRequest != null)
     {
       int prov_idx = lastTdbRequest.getFieldIndex("Provider");
-
+      boolean hasPDBe=false;
       for (FTSData row : tdbEntries)
       {
         String provider = (String) row.getSummaryData()[prov_idx];
-        FilterOption providerOpt = new FilterOption("3DB Provider - " + provider,
-                FILTER_SOURCE_PREFIX + provider, VIEWS_FILTER,
-                false, this);
+        FilterOption providerOpt = new FilterOption(
+                "3DB Provider - " + provider,
+                FILTER_SOURCE_PREFIX + provider, VIEWS_FILTER, false, this);
         if (!xtantOptions.contains(providerOpt))
         {
-          xtantOptions.add(1,
-                  providerOpt);
-          tdBeaconsFilters.add(FILTER_SOURCE_PREFIX+provider);
-
+          xtantOptions.add(1, providerOpt);
+          tdBeaconsFilters.add(FILTER_SOURCE_PREFIX + provider);
+          if ("PDBe".equalsIgnoreCase(provider))
+          {
+            hasPDBe=true;
+          }
+        }
+      }
+      if (!hasPDBe)
+      {
+        // remove the PDBe options from the available filters
+        int op=0;
+        while (op<xtantOptions.size())
+        {
+          FilterOption filter = xtantOptions.get(op);
+          if (filter.getQuerySource() instanceof PDBStructureChooserQuerySource)
+          {
+            xtantOptions.remove(op);
+          } else {
+            op++;
+          }
         }
       }
     }
@@ -280,8 +309,9 @@ public class ThreeDBStructureChooserQuerySource
   @Override
   public boolean needsRefetch(FilterOption selectedFilterOpt)
   {
-    return selectedFilterOpt==null || !tdBeaconsFilter(selectedFilterOpt.getValue())
-            && lastPdbRequest != lastTdbRequest;
+    return selectedFilterOpt == null
+            || !tdBeaconsFilter(selectedFilterOpt.getValue())
+                    && lastPdbRequest != lastTdbRequest;
   }
 
   /**
@@ -304,7 +334,7 @@ public class ThreeDBStructureChooserQuerySource
           Collection<FTSDataColumnI> wantedFields, String fieldToFilterBy,
           boolean b) throws Exception
   {
-    if (fieldToFilterBy!=null && tdBeaconsFilter(fieldToFilterBy))
+    if (fieldToFilterBy != null && tdBeaconsFilter(fieldToFilterBy))
     {
       TDBResultAnalyser analyser = new TDBResultAnalyser(seq,
               collectedResults, lastTdbRequest, fieldToFilterBy,
@@ -336,6 +366,7 @@ public class ThreeDBStructureChooserQuerySource
     int idColumnIndex = restable.getColumn("Model id").getModelIndex();
     int urlColumnIndex = restable.getColumn("Url").getModelIndex();
     int typeColumnIndex = restable.getColumn("Provider").getModelIndex();
+    int humanUrl = restable.getColumn("Page URL").getModelIndex();
     int categoryColumnIndex = restable.getColumn("Model Category")
             .getModelIndex();
     final int up_start_idx = restable.getColumn("Uniprot Start")
@@ -372,6 +403,8 @@ public class ThreeDBStructureChooserQuerySource
       String urlStr = restable.getValueAt(row, urlColumnIndex).toString();
       String typeColumn = restable.getValueAt(row, typeColumnIndex)
               .toString();
+      String modelPage = humanUrl < 1 ? null
+              : (String) restable.getValueAt(row, humanUrl);
       SequenceI selectedSeq = (SequenceI) restable.getValueAt(row,
               refSeqColIndex);
       selectedSeqsToView.add(selectedSeq);
@@ -386,12 +419,16 @@ public class ThreeDBStructureChooserQuerySource
         pdbEntry = new PDBEntry();
         pdbEntry.setId(pdbIdStr);
         boolean hasCif = urlStr.toLowerCase(Locale.ENGLISH).endsWith("cif");
-        boolean probablyPdb = urlStr.toLowerCase(Locale.ENGLISH).contains("pdb");
-        pdbEntry.setType(hasCif ? PDBEntry.Type.MMCIF : probablyPdb ? PDBEntry.Type.PDB : PDBEntry.Type.FILE);
+        boolean probablyPdb = urlStr.toLowerCase(Locale.ENGLISH)
+                .contains("pdb");
+        pdbEntry.setType(hasCif ? PDBEntry.Type.MMCIF
+                : probablyPdb ? PDBEntry.Type.PDB : PDBEntry.Type.FILE);
         if (!"PDBe".equalsIgnoreCase(typeColumn))
         {
           pdbEntry.setRetrievalUrl(urlStr);
         }
+        pdbEntry.setProvider(typeColumn);
+        pdbEntry.setProviderPage(modelPage);
         selectedSeq.getDatasetSequence().addPDBId(pdbEntry);
       }
       pdbEntriesToView[count++] = pdbEntry;
@@ -443,7 +480,7 @@ public class ThreeDBStructureChooserQuerySource
   {
 
     String pdb_Query = buildPDBFTSQueryFor(upResponse);
-    if (pdb_Query.length()==0)
+    if (pdb_Query.length() == 0)
     {
       return null;
     }
@@ -456,7 +493,7 @@ public class ThreeDBStructureChooserQuerySource
     pdbRequest.setWantedFields(
             pdbquery.getDocFieldPrefs().getStructureSummaryFields());
     pdbRequest.setSearchTerm(pdb_Query + ")");
-    
+
     resultList = pdbquery.executePDBFTSRestRequest(pdbRequest);
 
     lastPdbRequest = pdbRequest;