From: Jim Procter Date: Mon, 20 Sep 2021 10:19:35 +0000 (+0100) Subject: JAL-3829 do better at detecting structure type, prompt to retrieve Uniprot ids for... X-Git-Tag: Release_2_11_2_0~34^2~3^2~9 X-Git-Url: http://source.jalview.org/gitweb/?p=jalview.git;a=commitdiff_plain;h=9018c06add8548b4921a4c21f21a1b85a0bea1de JAL-3829 do better at detecting structure type, prompt to retrieve Uniprot ids for proteins without canonical Ids and show provider in the Structure Chooser’s cache view --- diff --git a/resources/fts/tdbeacons_data_columns.txt b/resources/fts/tdbeacons_data_columns.txt index dd6bec5..4ebcc1b 100644 --- a/resources/fts/tdbeacons_data_columns.txt +++ b/resources/fts/tdbeacons_data_columns.txt @@ -58,5 +58,5 @@ Sequence Identity;sequence_identity;double|T|1;g2;80;150;85;false;false Created Date;created;string;g3;80;150;85;true;false UniProt Accession;uniprot_accession;String;g1;50;400;95;false;true Url;model_url;String;g3;100;150;105;true;false -Page URL;model_page_url;String;g3;100;150;105;false;false +Page URL;model_page_url;String;g3;100;150;105;true;false Ensemble Sample Url;ensembl_sample_url;String;g3;100;150;105;false;false diff --git a/resources/lang/Messages.properties b/resources/lang/Messages.properties index 4bc88e8..92a6ab8 100644 --- a/resources/lang/Messages.properties +++ b/resources/lang/Messages.properties @@ -512,6 +512,8 @@ label.load_tree_file = Load a tree file label.retrieve_parse_sequence_database_records_alignment_or_selected_sequences = Retrieve and parse sequence database records for the alignment or the currently selected sequences label.standard_databases = Standard Databases label.fetch_embl_uniprot = Fetch from EMBL/EMBLCDS or Uniprot/PDB and any selected DAS sources +label.fetch_uniprot_references = Fetch Uniprot references +label.fetch_references_for = Fetch database references for {0} sequences ? label.reset_min_max_colours_to_defaults = Reset min and max colours to defaults from user preferences. label.align_structures_using_linked_alignment_views = Superpose structures using {0} selected alignment view(s) label.threshold_feature_display_by_score = Threshold the feature display by score. diff --git a/src/jalview/gui/StructureChooser.java b/src/jalview/gui/StructureChooser.java index 6bcac24..2e7019a 100644 --- a/src/jalview/gui/StructureChooser.java +++ b/src/jalview/gui/StructureChooser.java @@ -37,6 +37,7 @@ import jalview.fts.core.FTSRestResponse; import jalview.fts.service.pdb.PDBFTSRestClient; import jalview.gui.structurechooser.PDBStructureChooserQuerySource; import jalview.gui.structurechooser.StructureChooserQuerySource; +import jalview.gui.structurechooser.ThreeDBStructureChooserQuerySource; import jalview.io.DataSourceType; import jalview.jbgui.FilterOption; import jalview.jbgui.GStructureChooser; @@ -55,6 +56,9 @@ import java.util.List; import java.util.Objects; import java.util.Set; import java.util.Vector; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Executor; +import java.util.concurrent.Executors; import javax.swing.JCheckBox; import javax.swing.JComboBox; @@ -131,19 +135,21 @@ public class StructureChooser extends GStructureChooser // ensure a filter option is in force for search populateFilterComboBox(true, cachedPDBExists); - Thread discoverPDBStructuresThread = new Thread(new Runnable() + // todo change to futures I guess + + final Runnable discoverPDBStructures = new Runnable() { @Override public void run() { - // looks for any existing structures already loaded - // for the sequences (the cached ones) - // then queries the StructureChooserQuerySource to + // looks for any existing structures already loaded + // for the sequences (the cached ones) + // then queries the StructureChooserQuerySource to // discover more structures. - // + // // Possible optimisation is to only begin querying // the structure chooser if there are no cached structures. - + long startTime = System.currentTimeMillis(); updateProgressIndicator(MessageManager .getString("status.loading_cached_pdb_entries"), startTime); @@ -159,8 +165,68 @@ public class StructureChooser extends GStructureChooser mainFrame.setVisible(true); updateCurrentView(); } - }); - discoverPDBStructuresThread.start(); + }; + final List seqsWithoutSourceDBRef = new ArrayList(); + + final Runnable discoverCanonicalDBrefs = new Runnable() + { + @Override + public void run() + { + long progressId = System.currentTimeMillis(); + + int y = seqsWithoutSourceDBRef.size(); + setProgressBar(MessageManager.formatMessage( + "status.fetching_dbrefs_for_sequences_without_valid_refs", + y), progressId); + SequenceI[] seqWithoutSrcDBRef = seqsWithoutSourceDBRef + .toArray(new SequenceI[y]); + DBRefFetcher dbRefFetcher = new DBRefFetcher(seqWithoutSrcDBRef); + dbRefFetcher.fetchDBRefs(true); + + setProgressBar("Fetch complete.", progressId); // todo i18n + + SwingUtilities.invokeLater(discoverPDBStructures); + } + }; + + Executors.defaultThreadFactory().newThread(new Runnable() + { + public void run() + { + + for (SequenceI seq : selectedSequences) + { + if (seq.isProtein()) + { + int dbRef = ThreeDBStructureChooserQuerySource + .checkUniprotRefs(seq.getDBRefs()); + if (dbRef < 0) + { + seqsWithoutSourceDBRef.add(seq); + } + } + } + // retrieve database refs for protein sequences + if (!seqsWithoutSourceDBRef.isEmpty()) + { + JvOptionPane.newOptionDialog(Desktop.getDesktop()) + .setResponseHandler(0, discoverCanonicalDBrefs) + .setResponseHandler(1, discoverPDBStructures) + .showDialog(MessageManager.formatMessage("label.fetch_references_for",seqsWithoutSourceDBRef.size()), MessageManager.getString( + "label.fetch_uniprot_references"), + JvOptionPane.YES_NO_OPTION, + JvOptionPane.PLAIN_MESSAGE, null, new Object[] + { MessageManager.getString("action.ok"), + MessageManager.getString("action.cancel") }, + MessageManager.getString("action.ok")); + } else { + // get structures directly + Executors.defaultThreadFactory().newThread(discoverPDBStructures).start(); + } + }; + }).start();; + } /** @@ -799,8 +865,8 @@ public class StructureChooser extends GStructureChooser List selectedSeqsToView = new ArrayList<>(); for (int row : selectedRows) { - PDBEntry pdbEntry = (PDBEntry) tbl_local_pdb.getValueAt(row, - pdbIdColIndex); + PDBEntry pdbEntry = ((PDBEntryTableModel) tbl_local_pdb.getModel()).getPDBEntryAt(row).getPdbEntry(); + pdbEntriesToView[count++] = pdbEntry; SequenceI selectedSeq = (SequenceI) tbl_local_pdb .getValueAt(row, refSeqColIndex); @@ -1159,7 +1225,7 @@ public class StructureChooser extends GStructureChooser value = entry.getSequence(); break; case 1: - value = entry.getPdbEntry(); + value = entry.getQualifiedId(); break; case 2: value = entry.getPdbEntry().getChainCode() == null ? "_" @@ -1200,6 +1266,15 @@ public class StructureChooser extends GStructureChooser this.pdbEntry = pdbEntry; } + public String getQualifiedId() + { + if (pdbEntry.hasProvider()) + { + return pdbEntry.getProvider()+":"+pdbEntry.getId(); + } + return pdbEntry.toString(); + } + public SequenceI getSequence() { return sequence; diff --git a/src/jalview/gui/structurechooser/StructureChooserQuerySource.java b/src/jalview/gui/structurechooser/StructureChooserQuerySource.java index b5646db..1023b59 100644 --- a/src/jalview/gui/structurechooser/StructureChooserQuerySource.java +++ b/src/jalview/gui/structurechooser/StructureChooserQuerySource.java @@ -216,21 +216,33 @@ public abstract class StructureChooserQuerySource SequenceI[] selectedSeqs) { ThreeDBStructureChooserQuerySource tdbSource = new ThreeDBStructureChooserQuerySource(); - boolean hasUniprot=false; + boolean hasUniprot=false,hasCanonical=false; boolean hasNA=false,hasProtein=false; - for (SequenceI seq:selectedSeqs) + int protWithoutUni=0; + for (SequenceI seq : selectedSeqs) { - hasNA|=!seq.isProtein(); + hasNA |= !seq.isProtein(); hasProtein |= seq.isProtein(); if (seq.isProtein()) { - String query = tdbSource.buildQuery(seq); - if (query!=null && query.length()>0) + int refsAvailable = ThreeDBStructureChooserQuerySource.checkUniprotRefs(seq.getDBRefs()); + if (refsAvailable > -2) { - hasUniprot=true; + if (refsAvailable > -1) + hasCanonical = true; } + hasUniprot = true; + } + else + { + protWithoutUni++; } } + // + // logic: all canonicals - no fetchdb + // some uniprot no canonicals: prompt do fetchDb for remaining + // no uniprot but protein: offer 3d-beacons search + // if (hasProtein && hasUniprot && !hasNA) { return tdbSource; diff --git a/src/jalview/gui/structurechooser/TDBResultAnalyser.java b/src/jalview/gui/structurechooser/TDBResultAnalyser.java index ffac9c4..9f57100 100644 --- a/src/jalview/gui/structurechooser/TDBResultAnalyser.java +++ b/src/jalview/gui/structurechooser/TDBResultAnalyser.java @@ -177,8 +177,8 @@ public class TDBResultAnalyser return nonNullFirst(idx_mqual, o1data, o2data); } // models, so rank on qmean - b - float o1_mq = (Float) o1data[idx_mqual]; - float o2_mq = (Float) o2data[idx_mqual]; + double o1_mq = (Double) o1data[idx_mqual]; + double o2_mq = (Double) o2data[idx_mqual]; return (o2_mq < o1_mq) ? 1 : (o2_mq == o1_mq) ? 0 : -1; } } @@ -250,6 +250,10 @@ public class TDBResultAnalyser cover.andNot(scover); } } + if (selected.size()==0) + { + return selected; + } // final step is to sort on length - this might help the superposition // process Collections.sort(selected, new Comparator() diff --git a/src/jalview/gui/structurechooser/ThreeDBStructureChooserQuerySource.java b/src/jalview/gui/structurechooser/ThreeDBStructureChooserQuerySource.java index 4229b72..9b6d8e8 100644 --- a/src/jalview/gui/structurechooser/ThreeDBStructureChooserQuerySource.java +++ b/src/jalview/gui/structurechooser/ThreeDBStructureChooserQuerySource.java @@ -3,7 +3,6 @@ package jalview.gui.structurechooser; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.Collections; import java.util.Comparator; import java.util.HashSet; import java.util.LinkedHashSet; @@ -26,7 +25,6 @@ import jalview.fts.core.FTSRestRequest; import jalview.fts.core.FTSRestResponse; import jalview.fts.service.threedbeacons.TDBeaconsFTSRestClient; import jalview.jbgui.FilterOption; -import jalview.util.MessageManager; /** * logic for querying the 3DBeacons API for structures of sequences @@ -87,26 +85,38 @@ public class ThreeDBStructureChooserQuerySource Set pdbids = new HashSet<>(); List refs = seq.getDBRefs(); + int ib = checkUniprotRefs(refs); + if (ib>-1) + { + return getDBRefId(refs.get(ib)); + } + return null; + } + + /** + * Searches DBRefEntry for uniprot refs + * @param seq + * @return -2 if no uniprot refs, -1 if no canonical ref., otherwise index of Uniprot canonical DBRefEntry + */ + public static int checkUniprotRefs(List refs) + { + boolean hasUniprot = false; if (refs != null && refs.size() != 0) { for (int ib = 0, nb = refs.size(); ib < nb; ib++) { DBRefEntry dbRef = refs.get(ib); - if (isValidSeqName(getDBRefId(dbRef)) - && queryBuilder.length() < MAX_QLENGTH) + if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT)) { - if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) - && dbRef.isCanonical()) + hasUniprot = true; + if (dbRef.isCanonical()) { - // TODO: pick best Uniprot accession - isUniProtRefsFound = true; - return getDBRefId(dbRef); - + return ib; } } } } - return null; + return hasUniprot ? -1 : -2; } /** @@ -165,17 +175,19 @@ public class ThreeDBStructureChooserQuerySource FilterOption selectedFilterOpt, boolean b) throws Exception { FTSRestResponse resultList; - if (selectedFilterOpt!=null && tdBeaconsFilter(selectedFilterOpt.getValue())) + if (selectedFilterOpt != null + && tdBeaconsFilter(selectedFilterOpt.getValue())) { FTSRestRequest tdbRequest = getTDBeaconsRequest(seq, wantedFields); resultList = tdbRestClient.executeRequest(tdbRequest); - + lastTdbRequest = tdbRequest; - - // Query the PDB and add additional metadata - FTSRestResponse pdbResponse = fetchStructuresMetaDataFor( - getPDBQuerySource(), resultList); - FTSRestResponse joinedResp = joinResponses(resultList, pdbResponse); + if (resultList!=null) + { // Query the PDB and add additional metadata + FTSRestResponse pdbResponse = fetchStructuresMetaDataFor( + getPDBQuerySource(), resultList); + FTSRestResponse joinedResp = joinResponses(resultList, pdbResponse); + } return resultList; } // use the PDBFTS directly @@ -183,7 +195,7 @@ public class ThreeDBStructureChooserQuerySource wantedFields, selectedFilterOpt, b); lastTdbRequest = getPDBQuerySource().lastPdbRequest; lastPdbRequest = lastTdbRequest; // both queries the same - indicates we - // rank using PDBe + // rank using PDBe return resultList; } @@ -235,22 +247,39 @@ public class ThreeDBStructureChooserQuerySource public void updateAvailableFilterOptions(String VIEWS_FILTER, List xtantOptions, Collection tdbEntries) { - if (tdbEntries !=null && lastTdbRequest != null) + if (tdbEntries != null && lastTdbRequest != null) { int prov_idx = lastTdbRequest.getFieldIndex("Provider"); - + boolean hasPDBe=false; for (FTSData row : tdbEntries) { String provider = (String) row.getSummaryData()[prov_idx]; - FilterOption providerOpt = new FilterOption("3DB Provider - " + provider, - FILTER_SOURCE_PREFIX + provider, VIEWS_FILTER, - false, this); + FilterOption providerOpt = new FilterOption( + "3DB Provider - " + provider, + FILTER_SOURCE_PREFIX + provider, VIEWS_FILTER, false, this); if (!xtantOptions.contains(providerOpt)) { - xtantOptions.add(1, - providerOpt); - tdBeaconsFilters.add(FILTER_SOURCE_PREFIX+provider); - + xtantOptions.add(1, providerOpt); + tdBeaconsFilters.add(FILTER_SOURCE_PREFIX + provider); + if ("PDBe".equalsIgnoreCase(provider)) + { + hasPDBe=true; + } + } + } + if (!hasPDBe) + { + // remove the PDBe options from the available filters + int op=0; + while (op wantedFields, String fieldToFilterBy, boolean b) throws Exception { - if (fieldToFilterBy!=null && tdBeaconsFilter(fieldToFilterBy)) + if (fieldToFilterBy != null && tdBeaconsFilter(fieldToFilterBy)) { TDBResultAnalyser analyser = new TDBResultAnalyser(seq, collectedResults, lastTdbRequest, fieldToFilterBy, @@ -336,6 +366,7 @@ public class ThreeDBStructureChooserQuerySource int idColumnIndex = restable.getColumn("Model id").getModelIndex(); int urlColumnIndex = restable.getColumn("Url").getModelIndex(); int typeColumnIndex = restable.getColumn("Provider").getModelIndex(); + int humanUrl = restable.getColumn("Page URL").getModelIndex(); int categoryColumnIndex = restable.getColumn("Model Category") .getModelIndex(); final int up_start_idx = restable.getColumn("Uniprot Start") @@ -372,6 +403,8 @@ public class ThreeDBStructureChooserQuerySource String urlStr = restable.getValueAt(row, urlColumnIndex).toString(); String typeColumn = restable.getValueAt(row, typeColumnIndex) .toString(); + String modelPage = humanUrl < 1 ? null + : (String) restable.getValueAt(row, humanUrl); SequenceI selectedSeq = (SequenceI) restable.getValueAt(row, refSeqColIndex); selectedSeqsToView.add(selectedSeq); @@ -386,12 +419,16 @@ public class ThreeDBStructureChooserQuerySource pdbEntry = new PDBEntry(); pdbEntry.setId(pdbIdStr); boolean hasCif = urlStr.toLowerCase(Locale.ENGLISH).endsWith("cif"); - boolean probablyPdb = urlStr.toLowerCase(Locale.ENGLISH).contains("pdb"); - pdbEntry.setType(hasCif ? PDBEntry.Type.MMCIF : probablyPdb ? PDBEntry.Type.PDB : PDBEntry.Type.FILE); + boolean probablyPdb = urlStr.toLowerCase(Locale.ENGLISH) + .contains("pdb"); + pdbEntry.setType(hasCif ? PDBEntry.Type.MMCIF + : probablyPdb ? PDBEntry.Type.PDB : PDBEntry.Type.FILE); if (!"PDBe".equalsIgnoreCase(typeColumn)) { pdbEntry.setRetrievalUrl(urlStr); } + pdbEntry.setProvider(typeColumn); + pdbEntry.setProviderPage(modelPage); selectedSeq.getDatasetSequence().addPDBId(pdbEntry); } pdbEntriesToView[count++] = pdbEntry; @@ -443,7 +480,7 @@ public class ThreeDBStructureChooserQuerySource { String pdb_Query = buildPDBFTSQueryFor(upResponse); - if (pdb_Query.length()==0) + if (pdb_Query.length() == 0) { return null; } @@ -456,7 +493,7 @@ public class ThreeDBStructureChooserQuerySource pdbRequest.setWantedFields( pdbquery.getDocFieldPrefs().getStructureSummaryFields()); pdbRequest.setSearchTerm(pdb_Query + ")"); - + resultList = pdbquery.executePDBFTSRestRequest(pdbRequest); lastPdbRequest = pdbRequest;