X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fgui%2Fstructurechooser%2FThreeDBStructureChooserQuerySource.java;h=669e58a07130a41cfdd990e411c182786832e852;hb=584ab9644955bec02d96448361b8aac5b9542599;hp=1c04d3ad6923423945537faebfdd54b2ad47b54b;hpb=db005f38bdad89aef8e22750682edf2eecccc0e0;p=jalview.git diff --git a/src/jalview/gui/structurechooser/ThreeDBStructureChooserQuerySource.java b/src/jalview/gui/structurechooser/ThreeDBStructureChooserQuerySource.java index 1c04d3a..669e58a 100644 --- a/src/jalview/gui/structurechooser/ThreeDBStructureChooserQuerySource.java +++ b/src/jalview/gui/structurechooser/ThreeDBStructureChooserQuerySource.java @@ -3,15 +3,16 @@ package jalview.gui.structurechooser; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.Collections; import java.util.Comparator; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; +import java.util.Locale; import java.util.Set; import javax.swing.JTable; +import jalview.bin.Console; import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; import jalview.datamodel.PDBEntry; @@ -23,9 +24,9 @@ import jalview.fts.core.FTSDataColumnPreferences; import jalview.fts.core.FTSDataColumnPreferences.PreferenceSource; import jalview.fts.core.FTSRestRequest; import jalview.fts.core.FTSRestResponse; +import jalview.fts.service.threedbeacons.TDB_FTSData; import jalview.fts.service.threedbeacons.TDBeaconsFTSRestClient; import jalview.jbgui.FilterOption; -import jalview.util.MessageManager; /** * logic for querying the 3DBeacons API for structures of sequences @@ -36,7 +37,13 @@ public class ThreeDBStructureChooserQuerySource extends StructureChooserQuerySource { - private static int MAX_QLENGTH = 7820; + private Set tdBeaconsFilters = null, defaultFilters = null; + + public static final String FILTER_TDBEACONS_COVERAGE = "3d_beacons_coverage"; + + public static final String FILTER_FIRST_BEST_COVERAGE = "3d_beacons_first_best_coverage"; + + private static final String FILTER_SOURCE_PREFIX = "only_"; protected FTSRestRequest lastTdbRequest; @@ -46,11 +53,14 @@ public class ThreeDBStructureChooserQuerySource public ThreeDBStructureChooserQuerySource() { + defaultFilters = new LinkedHashSet(); + defaultFilters.add(FILTER_TDBEACONS_COVERAGE); + defaultFilters.add(FILTER_FIRST_BEST_COVERAGE); + tdbRestClient = TDBeaconsFTSRestClient.getInstance(); docFieldPrefs = new FTSDataColumnPreferences( PreferenceSource.STRUCTURE_CHOOSER, TDBeaconsFTSRestClient.getInstance()); - } /** @@ -64,37 +74,41 @@ public class ThreeDBStructureChooserQuerySource public String buildQuery(SequenceI seq) { - boolean isPDBRefsFound = false; - boolean isUniProtRefsFound = false; - StringBuilder queryBuilder = new StringBuilder(); - Set seqRefs = new LinkedHashSet<>(); - - /* - * note PDBs as DBRefEntry so they are not duplicated in query - */ - Set pdbids = new HashSet<>(); - List refs = seq.getDBRefs(); + int ib = checkUniprotRefs(refs); + if (ib > -1) + { + return getDBRefId(refs.get(ib)); + } + return null; + } + + /** + * Searches DBRefEntry for uniprot refs + * + * @param seq + * @return -2 if no uniprot refs, -1 if no canonical ref., otherwise index of + * Uniprot canonical DBRefEntry + */ + public static int checkUniprotRefs(List refs) + { + boolean hasUniprot = false; if (refs != null && refs.size() != 0) { for (int ib = 0, nb = refs.size(); ib < nb; ib++) { DBRefEntry dbRef = refs.get(ib); - if (isValidSeqName(getDBRefId(dbRef)) - && queryBuilder.length() < MAX_QLENGTH) + if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT)) { - if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) - && dbRef.isCanonical()) + hasUniprot = true; + if (dbRef.isCanonical()) { - // TODO: pick best Uniprot accession - isUniProtRefsFound = true; - return getDBRefId(dbRef); - + return ib; } } } } - return null; + return hasUniprot ? -1 : -2; } /** @@ -106,7 +120,6 @@ public class ThreeDBStructureChooserQuerySource */ static boolean isValidSeqName(String seqName) { - // System.out.println("seqName : " + seqName); String ignoreList = "pdb,uniprot,swiss-prot"; if (seqName.length() < 3) { @@ -116,7 +129,7 @@ public class ThreeDBStructureChooserQuerySource { return false; } - seqName = seqName.toLowerCase(); + seqName = seqName.toLowerCase(Locale.ROOT); for (String ignoredEntry : ignoreList.split(",")) { if (seqName.contains(ignoredEntry)) @@ -153,11 +166,41 @@ public class ThreeDBStructureChooserQuerySource FilterOption selectedFilterOpt, boolean b) throws Exception { FTSRestResponse resultList; - FTSRestRequest tdbRequest = getTDBeaconsRequest(seq, wantedFields); - resultList = tdbRestClient.executeRequest(tdbRequest); + if (selectedFilterOpt != null + && tdBeaconsFilter(selectedFilterOpt.getValue())) + { + FTSRestRequest tdbRequest = getTDBeaconsRequest(seq, wantedFields); + resultList = tdbRestClient.executeRequest(tdbRequest); + + lastTdbRequest = tdbRequest; + if (resultList != null) + { // Query the PDB and add additional metadata + FTSRestResponse pdbResponse = fetchStructuresMetaDataFor( + getPDBQuerySource(), resultList); - lastTdbRequest = tdbRequest; + resultList = joinResponses(resultList, pdbResponse); + } + return resultList; + } + // use the PDBFTS directly + resultList = getPDBQuerySource().fetchStructuresMetaData(seq, + wantedFields, selectedFilterOpt, b); + lastTdbRequest = getPDBQuerySource().lastPdbRequest; + lastPdbRequest = lastTdbRequest; // both queries the same - indicates we + // rank using PDBe return resultList; + + } + + PDBStructureChooserQuerySource pdbQuerySource = null; + + private PDBStructureChooserQuerySource getPDBQuerySource() + { + if (pdbQuerySource == null) + { + pdbQuerySource = new PDBStructureChooserQuerySource(); + } + return pdbQuerySource; } private FTSRestRequest getTDBeaconsRequest(SequenceI seq, @@ -180,35 +223,92 @@ public class ThreeDBStructureChooserQuerySource @Override public List getAvailableFilterOptions(String VIEWS_FILTER) { - List filters = new ArrayList(); - filters.add( - new FilterOption(MessageManager.getString("label.best_quality"), - "overall_quality", VIEWS_FILTER, false)); - filters.add(new FilterOption( - MessageManager.getString("label.best_resolution"), "resolution", - VIEWS_FILTER, false)); - filters.add(new FilterOption( - MessageManager.getString("label.most_protein_chain"), - "number_of_protein_chains", VIEWS_FILTER, false)); - filters.add(new FilterOption( - MessageManager.getString("label.most_bound_molecules"), - "number_of_bound_molecules", VIEWS_FILTER, false)); - filters.add(new FilterOption( - MessageManager.getString("label.most_polymer_residues"), - "number_of_polymer_residues", VIEWS_FILTER, true)); + List filters = getPDBQuerySource() + .getAvailableFilterOptions(VIEWS_FILTER); + tdBeaconsFilters = new LinkedHashSet(); + tdBeaconsFilters.addAll(defaultFilters); + filters.add(0, new FilterOption("Best 3D-Beacons Coverage", + FILTER_FIRST_BEST_COVERAGE, VIEWS_FILTER, false, this)); + filters.add(1, new FilterOption("Multiple 3D-Beacons Coverage", + FILTER_TDBEACONS_COVERAGE, VIEWS_FILTER, true, this)); return filters; } - /** - * model categories - update as needed. warnings output if unknown types - * encountered. - * - * Order denotes 'trust' - */ - private static List EXP_CATEGORIES = Arrays - .asList(new String[] - { "EXPERIMENTALLY DETERMINED", "DEEP LEARNING", "TEMPLATE-BASED" }); + @Override + public void updateAvailableFilterOptions(String VIEWS_FILTER, + List xtantOptions, Collection tdbEntries) + { + if (tdbEntries != null && lastTdbRequest != null) + { + boolean hasPDBe = false; + for (FTSData _row : tdbEntries) + { + // tdb returns custom object + TDB_FTSData row = (TDB_FTSData) _row; + String provider = (String) row.getProvider(); + FilterOption providerOpt = new FilterOption( + "3DB Provider - " + provider, + FILTER_SOURCE_PREFIX + provider, VIEWS_FILTER, false, this); + if (!xtantOptions.contains(providerOpt)) + { + xtantOptions.add(1, providerOpt); + tdBeaconsFilters.add(FILTER_SOURCE_PREFIX + provider); + if ("PDBe".equalsIgnoreCase(provider)) + { + hasPDBe = true; + } + } + } + if (!hasPDBe) + { + // remove the PDBe options from the available filters + int op = 0; + while (op < xtantOptions.size()) + { + FilterOption filter = xtantOptions.get(op); + if (filter + .getQuerySource() instanceof PDBStructureChooserQuerySource) + { + xtantOptions.remove(op); + } + else + { + op++; + } + } + } + } + + } + + private boolean tdBeaconsFilter(String fieldToFilterBy) + { + return tdBeaconsFilters != null + && tdBeaconsFilters.contains(fieldToFilterBy); + } + + private String remove_prefix(String fieldToFilterBy) + { + if (tdBeaconsFilters != null + && tdBeaconsFilters.contains(fieldToFilterBy) + && !defaultFilters.contains(fieldToFilterBy)) + { + return fieldToFilterBy.substring(FILTER_SOURCE_PREFIX.length()); + } + else + { + return null; + } + } + + @Override + public boolean needsRefetch(FilterOption selectedFilterOpt) + { + return selectedFilterOpt == null + || !tdBeaconsFilter(selectedFilterOpt.getValue()) + && lastPdbRequest != lastTdbRequest; + } /** * FTSRestClient specific query builder to pick top ranked entry from a @@ -230,101 +330,25 @@ public class ThreeDBStructureChooserQuerySource Collection wantedFields, String fieldToFilterBy, boolean b) throws Exception { - - List filteredResponse = new ArrayList(); - final int idx_ups = lastTdbRequest.getFieldIndex("Uniprot Start"); - final int idx_upe = lastTdbRequest.getFieldIndex("Uniprot End"); - final int idx_mcat = lastTdbRequest.getFieldIndex("Model Category"); - final int idx_mqual = lastTdbRequest.getFieldIndex("Qmean"); - final int idx_resol = lastTdbRequest.getFieldIndex("Resolution"); - - // ignore anything outside the sequence region - for (FTSData row : collectedResults) + if (fieldToFilterBy != null && tdBeaconsFilter(fieldToFilterBy)) { - int up_s = (Integer) row.getSummaryData()[idx_ups]; - int up_e = (Integer) row.getSummaryData()[idx_upe]; + TDBResultAnalyser analyser = new TDBResultAnalyser(seq, + collectedResults, lastTdbRequest, fieldToFilterBy, + remove_prefix(fieldToFilterBy)); - if (seq == row.getSummaryData()[0] && up_e > seq.getStart() - && up_s < seq.getEnd()) - { - filteredResponse.add(row); - } - } - // sort according to decreasing length, - // increasing start - Collections.sort(filteredResponse, new Comparator() - { - - private final int scoreCategory(String cat) - { - // TODO: make quicker - int idx = EXP_CATEGORIES.indexOf(cat.toUpperCase()); - if (idx == -1) - { - System.out.println("Unknown category: '" + cat + "'"); - } - return -EXP_CATEGORIES.size() - idx; - } + FTSRestResponse resultList = new FTSRestResponse(); - @Override - public int compare(FTSData o1, FTSData o2) - { - int o1_s = (Integer) o1.getSummaryData()[idx_ups]; - int o1_e = (Integer) o1.getSummaryData()[idx_upe]; - int o1_cat = scoreCategory((String) o1.getSummaryData()[idx_mcat]); - int o2_s = (Integer) o2.getSummaryData()[idx_ups]; - int o2_e = (Integer) o2.getSummaryData()[idx_upe]; - int o2_cat = scoreCategory((String) o2.getSummaryData()[idx_mcat]); - - if (o1_cat == o2_cat) - { - if (o1_s == o2_s) - { - int o1_xtent = o1_e - o1_s; - int o2_xtent = o2_e - o2_s; - if (o1_xtent == o2_xtent) - { - if (o1_cat == scoreCategory(EXP_CATEGORIES.get(0))) - { - // experimental structures, so rank on quality - double o1_res = (Double) o1.getSummaryData()[idx_resol]; - double o2_res = (Double) o2.getSummaryData()[idx_resol]; - return (o2_res < o1_res) ? 1 : (o2_res == o1_res) ? 0 : -1; - } - else - { - // models, so rank on qmean - float o1_mq = (Float) o1.getSummaryData()[idx_mqual]; - float o2_mq = (Float) o2.getSummaryData()[idx_mqual]; - return (o2_mq < o1_mq) ? 1 : (o2_mq == o1_mq) ? 0 : -1; - } - } - else - { - return o1_xtent - o2_xtent; - } - } - else - { - return o1_s - o2_s; - } - } - else - { - return o2_cat - o1_cat; - } - } + List filteredResponse = analyser.getFilteredResponse(); - @Override - public boolean equals(Object obj) - { - return super.equals(obj); - } - }); - FTSRestResponse resultList = new FTSRestResponse(); - resultList.setNumberOfItemsFound(filteredResponse.size()); - resultList.setSearchSummary(filteredResponse); - return resultList; + List selectedStructures = analyser + .selectStructures(filteredResponse); + resultList.setNumberOfItemsFound(selectedStructures.size()); + resultList.setSearchSummary(selectedStructures); + return resultList; + } + // Fall back to PDBe rankings + return getPDBQuerySource().selectFirstRankedQuery(seq, collectedResults, + wantedFields, fieldToFilterBy, b); } @Override @@ -338,10 +362,35 @@ public class ThreeDBStructureChooserQuerySource int idColumnIndex = restable.getColumn("Model id").getModelIndex(); int urlColumnIndex = restable.getColumn("Url").getModelIndex(); int typeColumnIndex = restable.getColumn("Provider").getModelIndex(); - int categoryColumnIndex = restable.getColumn("Model Category") + int humanUrl = restable.getColumn("Page URL").getModelIndex(); + int modelformat = restable.getColumn("Model Format").getModelIndex(); + final int up_start_idx = restable.getColumn("Uniprot Start") .getModelIndex(); + final int up_end_idx = restable.getColumn("Uniprot End") + .getModelIndex(); + int i = 0; - for (int row : selectedRows) + // bleugh! + Integer[] sellist = new Integer[selectedRows.length]; + for (Integer row : selectedRows) + { + sellist[i++] = row; + } + // Sort rows by coverage + Arrays.sort(sellist, new Comparator() + { + @Override + public int compare(Integer o1, Integer o2) + { + int o1_xt = ((Integer) restable.getValueAt(o1, up_end_idx)) + - (Integer) restable.getValueAt(o1, up_start_idx); + int o2_xt = ((Integer) restable.getValueAt(o2, up_end_idx)) + - (Integer) restable.getValueAt(o2, up_start_idx); + return o2_xt - o1_xt; + } + }); + + for (int row : sellist) { // unique id - could be a horrible hash @@ -349,6 +398,10 @@ public class ThreeDBStructureChooserQuerySource String urlStr = restable.getValueAt(row, urlColumnIndex).toString(); String typeColumn = restable.getValueAt(row, typeColumnIndex) .toString(); + String modelPage = humanUrl < 1 ? null + : (String) restable.getValueAt(row, humanUrl); + String strucFormat = restable.getValueAt(row, modelformat).toString(); + SequenceI selectedSeq = (SequenceI) restable.getValueAt(row, refSeqColIndex); selectedSeqsToView.add(selectedSeq); @@ -362,11 +415,22 @@ public class ThreeDBStructureChooserQuerySource { pdbEntry = new PDBEntry(); pdbEntry.setId(pdbIdStr); - pdbEntry.setType(PDBEntry.Type.MMCIF); + pdbEntry.setAuthoritative(true); + try + { + pdbEntry.setType(PDBEntry.Type.valueOf(strucFormat)); + } catch (Exception q) + { + Console.warn("Unknown filetype for 3D Beacons Model from: " + + strucFormat + " - " + pdbIdStr + " - " + modelPage); + } + if (!"PDBe".equalsIgnoreCase(typeColumn)) { pdbEntry.setRetrievalUrl(urlStr); } + pdbEntry.setProvider(typeColumn); + pdbEntry.setProviderPage(modelPage); selectedSeq.getDatasetSequence().addPDBId(pdbEntry); } pdbEntriesToView[count++] = pdbEntry; @@ -390,7 +454,7 @@ public class ThreeDBStructureChooserQuerySource public String buildPDBFTSQueryFor(FTSRestResponse upResponse) { - List pdbIds = new ArrayList(); + Set pdbIds = new HashSet(); int idx_modelId = getLastFTSRequest().getFieldIndex("Model id"); int idx_provider = getLastFTSRequest().getFieldIndex("Provider"); for (FTSData row : upResponse.getSearchSummary()) @@ -418,7 +482,10 @@ public class ThreeDBStructureChooserQuerySource { String pdb_Query = buildPDBFTSQueryFor(upResponse); - + if (pdb_Query.length() == 0) + { + return null; + } FTSRestResponse resultList; FTSRestRequest pdbRequest = new FTSRestRequest(); pdbRequest.setAllowEmptySeq(false); @@ -428,6 +495,7 @@ public class ThreeDBStructureChooserQuerySource pdbRequest.setWantedFields( pdbquery.getDocFieldPrefs().getStructureSummaryFields()); pdbRequest.setSearchTerm(pdb_Query + ")"); + resultList = pdbquery.executePDBFTSRestRequest(pdbRequest); lastPdbRequest = pdbRequest; @@ -437,29 +505,64 @@ public class ThreeDBStructureChooserQuerySource public FTSRestResponse joinResponses(FTSRestResponse upResponse, FTSRestResponse pdbResponse) { + boolean hasPdbResp = lastPdbRequest != null; + int idx_provider = getLastFTSRequest().getFieldIndex("Provider"); // join on int idx_modelId = getLastFTSRequest().getFieldIndex("Model id"); - int pdbIdx = lastPdbRequest.getFieldIndex("pdb_id"); - for (FTSData row : upResponse.getSearchSummary()) + int pdbIdx = hasPdbResp ? lastPdbRequest.getFieldIndex("PDB Id") : -1; + int pdbTitle_idx = hasPdbResp ? lastPdbRequest.getFieldIndex("Title") + : -1; + int tdbTitle_idx = getLastFTSRequest().getFieldIndex("Title"); + + for (final FTSData row : upResponse.getSearchSummary()) { String id = (String) row.getSummaryData()[idx_modelId]; String provider = (String) row.getSummaryData()[idx_provider]; if ("PDBe".equalsIgnoreCase(provider)) { - for (FTSData pdbrow : pdbResponse.getSearchSummary()) + if (!hasPdbResp) { - String pdbid = (String) pdbrow.getSummaryData()[pdbIdx]; - if (id.equalsIgnoreCase(pdbid)) + System.out.println( + "Warning: seems like we couldn't get to the PDBe search interface."); + } + else + { + for (final FTSData pdbrow : pdbResponse.getSearchSummary()) { - // often multiple entries per PDB ID so we bail after first - // get wanted fields - // append to FTSRestResponse array + String pdbid = (String) pdbrow.getSummaryData()[pdbIdx]; + if (id.equalsIgnoreCase(pdbid)) + { + row.getSummaryData()[tdbTitle_idx] = pdbrow + .getSummaryData()[pdbTitle_idx]; + } } } + + } + else + { + row.getSummaryData()[tdbTitle_idx] = "Model from TDB"; + } + } + return upResponse; + } + + public TDB_FTSData getFTSDataFor(JTable restable, int selectedRow, + Collection discoveredStructuresSet) + { + int idColumnIndex = restable.getColumn("Model id").getModelIndex(); + + String modelId = (String) restable.getValueAt(selectedRow, + idColumnIndex); + for (FTSData row : discoveredStructuresSet) + { + if (row instanceof TDB_FTSData + && ((TDB_FTSData) row).getModelId().equals(modelId)) + { + return ((TDB_FTSData) row); } } - // TODO Auto-generated method stub return null; }