From: Jim Procter Date: Mon, 30 Aug 2021 09:06:08 +0000 (+0100) Subject: JAL-3829 Structure chooser specific FTS interface and query source logic for PDBe... X-Git-Tag: Release_2_11_2_0~39^2~35 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=3471202f54daaef6d58a7d204fff9ef3a8857040;p=jalview.git JAL-3829 Structure chooser specific FTS interface and query source logic for PDBe and 3D-Beacons --- diff --git a/src/jalview/fts/api/StructureFTSRestClientI.java b/src/jalview/fts/api/StructureFTSRestClientI.java new file mode 100644 index 0000000..4974b80 --- /dev/null +++ b/src/jalview/fts/api/StructureFTSRestClientI.java @@ -0,0 +1,10 @@ +package jalview.fts.api; + +import java.util.Collection; + +public interface StructureFTSRestClientI +{ + + Collection getAllDefaultDisplayedStructureDataColumns(); + +} diff --git a/src/jalview/gui/structurechooser/PDBStructureChooserQuerySource.java b/src/jalview/gui/structurechooser/PDBStructureChooserQuerySource.java new file mode 100644 index 0000000..3d2efb2 --- /dev/null +++ b/src/jalview/gui/structurechooser/PDBStructureChooserQuerySource.java @@ -0,0 +1,324 @@ +package jalview.gui.structurechooser; + +import java.util.Collection; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; + +import javax.swing.JTable; +import javax.swing.table.TableModel; + +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.DBRefSource; +import jalview.datamodel.PDBEntry; +import jalview.datamodel.SequenceI; +import jalview.fts.api.FTSData; +import jalview.fts.api.FTSDataColumnI; +import jalview.fts.api.FTSRestClientI; +import jalview.fts.core.FTSDataColumnPreferences; +import jalview.fts.core.FTSDataColumnPreferences.PreferenceSource; +import jalview.fts.core.FTSRestRequest; +import jalview.fts.core.FTSRestResponse; +import jalview.fts.service.pdb.PDBFTSRestClient; +import jalview.jbgui.GStructureChooser.FilterOption; + +/** + * logic for querying the PDBe API for structures of sequences + * + * @author jprocter + */ +public class PDBStructureChooserQuerySource + extends StructureChooserQuerySource +{ + + private static int MAX_QLENGTH = 7820; + + public PDBStructureChooserQuerySource() + { + pdbRestClient = PDBFTSRestClient.getInstance(); + docFieldPrefs = new FTSDataColumnPreferences( + PreferenceSource.STRUCTURE_CHOOSER, + PDBFTSRestClient.getInstance()); + + } + + + /** + * Builds a query string for a given sequences using its DBRef entries + * + * @param seq + * the sequences to build a query for + * @return the built query string + */ + + public String buildQuery(SequenceI seq) + { + boolean isPDBRefsFound = false; + boolean isUniProtRefsFound = false; + StringBuilder queryBuilder = new StringBuilder(); + Set seqRefs = new LinkedHashSet<>(); + + /* + * note PDBs as DBRefEntry so they are not duplicated in query + */ + Set pdbids = new HashSet<>(); + + if (seq.getAllPDBEntries() != null + && queryBuilder.length() < MAX_QLENGTH) + { + for (PDBEntry entry : seq.getAllPDBEntries()) + { + if (isValidSeqName(entry.getId())) + { + String id = entry.getId().toLowerCase(); + queryBuilder.append("pdb_id:").append(id).append(" OR "); + isPDBRefsFound = true; + pdbids.add(id); + } + } + } + + List refs = seq.getDBRefs(); + if (refs != null && refs.size() != 0) + { + for (int ib = 0, nb = refs.size(); ib < nb; ib++) + { + DBRefEntry dbRef = refs.get(ib); + if (isValidSeqName(getDBRefId(dbRef)) + && queryBuilder.length() < MAX_QLENGTH) + { + if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT)) + { + queryBuilder.append("uniprot_accession:") + .append(getDBRefId(dbRef)).append(" OR "); + queryBuilder.append("uniprot_id:").append(getDBRefId(dbRef)) + .append(" OR "); + isUniProtRefsFound = true; + } + else if (dbRef.getSource().equalsIgnoreCase(DBRefSource.PDB)) + { + + String id = getDBRefId(dbRef).toLowerCase(); + if (!pdbids.contains(id)) + { + queryBuilder.append("pdb_id:").append(id).append(" OR "); + isPDBRefsFound = true; + pdbids.add(id); + } + } + else + { + seqRefs.add(getDBRefId(dbRef)); + } + } + } + } + + if (!isPDBRefsFound && !isUniProtRefsFound) + { + String seqName = seq.getName(); + seqName = sanitizeSeqName(seqName); + String[] names = seqName.toLowerCase().split("\\|"); + for (String name : names) + { + // System.out.println("Found name : " + name); + name.trim(); + if (isValidSeqName(name)) + { + seqRefs.add(name); + } + } + + for (String seqRef : seqRefs) + { + queryBuilder.append("text:").append(seqRef).append(" OR "); + } + } + + int endIndex = queryBuilder.lastIndexOf(" OR "); + if (queryBuilder.toString().length() < 6) + { + return null; + } + String query = queryBuilder.toString().substring(0, endIndex); + return query; + } + + /** + * Remove the following special characters from input string +, -, &, !, (, ), + * {, }, [, ], ^, ", ~, *, ?, :, \ + * + * @param seqName + * @return + */ + public static String sanitizeSeqName(String seqName) + { + Objects.requireNonNull(seqName); + return seqName.replaceAll("\\[\\d*\\]", "") + .replaceAll("[^\\dA-Za-z|_]", "").replaceAll("\\s+", "+"); + } + + /** + * Ensures sequence ref names are not less than 3 characters and does not + * contain a database name + * + * @param seqName + * @return + */ + static boolean isValidSeqName(String seqName) + { + // System.out.println("seqName : " + seqName); + String ignoreList = "pdb,uniprot,swiss-prot"; + if (seqName.length() < 3) + { + return false; + } + if (seqName.contains(":")) + { + return false; + } + seqName = seqName.toLowerCase(); + for (String ignoredEntry : ignoreList.split(",")) + { + if (seqName.contains(ignoredEntry)) + { + return false; + } + } + return true; + } + + static String getDBRefId(DBRefEntry dbRef) + { + String ref = dbRef.getAccessionId().replaceAll("GO:", ""); + return ref; + } + + /** + * FTSRestClient specific query builder to recover associated structure data + * records for a sequence + * + * @param seq + * - seq to generate a query for + * @param wantedFields + * - fields to retrieve + * @param selectedFilterOpt + * - criterion for ranking results (e.g. resolution) + * @param b + * - sort ascending or descending + * @return + * @throws Exception + */ + public FTSRestResponse fetchStructuresMetaData(SequenceI seq, + Collection wantedFields, + FilterOption selectedFilterOpt, boolean b) throws Exception + { + FTSRestResponse resultList; + FTSRestRequest pdbRequest = new FTSRestRequest(); + pdbRequest.setAllowEmptySeq(false); + pdbRequest.setResponseSize(500); + pdbRequest.setFieldToSearchBy("("); + pdbRequest.setFieldToSortBy(selectedFilterOpt.getValue(), b); + pdbRequest.setWantedFields(wantedFields); + pdbRequest.setSearchTerm(buildQuery(seq) + ")"); + pdbRequest.setAssociatedSequence(seq); + resultList = pdbRestClient.executeRequest(pdbRequest); + + lastPdbRequest = pdbRequest; + return resultList; + } + + /** + * FTSRestClient specific query builder to pick top ranked entry from a + * fetchStructuresMetaData query + * + * @param seq + * - seq to generate a query for + * @param wantedFields + * - fields to retrieve + * @param selectedFilterOpt + * - criterion for ranking results (e.g. resolution) + * @param b + * - sort ascending or descending + * @return + * @throws Exception + */ + public FTSRestResponse selectFirstRankedQuery(SequenceI seq, + Collection wantedFields, String fieldToFilterBy, + boolean b) throws Exception + { + + FTSRestResponse resultList; + FTSRestRequest pdbRequest = new FTSRestRequest(); + if (fieldToFilterBy.equalsIgnoreCase("uniprot_coverage")) + { + pdbRequest.setAllowEmptySeq(false); + pdbRequest.setResponseSize(1); + pdbRequest.setFieldToSearchBy("("); + pdbRequest.setSearchTerm(buildQuery(seq) + ")"); + pdbRequest.setWantedFields(wantedFields); + pdbRequest.setAssociatedSequence(seq); + pdbRequest.setFacet(true); + pdbRequest.setFacetPivot(fieldToFilterBy + ",entry_entity"); + pdbRequest.setFacetPivotMinCount(1); + } + else + { + pdbRequest.setAllowEmptySeq(false); + pdbRequest.setResponseSize(1); + pdbRequest.setFieldToSearchBy("("); + pdbRequest.setFieldToSortBy(fieldToFilterBy, b); + pdbRequest.setSearchTerm(buildQuery(seq) + ")"); + pdbRequest.setWantedFields(wantedFields); + pdbRequest.setAssociatedSequence(seq); + } + resultList = pdbRestClient.executeRequest(pdbRequest); + + lastPdbRequest = pdbRequest; + return resultList; + } + + + @Override + public PDBEntry[] collectSelectedRows(JTable restable, int[] selectedRows, + List selectedSeqsToView) + { + int refSeqColIndex = restable.getColumn("Ref Sequence") + .getModelIndex(); + + PDBEntry[] pdbEntriesToView=new PDBEntry[selectedRows.length]; + int count = 0; + int idColumnIndex=-1; + boolean fromTDB=true; + idColumnIndex = restable.getColumn("PDB Id").getModelIndex(); + + for (int row : selectedRows) + { + + String pdbIdStr = restable.getValueAt(row,idColumnIndex) + .toString(); + SequenceI selectedSeq = (SequenceI) restable.getValueAt(row, + refSeqColIndex); + selectedSeqsToView.add(selectedSeq); + PDBEntry pdbEntry = selectedSeq.getPDBEntry(pdbIdStr); + if (pdbEntry == null) + { + pdbEntry = getFindEntry(pdbIdStr, + selectedSeq.getAllPDBEntries()); + } + + if (pdbEntry == null) + { + pdbEntry = new PDBEntry(); + pdbEntry.setId(pdbIdStr); + pdbEntry.setType(PDBEntry.Type.MMCIF); + selectedSeq.getDatasetSequence().addPDBId(pdbEntry); + } + pdbEntriesToView[count++] = pdbEntry; + } + return pdbEntriesToView; + } + +} \ No newline at end of file diff --git a/src/jalview/gui/structurechooser/ThreeDBStructureChooserQuerySource.java b/src/jalview/gui/structurechooser/ThreeDBStructureChooserQuerySource.java new file mode 100644 index 0000000..6c2123b --- /dev/null +++ b/src/jalview/gui/structurechooser/ThreeDBStructureChooserQuerySource.java @@ -0,0 +1,256 @@ +package jalview.gui.structurechooser; + +import java.util.Collection; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; + +import javax.swing.JTable; +import javax.swing.table.TableModel; + +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.DBRefSource; +import jalview.datamodel.PDBEntry; +import jalview.datamodel.SequenceI; +import jalview.fts.api.FTSData; +import jalview.fts.api.FTSDataColumnI; +import jalview.fts.api.FTSRestClientI; +import jalview.fts.core.FTSDataColumnPreferences; +import jalview.fts.core.FTSDataColumnPreferences.PreferenceSource; +import jalview.fts.core.FTSRestRequest; +import jalview.fts.core.FTSRestResponse; +import jalview.fts.service.pdb.PDBFTSRestClient; +import jalview.fts.service.threedbeacons.TDBeaconsFTSRestClient; +import jalview.jbgui.GStructureChooser.FilterOption; + +/** + * logic for querying the PDBe API for structures of sequences + * + * @author jprocter + */ +public class ThreeDBStructureChooserQuerySource + extends StructureChooserQuerySource +{ + + private static int MAX_QLENGTH = 7820; + + public ThreeDBStructureChooserQuerySource() + { + pdbRestClient = TDBeaconsFTSRestClient.getInstance(); + docFieldPrefs = new FTSDataColumnPreferences( + PreferenceSource.STRUCTURE_CHOOSER, + TDBeaconsFTSRestClient.getInstance()); + + } + + + /** + * Builds a query string for a given sequences using its DBRef entries + * 3d Beacons is only useful for uniprot IDs + * @param seq + * the sequences to build a query for + * @return the built query string + */ + + public String buildQuery(SequenceI seq) + { + boolean isPDBRefsFound = false; + boolean isUniProtRefsFound = false; + StringBuilder queryBuilder = new StringBuilder(); + Set seqRefs = new LinkedHashSet<>(); + + /* + * note PDBs as DBRefEntry so they are not duplicated in query + */ + Set pdbids = new HashSet<>(); + + List refs = seq.getDBRefs(); + if (refs != null && refs.size() != 0) + { + for (int ib = 0, nb = refs.size(); ib < nb; ib++) + { + DBRefEntry dbRef = refs.get(ib); + if (isValidSeqName(getDBRefId(dbRef)) + && queryBuilder.length() < MAX_QLENGTH) + { + if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) && dbRef.isCanonical()) + { + // TODO: pick best Uniprot accession + isUniProtRefsFound=true; + return getDBRefId(dbRef); + + } + } + } + } + return null; + } + + + + /** + * Ensures sequence ref names are not less than 3 characters and does not + * contain a database name + * + * @param seqName + * @return + */ + static boolean isValidSeqName(String seqName) + { + // System.out.println("seqName : " + seqName); + String ignoreList = "pdb,uniprot,swiss-prot"; + if (seqName.length() < 3) + { + return false; + } + if (seqName.contains(":")) + { + return false; + } + seqName = seqName.toLowerCase(); + for (String ignoredEntry : ignoreList.split(",")) + { + if (seqName.contains(ignoredEntry)) + { + return false; + } + } + return true; + } + + static String getDBRefId(DBRefEntry dbRef) + { + String ref = dbRef.getAccessionId().replaceAll("GO:", ""); + return ref; + } + + /** + * FTSRestClient specific query builder to recover associated structure data + * records for a sequence + * + * @param seq + * - seq to generate a query for + * @param wantedFields + * - fields to retrieve + * @param selectedFilterOpt + * - criterion for ranking results (e.g. resolution) + * @param b + * - sort ascending or descending + * @return + * @throws Exception + */ + public FTSRestResponse fetchStructuresMetaData(SequenceI seq, + Collection wantedFields, + FilterOption selectedFilterOpt, boolean b) throws Exception + { + FTSRestResponse resultList; + FTSRestRequest pdbRequest = getTDBeaconsRequest(seq, wantedFields); + resultList = pdbRestClient.executeRequest(pdbRequest); + + lastPdbRequest = pdbRequest; + return resultList; + } + + + private FTSRestRequest getTDBeaconsRequest(SequenceI seq, Collection wantedFields) + { + FTSRestRequest pdbRequest = new FTSRestRequest(); + pdbRequest.setAllowEmptySeq(false); + pdbRequest.setResponseSize(500); + pdbRequest.setWantedFields(wantedFields); + String query = buildQuery(seq); + if (query==null) { + return null; + } + pdbRequest.setSearchTerm(query + ".json"); + pdbRequest.setAssociatedSequence(seq); + return pdbRequest; + } + + + /** + * FTSRestClient specific query builder to pick top ranked entry from a + * fetchStructuresMetaData query + * + * @param seq + * - seq to generate a query for + * @param wantedFields + * - fields to retrieve + * @param selectedFilterOpt + * - criterion for ranking results (e.g. resolution) + * @param b + * - sort ascending or descending + * @return + * @throws Exception + */ + public FTSRestResponse selectFirstRankedQuery(SequenceI seq, + Collection wantedFields, String fieldToFilterBy, + boolean b) throws Exception + { + + FTSRestResponse resultList; + FTSRestRequest pdbRequest = getTDBeaconsRequest(seq, wantedFields); + if (pdbRequest == null) { + return null; + } + pdbRequest.setResponseSize(1); + resultList = pdbRestClient.executeRequest(pdbRequest); + + // TODO: client side filtering - sort results and pick top one (or N) + + lastPdbRequest = pdbRequest; + return resultList; + } + + @Override + public PDBEntry[] collectSelectedRows(JTable restable, int[] selectedRows, + List selectedSeqsToView) + { + int refSeqColIndex = restable.getColumn("Ref Sequence") + .getModelIndex(); + + PDBEntry[] pdbEntriesToView=new PDBEntry[selectedRows.length]; + int count = 0; + int idColumnIndex = restable.getColumn("Model id").getModelIndex(); + int urlColumnIndex = restable.getColumn("Url").getModelIndex(); + int typeColumnIndex = restable.getColumn("Provider").getModelIndex(); + int categoryColumnIndex = restable.getColumn("Model Category").getModelIndex(); + + for (int row : selectedRows) + { + // unique id - could be a horrible hash + + String pdbIdStr = restable.getValueAt(row,idColumnIndex) + .toString(); + String urlStr = restable.getValueAt(row,urlColumnIndex) + .toString(); + String typeColumn = restable.getValueAt(row,typeColumnIndex) + .toString(); + SequenceI selectedSeq = (SequenceI) restable.getValueAt(row, + refSeqColIndex); + selectedSeqsToView.add(selectedSeq); + PDBEntry pdbEntry = selectedSeq.getPDBEntry(pdbIdStr); + if (pdbEntry == null) + { + pdbEntry = getFindEntry(pdbIdStr, + selectedSeq.getAllPDBEntries()); + } + + if (pdbEntry == null) + { + pdbEntry = new PDBEntry(); + pdbEntry.setId(pdbIdStr); + pdbEntry.setType(PDBEntry.Type.MMCIF); + if (!"PDBe".equalsIgnoreCase(typeColumn)) + { + pdbEntry.setRetrievalUrl(urlStr); + } + selectedSeq.getDatasetSequence().addPDBId(pdbEntry); + } + pdbEntriesToView[count++] = pdbEntry; + } + return pdbEntriesToView; + } +} \ No newline at end of file