From cd8070808d46db2a7cdf130c84dbc04c59c20188 Mon Sep 17 00:00:00 2001 From: Jim Procter Date: Mon, 23 Aug 2021 17:45:04 +0100 Subject: [PATCH] =?utf8?q?JAL-3829=20pushed=20the=20PDBFTSClient=20specific=20?= =?utf8?q?code=20to=20new=20=E2=80=98StructureChooserQuerySource=E2=80=99=20?= =?utf8?q?class=20in=20preparation=20for=20adapting=20it=20to=20query=203d-b?= =?utf8?q?eacons?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- src/jalview/gui/StructureChooser.java | 219 ++-------------- src/jalview/gui/StructureChooserQuerySource.java | 292 ++++++++++++++++++++++ test/jalview/gui/StructureChooserTest.java | 17 +- 3 files changed, 318 insertions(+), 210 deletions(-) create mode 100644 src/jalview/gui/StructureChooserQuerySource.java diff --git a/src/jalview/gui/StructureChooser.java b/src/jalview/gui/StructureChooser.java index 33d8c33..99f71dd 100644 --- a/src/jalview/gui/StructureChooser.java +++ b/src/jalview/gui/StructureChooser.java @@ -36,6 +36,7 @@ import jalview.fts.core.FTSRestResponse; import jalview.fts.service.pdb.PDBFTSRestClient; import jalview.io.DataSourceType; import jalview.jbgui.GStructureChooser; +import jalview.jbgui.GStructureChooser.FilterOption; import jalview.structure.StructureMapping; import jalview.structure.StructureSelectionManager; import jalview.util.MessageManager; @@ -71,8 +72,6 @@ public class StructureChooser extends GStructureChooser { private static final String AUTOSUPERIMPOSE = "AUTOSUPERIMPOSE"; - private static int MAX_QLENGTH = 7820; - private SequenceI selectedSequence; private SequenceI[] selectedSequences; @@ -81,9 +80,7 @@ public class StructureChooser extends GStructureChooser private Collection discoveredStructuresSet; - private FTSRestRequest lastPdbRequest; - - private FTSRestClientI pdbRestClient; + private StructureChooserQuerySource data = StructureChooserQuerySource.getPDBfts(); private String selectedPdbFileName; @@ -217,36 +214,28 @@ public class StructureChooser extends GStructureChooser void fetchStructuresMetaData() { long startTime = System.currentTimeMillis(); - pdbRestClient = PDBFTSRestClient.getInstance(); Collection wantedFields = pdbDocFieldPrefs .getStructureSummaryFields(); discoveredStructuresSet = new LinkedHashSet<>(); HashSet errors = new HashSet<>(); + + FilterOption selectedFilterOpt = ((FilterOption) cmb_filterOption + .getSelectedItem()); + for (SequenceI seq : selectedSequences) { - FTSRestRequest pdbRequest = new FTSRestRequest(); - pdbRequest.setAllowEmptySeq(false); - pdbRequest.setResponseSize(500); - pdbRequest.setFieldToSearchBy("("); - FilterOption selectedFilterOpt = ((FilterOption) cmb_filterOption - .getSelectedItem()); - pdbRequest.setFieldToSortBy(selectedFilterOpt.getValue(), - !chk_invertFilter.isSelected()); - pdbRequest.setWantedFields(wantedFields); - pdbRequest.setSearchTerm(buildQuery(seq) + ")"); - pdbRequest.setAssociatedSequence(seq); + FTSRestResponse resultList; try { - resultList = pdbRestClient.executeRequest(pdbRequest); + resultList = data.fetchStructuresMetaData(seq, wantedFields, selectedFilterOpt, !chk_invertFilter.isSelected()); } catch (Exception e) { e.printStackTrace(); errors.add(e.getMessage()); continue; } - lastPdbRequest = pdbRequest; if (resultList.getSearchSummary() != null && !resultList.getSearchSummary().isEmpty()) { @@ -260,8 +249,7 @@ public class StructureChooser extends GStructureChooser if (discoveredStructuresSet != null && !discoveredStructuresSet.isEmpty()) { - getResultTable().setModel(FTSRestResponse - .getTableModel(lastPdbRequest, discoveredStructuresSet)); + getResultTable().setModel(data.getTableModel(discoveredStructuresSet)); noOfStructuresFound = discoveredStructuresSet.size(); mainFrame.setTitle(MessageManager.formatMessage( "label.structure_chooser_no_of_structures", @@ -309,157 +297,6 @@ public class StructureChooser extends GStructureChooser } /** - * Builds a query string for a given sequences using its DBRef entries - * - * @param seq - * the sequences to build a query for - * @return the built query string - */ - - static String buildQuery(SequenceI seq) - { - boolean isPDBRefsFound = false; - boolean isUniProtRefsFound = false; - StringBuilder queryBuilder = new StringBuilder(); - Set seqRefs = new LinkedHashSet<>(); - - /* - * note PDBs as DBRefEntry so they are not duplicated in query - */ - Set pdbids = new HashSet<>(); - - if (seq.getAllPDBEntries() != null - && queryBuilder.length() < MAX_QLENGTH) - { - for (PDBEntry entry : seq.getAllPDBEntries()) - { - if (isValidSeqName(entry.getId())) - { - String id = entry.getId().toLowerCase(); - queryBuilder.append("pdb_id:").append(id).append(" OR "); - isPDBRefsFound = true; - pdbids.add(id); - } - } - } - - List refs = seq.getDBRefs(); - if (refs != null && refs.size() != 0) - { - for (int ib = 0, nb = refs.size(); ib < nb; ib++) - { - DBRefEntry dbRef = refs.get(ib); - if (isValidSeqName(getDBRefId(dbRef)) - && queryBuilder.length() < MAX_QLENGTH) - { - if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT)) - { - queryBuilder.append("uniprot_accession:") - .append(getDBRefId(dbRef)).append(" OR "); - queryBuilder.append("uniprot_id:").append(getDBRefId(dbRef)) - .append(" OR "); - isUniProtRefsFound = true; - } - else if (dbRef.getSource().equalsIgnoreCase(DBRefSource.PDB)) - { - - String id = getDBRefId(dbRef).toLowerCase(); - if (!pdbids.contains(id)) - { - queryBuilder.append("pdb_id:").append(id).append(" OR "); - isPDBRefsFound = true; - pdbids.add(id); - } - } - else - { - seqRefs.add(getDBRefId(dbRef)); - } - } - } - } - - if (!isPDBRefsFound && !isUniProtRefsFound) - { - String seqName = seq.getName(); - seqName = sanitizeSeqName(seqName); - String[] names = seqName.toLowerCase().split("\\|"); - for (String name : names) - { - // System.out.println("Found name : " + name); - name.trim(); - if (isValidSeqName(name)) - { - seqRefs.add(name); - } - } - - for (String seqRef : seqRefs) - { - queryBuilder.append("text:").append(seqRef).append(" OR "); - } - } - - int endIndex = queryBuilder.lastIndexOf(" OR "); - if (queryBuilder.toString().length() < 6) - { - return null; - } - String query = queryBuilder.toString().substring(0, endIndex); - return query; - } - - /** - * Remove the following special characters from input string +, -, &, !, (, ), - * {, }, [, ], ^, ", ~, *, ?, :, \ - * - * @param seqName - * @return - */ - static String sanitizeSeqName(String seqName) - { - Objects.requireNonNull(seqName); - return seqName.replaceAll("\\[\\d*\\]", "") - .replaceAll("[^\\dA-Za-z|_]", "").replaceAll("\\s+", "+"); - } - - /** - * Ensures sequence ref names are not less than 3 characters and does not - * contain a database name - * - * @param seqName - * @return - */ - static boolean isValidSeqName(String seqName) - { - // System.out.println("seqName : " + seqName); - String ignoreList = "pdb,uniprot,swiss-prot"; - if (seqName.length() < 3) - { - return false; - } - if (seqName.contains(":")) - { - return false; - } - seqName = seqName.toLowerCase(); - for (String ignoredEntry : ignoreList.split(",")) - { - if (seqName.contains(ignoredEntry)) - { - return false; - } - } - return true; - } - - static String getDBRefId(DBRefEntry dbRef) - { - String ref = dbRef.getAccessionId().replaceAll("GO:", ""); - return ref; - } - - /** * Filters a given list of discovered structures based on supplied argument * * @param fieldToFilterBy @@ -473,7 +310,6 @@ public class StructureChooser extends GStructureChooser public void run() { long startTime = System.currentTimeMillis(); - pdbRestClient = PDBFTSRestClient.getInstance(); lbl_loading.setVisible(true); Collection wantedFields = pdbDocFieldPrefs .getStructureSummaryFields(); @@ -482,41 +318,19 @@ public class StructureChooser extends GStructureChooser for (SequenceI seq : selectedSequences) { - FTSRestRequest pdbRequest = new FTSRestRequest(); - if (fieldToFilterBy.equalsIgnoreCase("uniprot_coverage")) - { - pdbRequest.setAllowEmptySeq(false); - pdbRequest.setResponseSize(1); - pdbRequest.setFieldToSearchBy("("); - pdbRequest.setSearchTerm(buildQuery(seq) + ")"); - pdbRequest.setWantedFields(wantedFields); - pdbRequest.setAssociatedSequence(seq); - pdbRequest.setFacet(true); - pdbRequest.setFacetPivot(fieldToFilterBy + ",entry_entity"); - pdbRequest.setFacetPivotMinCount(1); - } - else - { - pdbRequest.setAllowEmptySeq(false); - pdbRequest.setResponseSize(1); - pdbRequest.setFieldToSearchBy("("); - pdbRequest.setFieldToSortBy(fieldToFilterBy, - !chk_invertFilter.isSelected()); - pdbRequest.setSearchTerm(buildQuery(seq) + ")"); - pdbRequest.setWantedFields(wantedFields); - pdbRequest.setAssociatedSequence(seq); - } + FTSRestResponse resultList; try { - resultList = pdbRestClient.executeRequest(pdbRequest); + resultList = data.selectFirstRankedQuery(seq, wantedFields, fieldToFilterBy, + !chk_invertFilter.isSelected()); + } catch (Exception e) { e.printStackTrace(); errors.add(e.getMessage()); continue; } - lastPdbRequest = pdbRequest; if (resultList.getSearchSummary() != null && !resultList.getSearchSummary().isEmpty()) { @@ -532,8 +346,7 @@ public class StructureChooser extends GStructureChooser Collection reorderedStructuresSet = new LinkedHashSet<>(); reorderedStructuresSet.addAll(filteredResponse); reorderedStructuresSet.addAll(discoveredStructuresSet); - getResultTable().setModel(FTSRestResponse - .getTableModel(lastPdbRequest, reorderedStructuresSet)); + getResultTable().setModel(data.getTableModel(reorderedStructuresSet)); FTSRestResponse.configureTableColumn(getResultTable(), wantedFields, tempUserPrefs); @@ -1223,6 +1036,8 @@ public class StructureChooser extends GStructureChooser isValidPBDEntry = false; if (text.length() > 0) { + // TODO move this pdb id search into the PDB specific FTSSearchEngine + // for moment, it will work fine as is because it is self-contained String searchTerm = text.toLowerCase(); searchTerm = searchTerm.split(":")[0]; // System.out.println(">>>>> search term : " + searchTerm); @@ -1234,7 +1049,7 @@ public class StructureChooser extends GStructureChooser pdbRequest.setWantedFields(wantedFields); pdbRequest.setSearchTerm(searchTerm + ")"); pdbRequest.setAssociatedSequence(selectedSequence); - pdbRestClient = PDBFTSRestClient.getInstance(); + FTSRestClientI pdbRestClient = PDBFTSRestClient.getInstance(); wantedFields.add(pdbRestClient.getPrimaryKeyColumn()); FTSRestResponse resultList; try diff --git a/src/jalview/gui/StructureChooserQuerySource.java b/src/jalview/gui/StructureChooserQuerySource.java new file mode 100644 index 0000000..c127a78 --- /dev/null +++ b/src/jalview/gui/StructureChooserQuerySource.java @@ -0,0 +1,292 @@ +package jalview.gui; + +import java.util.Collection; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; + +import javax.swing.table.TableModel; + +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.DBRefSource; +import jalview.datamodel.PDBEntry; +import jalview.datamodel.SequenceI; +import jalview.fts.api.FTSData; +import jalview.fts.api.FTSDataColumnI; +import jalview.fts.api.FTSRestClientI; +import jalview.fts.core.FTSRestRequest; +import jalview.fts.core.FTSRestResponse; +import jalview.fts.service.pdb.PDBFTSRestClient; +import jalview.jbgui.GStructureChooser.FilterOption; + +/** + * logic for querying sources of structural data for structures of sequences + * + * @author jprocter + * + * @param + */ +public class StructureChooserQuerySource +{ + private FTSRestRequest lastPdbRequest; + + private FTSRestClientI pdbRestClient; + + private static int MAX_QLENGTH = 7820; + + public StructureChooserQuerySource() + { + } + + public static StructureChooserQuerySource getPDBfts() + { + StructureChooserQuerySource pdbfts = new StructureChooserQuerySource(); + pdbfts.pdbRestClient = PDBFTSRestClient.getInstance(); + return pdbfts; + } + + /** + * Builds a query string for a given sequences using its DBRef entries + * + * @param seq + * the sequences to build a query for + * @return the built query string + */ + + String buildQuery(SequenceI seq) + { + boolean isPDBRefsFound = false; + boolean isUniProtRefsFound = false; + StringBuilder queryBuilder = new StringBuilder(); + Set seqRefs = new LinkedHashSet<>(); + + /* + * note PDBs as DBRefEntry so they are not duplicated in query + */ + Set pdbids = new HashSet<>(); + + if (seq.getAllPDBEntries() != null + && queryBuilder.length() < MAX_QLENGTH) + { + for (PDBEntry entry : seq.getAllPDBEntries()) + { + if (isValidSeqName(entry.getId())) + { + String id = entry.getId().toLowerCase(); + queryBuilder.append("pdb_id:").append(id).append(" OR "); + isPDBRefsFound = true; + pdbids.add(id); + } + } + } + + List refs = seq.getDBRefs(); + if (refs != null && refs.size() != 0) + { + for (int ib = 0, nb = refs.size(); ib < nb; ib++) + { + DBRefEntry dbRef = refs.get(ib); + if (isValidSeqName(getDBRefId(dbRef)) + && queryBuilder.length() < MAX_QLENGTH) + { + if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT)) + { + queryBuilder.append("uniprot_accession:") + .append(getDBRefId(dbRef)).append(" OR "); + queryBuilder.append("uniprot_id:").append(getDBRefId(dbRef)) + .append(" OR "); + isUniProtRefsFound = true; + } + else if (dbRef.getSource().equalsIgnoreCase(DBRefSource.PDB)) + { + + String id = getDBRefId(dbRef).toLowerCase(); + if (!pdbids.contains(id)) + { + queryBuilder.append("pdb_id:").append(id).append(" OR "); + isPDBRefsFound = true; + pdbids.add(id); + } + } + else + { + seqRefs.add(getDBRefId(dbRef)); + } + } + } + } + + if (!isPDBRefsFound && !isUniProtRefsFound) + { + String seqName = seq.getName(); + seqName = sanitizeSeqName(seqName); + String[] names = seqName.toLowerCase().split("\\|"); + for (String name : names) + { + // System.out.println("Found name : " + name); + name.trim(); + if (isValidSeqName(name)) + { + seqRefs.add(name); + } + } + + for (String seqRef : seqRefs) + { + queryBuilder.append("text:").append(seqRef).append(" OR "); + } + } + + int endIndex = queryBuilder.lastIndexOf(" OR "); + if (queryBuilder.toString().length() < 6) + { + return null; + } + String query = queryBuilder.toString().substring(0, endIndex); + return query; + } + + /** + * Remove the following special characters from input string +, -, &, !, (, ), + * {, }, [, ], ^, ", ~, *, ?, :, \ + * + * @param seqName + * @return + */ + static String sanitizeSeqName(String seqName) + { + Objects.requireNonNull(seqName); + return seqName.replaceAll("\\[\\d*\\]", "") + .replaceAll("[^\\dA-Za-z|_]", "").replaceAll("\\s+", "+"); + } + + /** + * Ensures sequence ref names are not less than 3 characters and does not + * contain a database name + * + * @param seqName + * @return + */ + static boolean isValidSeqName(String seqName) + { + // System.out.println("seqName : " + seqName); + String ignoreList = "pdb,uniprot,swiss-prot"; + if (seqName.length() < 3) + { + return false; + } + if (seqName.contains(":")) + { + return false; + } + seqName = seqName.toLowerCase(); + for (String ignoredEntry : ignoreList.split(",")) + { + if (seqName.contains(ignoredEntry)) + { + return false; + } + } + return true; + } + + static String getDBRefId(DBRefEntry dbRef) + { + String ref = dbRef.getAccessionId().replaceAll("GO:", ""); + return ref; + } + + /** + * FTSRestClient specific query builder to recover associated structure data + * records for a sequence + * + * @param seq + * - seq to generate a query for + * @param wantedFields + * - fields to retrieve + * @param selectedFilterOpt + * - criterion for ranking results (e.g. resolution) + * @param b + * - sort ascending or descending + * @return + * @throws Exception + */ + public FTSRestResponse fetchStructuresMetaData(SequenceI seq, + Collection wantedFields, + FilterOption selectedFilterOpt, boolean b) throws Exception + { + FTSRestResponse resultList; + FTSRestRequest pdbRequest = new FTSRestRequest(); + pdbRequest.setAllowEmptySeq(false); + pdbRequest.setResponseSize(500); + pdbRequest.setFieldToSearchBy("("); + pdbRequest.setFieldToSortBy(selectedFilterOpt.getValue(), b); + pdbRequest.setWantedFields(wantedFields); + pdbRequest.setSearchTerm(buildQuery(seq) + ")"); + pdbRequest.setAssociatedSequence(seq); + resultList = pdbRestClient.executeRequest(pdbRequest); + + lastPdbRequest = pdbRequest; + return resultList; + } + + /** + * FTSRestClient specific query builder to pick top ranked entry from a + * fetchStructuresMetaData query + * + * @param seq + * - seq to generate a query for + * @param wantedFields + * - fields to retrieve + * @param selectedFilterOpt + * - criterion for ranking results (e.g. resolution) + * @param b + * - sort ascending or descending + * @return + * @throws Exception + */ + public FTSRestResponse selectFirstRankedQuery(SequenceI seq, + Collection wantedFields, String fieldToFilterBy, + boolean b) throws Exception + { + + FTSRestResponse resultList; + FTSRestRequest pdbRequest = new FTSRestRequest(); + if (fieldToFilterBy.equalsIgnoreCase("uniprot_coverage")) + { + pdbRequest.setAllowEmptySeq(false); + pdbRequest.setResponseSize(1); + pdbRequest.setFieldToSearchBy("("); + pdbRequest.setSearchTerm(buildQuery(seq) + ")"); + pdbRequest.setWantedFields(wantedFields); + pdbRequest.setAssociatedSequence(seq); + pdbRequest.setFacet(true); + pdbRequest.setFacetPivot(fieldToFilterBy + ",entry_entity"); + pdbRequest.setFacetPivotMinCount(1); + } + else + { + pdbRequest.setAllowEmptySeq(false); + pdbRequest.setResponseSize(1); + pdbRequest.setFieldToSearchBy("("); + pdbRequest.setFieldToSortBy(fieldToFilterBy, b); + pdbRequest.setSearchTerm(buildQuery(seq) + ")"); + pdbRequest.setWantedFields(wantedFields); + pdbRequest.setAssociatedSequence(seq); + } + resultList = pdbRestClient.executeRequest(pdbRequest); + + lastPdbRequest = pdbRequest; + return resultList; + } + + public TableModel getTableModel( + Collection discoveredStructuresSet) + { + return FTSRestResponse.getTableModel(lastPdbRequest, + discoveredStructuresSet); + } + +} \ No newline at end of file diff --git a/test/jalview/gui/StructureChooserTest.java b/test/jalview/gui/StructureChooserTest.java index 9529d9f..66e606a 100644 --- a/test/jalview/gui/StructureChooserTest.java +++ b/test/jalview/gui/StructureChooserTest.java @@ -88,15 +88,16 @@ public class StructureChooserTest public void buildQueryTest() { System.out.println("seq >>>> " + seq); - String query = StructureChooser.buildQuery(seq); + StructureChooserQuerySource scquery = StructureChooserQuerySource.getPDBfts(); + String query = scquery.buildQuery(seq); assertEquals("pdb_id:1tim", query); seq.getAllPDBEntries().clear(); - query = StructureChooser.buildQuery(seq); + query = scquery.buildQuery(seq); assertEquals( "text:XYZ_1 OR text:XYZ_2 OR text:XYZ_3 OR text:XYZ_4 OR text:4kqy", query); seq.setDBRefs(null); - query = StructureChooser.buildQuery(seq); + query = scquery.buildQuery(seq); System.out.println(query); assertEquals("text:4kqy", query); @@ -119,7 +120,7 @@ public class StructureChooserTest System.out.println(""); System.out.println(seq.getDBRefs()); System.out.println(query); - query = StructureChooser.buildQuery(seq); + query = scquery.buildQuery(seq); assertEquals( "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz", query); @@ -164,17 +165,17 @@ public class StructureChooserTest public void sanitizeSeqNameTest() { String name = "ab_cdEF|fwxyz012349"; - assertEquals(name, StructureChooser.sanitizeSeqName(name)); + assertEquals(name, StructureChooserQuerySource.sanitizeSeqName(name)); // remove a [nn] substring name = "abcde12[345]fg"; - assertEquals("abcde12fg", StructureChooser.sanitizeSeqName(name)); + assertEquals("abcde12fg", StructureChooserQuerySource.sanitizeSeqName(name)); // remove characters other than a-zA-Z0-9 | or _ name = "ab[cd],.\t£$*!- \\\"@:e"; - assertEquals("abcde", StructureChooser.sanitizeSeqName(name)); + assertEquals("abcde", StructureChooserQuerySource.sanitizeSeqName(name)); name = "abcde12[345a]fg"; - assertEquals("abcde12345afg", StructureChooser.sanitizeSeqName(name)); + assertEquals("abcde12345afg", StructureChooserQuerySource.sanitizeSeqName(name)); } } -- 1.7.10.2