import jalview.fts.service.pdb.PDBFTSRestClient;
import jalview.io.DataSourceType;
import jalview.jbgui.GStructureChooser;
+import jalview.jbgui.GStructureChooser.FilterOption;
import jalview.structure.StructureMapping;
import jalview.structure.StructureSelectionManager;
import jalview.util.MessageManager;
{
private static final String AUTOSUPERIMPOSE = "AUTOSUPERIMPOSE";
- private static int MAX_QLENGTH = 7820;
-
private SequenceI selectedSequence;
private SequenceI[] selectedSequences;
private Collection<FTSData> discoveredStructuresSet;
- private FTSRestRequest lastPdbRequest;
-
- private FTSRestClientI pdbRestClient;
+ private StructureChooserQuerySource data = StructureChooserQuerySource.getPDBfts();
private String selectedPdbFileName;
void fetchStructuresMetaData()
{
long startTime = System.currentTimeMillis();
- pdbRestClient = PDBFTSRestClient.getInstance();
Collection<FTSDataColumnI> wantedFields = pdbDocFieldPrefs
.getStructureSummaryFields();
discoveredStructuresSet = new LinkedHashSet<>();
HashSet<String> errors = new HashSet<>();
+
+ FilterOption selectedFilterOpt = ((FilterOption) cmb_filterOption
+ .getSelectedItem());
+
for (SequenceI seq : selectedSequences)
{
- FTSRestRequest pdbRequest = new FTSRestRequest();
- pdbRequest.setAllowEmptySeq(false);
- pdbRequest.setResponseSize(500);
- pdbRequest.setFieldToSearchBy("(");
- FilterOption selectedFilterOpt = ((FilterOption) cmb_filterOption
- .getSelectedItem());
- pdbRequest.setFieldToSortBy(selectedFilterOpt.getValue(),
- !chk_invertFilter.isSelected());
- pdbRequest.setWantedFields(wantedFields);
- pdbRequest.setSearchTerm(buildQuery(seq) + ")");
- pdbRequest.setAssociatedSequence(seq);
+
FTSRestResponse resultList;
try
{
- resultList = pdbRestClient.executeRequest(pdbRequest);
+ resultList = data.fetchStructuresMetaData(seq, wantedFields, selectedFilterOpt, !chk_invertFilter.isSelected());
} catch (Exception e)
{
e.printStackTrace();
errors.add(e.getMessage());
continue;
}
- lastPdbRequest = pdbRequest;
if (resultList.getSearchSummary() != null
&& !resultList.getSearchSummary().isEmpty())
{
if (discoveredStructuresSet != null
&& !discoveredStructuresSet.isEmpty())
{
- getResultTable().setModel(FTSRestResponse
- .getTableModel(lastPdbRequest, discoveredStructuresSet));
+ getResultTable().setModel(data.getTableModel(discoveredStructuresSet));
noOfStructuresFound = discoveredStructuresSet.size();
mainFrame.setTitle(MessageManager.formatMessage(
"label.structure_chooser_no_of_structures",
}
/**
- * Builds a query string for a given sequences using its DBRef entries
- *
- * @param seq
- * the sequences to build a query for
- * @return the built query string
- */
-
- static String buildQuery(SequenceI seq)
- {
- boolean isPDBRefsFound = false;
- boolean isUniProtRefsFound = false;
- StringBuilder queryBuilder = new StringBuilder();
- Set<String> seqRefs = new LinkedHashSet<>();
-
- /*
- * note PDBs as DBRefEntry so they are not duplicated in query
- */
- Set<String> pdbids = new HashSet<>();
-
- if (seq.getAllPDBEntries() != null
- && queryBuilder.length() < MAX_QLENGTH)
- {
- for (PDBEntry entry : seq.getAllPDBEntries())
- {
- if (isValidSeqName(entry.getId()))
- {
- String id = entry.getId().toLowerCase();
- queryBuilder.append("pdb_id:").append(id).append(" OR ");
- isPDBRefsFound = true;
- pdbids.add(id);
- }
- }
- }
-
- List<DBRefEntry> refs = seq.getDBRefs();
- if (refs != null && refs.size() != 0)
- {
- for (int ib = 0, nb = refs.size(); ib < nb; ib++)
- {
- DBRefEntry dbRef = refs.get(ib);
- if (isValidSeqName(getDBRefId(dbRef))
- && queryBuilder.length() < MAX_QLENGTH)
- {
- if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT))
- {
- queryBuilder.append("uniprot_accession:")
- .append(getDBRefId(dbRef)).append(" OR ");
- queryBuilder.append("uniprot_id:").append(getDBRefId(dbRef))
- .append(" OR ");
- isUniProtRefsFound = true;
- }
- else if (dbRef.getSource().equalsIgnoreCase(DBRefSource.PDB))
- {
-
- String id = getDBRefId(dbRef).toLowerCase();
- if (!pdbids.contains(id))
- {
- queryBuilder.append("pdb_id:").append(id).append(" OR ");
- isPDBRefsFound = true;
- pdbids.add(id);
- }
- }
- else
- {
- seqRefs.add(getDBRefId(dbRef));
- }
- }
- }
- }
-
- if (!isPDBRefsFound && !isUniProtRefsFound)
- {
- String seqName = seq.getName();
- seqName = sanitizeSeqName(seqName);
- String[] names = seqName.toLowerCase().split("\\|");
- for (String name : names)
- {
- // System.out.println("Found name : " + name);
- name.trim();
- if (isValidSeqName(name))
- {
- seqRefs.add(name);
- }
- }
-
- for (String seqRef : seqRefs)
- {
- queryBuilder.append("text:").append(seqRef).append(" OR ");
- }
- }
-
- int endIndex = queryBuilder.lastIndexOf(" OR ");
- if (queryBuilder.toString().length() < 6)
- {
- return null;
- }
- String query = queryBuilder.toString().substring(0, endIndex);
- return query;
- }
-
- /**
- * Remove the following special characters from input string +, -, &, !, (, ),
- * {, }, [, ], ^, ", ~, *, ?, :, \
- *
- * @param seqName
- * @return
- */
- static String sanitizeSeqName(String seqName)
- {
- Objects.requireNonNull(seqName);
- return seqName.replaceAll("\\[\\d*\\]", "")
- .replaceAll("[^\\dA-Za-z|_]", "").replaceAll("\\s+", "+");
- }
-
- /**
- * Ensures sequence ref names are not less than 3 characters and does not
- * contain a database name
- *
- * @param seqName
- * @return
- */
- static boolean isValidSeqName(String seqName)
- {
- // System.out.println("seqName : " + seqName);
- String ignoreList = "pdb,uniprot,swiss-prot";
- if (seqName.length() < 3)
- {
- return false;
- }
- if (seqName.contains(":"))
- {
- return false;
- }
- seqName = seqName.toLowerCase();
- for (String ignoredEntry : ignoreList.split(","))
- {
- if (seqName.contains(ignoredEntry))
- {
- return false;
- }
- }
- return true;
- }
-
- static String getDBRefId(DBRefEntry dbRef)
- {
- String ref = dbRef.getAccessionId().replaceAll("GO:", "");
- return ref;
- }
-
- /**
* Filters a given list of discovered structures based on supplied argument
*
* @param fieldToFilterBy
public void run()
{
long startTime = System.currentTimeMillis();
- pdbRestClient = PDBFTSRestClient.getInstance();
lbl_loading.setVisible(true);
Collection<FTSDataColumnI> wantedFields = pdbDocFieldPrefs
.getStructureSummaryFields();
for (SequenceI seq : selectedSequences)
{
- FTSRestRequest pdbRequest = new FTSRestRequest();
- if (fieldToFilterBy.equalsIgnoreCase("uniprot_coverage"))
- {
- pdbRequest.setAllowEmptySeq(false);
- pdbRequest.setResponseSize(1);
- pdbRequest.setFieldToSearchBy("(");
- pdbRequest.setSearchTerm(buildQuery(seq) + ")");
- pdbRequest.setWantedFields(wantedFields);
- pdbRequest.setAssociatedSequence(seq);
- pdbRequest.setFacet(true);
- pdbRequest.setFacetPivot(fieldToFilterBy + ",entry_entity");
- pdbRequest.setFacetPivotMinCount(1);
- }
- else
- {
- pdbRequest.setAllowEmptySeq(false);
- pdbRequest.setResponseSize(1);
- pdbRequest.setFieldToSearchBy("(");
- pdbRequest.setFieldToSortBy(fieldToFilterBy,
- !chk_invertFilter.isSelected());
- pdbRequest.setSearchTerm(buildQuery(seq) + ")");
- pdbRequest.setWantedFields(wantedFields);
- pdbRequest.setAssociatedSequence(seq);
- }
+
FTSRestResponse resultList;
try
{
- resultList = pdbRestClient.executeRequest(pdbRequest);
+ resultList = data.selectFirstRankedQuery(seq, wantedFields, fieldToFilterBy,
+ !chk_invertFilter.isSelected());
+
} catch (Exception e)
{
e.printStackTrace();
errors.add(e.getMessage());
continue;
}
- lastPdbRequest = pdbRequest;
if (resultList.getSearchSummary() != null
&& !resultList.getSearchSummary().isEmpty())
{
Collection<FTSData> reorderedStructuresSet = new LinkedHashSet<>();
reorderedStructuresSet.addAll(filteredResponse);
reorderedStructuresSet.addAll(discoveredStructuresSet);
- getResultTable().setModel(FTSRestResponse
- .getTableModel(lastPdbRequest, reorderedStructuresSet));
+ getResultTable().setModel(data.getTableModel(reorderedStructuresSet));
FTSRestResponse.configureTableColumn(getResultTable(),
wantedFields, tempUserPrefs);
isValidPBDEntry = false;
if (text.length() > 0)
{
+ // TODO move this pdb id search into the PDB specific FTSSearchEngine
+ // for moment, it will work fine as is because it is self-contained
String searchTerm = text.toLowerCase();
searchTerm = searchTerm.split(":")[0];
// System.out.println(">>>>> search term : " + searchTerm);
pdbRequest.setWantedFields(wantedFields);
pdbRequest.setSearchTerm(searchTerm + ")");
pdbRequest.setAssociatedSequence(selectedSequence);
- pdbRestClient = PDBFTSRestClient.getInstance();
+ FTSRestClientI pdbRestClient = PDBFTSRestClient.getInstance();
wantedFields.add(pdbRestClient.getPrimaryKeyColumn());
FTSRestResponse resultList;
try
--- /dev/null
+package jalview.gui;
+
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Objects;
+import java.util.Set;
+
+import javax.swing.table.TableModel;
+
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.DBRefSource;
+import jalview.datamodel.PDBEntry;
+import jalview.datamodel.SequenceI;
+import jalview.fts.api.FTSData;
+import jalview.fts.api.FTSDataColumnI;
+import jalview.fts.api.FTSRestClientI;
+import jalview.fts.core.FTSRestRequest;
+import jalview.fts.core.FTSRestResponse;
+import jalview.fts.service.pdb.PDBFTSRestClient;
+import jalview.jbgui.GStructureChooser.FilterOption;
+
+/**
+ * logic for querying sources of structural data for structures of sequences
+ *
+ * @author jprocter
+ *
+ * @param <T>
+ */
+public class StructureChooserQuerySource
+{
+ private FTSRestRequest lastPdbRequest;
+
+ private FTSRestClientI pdbRestClient;
+
+ private static int MAX_QLENGTH = 7820;
+
+ public StructureChooserQuerySource()
+ {
+ }
+
+ public static StructureChooserQuerySource getPDBfts()
+ {
+ StructureChooserQuerySource pdbfts = new StructureChooserQuerySource();
+ pdbfts.pdbRestClient = PDBFTSRestClient.getInstance();
+ return pdbfts;
+ }
+
+ /**
+ * Builds a query string for a given sequences using its DBRef entries
+ *
+ * @param seq
+ * the sequences to build a query for
+ * @return the built query string
+ */
+
+ String buildQuery(SequenceI seq)
+ {
+ boolean isPDBRefsFound = false;
+ boolean isUniProtRefsFound = false;
+ StringBuilder queryBuilder = new StringBuilder();
+ Set<String> seqRefs = new LinkedHashSet<>();
+
+ /*
+ * note PDBs as DBRefEntry so they are not duplicated in query
+ */
+ Set<String> pdbids = new HashSet<>();
+
+ if (seq.getAllPDBEntries() != null
+ && queryBuilder.length() < MAX_QLENGTH)
+ {
+ for (PDBEntry entry : seq.getAllPDBEntries())
+ {
+ if (isValidSeqName(entry.getId()))
+ {
+ String id = entry.getId().toLowerCase();
+ queryBuilder.append("pdb_id:").append(id).append(" OR ");
+ isPDBRefsFound = true;
+ pdbids.add(id);
+ }
+ }
+ }
+
+ List<DBRefEntry> refs = seq.getDBRefs();
+ if (refs != null && refs.size() != 0)
+ {
+ for (int ib = 0, nb = refs.size(); ib < nb; ib++)
+ {
+ DBRefEntry dbRef = refs.get(ib);
+ if (isValidSeqName(getDBRefId(dbRef))
+ && queryBuilder.length() < MAX_QLENGTH)
+ {
+ if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT))
+ {
+ queryBuilder.append("uniprot_accession:")
+ .append(getDBRefId(dbRef)).append(" OR ");
+ queryBuilder.append("uniprot_id:").append(getDBRefId(dbRef))
+ .append(" OR ");
+ isUniProtRefsFound = true;
+ }
+ else if (dbRef.getSource().equalsIgnoreCase(DBRefSource.PDB))
+ {
+
+ String id = getDBRefId(dbRef).toLowerCase();
+ if (!pdbids.contains(id))
+ {
+ queryBuilder.append("pdb_id:").append(id).append(" OR ");
+ isPDBRefsFound = true;
+ pdbids.add(id);
+ }
+ }
+ else
+ {
+ seqRefs.add(getDBRefId(dbRef));
+ }
+ }
+ }
+ }
+
+ if (!isPDBRefsFound && !isUniProtRefsFound)
+ {
+ String seqName = seq.getName();
+ seqName = sanitizeSeqName(seqName);
+ String[] names = seqName.toLowerCase().split("\\|");
+ for (String name : names)
+ {
+ // System.out.println("Found name : " + name);
+ name.trim();
+ if (isValidSeqName(name))
+ {
+ seqRefs.add(name);
+ }
+ }
+
+ for (String seqRef : seqRefs)
+ {
+ queryBuilder.append("text:").append(seqRef).append(" OR ");
+ }
+ }
+
+ int endIndex = queryBuilder.lastIndexOf(" OR ");
+ if (queryBuilder.toString().length() < 6)
+ {
+ return null;
+ }
+ String query = queryBuilder.toString().substring(0, endIndex);
+ return query;
+ }
+
+ /**
+ * Remove the following special characters from input string +, -, &, !, (, ),
+ * {, }, [, ], ^, ", ~, *, ?, :, \
+ *
+ * @param seqName
+ * @return
+ */
+ static String sanitizeSeqName(String seqName)
+ {
+ Objects.requireNonNull(seqName);
+ return seqName.replaceAll("\\[\\d*\\]", "")
+ .replaceAll("[^\\dA-Za-z|_]", "").replaceAll("\\s+", "+");
+ }
+
+ /**
+ * Ensures sequence ref names are not less than 3 characters and does not
+ * contain a database name
+ *
+ * @param seqName
+ * @return
+ */
+ static boolean isValidSeqName(String seqName)
+ {
+ // System.out.println("seqName : " + seqName);
+ String ignoreList = "pdb,uniprot,swiss-prot";
+ if (seqName.length() < 3)
+ {
+ return false;
+ }
+ if (seqName.contains(":"))
+ {
+ return false;
+ }
+ seqName = seqName.toLowerCase();
+ for (String ignoredEntry : ignoreList.split(","))
+ {
+ if (seqName.contains(ignoredEntry))
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ static String getDBRefId(DBRefEntry dbRef)
+ {
+ String ref = dbRef.getAccessionId().replaceAll("GO:", "");
+ return ref;
+ }
+
+ /**
+ * FTSRestClient specific query builder to recover associated structure data
+ * records for a sequence
+ *
+ * @param seq
+ * - seq to generate a query for
+ * @param wantedFields
+ * - fields to retrieve
+ * @param selectedFilterOpt
+ * - criterion for ranking results (e.g. resolution)
+ * @param b
+ * - sort ascending or descending
+ * @return
+ * @throws Exception
+ */
+ public FTSRestResponse fetchStructuresMetaData(SequenceI seq,
+ Collection<FTSDataColumnI> wantedFields,
+ FilterOption selectedFilterOpt, boolean b) throws Exception
+ {
+ FTSRestResponse resultList;
+ FTSRestRequest pdbRequest = new FTSRestRequest();
+ pdbRequest.setAllowEmptySeq(false);
+ pdbRequest.setResponseSize(500);
+ pdbRequest.setFieldToSearchBy("(");
+ pdbRequest.setFieldToSortBy(selectedFilterOpt.getValue(), b);
+ pdbRequest.setWantedFields(wantedFields);
+ pdbRequest.setSearchTerm(buildQuery(seq) + ")");
+ pdbRequest.setAssociatedSequence(seq);
+ resultList = pdbRestClient.executeRequest(pdbRequest);
+
+ lastPdbRequest = pdbRequest;
+ return resultList;
+ }
+
+ /**
+ * FTSRestClient specific query builder to pick top ranked entry from a
+ * fetchStructuresMetaData query
+ *
+ * @param seq
+ * - seq to generate a query for
+ * @param wantedFields
+ * - fields to retrieve
+ * @param selectedFilterOpt
+ * - criterion for ranking results (e.g. resolution)
+ * @param b
+ * - sort ascending or descending
+ * @return
+ * @throws Exception
+ */
+ public FTSRestResponse selectFirstRankedQuery(SequenceI seq,
+ Collection<FTSDataColumnI> wantedFields, String fieldToFilterBy,
+ boolean b) throws Exception
+ {
+
+ FTSRestResponse resultList;
+ FTSRestRequest pdbRequest = new FTSRestRequest();
+ if (fieldToFilterBy.equalsIgnoreCase("uniprot_coverage"))
+ {
+ pdbRequest.setAllowEmptySeq(false);
+ pdbRequest.setResponseSize(1);
+ pdbRequest.setFieldToSearchBy("(");
+ pdbRequest.setSearchTerm(buildQuery(seq) + ")");
+ pdbRequest.setWantedFields(wantedFields);
+ pdbRequest.setAssociatedSequence(seq);
+ pdbRequest.setFacet(true);
+ pdbRequest.setFacetPivot(fieldToFilterBy + ",entry_entity");
+ pdbRequest.setFacetPivotMinCount(1);
+ }
+ else
+ {
+ pdbRequest.setAllowEmptySeq(false);
+ pdbRequest.setResponseSize(1);
+ pdbRequest.setFieldToSearchBy("(");
+ pdbRequest.setFieldToSortBy(fieldToFilterBy, b);
+ pdbRequest.setSearchTerm(buildQuery(seq) + ")");
+ pdbRequest.setWantedFields(wantedFields);
+ pdbRequest.setAssociatedSequence(seq);
+ }
+ resultList = pdbRestClient.executeRequest(pdbRequest);
+
+ lastPdbRequest = pdbRequest;
+ return resultList;
+ }
+
+ public TableModel getTableModel(
+ Collection<FTSData> discoveredStructuresSet)
+ {
+ return FTSRestResponse.getTableModel(lastPdbRequest,
+ discoveredStructuresSet);
+ }
+
+}
\ No newline at end of file
public void buildQueryTest()
{
System.out.println("seq >>>> " + seq);
- String query = StructureChooser.buildQuery(seq);
+ StructureChooserQuerySource scquery = StructureChooserQuerySource.getPDBfts();
+ String query = scquery.buildQuery(seq);
assertEquals("pdb_id:1tim", query);
seq.getAllPDBEntries().clear();
- query = StructureChooser.buildQuery(seq);
+ query = scquery.buildQuery(seq);
assertEquals(
"text:XYZ_1 OR text:XYZ_2 OR text:XYZ_3 OR text:XYZ_4 OR text:4kqy",
query);
seq.setDBRefs(null);
- query = StructureChooser.buildQuery(seq);
+ query = scquery.buildQuery(seq);
System.out.println(query);
assertEquals("text:4kqy", query);
System.out.println("");
System.out.println(seq.getDBRefs());
System.out.println(query);
- query = StructureChooser.buildQuery(seq);
+ query = scquery.buildQuery(seq);
assertEquals(
"uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
query);
public void sanitizeSeqNameTest()
{
String name = "ab_cdEF|fwxyz012349";
- assertEquals(name, StructureChooser.sanitizeSeqName(name));
+ assertEquals(name, StructureChooserQuerySource.sanitizeSeqName(name));
// remove a [nn] substring
name = "abcde12[345]fg";
- assertEquals("abcde12fg", StructureChooser.sanitizeSeqName(name));
+ assertEquals("abcde12fg", StructureChooserQuerySource.sanitizeSeqName(name));
// remove characters other than a-zA-Z0-9 | or _
name = "ab[cd],.\t£$*!- \\\"@:e";
- assertEquals("abcde", StructureChooser.sanitizeSeqName(name));
+ assertEquals("abcde", StructureChooserQuerySource.sanitizeSeqName(name));
name = "abcde12[345a]fg";
- assertEquals("abcde12345afg", StructureChooser.sanitizeSeqName(name));
+ assertEquals("abcde12345afg", StructureChooserQuerySource.sanitizeSeqName(name));
}
}