1 package jalview.gui.structurechooser;
3 import java.util.Locale;
5 import java.util.ArrayList;
6 import java.util.Collection;
7 import java.util.HashSet;
8 import java.util.LinkedHashSet;
10 import java.util.Objects;
13 import javax.swing.JTable;
14 import javax.swing.table.TableModel;
16 import jalview.datamodel.DBRefEntry;
17 import jalview.datamodel.DBRefSource;
18 import jalview.datamodel.PDBEntry;
19 import jalview.datamodel.SequenceI;
20 import jalview.fts.api.FTSData;
21 import jalview.fts.api.FTSDataColumnI;
22 import jalview.fts.api.FTSRestClientI;
23 import jalview.fts.core.FTSDataColumnPreferences;
24 import jalview.fts.core.FTSDataColumnPreferences.PreferenceSource;
25 import jalview.fts.core.FTSRestRequest;
26 import jalview.fts.core.FTSRestResponse;
27 import jalview.fts.service.pdb.PDBFTSRestClient;
28 import jalview.jbgui.FilterOption;
29 import jalview.util.MessageManager;
32 * logic for querying the PDBe API for structures of sequences
36 public class PDBStructureChooserQuerySource
37 extends StructureChooserQuerySource
40 private static int MAX_QLENGTH = 7820;
42 protected FTSRestRequest lastPdbRequest;
44 protected FTSRestClientI pdbRestClient;
46 public PDBStructureChooserQuerySource()
48 pdbRestClient = PDBFTSRestClient.getInstance();
49 docFieldPrefs = new FTSDataColumnPreferences(
50 PreferenceSource.STRUCTURE_CHOOSER,
51 PDBFTSRestClient.getInstance());
57 * Builds a query string for a given sequences using its DBRef entries
60 * the sequences to build a query for
61 * @return the built query string
64 public String buildQuery(SequenceI seq)
66 boolean isPDBRefsFound = false;
67 boolean isUniProtRefsFound = false;
68 StringBuilder queryBuilder = new StringBuilder();
69 Set<String> seqRefs = new LinkedHashSet<>();
72 * note PDBs as DBRefEntry so they are not duplicated in query
74 Set<String> pdbids = new HashSet<>();
76 if (seq.getAllPDBEntries() != null
77 && queryBuilder.length() < MAX_QLENGTH)
79 for (PDBEntry entry : seq.getAllPDBEntries())
81 if (isValidSeqName(entry.getId()))
83 String id = entry.getId().toLowerCase(Locale.ROOT);
84 queryBuilder.append("pdb_id:").append(id).append(" OR ");
85 isPDBRefsFound = true;
91 List<DBRefEntry> refs = seq.getDBRefs();
92 if (refs != null && refs.size() != 0)
94 for (int ib = 0, nb = refs.size(); ib < nb; ib++)
96 DBRefEntry dbRef = refs.get(ib);
97 if (isValidSeqName(getDBRefId(dbRef))
98 && queryBuilder.length() < MAX_QLENGTH)
100 if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT))
102 queryBuilder.append("uniprot_accession:")
103 .append(getDBRefId(dbRef)).append(" OR ");
104 queryBuilder.append("uniprot_id:").append(getDBRefId(dbRef))
106 isUniProtRefsFound = true;
108 else if (dbRef.getSource().equalsIgnoreCase(DBRefSource.PDB))
111 String id = getDBRefId(dbRef).toLowerCase(Locale.ROOT);
112 if (!pdbids.contains(id))
114 queryBuilder.append("pdb_id:").append(id).append(" OR ");
115 isPDBRefsFound = true;
121 seqRefs.add(getDBRefId(dbRef));
127 if (!isPDBRefsFound && !isUniProtRefsFound)
129 String seqName = seq.getName();
130 seqName = sanitizeSeqName(seqName);
131 String[] names = seqName.toLowerCase(Locale.ROOT).split("\\|");
132 for (String name : names)
134 // System.out.println("Found name : " + name);
136 if (isValidSeqName(name))
142 for (String seqRef : seqRefs)
144 queryBuilder.append("text:").append(seqRef).append(" OR ");
148 int endIndex = queryBuilder.lastIndexOf(" OR ");
149 if (queryBuilder.toString().length() < 6)
153 String query = queryBuilder.toString().substring(0, endIndex);
158 * Remove the following special characters from input string +, -, &, !, (, ),
159 * {, }, [, ], ^, ", ~, *, ?, :, \
164 public static String sanitizeSeqName(String seqName)
166 Objects.requireNonNull(seqName);
167 return seqName.replaceAll("\\[\\d*\\]", "")
168 .replaceAll("[^\\dA-Za-z|_]", "").replaceAll("\\s+", "+");
172 * Ensures sequence ref names are not less than 3 characters and does not
173 * contain a database name
178 static boolean isValidSeqName(String seqName)
180 // System.out.println("seqName : " + seqName);
181 String ignoreList = "pdb,uniprot,swiss-prot";
182 if (seqName.length() < 3)
186 if (seqName.contains(":"))
190 seqName = seqName.toLowerCase(Locale.ROOT);
191 for (String ignoredEntry : ignoreList.split(","))
193 if (seqName.contains(ignoredEntry))
201 static String getDBRefId(DBRefEntry dbRef)
203 String ref = dbRef.getAccessionId().replaceAll("GO:", "");
208 * FTSRestClient specific query builder to recover associated structure data
209 * records for a sequence
212 * - seq to generate a query for
213 * @param wantedFields
214 * - fields to retrieve
215 * @param selectedFilterOpt
216 * - criterion for ranking results (e.g. resolution)
218 * - sort ascending or descending
222 public FTSRestResponse fetchStructuresMetaData(SequenceI seq,
223 Collection<FTSDataColumnI> wantedFields,
224 FilterOption selectedFilterOpt, boolean b) throws Exception
226 FTSRestResponse resultList;
227 FTSRestRequest pdbRequest = new FTSRestRequest();
228 pdbRequest.setAllowEmptySeq(false);
229 pdbRequest.setResponseSize(500);
230 pdbRequest.setFieldToSearchBy("(");
231 pdbRequest.setFieldToSortBy(selectedFilterOpt.getValue(), b);
232 pdbRequest.setWantedFields(wantedFields);
233 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
234 pdbRequest.setAssociatedSequence(seq);
235 resultList = pdbRestClient.executeRequest(pdbRequest);
237 lastPdbRequest = pdbRequest;
240 public List<FilterOption> getAvailableFilterOptions(String VIEWS_FILTER)
242 List<FilterOption> filters = new ArrayList<FilterOption>();
243 filters.add(new FilterOption("PDBe "+
244 MessageManager.getString("label.best_quality"),
245 "overall_quality", VIEWS_FILTER, false,this));
246 filters.add(new FilterOption("PDBe "+
247 MessageManager.getString("label.best_resolution"),
248 "resolution", VIEWS_FILTER, false,this));
249 filters.add(new FilterOption("PDBe "+
250 MessageManager.getString("label.most_protein_chain"),
251 "number_of_protein_chains", VIEWS_FILTER, false,this));
252 filters.add(new FilterOption("PDBe "+
253 MessageManager.getString("label.most_bound_molecules"),
254 "number_of_bound_molecules", VIEWS_FILTER, false,this));
255 filters.add(new FilterOption("PDBe "+
256 MessageManager.getString("label.most_polymer_residues"),
257 "number_of_polymer_residues", VIEWS_FILTER, true,this));
263 public boolean needsRefetch(FilterOption selectedFilterOpt)
265 // PDBe queries never need a refetch first
270 * FTSRestClient specific query builder to pick top ranked entry from a
271 * fetchStructuresMetaData query
274 * - seq to generate a query for
275 * @param wantedFields
276 * - fields to retrieve
277 * @param selectedFilterOpt
278 * - criterion for ranking results (e.g. resolution)
280 * - sort ascending or descending
284 public FTSRestResponse selectFirstRankedQuery(SequenceI seq, Collection<FTSData> collectedResults,
285 Collection<FTSDataColumnI> wantedFields, String fieldToFilterBy,
286 boolean b) throws Exception
289 FTSRestResponse resultList;
290 FTSRestRequest pdbRequest = new FTSRestRequest();
291 if (fieldToFilterBy.equalsIgnoreCase("uniprot_coverage"))
293 pdbRequest.setAllowEmptySeq(false);
294 pdbRequest.setResponseSize(1);
295 pdbRequest.setFieldToSearchBy("(");
296 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
297 pdbRequest.setWantedFields(wantedFields);
298 pdbRequest.setAssociatedSequence(seq);
299 pdbRequest.setFacet(true);
300 pdbRequest.setFacetPivot(fieldToFilterBy + ",entry_entity");
301 pdbRequest.setFacetPivotMinCount(1);
305 pdbRequest.setAllowEmptySeq(false);
306 pdbRequest.setResponseSize(1);
307 pdbRequest.setFieldToSearchBy("(");
308 pdbRequest.setFieldToSortBy(fieldToFilterBy, b);
309 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
310 pdbRequest.setWantedFields(wantedFields);
311 pdbRequest.setAssociatedSequence(seq);
313 resultList = pdbRestClient.executeRequest(pdbRequest);
315 lastPdbRequest = pdbRequest;
321 public PDBEntry[] collectSelectedRows(JTable restable, int[] selectedRows,
322 List<SequenceI> selectedSeqsToView)
324 int refSeqColIndex = restable.getColumn("Ref Sequence")
327 PDBEntry[] pdbEntriesToView=new PDBEntry[selectedRows.length];
329 int idColumnIndex=-1;
330 boolean fromTDB=true;
331 idColumnIndex = restable.getColumn("PDB Id").getModelIndex();
333 for (int row : selectedRows)
336 String pdbIdStr = restable.getValueAt(row,idColumnIndex)
338 SequenceI selectedSeq = (SequenceI) restable.getValueAt(row,
340 selectedSeqsToView.add(selectedSeq);
341 PDBEntry pdbEntry = selectedSeq.getPDBEntry(pdbIdStr);
342 if (pdbEntry == null)
344 pdbEntry = getFindEntry(pdbIdStr,
345 selectedSeq.getAllPDBEntries());
348 if (pdbEntry == null)
350 pdbEntry = new PDBEntry();
351 pdbEntry.setId(pdbIdStr);
352 pdbEntry.setType(PDBEntry.Type.MMCIF);
353 selectedSeq.getDatasetSequence().addPDBId(pdbEntry);
355 pdbEntriesToView[count++] = pdbEntry;
357 return pdbEntriesToView;
362 protected FTSRestRequest getLastFTSRequest()
364 return lastPdbRequest;
368 public FTSRestResponse executePDBFTSRestRequest(FTSRestRequest pdbRequest) throws Exception
370 return pdbRestClient.executeRequest(pdbRequest);