1 package jalview.gui.structurechooser;
3 import java.util.ArrayList;
4 import java.util.Collection;
5 import java.util.HashSet;
6 import java.util.LinkedHashSet;
8 import java.util.Objects;
11 import javax.swing.JTable;
12 import javax.swing.table.TableModel;
14 import jalview.datamodel.DBRefEntry;
15 import jalview.datamodel.DBRefSource;
16 import jalview.datamodel.PDBEntry;
17 import jalview.datamodel.SequenceI;
18 import jalview.fts.api.FTSData;
19 import jalview.fts.api.FTSDataColumnI;
20 import jalview.fts.api.FTSRestClientI;
21 import jalview.fts.core.FTSDataColumnPreferences;
22 import jalview.fts.core.FTSDataColumnPreferences.PreferenceSource;
23 import jalview.fts.core.FTSRestRequest;
24 import jalview.fts.core.FTSRestResponse;
25 import jalview.fts.service.pdb.PDBFTSRestClient;
26 import jalview.jbgui.FilterOption;
27 import jalview.util.MessageManager;
30 * logic for querying the PDBe API for structures of sequences
34 public class PDBStructureChooserQuerySource
35 extends StructureChooserQuerySource
38 private static int MAX_QLENGTH = 7820;
40 protected FTSRestRequest lastPdbRequest;
42 protected FTSRestClientI pdbRestClient;
44 public PDBStructureChooserQuerySource()
46 pdbRestClient = PDBFTSRestClient.getInstance();
47 docFieldPrefs = new FTSDataColumnPreferences(
48 PreferenceSource.STRUCTURE_CHOOSER,
49 PDBFTSRestClient.getInstance());
55 * Builds a query string for a given sequences using its DBRef entries
58 * the sequences to build a query for
59 * @return the built query string
62 public String buildQuery(SequenceI seq)
64 boolean isPDBRefsFound = false;
65 boolean isUniProtRefsFound = false;
66 StringBuilder queryBuilder = new StringBuilder();
67 Set<String> seqRefs = new LinkedHashSet<>();
70 * note PDBs as DBRefEntry so they are not duplicated in query
72 Set<String> pdbids = new HashSet<>();
74 if (seq.getAllPDBEntries() != null
75 && queryBuilder.length() < MAX_QLENGTH)
77 for (PDBEntry entry : seq.getAllPDBEntries())
79 if (isValidSeqName(entry.getId()))
81 String id = entry.getId().toLowerCase();
82 queryBuilder.append("pdb_id:").append(id).append(" OR ");
83 isPDBRefsFound = true;
89 List<DBRefEntry> refs = seq.getDBRefs();
90 if (refs != null && refs.size() != 0)
92 for (int ib = 0, nb = refs.size(); ib < nb; ib++)
94 DBRefEntry dbRef = refs.get(ib);
95 if (isValidSeqName(getDBRefId(dbRef))
96 && queryBuilder.length() < MAX_QLENGTH)
98 if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT))
100 queryBuilder.append("uniprot_accession:")
101 .append(getDBRefId(dbRef)).append(" OR ");
102 queryBuilder.append("uniprot_id:").append(getDBRefId(dbRef))
104 isUniProtRefsFound = true;
106 else if (dbRef.getSource().equalsIgnoreCase(DBRefSource.PDB))
109 String id = getDBRefId(dbRef).toLowerCase();
110 if (!pdbids.contains(id))
112 queryBuilder.append("pdb_id:").append(id).append(" OR ");
113 isPDBRefsFound = true;
119 seqRefs.add(getDBRefId(dbRef));
125 if (!isPDBRefsFound && !isUniProtRefsFound)
127 String seqName = seq.getName();
128 seqName = sanitizeSeqName(seqName);
129 String[] names = seqName.toLowerCase().split("\\|");
130 for (String name : names)
132 // System.out.println("Found name : " + name);
134 if (isValidSeqName(name))
140 for (String seqRef : seqRefs)
142 queryBuilder.append("text:").append(seqRef).append(" OR ");
146 int endIndex = queryBuilder.lastIndexOf(" OR ");
147 if (queryBuilder.toString().length() < 6)
151 String query = queryBuilder.toString().substring(0, endIndex);
156 * Remove the following special characters from input string +, -, &, !, (, ),
157 * {, }, [, ], ^, ", ~, *, ?, :, \
162 public static String sanitizeSeqName(String seqName)
164 Objects.requireNonNull(seqName);
165 return seqName.replaceAll("\\[\\d*\\]", "")
166 .replaceAll("[^\\dA-Za-z|_]", "").replaceAll("\\s+", "+");
170 * Ensures sequence ref names are not less than 3 characters and does not
171 * contain a database name
176 static boolean isValidSeqName(String seqName)
178 // System.out.println("seqName : " + seqName);
179 String ignoreList = "pdb,uniprot,swiss-prot";
180 if (seqName.length() < 3)
184 if (seqName.contains(":"))
188 seqName = seqName.toLowerCase();
189 for (String ignoredEntry : ignoreList.split(","))
191 if (seqName.contains(ignoredEntry))
199 static String getDBRefId(DBRefEntry dbRef)
201 String ref = dbRef.getAccessionId().replaceAll("GO:", "");
206 * FTSRestClient specific query builder to recover associated structure data
207 * records for a sequence
210 * - seq to generate a query for
211 * @param wantedFields
212 * - fields to retrieve
213 * @param selectedFilterOpt
214 * - criterion for ranking results (e.g. resolution)
216 * - sort ascending or descending
220 public FTSRestResponse fetchStructuresMetaData(SequenceI seq,
221 Collection<FTSDataColumnI> wantedFields,
222 FilterOption selectedFilterOpt, boolean b) throws Exception
224 FTSRestResponse resultList;
225 FTSRestRequest pdbRequest = new FTSRestRequest();
226 pdbRequest.setAllowEmptySeq(false);
227 pdbRequest.setResponseSize(500);
228 pdbRequest.setFieldToSearchBy("(");
229 pdbRequest.setFieldToSortBy(selectedFilterOpt.getValue(), b);
230 pdbRequest.setWantedFields(wantedFields);
231 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
232 pdbRequest.setAssociatedSequence(seq);
233 resultList = pdbRestClient.executeRequest(pdbRequest);
235 lastPdbRequest = pdbRequest;
238 public List<FilterOption> getAvailableFilterOptions(String VIEWS_FILTER)
240 List<FilterOption> filters = new ArrayList<FilterOption>();
241 filters.add(new FilterOption(
242 MessageManager.getString("label.best_quality"),
243 "overall_quality", VIEWS_FILTER, false,this));
244 filters.add(new FilterOption(
245 MessageManager.getString("label.best_resolution"),
246 "resolution", VIEWS_FILTER, false,this));
247 filters.add(new FilterOption(
248 MessageManager.getString("label.most_protein_chain"),
249 "number_of_protein_chains", VIEWS_FILTER, false,this));
250 filters.add(new FilterOption(
251 MessageManager.getString("label.most_bound_molecules"),
252 "number_of_bound_molecules", VIEWS_FILTER, false,this));
253 filters.add(new FilterOption(
254 MessageManager.getString("label.most_polymer_residues"),
255 "number_of_polymer_residues", VIEWS_FILTER, true,this));
261 public boolean needsRefetch(FilterOption selectedFilterOpt)
263 // PDBe queries never need a refetch first
268 * FTSRestClient specific query builder to pick top ranked entry from a
269 * fetchStructuresMetaData query
272 * - seq to generate a query for
273 * @param wantedFields
274 * - fields to retrieve
275 * @param selectedFilterOpt
276 * - criterion for ranking results (e.g. resolution)
278 * - sort ascending or descending
282 public FTSRestResponse selectFirstRankedQuery(SequenceI seq, Collection<FTSData> collectedResults,
283 Collection<FTSDataColumnI> wantedFields, String fieldToFilterBy,
284 boolean b) throws Exception
287 FTSRestResponse resultList;
288 FTSRestRequest pdbRequest = new FTSRestRequest();
289 if (fieldToFilterBy.equalsIgnoreCase("uniprot_coverage"))
291 pdbRequest.setAllowEmptySeq(false);
292 pdbRequest.setResponseSize(1);
293 pdbRequest.setFieldToSearchBy("(");
294 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
295 pdbRequest.setWantedFields(wantedFields);
296 pdbRequest.setAssociatedSequence(seq);
297 pdbRequest.setFacet(true);
298 pdbRequest.setFacetPivot(fieldToFilterBy + ",entry_entity");
299 pdbRequest.setFacetPivotMinCount(1);
303 pdbRequest.setAllowEmptySeq(false);
304 pdbRequest.setResponseSize(1);
305 pdbRequest.setFieldToSearchBy("(");
306 pdbRequest.setFieldToSortBy(fieldToFilterBy, b);
307 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
308 pdbRequest.setWantedFields(wantedFields);
309 pdbRequest.setAssociatedSequence(seq);
311 resultList = pdbRestClient.executeRequest(pdbRequest);
313 lastPdbRequest = pdbRequest;
319 public PDBEntry[] collectSelectedRows(JTable restable, int[] selectedRows,
320 List<SequenceI> selectedSeqsToView)
322 int refSeqColIndex = restable.getColumn("Ref Sequence")
325 PDBEntry[] pdbEntriesToView=new PDBEntry[selectedRows.length];
327 int idColumnIndex=-1;
328 boolean fromTDB=true;
329 idColumnIndex = restable.getColumn("PDB Id").getModelIndex();
331 for (int row : selectedRows)
334 String pdbIdStr = restable.getValueAt(row,idColumnIndex)
336 SequenceI selectedSeq = (SequenceI) restable.getValueAt(row,
338 selectedSeqsToView.add(selectedSeq);
339 PDBEntry pdbEntry = selectedSeq.getPDBEntry(pdbIdStr);
340 if (pdbEntry == null)
342 pdbEntry = getFindEntry(pdbIdStr,
343 selectedSeq.getAllPDBEntries());
346 if (pdbEntry == null)
348 pdbEntry = new PDBEntry();
349 pdbEntry.setId(pdbIdStr);
350 pdbEntry.setType(PDBEntry.Type.MMCIF);
351 selectedSeq.getDatasetSequence().addPDBId(pdbEntry);
353 pdbEntriesToView[count++] = pdbEntry;
355 return pdbEntriesToView;
360 protected FTSRestRequest getLastFTSRequest()
362 return lastPdbRequest;
366 public FTSRestResponse executePDBFTSRestRequest(FTSRestRequest pdbRequest) throws Exception
368 return pdbRestClient.executeRequest(pdbRequest);