1 package jalview.gui.structurechooser;
3 import java.util.Locale;
5 import java.util.ArrayList;
6 import java.util.Collection;
7 import java.util.HashSet;
8 import java.util.LinkedHashSet;
10 import java.util.Objects;
13 import javax.swing.JTable;
14 import javax.swing.table.TableModel;
16 import jalview.datamodel.DBRefEntry;
17 import jalview.datamodel.DBRefSource;
18 import jalview.datamodel.PDBEntry;
19 import jalview.datamodel.SequenceI;
20 import jalview.fts.api.FTSData;
21 import jalview.fts.api.FTSDataColumnI;
22 import jalview.fts.api.FTSRestClientI;
23 import jalview.fts.core.FTSDataColumnPreferences;
24 import jalview.fts.core.FTSDataColumnPreferences.PreferenceSource;
25 import jalview.fts.core.FTSRestRequest;
26 import jalview.fts.core.FTSRestResponse;
27 import jalview.fts.service.pdb.PDBFTSRestClient;
28 import jalview.jbgui.FilterOption;
29 import jalview.util.MessageManager;
32 * logic for querying the PDBe API for structures of sequences
36 public class PDBStructureChooserQuerySource
37 extends StructureChooserQuerySource
40 private static int MAX_QLENGTH = 7820;
42 protected FTSRestRequest lastPdbRequest;
44 protected FTSRestClientI pdbRestClient;
46 public PDBStructureChooserQuerySource()
48 pdbRestClient = PDBFTSRestClient.getInstance();
49 docFieldPrefs = new FTSDataColumnPreferences(
50 PreferenceSource.STRUCTURE_CHOOSER,
51 PDBFTSRestClient.getInstance());
56 * Builds a query string for a given sequences using its DBRef entries
59 * the sequences to build a query for
60 * @return the built query string
63 public String buildQuery(SequenceI seq)
65 boolean isPDBRefsFound = false;
66 boolean isUniProtRefsFound = false;
67 StringBuilder queryBuilder = new StringBuilder();
68 Set<String> seqRefs = new LinkedHashSet<>();
71 * note PDBs as DBRefEntry so they are not duplicated in query
73 Set<String> pdbids = new HashSet<>();
75 if (seq.getAllPDBEntries() != null
76 && queryBuilder.length() < MAX_QLENGTH)
78 for (PDBEntry entry : seq.getAllPDBEntries())
80 if (isValidSeqName(entry.getId()))
82 String id = entry.getId().toLowerCase(Locale.ROOT);
83 queryBuilder.append("pdb_id:").append(id).append(" OR ");
84 isPDBRefsFound = true;
90 List<DBRefEntry> refs = seq.getDBRefs();
91 if (refs != null && refs.size() != 0)
93 for (int ib = 0, nb = refs.size(); ib < nb; ib++)
95 DBRefEntry dbRef = refs.get(ib);
96 if (isValidSeqName(getDBRefId(dbRef))
97 && queryBuilder.length() < MAX_QLENGTH)
99 if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT))
101 queryBuilder.append("uniprot_accession:")
102 .append(getDBRefId(dbRef)).append(" OR ");
103 queryBuilder.append("uniprot_id:").append(getDBRefId(dbRef))
105 isUniProtRefsFound = true;
107 else if (dbRef.getSource().equalsIgnoreCase(DBRefSource.PDB))
110 String id = getDBRefId(dbRef).toLowerCase(Locale.ROOT);
111 if (!pdbids.contains(id))
113 queryBuilder.append("pdb_id:").append(id).append(" OR ");
114 isPDBRefsFound = true;
120 seqRefs.add(getDBRefId(dbRef));
126 if (!isPDBRefsFound && !isUniProtRefsFound)
128 String seqName = seq.getName();
129 seqName = sanitizeSeqName(seqName);
130 String[] names = seqName.toLowerCase(Locale.ROOT).split("\\|");
131 for (String name : names)
133 // System.out.println("Found name : " + name);
135 if (isValidSeqName(name))
141 for (String seqRef : seqRefs)
143 queryBuilder.append("text:").append(seqRef).append(" OR ");
147 int endIndex = queryBuilder.lastIndexOf(" OR ");
148 if (queryBuilder.toString().length() < 6)
152 String query = queryBuilder.toString().substring(0, endIndex);
157 * Remove the following special characters from input string +, -, &, !, (, ),
158 * {, }, [, ], ^, ", ~, *, ?, :, \
163 public static String sanitizeSeqName(String seqName)
165 Objects.requireNonNull(seqName);
166 return seqName.replaceAll("\\[\\d*\\]", "")
167 .replaceAll("[^\\dA-Za-z|_]", "").replaceAll("\\s+", "+");
171 * Ensures sequence ref names are not less than 3 characters and does not
172 * contain a database name
177 static boolean isValidSeqName(String seqName)
179 // System.out.println("seqName : " + seqName);
180 String ignoreList = "pdb,uniprot,swiss-prot";
181 if (seqName.length() < 3)
185 if (seqName.contains(":"))
189 seqName = seqName.toLowerCase(Locale.ROOT);
190 for (String ignoredEntry : ignoreList.split(","))
192 if (seqName.contains(ignoredEntry))
200 static String getDBRefId(DBRefEntry dbRef)
202 String ref = dbRef.getAccessionId().replaceAll("GO:", "");
207 * FTSRestClient specific query builder to recover associated structure data
208 * records for a sequence
211 * - seq to generate a query for
212 * @param wantedFields
213 * - fields to retrieve
214 * @param selectedFilterOpt
215 * - criterion for ranking results (e.g. resolution)
217 * - sort ascending or descending
221 public FTSRestResponse fetchStructuresMetaData(SequenceI seq,
222 Collection<FTSDataColumnI> wantedFields,
223 FilterOption selectedFilterOpt, boolean b) throws Exception
225 FTSRestResponse resultList;
226 FTSRestRequest pdbRequest = new FTSRestRequest();
227 pdbRequest.setAllowEmptySeq(false);
228 pdbRequest.setResponseSize(500);
229 pdbRequest.setFieldToSearchBy("(");
230 pdbRequest.setFieldToSortBy(selectedFilterOpt.getValue(), b);
231 pdbRequest.setWantedFields(wantedFields);
232 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
233 pdbRequest.setAssociatedSequence(seq);
234 resultList = pdbRestClient.executeRequest(pdbRequest);
236 lastPdbRequest = pdbRequest;
240 public List<FilterOption> getAvailableFilterOptions(String VIEWS_FILTER)
242 List<FilterOption> filters = new ArrayList<FilterOption>();
243 filters.add(new FilterOption(
244 "PDBe " + MessageManager.getString("label.best_quality"),
245 "overall_quality", VIEWS_FILTER, false, this));
246 filters.add(new FilterOption(
247 "PDBe " + MessageManager.getString("label.best_resolution"),
248 "resolution", VIEWS_FILTER, false, this));
249 filters.add(new FilterOption(
250 "PDBe " + MessageManager.getString("label.most_protein_chain"),
251 "number_of_protein_chains", VIEWS_FILTER, false, this));
252 filters.add(new FilterOption(
253 "PDBe " + MessageManager
254 .getString("label.most_bound_molecules"),
255 "number_of_bound_molecules", VIEWS_FILTER, false, this));
256 filters.add(new FilterOption(
257 "PDBe " + MessageManager
258 .getString("label.most_polymer_residues"),
259 "number_of_polymer_residues", VIEWS_FILTER, true, this));
265 public boolean needsRefetch(FilterOption selectedFilterOpt)
267 // PDBe queries never need a refetch first
272 * FTSRestClient specific query builder to pick top ranked entry from a
273 * fetchStructuresMetaData query
276 * - seq to generate a query for
277 * @param wantedFields
278 * - fields to retrieve
279 * @param selectedFilterOpt
280 * - criterion for ranking results (e.g. resolution)
282 * - sort ascending or descending
286 public FTSRestResponse selectFirstRankedQuery(SequenceI seq,
287 Collection<FTSData> collectedResults,
288 Collection<FTSDataColumnI> wantedFields, String fieldToFilterBy,
289 boolean b) throws Exception
292 FTSRestResponse resultList;
293 FTSRestRequest pdbRequest = new FTSRestRequest();
294 if (fieldToFilterBy.equalsIgnoreCase("uniprot_coverage"))
296 pdbRequest.setAllowEmptySeq(false);
297 pdbRequest.setResponseSize(1);
298 pdbRequest.setFieldToSearchBy("(");
299 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
300 pdbRequest.setWantedFields(wantedFields);
301 pdbRequest.setAssociatedSequence(seq);
302 pdbRequest.setFacet(true);
303 pdbRequest.setFacetPivot(fieldToFilterBy + ",entry_entity");
304 pdbRequest.setFacetPivotMinCount(1);
308 pdbRequest.setAllowEmptySeq(false);
309 pdbRequest.setResponseSize(1);
310 pdbRequest.setFieldToSearchBy("(");
311 pdbRequest.setFieldToSortBy(fieldToFilterBy, b);
312 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
313 pdbRequest.setWantedFields(wantedFields);
314 pdbRequest.setAssociatedSequence(seq);
316 resultList = pdbRestClient.executeRequest(pdbRequest);
318 lastPdbRequest = pdbRequest;
323 public PDBEntry[] collectSelectedRows(JTable restable, int[] selectedRows,
324 List<SequenceI> selectedSeqsToView)
326 int refSeqColIndex = restable.getColumn("Ref Sequence").getModelIndex();
328 PDBEntry[] pdbEntriesToView = new PDBEntry[selectedRows.length];
330 int idColumnIndex = -1;
331 boolean fromTDB = true;
332 idColumnIndex = restable.getColumn("PDB Id").getModelIndex();
334 for (int row : selectedRows)
337 String pdbIdStr = restable.getValueAt(row, idColumnIndex).toString();
338 SequenceI selectedSeq = (SequenceI) restable.getValueAt(row,
340 selectedSeqsToView.add(selectedSeq);
341 PDBEntry pdbEntry = selectedSeq.getPDBEntry(pdbIdStr);
342 if (pdbEntry == null)
344 pdbEntry = getFindEntry(pdbIdStr, selectedSeq.getAllPDBEntries());
347 if (pdbEntry == null)
349 pdbEntry = new PDBEntry();
350 pdbEntry.setId(pdbIdStr);
351 pdbEntry.setType(PDBEntry.Type.MMCIF);
352 selectedSeq.getDatasetSequence().addPDBId(pdbEntry);
354 pdbEntriesToView[count++] = pdbEntry;
356 return pdbEntriesToView;
360 protected FTSRestRequest getLastFTSRequest()
362 return lastPdbRequest;
365 public FTSRestResponse executePDBFTSRestRequest(FTSRestRequest pdbRequest)
368 return pdbRestClient.executeRequest(pdbRequest);