1 package jalview.gui.structurechooser;
3 import java.util.ArrayList;
4 import java.util.Collection;
5 import java.util.HashSet;
6 import java.util.LinkedHashSet;
8 import java.util.Objects;
11 import javax.swing.JTable;
12 import javax.swing.table.TableModel;
14 import jalview.datamodel.DBRefEntry;
15 import jalview.datamodel.DBRefSource;
16 import jalview.datamodel.PDBEntry;
17 import jalview.datamodel.SequenceI;
18 import jalview.fts.api.FTSData;
19 import jalview.fts.api.FTSDataColumnI;
20 import jalview.fts.api.FTSRestClientI;
21 import jalview.fts.core.FTSDataColumnPreferences;
22 import jalview.fts.core.FTSDataColumnPreferences.PreferenceSource;
23 import jalview.fts.core.FTSRestRequest;
24 import jalview.fts.core.FTSRestResponse;
25 import jalview.fts.service.pdb.PDBFTSRestClient;
26 import jalview.jbgui.FilterOption;
27 import jalview.util.MessageManager;
30 * logic for querying the PDBe API for structures of sequences
34 public class PDBStructureChooserQuerySource
35 extends StructureChooserQuerySource
38 private static int MAX_QLENGTH = 7820;
40 protected FTSRestRequest lastPdbRequest;
42 protected FTSRestClientI pdbRestClient;
44 public PDBStructureChooserQuerySource()
46 pdbRestClient = PDBFTSRestClient.getInstance();
47 docFieldPrefs = new FTSDataColumnPreferences(
48 PreferenceSource.STRUCTURE_CHOOSER,
49 PDBFTSRestClient.getInstance());
55 * Builds a query string for a given sequences using its DBRef entries
58 * the sequences to build a query for
59 * @return the built query string
62 public String buildQuery(SequenceI seq)
64 boolean isPDBRefsFound = false;
65 boolean isUniProtRefsFound = false;
66 StringBuilder queryBuilder = new StringBuilder();
67 Set<String> seqRefs = new LinkedHashSet<>();
70 * note PDBs as DBRefEntry so they are not duplicated in query
72 Set<String> pdbids = new HashSet<>();
74 if (seq.getAllPDBEntries() != null
75 && queryBuilder.length() < MAX_QLENGTH)
77 for (PDBEntry entry : seq.getAllPDBEntries())
79 if (isValidSeqName(entry.getId()))
81 String id = entry.getId().toLowerCase();
82 queryBuilder.append("pdb_id:").append(id).append(" OR ");
83 isPDBRefsFound = true;
89 List<DBRefEntry> refs = seq.getDBRefs();
90 if (refs != null && refs.size() != 0)
92 for (int ib = 0, nb = refs.size(); ib < nb; ib++)
94 DBRefEntry dbRef = refs.get(ib);
95 if (isValidSeqName(getDBRefId(dbRef))
96 && queryBuilder.length() < MAX_QLENGTH)
98 if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT))
100 queryBuilder.append("uniprot_accession:")
101 .append(getDBRefId(dbRef)).append(" OR ");
102 queryBuilder.append("uniprot_id:").append(getDBRefId(dbRef))
104 isUniProtRefsFound = true;
106 else if (dbRef.getSource().equalsIgnoreCase(DBRefSource.PDB))
109 String id = getDBRefId(dbRef).toLowerCase();
110 if (!pdbids.contains(id))
112 queryBuilder.append("pdb_id:").append(id).append(" OR ");
113 isPDBRefsFound = true;
119 seqRefs.add(getDBRefId(dbRef));
125 if (!isPDBRefsFound && !isUniProtRefsFound)
127 String seqName = seq.getName();
128 seqName = sanitizeSeqName(seqName);
129 String[] names = seqName.toLowerCase().split("\\|");
130 for (String name : names)
132 // System.out.println("Found name : " + name);
134 if (isValidSeqName(name))
140 for (String seqRef : seqRefs)
142 queryBuilder.append("text:").append(seqRef).append(" OR ");
146 int endIndex = queryBuilder.lastIndexOf(" OR ");
147 if (queryBuilder.toString().length() < 6)
151 String query = queryBuilder.toString().substring(0, endIndex);
156 * Remove the following special characters from input string +, -, &, !, (, ),
157 * {, }, [, ], ^, ", ~, *, ?, :, \
162 public static String sanitizeSeqName(String seqName)
164 Objects.requireNonNull(seqName);
165 return seqName.replaceAll("\\[\\d*\\]", "")
166 .replaceAll("[^\\dA-Za-z|_]", "").replaceAll("\\s+", "+");
170 * Ensures sequence ref names are not less than 3 characters and does not
171 * contain a database name
176 static boolean isValidSeqName(String seqName)
178 // System.out.println("seqName : " + seqName);
179 String ignoreList = "pdb,uniprot,swiss-prot";
180 if (seqName.length() < 3)
184 if (seqName.contains(":"))
188 seqName = seqName.toLowerCase();
189 for (String ignoredEntry : ignoreList.split(","))
191 if (seqName.contains(ignoredEntry))
199 static String getDBRefId(DBRefEntry dbRef)
201 String ref = dbRef.getAccessionId().replaceAll("GO:", "");
206 * FTSRestClient specific query builder to recover associated structure data
207 * records for a sequence
210 * - seq to generate a query for
211 * @param wantedFields
212 * - fields to retrieve
213 * @param selectedFilterOpt
214 * - criterion for ranking results (e.g. resolution)
216 * - sort ascending or descending
220 public FTSRestResponse fetchStructuresMetaData(SequenceI seq,
221 Collection<FTSDataColumnI> wantedFields,
222 FilterOption selectedFilterOpt, boolean b) throws Exception
224 FTSRestResponse resultList;
225 FTSRestRequest pdbRequest = new FTSRestRequest();
226 pdbRequest.setAllowEmptySeq(false);
227 pdbRequest.setResponseSize(500);
228 pdbRequest.setFieldToSearchBy("(");
229 pdbRequest.setFieldToSortBy(selectedFilterOpt.getValue(), b);
230 pdbRequest.setWantedFields(wantedFields);
231 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
232 pdbRequest.setAssociatedSequence(seq);
233 resultList = pdbRestClient.executeRequest(pdbRequest);
235 lastPdbRequest = pdbRequest;
238 public List<FilterOption> getAvailableFilterOptions(String VIEWS_FILTER)
240 List<FilterOption> filters = new ArrayList<FilterOption>();
241 filters.add(new FilterOption(
242 MessageManager.getString("label.best_quality"),
243 "overall_quality", VIEWS_FILTER, false));
244 filters.add(new FilterOption(
245 MessageManager.getString("label.best_resolution"),
246 "resolution", VIEWS_FILTER, false));
247 filters.add(new FilterOption(
248 MessageManager.getString("label.most_protein_chain"),
249 "number_of_protein_chains", VIEWS_FILTER, false));
250 filters.add(new FilterOption(
251 MessageManager.getString("label.most_bound_molecules"),
252 "number_of_bound_molecules", VIEWS_FILTER, false));
253 filters.add(new FilterOption(
254 MessageManager.getString("label.most_polymer_residues"),
255 "number_of_polymer_residues", VIEWS_FILTER, true));
261 * FTSRestClient specific query builder to pick top ranked entry from a
262 * fetchStructuresMetaData query
265 * - seq to generate a query for
266 * @param wantedFields
267 * - fields to retrieve
268 * @param selectedFilterOpt
269 * - criterion for ranking results (e.g. resolution)
271 * - sort ascending or descending
275 public FTSRestResponse selectFirstRankedQuery(SequenceI seq,
276 Collection<FTSDataColumnI> wantedFields, String fieldToFilterBy,
277 boolean b) throws Exception
280 FTSRestResponse resultList;
281 FTSRestRequest pdbRequest = new FTSRestRequest();
282 if (fieldToFilterBy.equalsIgnoreCase("uniprot_coverage"))
284 pdbRequest.setAllowEmptySeq(false);
285 pdbRequest.setResponseSize(1);
286 pdbRequest.setFieldToSearchBy("(");
287 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
288 pdbRequest.setWantedFields(wantedFields);
289 pdbRequest.setAssociatedSequence(seq);
290 pdbRequest.setFacet(true);
291 pdbRequest.setFacetPivot(fieldToFilterBy + ",entry_entity");
292 pdbRequest.setFacetPivotMinCount(1);
296 pdbRequest.setAllowEmptySeq(false);
297 pdbRequest.setResponseSize(1);
298 pdbRequest.setFieldToSearchBy("(");
299 pdbRequest.setFieldToSortBy(fieldToFilterBy, b);
300 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
301 pdbRequest.setWantedFields(wantedFields);
302 pdbRequest.setAssociatedSequence(seq);
304 resultList = pdbRestClient.executeRequest(pdbRequest);
306 lastPdbRequest = pdbRequest;
312 public PDBEntry[] collectSelectedRows(JTable restable, int[] selectedRows,
313 List<SequenceI> selectedSeqsToView)
315 int refSeqColIndex = restable.getColumn("Ref Sequence")
318 PDBEntry[] pdbEntriesToView=new PDBEntry[selectedRows.length];
320 int idColumnIndex=-1;
321 boolean fromTDB=true;
322 idColumnIndex = restable.getColumn("PDB Id").getModelIndex();
324 for (int row : selectedRows)
327 String pdbIdStr = restable.getValueAt(row,idColumnIndex)
329 SequenceI selectedSeq = (SequenceI) restable.getValueAt(row,
331 selectedSeqsToView.add(selectedSeq);
332 PDBEntry pdbEntry = selectedSeq.getPDBEntry(pdbIdStr);
333 if (pdbEntry == null)
335 pdbEntry = getFindEntry(pdbIdStr,
336 selectedSeq.getAllPDBEntries());
339 if (pdbEntry == null)
341 pdbEntry = new PDBEntry();
342 pdbEntry.setId(pdbIdStr);
343 pdbEntry.setType(PDBEntry.Type.MMCIF);
344 selectedSeq.getDatasetSequence().addPDBId(pdbEntry);
346 pdbEntriesToView[count++] = pdbEntry;
348 return pdbEntriesToView;
353 protected FTSRestRequest getLastFTSRequest()
355 return lastPdbRequest;