1 package jalview.gui.structurechooser;
3 import java.util.ArrayList;
4 import java.util.Collection;
5 import java.util.HashSet;
6 import java.util.LinkedHashSet;
8 import java.util.Objects;
11 import javax.swing.JTable;
12 import javax.swing.table.TableModel;
14 import jalview.datamodel.DBRefEntry;
15 import jalview.datamodel.DBRefSource;
16 import jalview.datamodel.PDBEntry;
17 import jalview.datamodel.SequenceI;
18 import jalview.fts.api.FTSData;
19 import jalview.fts.api.FTSDataColumnI;
20 import jalview.fts.api.FTSRestClientI;
21 import jalview.fts.core.FTSDataColumnPreferences;
22 import jalview.fts.core.FTSDataColumnPreferences.PreferenceSource;
23 import jalview.fts.core.FTSRestRequest;
24 import jalview.fts.core.FTSRestResponse;
25 import jalview.fts.service.pdb.PDBFTSRestClient;
26 import jalview.jbgui.FilterOption;
27 import jalview.util.MessageManager;
30 * logic for querying the PDBe API for structures of sequences
34 public class PDBStructureChooserQuerySource
35 extends StructureChooserQuerySource
38 private static int MAX_QLENGTH = 7820;
40 public PDBStructureChooserQuerySource()
42 pdbRestClient = PDBFTSRestClient.getInstance();
43 docFieldPrefs = new FTSDataColumnPreferences(
44 PreferenceSource.STRUCTURE_CHOOSER,
45 PDBFTSRestClient.getInstance());
51 * Builds a query string for a given sequences using its DBRef entries
54 * the sequences to build a query for
55 * @return the built query string
58 public String buildQuery(SequenceI seq)
60 boolean isPDBRefsFound = false;
61 boolean isUniProtRefsFound = false;
62 StringBuilder queryBuilder = new StringBuilder();
63 Set<String> seqRefs = new LinkedHashSet<>();
66 * note PDBs as DBRefEntry so they are not duplicated in query
68 Set<String> pdbids = new HashSet<>();
70 if (seq.getAllPDBEntries() != null
71 && queryBuilder.length() < MAX_QLENGTH)
73 for (PDBEntry entry : seq.getAllPDBEntries())
75 if (isValidSeqName(entry.getId()))
77 String id = entry.getId().toLowerCase();
78 queryBuilder.append("pdb_id:").append(id).append(" OR ");
79 isPDBRefsFound = true;
85 List<DBRefEntry> refs = seq.getDBRefs();
86 if (refs != null && refs.size() != 0)
88 for (int ib = 0, nb = refs.size(); ib < nb; ib++)
90 DBRefEntry dbRef = refs.get(ib);
91 if (isValidSeqName(getDBRefId(dbRef))
92 && queryBuilder.length() < MAX_QLENGTH)
94 if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT))
96 queryBuilder.append("uniprot_accession:")
97 .append(getDBRefId(dbRef)).append(" OR ");
98 queryBuilder.append("uniprot_id:").append(getDBRefId(dbRef))
100 isUniProtRefsFound = true;
102 else if (dbRef.getSource().equalsIgnoreCase(DBRefSource.PDB))
105 String id = getDBRefId(dbRef).toLowerCase();
106 if (!pdbids.contains(id))
108 queryBuilder.append("pdb_id:").append(id).append(" OR ");
109 isPDBRefsFound = true;
115 seqRefs.add(getDBRefId(dbRef));
121 if (!isPDBRefsFound && !isUniProtRefsFound)
123 String seqName = seq.getName();
124 seqName = sanitizeSeqName(seqName);
125 String[] names = seqName.toLowerCase().split("\\|");
126 for (String name : names)
128 // System.out.println("Found name : " + name);
130 if (isValidSeqName(name))
136 for (String seqRef : seqRefs)
138 queryBuilder.append("text:").append(seqRef).append(" OR ");
142 int endIndex = queryBuilder.lastIndexOf(" OR ");
143 if (queryBuilder.toString().length() < 6)
147 String query = queryBuilder.toString().substring(0, endIndex);
152 * Remove the following special characters from input string +, -, &, !, (, ),
153 * {, }, [, ], ^, ", ~, *, ?, :, \
158 public static String sanitizeSeqName(String seqName)
160 Objects.requireNonNull(seqName);
161 return seqName.replaceAll("\\[\\d*\\]", "")
162 .replaceAll("[^\\dA-Za-z|_]", "").replaceAll("\\s+", "+");
166 * Ensures sequence ref names are not less than 3 characters and does not
167 * contain a database name
172 static boolean isValidSeqName(String seqName)
174 // System.out.println("seqName : " + seqName);
175 String ignoreList = "pdb,uniprot,swiss-prot";
176 if (seqName.length() < 3)
180 if (seqName.contains(":"))
184 seqName = seqName.toLowerCase();
185 for (String ignoredEntry : ignoreList.split(","))
187 if (seqName.contains(ignoredEntry))
195 static String getDBRefId(DBRefEntry dbRef)
197 String ref = dbRef.getAccessionId().replaceAll("GO:", "");
202 * FTSRestClient specific query builder to recover associated structure data
203 * records for a sequence
206 * - seq to generate a query for
207 * @param wantedFields
208 * - fields to retrieve
209 * @param selectedFilterOpt
210 * - criterion for ranking results (e.g. resolution)
212 * - sort ascending or descending
216 public FTSRestResponse fetchStructuresMetaData(SequenceI seq,
217 Collection<FTSDataColumnI> wantedFields,
218 FilterOption selectedFilterOpt, boolean b) throws Exception
220 FTSRestResponse resultList;
221 FTSRestRequest pdbRequest = new FTSRestRequest();
222 pdbRequest.setAllowEmptySeq(false);
223 pdbRequest.setResponseSize(500);
224 pdbRequest.setFieldToSearchBy("(");
225 pdbRequest.setFieldToSortBy(selectedFilterOpt.getValue(), b);
226 pdbRequest.setWantedFields(wantedFields);
227 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
228 pdbRequest.setAssociatedSequence(seq);
229 resultList = pdbRestClient.executeRequest(pdbRequest);
231 lastPdbRequest = pdbRequest;
234 public List<FilterOption> getAvailableFilterOptions(String VIEWS_FILTER)
236 List<FilterOption> filters = new ArrayList<FilterOption>();
237 filters.add(new FilterOption(
238 MessageManager.getString("label.best_quality"),
239 "overall_quality", VIEWS_FILTER, false));
240 filters.add(new FilterOption(
241 MessageManager.getString("label.best_resolution"),
242 "resolution", VIEWS_FILTER, false));
243 filters.add(new FilterOption(
244 MessageManager.getString("label.most_protein_chain"),
245 "number_of_protein_chains", VIEWS_FILTER, false));
246 filters.add(new FilterOption(
247 MessageManager.getString("label.most_bound_molecules"),
248 "number_of_bound_molecules", VIEWS_FILTER, false));
249 filters.add(new FilterOption(
250 MessageManager.getString("label.most_polymer_residues"),
251 "number_of_polymer_residues", VIEWS_FILTER, true));
257 * FTSRestClient specific query builder to pick top ranked entry from a
258 * fetchStructuresMetaData query
261 * - seq to generate a query for
262 * @param wantedFields
263 * - fields to retrieve
264 * @param selectedFilterOpt
265 * - criterion for ranking results (e.g. resolution)
267 * - sort ascending or descending
271 public FTSRestResponse selectFirstRankedQuery(SequenceI seq,
272 Collection<FTSDataColumnI> wantedFields, String fieldToFilterBy,
273 boolean b) throws Exception
276 FTSRestResponse resultList;
277 FTSRestRequest pdbRequest = new FTSRestRequest();
278 if (fieldToFilterBy.equalsIgnoreCase("uniprot_coverage"))
280 pdbRequest.setAllowEmptySeq(false);
281 pdbRequest.setResponseSize(1);
282 pdbRequest.setFieldToSearchBy("(");
283 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
284 pdbRequest.setWantedFields(wantedFields);
285 pdbRequest.setAssociatedSequence(seq);
286 pdbRequest.setFacet(true);
287 pdbRequest.setFacetPivot(fieldToFilterBy + ",entry_entity");
288 pdbRequest.setFacetPivotMinCount(1);
292 pdbRequest.setAllowEmptySeq(false);
293 pdbRequest.setResponseSize(1);
294 pdbRequest.setFieldToSearchBy("(");
295 pdbRequest.setFieldToSortBy(fieldToFilterBy, b);
296 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
297 pdbRequest.setWantedFields(wantedFields);
298 pdbRequest.setAssociatedSequence(seq);
300 resultList = pdbRestClient.executeRequest(pdbRequest);
302 lastPdbRequest = pdbRequest;
308 public PDBEntry[] collectSelectedRows(JTable restable, int[] selectedRows,
309 List<SequenceI> selectedSeqsToView)
311 int refSeqColIndex = restable.getColumn("Ref Sequence")
314 PDBEntry[] pdbEntriesToView=new PDBEntry[selectedRows.length];
316 int idColumnIndex=-1;
317 boolean fromTDB=true;
318 idColumnIndex = restable.getColumn("PDB Id").getModelIndex();
320 for (int row : selectedRows)
323 String pdbIdStr = restable.getValueAt(row,idColumnIndex)
325 SequenceI selectedSeq = (SequenceI) restable.getValueAt(row,
327 selectedSeqsToView.add(selectedSeq);
328 PDBEntry pdbEntry = selectedSeq.getPDBEntry(pdbIdStr);
329 if (pdbEntry == null)
331 pdbEntry = getFindEntry(pdbIdStr,
332 selectedSeq.getAllPDBEntries());
335 if (pdbEntry == null)
337 pdbEntry = new PDBEntry();
338 pdbEntry.setId(pdbIdStr);
339 pdbEntry.setType(PDBEntry.Type.MMCIF);
340 selectedSeq.getDatasetSequence().addPDBId(pdbEntry);
342 pdbEntriesToView[count++] = pdbEntry;
344 return pdbEntriesToView;