1 package jalview.gui.structurechooser;
3 import java.util.ArrayList;
4 import java.util.Arrays;
5 import java.util.Collection;
6 import java.util.Collections;
7 import java.util.Comparator;
8 import java.util.HashSet;
9 import java.util.LinkedHashSet;
10 import java.util.List;
13 import javax.swing.JTable;
15 import jalview.datamodel.DBRefEntry;
16 import jalview.datamodel.DBRefSource;
17 import jalview.datamodel.PDBEntry;
18 import jalview.datamodel.SequenceI;
19 import jalview.fts.api.FTSData;
20 import jalview.fts.api.FTSDataColumnI;
21 import jalview.fts.api.FTSRestClientI;
22 import jalview.fts.core.FTSDataColumnPreferences;
23 import jalview.fts.core.FTSDataColumnPreferences.PreferenceSource;
24 import jalview.fts.core.FTSRestRequest;
25 import jalview.fts.core.FTSRestResponse;
26 import jalview.fts.service.threedbeacons.TDBeaconsFTSRestClient;
27 import jalview.jbgui.FilterOption;
28 import jalview.util.MessageManager;
31 * logic for querying the 3DBeacons API for structures of sequences
35 public class ThreeDBStructureChooserQuerySource
36 extends StructureChooserQuerySource
39 private static int MAX_QLENGTH = 7820;
41 protected FTSRestRequest lastTdbRequest;
43 protected FTSRestClientI tdbRestClient;
45 private FTSRestRequest lastPdbRequest;
47 public ThreeDBStructureChooserQuerySource()
49 tdbRestClient = TDBeaconsFTSRestClient.getInstance();
50 docFieldPrefs = new FTSDataColumnPreferences(
51 PreferenceSource.STRUCTURE_CHOOSER,
52 TDBeaconsFTSRestClient.getInstance());
57 * Builds a query string for a given sequences using its DBRef entries 3d
58 * Beacons is only useful for uniprot IDs
61 * the sequences to build a query for
62 * @return the built query string
65 public String buildQuery(SequenceI seq)
67 boolean isPDBRefsFound = false;
68 boolean isUniProtRefsFound = false;
69 StringBuilder queryBuilder = new StringBuilder();
70 Set<String> seqRefs = new LinkedHashSet<>();
73 * note PDBs as DBRefEntry so they are not duplicated in query
75 Set<String> pdbids = new HashSet<>();
77 List<DBRefEntry> refs = seq.getDBRefs();
78 if (refs != null && refs.size() != 0)
80 for (int ib = 0, nb = refs.size(); ib < nb; ib++)
82 DBRefEntry dbRef = refs.get(ib);
83 if (isValidSeqName(getDBRefId(dbRef))
84 && queryBuilder.length() < MAX_QLENGTH)
86 if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT)
87 && dbRef.isCanonical())
89 // TODO: pick best Uniprot accession
90 isUniProtRefsFound = true;
91 return getDBRefId(dbRef);
101 * Ensures sequence ref names are not less than 3 characters and does not
102 * contain a database name
107 static boolean isValidSeqName(String seqName)
109 // System.out.println("seqName : " + seqName);
110 String ignoreList = "pdb,uniprot,swiss-prot";
111 if (seqName.length() < 3)
115 if (seqName.contains(":"))
119 seqName = seqName.toLowerCase();
120 for (String ignoredEntry : ignoreList.split(","))
122 if (seqName.contains(ignoredEntry))
130 static String getDBRefId(DBRefEntry dbRef)
132 String ref = dbRef.getAccessionId().replaceAll("GO:", "");
137 * FTSRestClient specific query builder to recover associated structure data
138 * records for a sequence
141 * - seq to generate a query for
142 * @param wantedFields
143 * - fields to retrieve
144 * @param selectedFilterOpt
145 * - criterion for ranking results (e.g. resolution)
147 * - sort ascending or descending
151 public FTSRestResponse fetchStructuresMetaData(SequenceI seq,
152 Collection<FTSDataColumnI> wantedFields,
153 FilterOption selectedFilterOpt, boolean b) throws Exception
155 FTSRestResponse resultList;
156 FTSRestRequest tdbRequest = getTDBeaconsRequest(seq, wantedFields);
157 resultList = tdbRestClient.executeRequest(tdbRequest);
159 lastTdbRequest = tdbRequest;
163 private FTSRestRequest getTDBeaconsRequest(SequenceI seq,
164 Collection<FTSDataColumnI> wantedFields)
166 FTSRestRequest pdbRequest = new FTSRestRequest();
167 pdbRequest.setAllowEmptySeq(false);
168 pdbRequest.setResponseSize(500);
169 pdbRequest.setWantedFields(wantedFields);
170 String query = buildQuery(seq);
175 pdbRequest.setSearchTerm(query + ".json");
176 pdbRequest.setAssociatedSequence(seq);
181 public List<FilterOption> getAvailableFilterOptions(String VIEWS_FILTER)
183 List<FilterOption> filters = new ArrayList<FilterOption>();
185 new FilterOption(MessageManager.getString("label.best_quality"),
186 "overall_quality", VIEWS_FILTER, false));
187 filters.add(new FilterOption(
188 MessageManager.getString("label.best_resolution"), "resolution",
189 VIEWS_FILTER, false));
190 filters.add(new FilterOption(
191 MessageManager.getString("label.most_protein_chain"),
192 "number_of_protein_chains", VIEWS_FILTER, false));
193 filters.add(new FilterOption(
194 MessageManager.getString("label.most_bound_molecules"),
195 "number_of_bound_molecules", VIEWS_FILTER, false));
196 filters.add(new FilterOption(
197 MessageManager.getString("label.most_polymer_residues"),
198 "number_of_polymer_residues", VIEWS_FILTER, true));
204 * model categories - update as needed. warnings output if unknown types
207 * Order denotes 'trust'
209 private static List<String> EXP_CATEGORIES = Arrays
211 { "EXPERIMENTALLY DETERMINED", "DEEP LEARNING", "TEMPLATE-BASED" });
214 * FTSRestClient specific query builder to pick top ranked entry from a
215 * fetchStructuresMetaData query
218 * - seq to generate a query for
219 * @param wantedFields
220 * - fields to retrieve
221 * @param selectedFilterOpt
222 * - criterion for ranking results (e.g. resolution)
224 * - sort ascending or descending
228 public FTSRestResponse selectFirstRankedQuery(SequenceI seq,
229 Collection<FTSData> collectedResults,
230 Collection<FTSDataColumnI> wantedFields, String fieldToFilterBy,
231 boolean b) throws Exception
234 List<FTSData> filteredResponse = new ArrayList<FTSData>();
235 final int idx_ups = lastTdbRequest.getFieldIndex("Uniprot Start");
236 final int idx_upe = lastTdbRequest.getFieldIndex("Uniprot End");
237 final int idx_mcat = lastTdbRequest.getFieldIndex("Model Category");
238 final int idx_mqual = lastTdbRequest.getFieldIndex("Qmean");
239 final int idx_resol = lastTdbRequest.getFieldIndex("Resolution");
241 // ignore anything outside the sequence region
242 for (FTSData row : collectedResults)
244 int up_s = (Integer) row.getSummaryData()[idx_ups];
245 int up_e = (Integer) row.getSummaryData()[idx_upe];
247 if (seq == row.getSummaryData()[0] && up_e > seq.getStart()
248 && up_s < seq.getEnd())
250 filteredResponse.add(row);
253 // sort according to decreasing length,
255 Collections.sort(filteredResponse, new Comparator<FTSData>()
258 private final int scoreCategory(String cat)
260 // TODO: make quicker
261 int idx = EXP_CATEGORIES.indexOf(cat.toUpperCase());
264 System.out.println("Unknown category: '" + cat + "'");
266 return -EXP_CATEGORIES.size() - idx;
270 public int compare(FTSData o1, FTSData o2)
272 int o1_s = (Integer) o1.getSummaryData()[idx_ups];
273 int o1_e = (Integer) o1.getSummaryData()[idx_upe];
274 int o1_cat = scoreCategory((String) o1.getSummaryData()[idx_mcat]);
275 int o2_s = (Integer) o2.getSummaryData()[idx_ups];
276 int o2_e = (Integer) o2.getSummaryData()[idx_upe];
277 int o2_cat = scoreCategory((String) o2.getSummaryData()[idx_mcat]);
279 if (o1_cat == o2_cat)
283 int o1_xtent = o1_e - o1_s;
284 int o2_xtent = o2_e - o2_s;
285 if (o1_xtent == o2_xtent)
287 if (o1_cat == scoreCategory(EXP_CATEGORIES.get(0)))
289 // experimental structures, so rank on quality
290 double o1_res = (Double) o1.getSummaryData()[idx_resol];
291 double o2_res = (Double) o2.getSummaryData()[idx_resol];
292 return (o2_res < o1_res) ? 1 : (o2_res == o1_res) ? 0 : -1;
296 // models, so rank on qmean
297 float o1_mq = (Float) o1.getSummaryData()[idx_mqual];
298 float o2_mq = (Float) o2.getSummaryData()[idx_mqual];
299 return (o2_mq < o1_mq) ? 1 : (o2_mq == o1_mq) ? 0 : -1;
304 return o1_xtent - o2_xtent;
314 return o2_cat - o1_cat;
319 public boolean equals(Object obj)
321 return super.equals(obj);
324 FTSRestResponse resultList = new FTSRestResponse();
325 resultList.setNumberOfItemsFound(filteredResponse.size());
326 resultList.setSearchSummary(filteredResponse);
331 public PDBEntry[] collectSelectedRows(JTable restable, int[] selectedRows,
332 List<SequenceI> selectedSeqsToView)
334 int refSeqColIndex = restable.getColumn("Ref Sequence").getModelIndex();
336 PDBEntry[] pdbEntriesToView = new PDBEntry[selectedRows.length];
338 int idColumnIndex = restable.getColumn("Model id").getModelIndex();
339 int urlColumnIndex = restable.getColumn("Url").getModelIndex();
340 int typeColumnIndex = restable.getColumn("Provider").getModelIndex();
341 int categoryColumnIndex = restable.getColumn("Model Category")
344 for (int row : selectedRows)
346 // unique id - could be a horrible hash
348 String pdbIdStr = restable.getValueAt(row, idColumnIndex).toString();
349 String urlStr = restable.getValueAt(row, urlColumnIndex).toString();
350 String typeColumn = restable.getValueAt(row, typeColumnIndex)
352 SequenceI selectedSeq = (SequenceI) restable.getValueAt(row,
354 selectedSeqsToView.add(selectedSeq);
355 PDBEntry pdbEntry = selectedSeq.getPDBEntry(pdbIdStr);
356 if (pdbEntry == null)
358 pdbEntry = getFindEntry(pdbIdStr, selectedSeq.getAllPDBEntries());
361 if (pdbEntry == null)
363 pdbEntry = new PDBEntry();
364 pdbEntry.setId(pdbIdStr);
365 pdbEntry.setType(PDBEntry.Type.MMCIF);
366 if (!"PDBe".equalsIgnoreCase(typeColumn))
368 pdbEntry.setRetrievalUrl(urlStr);
370 selectedSeq.getDatasetSequence().addPDBId(pdbEntry);
372 pdbEntriesToView[count++] = pdbEntry;
374 return pdbEntriesToView;
378 protected FTSRestRequest getLastFTSRequest()
380 return lastTdbRequest;
384 * generate a query for PDBFTS to retrieve structure metadata
386 * @param ftsRestRequest
391 public String buildPDBFTSQueryFor(FTSRestResponse upResponse)
393 List<String> pdbIds = new ArrayList<String>();
394 int idx_modelId = getLastFTSRequest().getFieldIndex("Model id");
395 int idx_provider = getLastFTSRequest().getFieldIndex("Provider");
396 for (FTSData row : upResponse.getSearchSummary())
398 String id = (String) row.getSummaryData()[idx_modelId];
399 String provider = (String) row.getSummaryData()[idx_provider];
400 if ("PDBe".equalsIgnoreCase(provider))
405 return String.join(" OR ", pdbIds).toString();
409 * query PDBe for structure metadata
413 * @return FTSRestResponse via PDBStructureChooserQuerySource
415 public FTSRestResponse fetchStructuresMetaDataFor(
416 PDBStructureChooserQuerySource pdbquery,
417 FTSRestResponse upResponse) throws Exception
420 String pdb_Query = buildPDBFTSQueryFor(upResponse);
422 FTSRestResponse resultList;
423 FTSRestRequest pdbRequest = new FTSRestRequest();
424 pdbRequest.setAllowEmptySeq(false);
425 pdbRequest.setResponseSize(500);
426 pdbRequest.setFieldToSearchBy("(");
427 // pdbRequest.setFieldToSortBy("pdb_id");
428 pdbRequest.setWantedFields(
429 pdbquery.getDocFieldPrefs().getStructureSummaryFields());
430 pdbRequest.setSearchTerm(pdb_Query + ")");
431 resultList = pdbquery.executePDBFTSRestRequest(pdbRequest);
433 lastPdbRequest = pdbRequest;
437 public FTSRestResponse joinResponses(FTSRestResponse upResponse,
438 FTSRestResponse pdbResponse)
440 int idx_provider = getLastFTSRequest().getFieldIndex("Provider");
442 int idx_modelId = getLastFTSRequest().getFieldIndex("Model id");
443 int pdbIdx = lastPdbRequest.getFieldIndex("pdb_id");
444 for (FTSData row : upResponse.getSearchSummary())
446 String id = (String) row.getSummaryData()[idx_modelId];
447 String provider = (String) row.getSummaryData()[idx_provider];
448 if ("PDBe".equalsIgnoreCase(provider))
450 for (FTSData pdbrow : pdbResponse.getSearchSummary())
452 String pdbid = (String) pdbrow.getSummaryData()[pdbIdx];
453 if (id.equalsIgnoreCase(pdbid))
455 // often multiple entries per PDB ID so we bail after first
457 // append to FTSRestResponse array
462 // TODO Auto-generated method stub