2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.gui.structurechooser;
23 import java.util.Locale;
25 import java.util.ArrayList;
26 import java.util.Collection;
27 import java.util.HashSet;
28 import java.util.LinkedHashSet;
29 import java.util.List;
30 import java.util.Objects;
33 import javax.swing.JTable;
34 import javax.swing.table.TableModel;
36 import jalview.datamodel.DBRefEntry;
37 import jalview.datamodel.DBRefSource;
38 import jalview.datamodel.PDBEntry;
39 import jalview.datamodel.SequenceI;
40 import jalview.fts.api.FTSData;
41 import jalview.fts.api.FTSDataColumnI;
42 import jalview.fts.api.FTSRestClientI;
43 import jalview.fts.core.FTSDataColumnPreferences;
44 import jalview.fts.core.FTSDataColumnPreferences.PreferenceSource;
45 import jalview.fts.core.FTSRestRequest;
46 import jalview.fts.core.FTSRestResponse;
47 import jalview.fts.service.pdb.PDBFTSRestClient;
48 import jalview.jbgui.FilterOption;
49 import jalview.util.MessageManager;
52 * logic for querying the PDBe API for structures of sequences
56 public class PDBStructureChooserQuerySource
57 extends StructureChooserQuerySource
60 private static int MAX_QLENGTH = 7820;
62 protected FTSRestRequest lastPdbRequest;
64 protected FTSRestClientI pdbRestClient;
66 public PDBStructureChooserQuerySource()
68 pdbRestClient = PDBFTSRestClient.getInstance();
69 docFieldPrefs = new FTSDataColumnPreferences(
70 PreferenceSource.STRUCTURE_CHOOSER,
71 PDBFTSRestClient.getInstance());
76 * Builds a query string for a given sequences using its DBRef entries
79 * the sequences to build a query for
80 * @return the built query string
83 public String buildQuery(SequenceI seq)
85 boolean isPDBRefsFound = false;
86 boolean isUniProtRefsFound = false;
87 StringBuilder queryBuilder = new StringBuilder();
88 Set<String> seqRefs = new LinkedHashSet<>();
91 * note PDBs as DBRefEntry so they are not duplicated in query
93 Set<String> pdbids = new HashSet<>();
95 if (seq.getAllPDBEntries() != null
96 && queryBuilder.length() < MAX_QLENGTH)
98 for (PDBEntry entry : seq.getAllPDBEntries())
100 if (isValidSeqName(entry.getId()))
102 String id = entry.getId().toLowerCase(Locale.ROOT);
103 queryBuilder.append("pdb_id:").append(id).append(" OR ");
104 isPDBRefsFound = true;
110 List<DBRefEntry> refs = seq.getDBRefs();
111 if (refs != null && refs.size() != 0)
113 for (int ib = 0, nb = refs.size(); ib < nb; ib++)
115 DBRefEntry dbRef = refs.get(ib);
116 if (isValidSeqName(getDBRefId(dbRef))
117 && queryBuilder.length() < MAX_QLENGTH)
119 if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT))
121 queryBuilder.append("uniprot_accession:")
122 .append(getDBRefId(dbRef)).append(" OR ");
123 queryBuilder.append("uniprot_id:").append(getDBRefId(dbRef))
125 isUniProtRefsFound = true;
127 else if (dbRef.getSource().equalsIgnoreCase(DBRefSource.PDB))
130 String id = getDBRefId(dbRef).toLowerCase(Locale.ROOT);
131 if (!pdbids.contains(id))
133 queryBuilder.append("pdb_id:").append(id).append(" OR ");
134 isPDBRefsFound = true;
140 seqRefs.add(getDBRefId(dbRef));
146 if (!isPDBRefsFound && !isUniProtRefsFound)
148 String seqName = seq.getName();
149 seqName = sanitizeSeqName(seqName);
150 String[] names = seqName.toLowerCase(Locale.ROOT).split("\\|");
151 for (String name : names)
153 // jalview.bin.Console.outPrintln("Found name : " + name);
155 if (isValidSeqName(name))
161 for (String seqRef : seqRefs)
163 queryBuilder.append("text:").append(seqRef).append(" OR ");
167 int endIndex = queryBuilder.lastIndexOf(" OR ");
168 if (queryBuilder.toString().length() < 6)
172 String query = queryBuilder.toString().substring(0, endIndex);
177 * Remove the following special characters from input string +, -, &, !, (, ),
178 * {, }, [, ], ^, ", ~, *, ?, :, \
183 public static String sanitizeSeqName(String seqName)
185 Objects.requireNonNull(seqName);
186 return seqName.replaceAll("\\[\\d*\\]", "")
187 .replaceAll("[^\\dA-Za-z|_]", "").replaceAll("\\s+", "+");
191 * Ensures sequence ref names are not less than 3 characters and does not
192 * contain a database name
197 static boolean isValidSeqName(String seqName)
199 // jalview.bin.Console.outPrintln("seqName : " + seqName);
200 String ignoreList = "pdb,uniprot,swiss-prot";
201 if (seqName.length() < 3)
205 if (seqName.contains(":"))
209 seqName = seqName.toLowerCase(Locale.ROOT);
210 for (String ignoredEntry : ignoreList.split(","))
212 if (seqName.contains(ignoredEntry))
220 static String getDBRefId(DBRefEntry dbRef)
222 String ref = dbRef.getAccessionId().replaceAll("GO:", "");
227 * FTSRestClient specific query builder to recover associated structure data
228 * records for a sequence
231 * - seq to generate a query for
232 * @param wantedFields
233 * - fields to retrieve
234 * @param selectedFilterOpt
235 * - criterion for ranking results (e.g. resolution)
237 * - sort ascending or descending
241 public FTSRestResponse fetchStructuresMetaData(SequenceI seq,
242 Collection<FTSDataColumnI> wantedFields,
243 FilterOption selectedFilterOpt, boolean b) throws Exception
245 FTSRestResponse resultList;
246 FTSRestRequest pdbRequest = new FTSRestRequest();
247 pdbRequest.setAllowEmptySeq(false);
248 pdbRequest.setResponseSize(500);
249 pdbRequest.setFieldToSearchBy("(");
250 pdbRequest.setFieldToSortBy(selectedFilterOpt.getValue(), b);
251 pdbRequest.setWantedFields(wantedFields);
252 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
253 pdbRequest.setAssociatedSequence(seq);
254 resultList = pdbRestClient.executeRequest(pdbRequest);
256 lastPdbRequest = pdbRequest;
260 public List<FilterOption> getAvailableFilterOptions(String VIEWS_FILTER)
262 List<FilterOption> filters = new ArrayList<FilterOption>();
263 filters.add(new FilterOption(
264 "PDBe " + MessageManager.getString("label.best_quality"),
265 "overall_quality", VIEWS_FILTER, false, this));
266 filters.add(new FilterOption(
267 "PDBe " + MessageManager.getString("label.best_resolution"),
268 "resolution", VIEWS_FILTER, false, this));
269 filters.add(new FilterOption(
270 "PDBe " + MessageManager.getString("label.most_protein_chain"),
271 "number_of_protein_chains", VIEWS_FILTER, false, this));
272 filters.add(new FilterOption(
273 "PDBe " + MessageManager
274 .getString("label.most_bound_molecules"),
275 "number_of_bound_molecules", VIEWS_FILTER, false, this));
276 filters.add(new FilterOption(
277 "PDBe " + MessageManager
278 .getString("label.most_polymer_residues"),
279 "number_of_polymer_residues", VIEWS_FILTER, true, this));
285 public boolean needsRefetch(FilterOption selectedFilterOpt)
287 // PDBe queries never need a refetch first
292 * FTSRestClient specific query builder to pick top ranked entry from a
293 * fetchStructuresMetaData query
296 * - seq to generate a query for
297 * @param wantedFields
298 * - fields to retrieve
299 * @param selectedFilterOpt
300 * - criterion for ranking results (e.g. resolution)
302 * - sort ascending or descending
306 public FTSRestResponse selectFirstRankedQuery(SequenceI seq,
307 Collection<FTSData> collectedResults,
308 Collection<FTSDataColumnI> wantedFields, String fieldToFilterBy,
309 boolean b) throws Exception
312 FTSRestResponse resultList;
313 FTSRestRequest pdbRequest = new FTSRestRequest();
314 if (fieldToFilterBy.equalsIgnoreCase("uniprot_coverage"))
316 pdbRequest.setAllowEmptySeq(false);
317 pdbRequest.setResponseSize(1);
318 pdbRequest.setFieldToSearchBy("(");
319 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
320 pdbRequest.setWantedFields(wantedFields);
321 pdbRequest.setAssociatedSequence(seq);
322 pdbRequest.setFacet(true);
323 pdbRequest.setFacetPivot(fieldToFilterBy + ",entry_entity");
324 pdbRequest.setFacetPivotMinCount(1);
328 pdbRequest.setAllowEmptySeq(false);
329 pdbRequest.setResponseSize(1);
330 pdbRequest.setFieldToSearchBy("(");
331 pdbRequest.setFieldToSortBy(fieldToFilterBy, b);
332 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
333 pdbRequest.setWantedFields(wantedFields);
334 pdbRequest.setAssociatedSequence(seq);
336 resultList = pdbRestClient.executeRequest(pdbRequest);
338 lastPdbRequest = pdbRequest;
343 public PDBEntry[] collectSelectedRows(JTable restable, int[] selectedRows,
344 List<SequenceI> selectedSeqsToView)
346 int refSeqColIndex = restable.getColumn("Ref Sequence").getModelIndex();
348 PDBEntry[] pdbEntriesToView = new PDBEntry[selectedRows.length];
350 int idColumnIndex = -1;
351 boolean fromTDB = true;
352 idColumnIndex = restable.getColumn("PDB Id").getModelIndex();
354 for (int row : selectedRows)
357 String pdbIdStr = restable.getValueAt(row, idColumnIndex).toString();
358 SequenceI selectedSeq = (SequenceI) restable.getValueAt(row,
360 selectedSeqsToView.add(selectedSeq);
361 PDBEntry pdbEntry = selectedSeq.getPDBEntry(pdbIdStr);
362 if (pdbEntry == null)
364 pdbEntry = getFindEntry(pdbIdStr, selectedSeq.getAllPDBEntries());
367 if (pdbEntry == null)
369 pdbEntry = new PDBEntry();
370 pdbEntry.setId(pdbIdStr);
371 pdbEntry.setType(PDBEntry.Type.MMCIF);
372 selectedSeq.getDatasetSequence().addPDBId(pdbEntry);
374 pdbEntriesToView[count++] = pdbEntry;
376 return pdbEntriesToView;
380 protected FTSRestRequest getLastFTSRequest()
382 return lastPdbRequest;
385 public FTSRestResponse executePDBFTSRestRequest(FTSRestRequest pdbRequest)
388 return pdbRestClient.executeRequest(pdbRequest);