2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.gui.structurechooser;
23 import java.util.Locale;
25 import java.util.ArrayList;
26 import java.util.Collection;
27 import java.util.HashSet;
28 import java.util.LinkedHashSet;
29 import java.util.List;
30 import java.util.Objects;
33 import javax.swing.JTable;
35 import jalview.datamodel.DBRefEntry;
36 import jalview.datamodel.DBRefSource;
37 import jalview.datamodel.PDBEntry;
38 import jalview.datamodel.SequenceI;
39 import jalview.fts.api.FTSData;
40 import jalview.fts.api.FTSDataColumnI;
41 import jalview.fts.api.FTSRestClientI;
42 import jalview.fts.core.FTSDataColumnPreferences;
43 import jalview.fts.core.FTSDataColumnPreferences.PreferenceSource;
44 import jalview.fts.core.FTSRestRequest;
45 import jalview.fts.core.FTSRestResponse;
46 import jalview.fts.service.pdb.PDBFTSRestClient;
47 import jalview.jbgui.FilterOption;
48 import jalview.struture.PDBEntryUtils;
49 import jalview.util.MessageManager;
52 * logic for querying the PDBe API for structures of sequences
56 public class PDBStructureChooserQuerySource
57 extends StructureChooserQuerySource
60 private static int MAX_QLENGTH = 7820;
62 protected FTSRestRequest lastPdbRequest;
64 protected FTSRestClientI pdbRestClient;
66 public PDBStructureChooserQuerySource()
68 pdbRestClient = PDBFTSRestClient.getInstance();
69 docFieldPrefs = new FTSDataColumnPreferences(
70 PreferenceSource.STRUCTURE_CHOOSER,
71 PDBFTSRestClient.getInstance());
76 * Builds a query string for a given sequences using its DBRef entries
79 * the sequences to build a query for
80 * @return the built query string
83 public String buildQuery(SequenceI seq)
85 boolean isPDBRefsFound = false;
86 boolean isUniProtRefsFound = false;
87 StringBuilder queryBuilder = new StringBuilder();
88 Set<String> seqRefs = new LinkedHashSet<>();
89 SequenceI ds = seq.getDatasetSequence();
90 while (ds.getDatasetSequence()!=null) {
91 ds = ds.getDatasetSequence();
95 * note PDBs as DBRefEntry so they are not duplicated in query
97 Set<String> pdbids = new HashSet<>();
98 if (queryBuilder.length() < MAX_QLENGTH)
100 Set<PDBEntry> gatheredEntries = PDBEntryUtils.gatherPDBEntries(seq, true);
101 for (PDBEntry entry : gatheredEntries)
103 if (isValidSeqName(entry.getId()))
105 String id = entry.getId().toLowerCase(Locale.ROOT);
106 queryBuilder.append("pdb_id:").append(id).append(" OR ");
107 isPDBRefsFound = true;
113 List<DBRefEntry> refs = seq.getDBRefs();
114 if (refs != null && refs.size() != 0)
116 for (int ib = 0, nb = refs.size(); ib < nb; ib++)
118 DBRefEntry dbRef = refs.get(ib);
119 if (isValidSeqName(getDBRefId(dbRef))
120 && queryBuilder.length() < MAX_QLENGTH)
122 if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT))
124 queryBuilder.append("uniprot_accession:")
125 .append(getDBRefId(dbRef)).append(" OR ");
126 queryBuilder.append("uniprot_id:").append(getDBRefId(dbRef))
128 isUniProtRefsFound = true;
130 else if (dbRef.getSource().equalsIgnoreCase(DBRefSource.PDB))
133 String id = getDBRefId(dbRef).toLowerCase(Locale.ROOT);
134 if (!pdbids.contains(id))
136 queryBuilder.append("pdb_id:").append(id).append(" OR ");
137 isPDBRefsFound = true;
143 seqRefs.add(getDBRefId(dbRef));
149 if (!isPDBRefsFound && !isUniProtRefsFound && pdbids.isEmpty())
151 String seqName = seq.getName();
152 seqName = sanitizeSeqName(seqName);
153 String[] names = seqName.toLowerCase(Locale.ROOT).split("\\|");
154 for (String name : names)
156 // jalview.bin.Console.outPrintln("Found name : " + name);
158 if (isValidSeqName(name))
164 for (String seqRef : seqRefs)
166 queryBuilder.append("text:").append(seqRef).append(" OR ");
170 int endIndex = queryBuilder.lastIndexOf(" OR ");
171 if (queryBuilder.toString().length() < 6)
175 String query = queryBuilder.toString().substring(0, endIndex);
180 * Remove the following special characters from input string +, -, &, !, (, ),
181 * {, }, [, ], ^, ", ~, *, ?, :, \
186 public static String sanitizeSeqName(String seqName)
188 Objects.requireNonNull(seqName);
189 return seqName.replaceAll("\\[\\d*\\]", "")
190 .replaceAll("[^\\dA-Za-z|_]", "").replaceAll("\\s+", "+");
194 * Ensures sequence ref names are not less than 3 characters and does not
195 * contain a database name
200 static boolean isValidSeqName(String seqName)
202 // jalview.bin.Console.outPrintln("seqName : " + seqName);
203 String ignoreList = "pdb,uniprot,swiss-prot";
204 if (seqName.length() < 3)
208 if (seqName.contains(":"))
212 seqName = seqName.toLowerCase(Locale.ROOT);
213 for (String ignoredEntry : ignoreList.split(","))
215 if (seqName.contains(ignoredEntry))
223 static String getDBRefId(DBRefEntry dbRef)
225 String ref = dbRef.getAccessionId().replaceAll("GO:", "");
230 * FTSRestClient specific query builder to recover associated structure data
231 * records for a sequence
234 * - seq to generate a query for
235 * @param wantedFields
236 * - fields to retrieve
237 * @param selectedFilterOpt
238 * - criterion for ranking results (e.g. resolution)
240 * - sort ascending or descending
244 public FTSRestResponse fetchStructuresMetaData(SequenceI seq,
245 Collection<FTSDataColumnI> wantedFields,
246 FilterOption selectedFilterOpt, boolean b) throws Exception
248 FTSRestResponse resultList;
249 FTSRestRequest pdbRequest = new FTSRestRequest();
250 pdbRequest.setAllowEmptySeq(false);
251 pdbRequest.setResponseSize(500);
252 pdbRequest.setFieldToSearchBy("(");
253 pdbRequest.setFieldToSortBy(selectedFilterOpt.getValue(), b);
254 pdbRequest.setWantedFields(wantedFields);
255 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
256 pdbRequest.setAssociatedSequence(seq);
257 resultList = pdbRestClient.executeRequest(pdbRequest);
259 lastPdbRequest = pdbRequest;
263 public List<FilterOption> getAvailableFilterOptions(String VIEWS_FILTER)
265 List<FilterOption> filters = new ArrayList<FilterOption>();
266 filters.add(new FilterOption(
267 "PDBe " + MessageManager.getString("label.best_quality"),
268 "overall_quality", VIEWS_FILTER, false, this));
269 filters.add(new FilterOption(
270 "PDBe " + MessageManager.getString("label.best_resolution"),
271 "resolution", VIEWS_FILTER, false, this));
272 filters.add(new FilterOption(
273 "PDBe " + MessageManager.getString("label.most_protein_chain"),
274 "number_of_protein_chains", VIEWS_FILTER, false, this));
275 filters.add(new FilterOption(
276 "PDBe " + MessageManager
277 .getString("label.most_bound_molecules"),
278 "number_of_bound_molecules", VIEWS_FILTER, false, this));
279 filters.add(new FilterOption(
280 "PDBe " + MessageManager
281 .getString("label.most_polymer_residues"),
282 "number_of_polymer_residues", VIEWS_FILTER, true, this));
288 public boolean needsRefetch(FilterOption selectedFilterOpt)
290 // PDBe queries never need a refetch first
295 * FTSRestClient specific query builder to pick top ranked entry from a
296 * fetchStructuresMetaData query
299 * - seq to generate a query for
300 * @param wantedFields
301 * - fields to retrieve
302 * @param selectedFilterOpt
303 * - criterion for ranking results (e.g. resolution)
305 * - sort ascending or descending
309 public FTSRestResponse selectFirstRankedQuery(SequenceI seq,
310 Collection<FTSData> collectedResults,
311 Collection<FTSDataColumnI> wantedFields, String fieldToFilterBy,
312 boolean b) throws Exception
315 FTSRestResponse resultList;
316 FTSRestRequest pdbRequest = new FTSRestRequest();
317 if (fieldToFilterBy.equalsIgnoreCase("uniprot_coverage"))
319 pdbRequest.setAllowEmptySeq(false);
320 pdbRequest.setResponseSize(1);
321 pdbRequest.setFieldToSearchBy("(");
322 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
323 pdbRequest.setWantedFields(wantedFields);
324 pdbRequest.setAssociatedSequence(seq);
325 pdbRequest.setFacet(true);
326 pdbRequest.setFacetPivot(fieldToFilterBy + ",entry_entity");
327 pdbRequest.setFacetPivotMinCount(1);
331 pdbRequest.setAllowEmptySeq(false);
332 pdbRequest.setResponseSize(1);
333 pdbRequest.setFieldToSearchBy("(");
334 pdbRequest.setFieldToSortBy(fieldToFilterBy, b);
335 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
336 pdbRequest.setWantedFields(wantedFields);
337 pdbRequest.setAssociatedSequence(seq);
339 resultList = pdbRestClient.executeRequest(pdbRequest);
341 lastPdbRequest = pdbRequest;
346 public PDBEntry[] collectSelectedRows(JTable restable, int[] selectedRows,
347 List<SequenceI> selectedSeqsToView)
349 int refSeqColIndex = restable.getColumn("Ref Sequence").getModelIndex();
351 PDBEntry[] pdbEntriesToView = new PDBEntry[selectedRows.length];
353 int idColumnIndex = -1;
354 idColumnIndex = restable.getColumn("PDB Id").getModelIndex();
356 for (int row : selectedRows)
359 String pdbIdStr = restable.getValueAt(row, idColumnIndex).toString();
360 SequenceI selectedSeq = (SequenceI) restable.getValueAt(row,
362 selectedSeqsToView.add(selectedSeq);
363 PDBEntry pdbEntry = selectedSeq.getPDBEntry(pdbIdStr);
364 if (pdbEntry == null)
366 pdbEntry = getFindEntry(pdbIdStr, selectedSeq.getAllPDBEntries());
369 if (pdbEntry == null)
371 pdbEntry = new PDBEntry();
372 pdbEntry.setId(pdbIdStr);
373 pdbEntry.setType(PDBEntry.Type.MMCIF);
374 selectedSeq.getDatasetSequence().addPDBId(pdbEntry);
376 pdbEntriesToView[count++] = pdbEntry;
378 return pdbEntriesToView;
382 protected FTSRestRequest getLastFTSRequest()
384 return lastPdbRequest;
387 public FTSRestResponse executePDBFTSRestRequest(FTSRestRequest pdbRequest)
390 return pdbRestClient.executeRequest(pdbRequest);