1 package jalview.gui.structurechooser;
3 import java.util.Collection;
4 import java.util.HashSet;
5 import java.util.LinkedHashSet;
7 import java.util.Objects;
10 import javax.swing.JTable;
11 import javax.swing.table.TableModel;
13 import jalview.datamodel.DBRefEntry;
14 import jalview.datamodel.DBRefSource;
15 import jalview.datamodel.PDBEntry;
16 import jalview.datamodel.SequenceI;
17 import jalview.fts.api.FTSData;
18 import jalview.fts.api.FTSDataColumnI;
19 import jalview.fts.api.FTSRestClientI;
20 import jalview.fts.core.FTSDataColumnPreferences;
21 import jalview.fts.core.FTSDataColumnPreferences.PreferenceSource;
22 import jalview.fts.core.FTSRestRequest;
23 import jalview.fts.core.FTSRestResponse;
24 import jalview.fts.service.pdb.PDBFTSRestClient;
25 import jalview.jbgui.GStructureChooser.FilterOption;
28 * logic for querying the PDBe API for structures of sequences
32 public class PDBStructureChooserQuerySource
33 extends StructureChooserQuerySource
36 private static int MAX_QLENGTH = 7820;
38 public PDBStructureChooserQuerySource()
40 pdbRestClient = PDBFTSRestClient.getInstance();
41 docFieldPrefs = new FTSDataColumnPreferences(
42 PreferenceSource.STRUCTURE_CHOOSER,
43 PDBFTSRestClient.getInstance());
49 * Builds a query string for a given sequences using its DBRef entries
52 * the sequences to build a query for
53 * @return the built query string
56 public String buildQuery(SequenceI seq)
58 boolean isPDBRefsFound = false;
59 boolean isUniProtRefsFound = false;
60 StringBuilder queryBuilder = new StringBuilder();
61 Set<String> seqRefs = new LinkedHashSet<>();
64 * note PDBs as DBRefEntry so they are not duplicated in query
66 Set<String> pdbids = new HashSet<>();
68 if (seq.getAllPDBEntries() != null
69 && queryBuilder.length() < MAX_QLENGTH)
71 for (PDBEntry entry : seq.getAllPDBEntries())
73 if (isValidSeqName(entry.getId()))
75 String id = entry.getId().toLowerCase();
76 queryBuilder.append("pdb_id:").append(id).append(" OR ");
77 isPDBRefsFound = true;
83 List<DBRefEntry> refs = seq.getDBRefs();
84 if (refs != null && refs.size() != 0)
86 for (int ib = 0, nb = refs.size(); ib < nb; ib++)
88 DBRefEntry dbRef = refs.get(ib);
89 if (isValidSeqName(getDBRefId(dbRef))
90 && queryBuilder.length() < MAX_QLENGTH)
92 if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT))
94 queryBuilder.append("uniprot_accession:")
95 .append(getDBRefId(dbRef)).append(" OR ");
96 queryBuilder.append("uniprot_id:").append(getDBRefId(dbRef))
98 isUniProtRefsFound = true;
100 else if (dbRef.getSource().equalsIgnoreCase(DBRefSource.PDB))
103 String id = getDBRefId(dbRef).toLowerCase();
104 if (!pdbids.contains(id))
106 queryBuilder.append("pdb_id:").append(id).append(" OR ");
107 isPDBRefsFound = true;
113 seqRefs.add(getDBRefId(dbRef));
119 if (!isPDBRefsFound && !isUniProtRefsFound)
121 String seqName = seq.getName();
122 seqName = sanitizeSeqName(seqName);
123 String[] names = seqName.toLowerCase().split("\\|");
124 for (String name : names)
126 // System.out.println("Found name : " + name);
128 if (isValidSeqName(name))
134 for (String seqRef : seqRefs)
136 queryBuilder.append("text:").append(seqRef).append(" OR ");
140 int endIndex = queryBuilder.lastIndexOf(" OR ");
141 if (queryBuilder.toString().length() < 6)
145 String query = queryBuilder.toString().substring(0, endIndex);
150 * Remove the following special characters from input string +, -, &, !, (, ),
151 * {, }, [, ], ^, ", ~, *, ?, :, \
156 public static String sanitizeSeqName(String seqName)
158 Objects.requireNonNull(seqName);
159 return seqName.replaceAll("\\[\\d*\\]", "")
160 .replaceAll("[^\\dA-Za-z|_]", "").replaceAll("\\s+", "+");
164 * Ensures sequence ref names are not less than 3 characters and does not
165 * contain a database name
170 static boolean isValidSeqName(String seqName)
172 // System.out.println("seqName : " + seqName);
173 String ignoreList = "pdb,uniprot,swiss-prot";
174 if (seqName.length() < 3)
178 if (seqName.contains(":"))
182 seqName = seqName.toLowerCase();
183 for (String ignoredEntry : ignoreList.split(","))
185 if (seqName.contains(ignoredEntry))
193 static String getDBRefId(DBRefEntry dbRef)
195 String ref = dbRef.getAccessionId().replaceAll("GO:", "");
200 * FTSRestClient specific query builder to recover associated structure data
201 * records for a sequence
204 * - seq to generate a query for
205 * @param wantedFields
206 * - fields to retrieve
207 * @param selectedFilterOpt
208 * - criterion for ranking results (e.g. resolution)
210 * - sort ascending or descending
214 public FTSRestResponse fetchStructuresMetaData(SequenceI seq,
215 Collection<FTSDataColumnI> wantedFields,
216 FilterOption selectedFilterOpt, boolean b) throws Exception
218 FTSRestResponse resultList;
219 FTSRestRequest pdbRequest = new FTSRestRequest();
220 pdbRequest.setAllowEmptySeq(false);
221 pdbRequest.setResponseSize(500);
222 pdbRequest.setFieldToSearchBy("(");
223 pdbRequest.setFieldToSortBy(selectedFilterOpt.getValue(), b);
224 pdbRequest.setWantedFields(wantedFields);
225 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
226 pdbRequest.setAssociatedSequence(seq);
227 resultList = pdbRestClient.executeRequest(pdbRequest);
229 lastPdbRequest = pdbRequest;
234 * FTSRestClient specific query builder to pick top ranked entry from a
235 * fetchStructuresMetaData query
238 * - seq to generate a query for
239 * @param wantedFields
240 * - fields to retrieve
241 * @param selectedFilterOpt
242 * - criterion for ranking results (e.g. resolution)
244 * - sort ascending or descending
248 public FTSRestResponse selectFirstRankedQuery(SequenceI seq,
249 Collection<FTSDataColumnI> wantedFields, String fieldToFilterBy,
250 boolean b) throws Exception
253 FTSRestResponse resultList;
254 FTSRestRequest pdbRequest = new FTSRestRequest();
255 if (fieldToFilterBy.equalsIgnoreCase("uniprot_coverage"))
257 pdbRequest.setAllowEmptySeq(false);
258 pdbRequest.setResponseSize(1);
259 pdbRequest.setFieldToSearchBy("(");
260 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
261 pdbRequest.setWantedFields(wantedFields);
262 pdbRequest.setAssociatedSequence(seq);
263 pdbRequest.setFacet(true);
264 pdbRequest.setFacetPivot(fieldToFilterBy + ",entry_entity");
265 pdbRequest.setFacetPivotMinCount(1);
269 pdbRequest.setAllowEmptySeq(false);
270 pdbRequest.setResponseSize(1);
271 pdbRequest.setFieldToSearchBy("(");
272 pdbRequest.setFieldToSortBy(fieldToFilterBy, b);
273 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
274 pdbRequest.setWantedFields(wantedFields);
275 pdbRequest.setAssociatedSequence(seq);
277 resultList = pdbRestClient.executeRequest(pdbRequest);
279 lastPdbRequest = pdbRequest;
285 public PDBEntry[] collectSelectedRows(JTable restable, int[] selectedRows,
286 List<SequenceI> selectedSeqsToView)
288 int refSeqColIndex = restable.getColumn("Ref Sequence")
291 PDBEntry[] pdbEntriesToView=new PDBEntry[selectedRows.length];
293 int idColumnIndex=-1;
294 boolean fromTDB=true;
295 idColumnIndex = restable.getColumn("PDB Id").getModelIndex();
297 for (int row : selectedRows)
300 String pdbIdStr = restable.getValueAt(row,idColumnIndex)
302 SequenceI selectedSeq = (SequenceI) restable.getValueAt(row,
304 selectedSeqsToView.add(selectedSeq);
305 PDBEntry pdbEntry = selectedSeq.getPDBEntry(pdbIdStr);
306 if (pdbEntry == null)
308 pdbEntry = getFindEntry(pdbIdStr,
309 selectedSeq.getAllPDBEntries());
312 if (pdbEntry == null)
314 pdbEntry = new PDBEntry();
315 pdbEntry.setId(pdbIdStr);
316 pdbEntry.setType(PDBEntry.Type.MMCIF);
317 selectedSeq.getDatasetSequence().addPDBId(pdbEntry);
319 pdbEntriesToView[count++] = pdbEntry;
321 return pdbEntriesToView;