3 import java.util.Collection;
4 import java.util.HashSet;
5 import java.util.LinkedHashSet;
7 import java.util.Objects;
10 import javax.swing.table.TableModel;
12 import jalview.datamodel.DBRefEntry;
13 import jalview.datamodel.DBRefSource;
14 import jalview.datamodel.PDBEntry;
15 import jalview.datamodel.SequenceI;
16 import jalview.fts.api.FTSData;
17 import jalview.fts.api.FTSDataColumnI;
18 import jalview.fts.api.FTSRestClientI;
19 import jalview.fts.core.FTSDataColumnPreferences;
20 import jalview.fts.core.FTSDataColumnPreferences.PreferenceSource;
21 import jalview.fts.core.FTSRestRequest;
22 import jalview.fts.core.FTSRestResponse;
23 import jalview.fts.service.pdb.PDBFTSRestClient;
24 import jalview.jbgui.GStructureChooser.FilterOption;
27 * logic for querying sources of structural data for structures of sequences
33 public class StructureChooserQuerySource
35 private FTSRestRequest lastPdbRequest;
37 private FTSRestClientI pdbRestClient;
39 private FTSDataColumnPreferences docFieldPrefs;
41 private static int MAX_QLENGTH = 7820;
43 public StructureChooserQuerySource()
47 public static StructureChooserQuerySource getPDBfts()
49 StructureChooserQuerySource pdbfts = new StructureChooserQuerySource();
50 pdbfts.pdbRestClient = PDBFTSRestClient.getInstance();
51 pdbfts.docFieldPrefs = new FTSDataColumnPreferences(
52 PreferenceSource.STRUCTURE_CHOOSER,
53 PDBFTSRestClient.getInstance());
57 public FTSDataColumnPreferences getDocFieldPrefs()
62 public void setDocFieldPrefs(FTSDataColumnPreferences docFieldPrefs)
64 this.docFieldPrefs = docFieldPrefs;
68 * Builds a query string for a given sequences using its DBRef entries
71 * the sequences to build a query for
72 * @return the built query string
75 String buildQuery(SequenceI seq)
77 boolean isPDBRefsFound = false;
78 boolean isUniProtRefsFound = false;
79 StringBuilder queryBuilder = new StringBuilder();
80 Set<String> seqRefs = new LinkedHashSet<>();
83 * note PDBs as DBRefEntry so they are not duplicated in query
85 Set<String> pdbids = new HashSet<>();
87 if (seq.getAllPDBEntries() != null
88 && queryBuilder.length() < MAX_QLENGTH)
90 for (PDBEntry entry : seq.getAllPDBEntries())
92 if (isValidSeqName(entry.getId()))
94 String id = entry.getId().toLowerCase();
95 queryBuilder.append("pdb_id:").append(id).append(" OR ");
96 isPDBRefsFound = true;
102 List<DBRefEntry> refs = seq.getDBRefs();
103 if (refs != null && refs.size() != 0)
105 for (int ib = 0, nb = refs.size(); ib < nb; ib++)
107 DBRefEntry dbRef = refs.get(ib);
108 if (isValidSeqName(getDBRefId(dbRef))
109 && queryBuilder.length() < MAX_QLENGTH)
111 if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT))
113 queryBuilder.append("uniprot_accession:")
114 .append(getDBRefId(dbRef)).append(" OR ");
115 queryBuilder.append("uniprot_id:").append(getDBRefId(dbRef))
117 isUniProtRefsFound = true;
119 else if (dbRef.getSource().equalsIgnoreCase(DBRefSource.PDB))
122 String id = getDBRefId(dbRef).toLowerCase();
123 if (!pdbids.contains(id))
125 queryBuilder.append("pdb_id:").append(id).append(" OR ");
126 isPDBRefsFound = true;
132 seqRefs.add(getDBRefId(dbRef));
138 if (!isPDBRefsFound && !isUniProtRefsFound)
140 String seqName = seq.getName();
141 seqName = sanitizeSeqName(seqName);
142 String[] names = seqName.toLowerCase().split("\\|");
143 for (String name : names)
145 // System.out.println("Found name : " + name);
147 if (isValidSeqName(name))
153 for (String seqRef : seqRefs)
155 queryBuilder.append("text:").append(seqRef).append(" OR ");
159 int endIndex = queryBuilder.lastIndexOf(" OR ");
160 if (queryBuilder.toString().length() < 6)
164 String query = queryBuilder.toString().substring(0, endIndex);
169 * Remove the following special characters from input string +, -, &, !, (, ),
170 * {, }, [, ], ^, ", ~, *, ?, :, \
175 static String sanitizeSeqName(String seqName)
177 Objects.requireNonNull(seqName);
178 return seqName.replaceAll("\\[\\d*\\]", "")
179 .replaceAll("[^\\dA-Za-z|_]", "").replaceAll("\\s+", "+");
183 * Ensures sequence ref names are not less than 3 characters and does not
184 * contain a database name
189 static boolean isValidSeqName(String seqName)
191 // System.out.println("seqName : " + seqName);
192 String ignoreList = "pdb,uniprot,swiss-prot";
193 if (seqName.length() < 3)
197 if (seqName.contains(":"))
201 seqName = seqName.toLowerCase();
202 for (String ignoredEntry : ignoreList.split(","))
204 if (seqName.contains(ignoredEntry))
212 static String getDBRefId(DBRefEntry dbRef)
214 String ref = dbRef.getAccessionId().replaceAll("GO:", "");
219 * FTSRestClient specific query builder to recover associated structure data
220 * records for a sequence
223 * - seq to generate a query for
224 * @param wantedFields
225 * - fields to retrieve
226 * @param selectedFilterOpt
227 * - criterion for ranking results (e.g. resolution)
229 * - sort ascending or descending
233 public FTSRestResponse fetchStructuresMetaData(SequenceI seq,
234 Collection<FTSDataColumnI> wantedFields,
235 FilterOption selectedFilterOpt, boolean b) throws Exception
237 FTSRestResponse resultList;
238 FTSRestRequest pdbRequest = new FTSRestRequest();
239 pdbRequest.setAllowEmptySeq(false);
240 pdbRequest.setResponseSize(500);
241 pdbRequest.setFieldToSearchBy("(");
242 pdbRequest.setFieldToSortBy(selectedFilterOpt.getValue(), b);
243 pdbRequest.setWantedFields(wantedFields);
244 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
245 pdbRequest.setAssociatedSequence(seq);
246 resultList = pdbRestClient.executeRequest(pdbRequest);
248 lastPdbRequest = pdbRequest;
253 * FTSRestClient specific query builder to pick top ranked entry from a
254 * fetchStructuresMetaData query
257 * - seq to generate a query for
258 * @param wantedFields
259 * - fields to retrieve
260 * @param selectedFilterOpt
261 * - criterion for ranking results (e.g. resolution)
263 * - sort ascending or descending
267 public FTSRestResponse selectFirstRankedQuery(SequenceI seq,
268 Collection<FTSDataColumnI> wantedFields, String fieldToFilterBy,
269 boolean b) throws Exception
272 FTSRestResponse resultList;
273 FTSRestRequest pdbRequest = new FTSRestRequest();
274 if (fieldToFilterBy.equalsIgnoreCase("uniprot_coverage"))
276 pdbRequest.setAllowEmptySeq(false);
277 pdbRequest.setResponseSize(1);
278 pdbRequest.setFieldToSearchBy("(");
279 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
280 pdbRequest.setWantedFields(wantedFields);
281 pdbRequest.setAssociatedSequence(seq);
282 pdbRequest.setFacet(true);
283 pdbRequest.setFacetPivot(fieldToFilterBy + ",entry_entity");
284 pdbRequest.setFacetPivotMinCount(1);
288 pdbRequest.setAllowEmptySeq(false);
289 pdbRequest.setResponseSize(1);
290 pdbRequest.setFieldToSearchBy("(");
291 pdbRequest.setFieldToSortBy(fieldToFilterBy, b);
292 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
293 pdbRequest.setWantedFields(wantedFields);
294 pdbRequest.setAssociatedSequence(seq);
296 resultList = pdbRestClient.executeRequest(pdbRequest);
298 lastPdbRequest = pdbRequest;
302 public TableModel getTableModel(
303 Collection<FTSData> discoveredStructuresSet)
305 return FTSRestResponse.getTableModel(lastPdbRequest,
306 discoveredStructuresSet);
309 public FTSDataColumnPreferences getInitialFieldPreferences()
311 // TODO Auto-generated method stub