3 import java.util.Collection;
4 import java.util.HashSet;
5 import java.util.LinkedHashSet;
7 import java.util.Objects;
10 import javax.swing.table.TableModel;
12 import jalview.datamodel.DBRefEntry;
13 import jalview.datamodel.DBRefSource;
14 import jalview.datamodel.PDBEntry;
15 import jalview.datamodel.SequenceI;
16 import jalview.fts.api.FTSData;
17 import jalview.fts.api.FTSDataColumnI;
18 import jalview.fts.api.FTSRestClientI;
19 import jalview.fts.core.FTSRestRequest;
20 import jalview.fts.core.FTSRestResponse;
21 import jalview.fts.service.pdb.PDBFTSRestClient;
22 import jalview.jbgui.GStructureChooser.FilterOption;
25 * logic for querying sources of structural data for structures of sequences
31 public class StructureChooserQuerySource
33 private FTSRestRequest lastPdbRequest;
35 private FTSRestClientI pdbRestClient;
37 private static int MAX_QLENGTH = 7820;
39 public StructureChooserQuerySource()
43 public static StructureChooserQuerySource getPDBfts()
45 StructureChooserQuerySource pdbfts = new StructureChooserQuerySource();
46 pdbfts.pdbRestClient = PDBFTSRestClient.getInstance();
51 * Builds a query string for a given sequences using its DBRef entries
54 * the sequences to build a query for
55 * @return the built query string
58 String buildQuery(SequenceI seq)
60 boolean isPDBRefsFound = false;
61 boolean isUniProtRefsFound = false;
62 StringBuilder queryBuilder = new StringBuilder();
63 Set<String> seqRefs = new LinkedHashSet<>();
66 * note PDBs as DBRefEntry so they are not duplicated in query
68 Set<String> pdbids = new HashSet<>();
70 if (seq.getAllPDBEntries() != null
71 && queryBuilder.length() < MAX_QLENGTH)
73 for (PDBEntry entry : seq.getAllPDBEntries())
75 if (isValidSeqName(entry.getId()))
77 String id = entry.getId().toLowerCase();
78 queryBuilder.append("pdb_id:").append(id).append(" OR ");
79 isPDBRefsFound = true;
85 List<DBRefEntry> refs = seq.getDBRefs();
86 if (refs != null && refs.size() != 0)
88 for (int ib = 0, nb = refs.size(); ib < nb; ib++)
90 DBRefEntry dbRef = refs.get(ib);
91 if (isValidSeqName(getDBRefId(dbRef))
92 && queryBuilder.length() < MAX_QLENGTH)
94 if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT))
96 queryBuilder.append("uniprot_accession:")
97 .append(getDBRefId(dbRef)).append(" OR ");
98 queryBuilder.append("uniprot_id:").append(getDBRefId(dbRef))
100 isUniProtRefsFound = true;
102 else if (dbRef.getSource().equalsIgnoreCase(DBRefSource.PDB))
105 String id = getDBRefId(dbRef).toLowerCase();
106 if (!pdbids.contains(id))
108 queryBuilder.append("pdb_id:").append(id).append(" OR ");
109 isPDBRefsFound = true;
115 seqRefs.add(getDBRefId(dbRef));
121 if (!isPDBRefsFound && !isUniProtRefsFound)
123 String seqName = seq.getName();
124 seqName = sanitizeSeqName(seqName);
125 String[] names = seqName.toLowerCase().split("\\|");
126 for (String name : names)
128 // System.out.println("Found name : " + name);
130 if (isValidSeqName(name))
136 for (String seqRef : seqRefs)
138 queryBuilder.append("text:").append(seqRef).append(" OR ");
142 int endIndex = queryBuilder.lastIndexOf(" OR ");
143 if (queryBuilder.toString().length() < 6)
147 String query = queryBuilder.toString().substring(0, endIndex);
152 * Remove the following special characters from input string +, -, &, !, (, ),
153 * {, }, [, ], ^, ", ~, *, ?, :, \
158 static String sanitizeSeqName(String seqName)
160 Objects.requireNonNull(seqName);
161 return seqName.replaceAll("\\[\\d*\\]", "")
162 .replaceAll("[^\\dA-Za-z|_]", "").replaceAll("\\s+", "+");
166 * Ensures sequence ref names are not less than 3 characters and does not
167 * contain a database name
172 static boolean isValidSeqName(String seqName)
174 // System.out.println("seqName : " + seqName);
175 String ignoreList = "pdb,uniprot,swiss-prot";
176 if (seqName.length() < 3)
180 if (seqName.contains(":"))
184 seqName = seqName.toLowerCase();
185 for (String ignoredEntry : ignoreList.split(","))
187 if (seqName.contains(ignoredEntry))
195 static String getDBRefId(DBRefEntry dbRef)
197 String ref = dbRef.getAccessionId().replaceAll("GO:", "");
202 * FTSRestClient specific query builder to recover associated structure data
203 * records for a sequence
206 * - seq to generate a query for
207 * @param wantedFields
208 * - fields to retrieve
209 * @param selectedFilterOpt
210 * - criterion for ranking results (e.g. resolution)
212 * - sort ascending or descending
216 public FTSRestResponse fetchStructuresMetaData(SequenceI seq,
217 Collection<FTSDataColumnI> wantedFields,
218 FilterOption selectedFilterOpt, boolean b) throws Exception
220 FTSRestResponse resultList;
221 FTSRestRequest pdbRequest = new FTSRestRequest();
222 pdbRequest.setAllowEmptySeq(false);
223 pdbRequest.setResponseSize(500);
224 pdbRequest.setFieldToSearchBy("(");
225 pdbRequest.setFieldToSortBy(selectedFilterOpt.getValue(), b);
226 pdbRequest.setWantedFields(wantedFields);
227 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
228 pdbRequest.setAssociatedSequence(seq);
229 resultList = pdbRestClient.executeRequest(pdbRequest);
231 lastPdbRequest = pdbRequest;
236 * FTSRestClient specific query builder to pick top ranked entry from a
237 * fetchStructuresMetaData query
240 * - seq to generate a query for
241 * @param wantedFields
242 * - fields to retrieve
243 * @param selectedFilterOpt
244 * - criterion for ranking results (e.g. resolution)
246 * - sort ascending or descending
250 public FTSRestResponse selectFirstRankedQuery(SequenceI seq,
251 Collection<FTSDataColumnI> wantedFields, String fieldToFilterBy,
252 boolean b) throws Exception
255 FTSRestResponse resultList;
256 FTSRestRequest pdbRequest = new FTSRestRequest();
257 if (fieldToFilterBy.equalsIgnoreCase("uniprot_coverage"))
259 pdbRequest.setAllowEmptySeq(false);
260 pdbRequest.setResponseSize(1);
261 pdbRequest.setFieldToSearchBy("(");
262 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
263 pdbRequest.setWantedFields(wantedFields);
264 pdbRequest.setAssociatedSequence(seq);
265 pdbRequest.setFacet(true);
266 pdbRequest.setFacetPivot(fieldToFilterBy + ",entry_entity");
267 pdbRequest.setFacetPivotMinCount(1);
271 pdbRequest.setAllowEmptySeq(false);
272 pdbRequest.setResponseSize(1);
273 pdbRequest.setFieldToSearchBy("(");
274 pdbRequest.setFieldToSortBy(fieldToFilterBy, b);
275 pdbRequest.setSearchTerm(buildQuery(seq) + ")");
276 pdbRequest.setWantedFields(wantedFields);
277 pdbRequest.setAssociatedSequence(seq);
279 resultList = pdbRestClient.executeRequest(pdbRequest);
281 lastPdbRequest = pdbRequest;
285 public TableModel getTableModel(
286 Collection<FTSData> discoveredStructuresSet)
288 return FTSRestResponse.getTableModel(lastPdbRequest,
289 discoveredStructuresSet);