package jalview.gui.structurechooser; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Set; import javax.swing.JTable; import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; import jalview.datamodel.PDBEntry; import jalview.datamodel.SequenceI; import jalview.fts.api.FTSData; import jalview.fts.api.FTSDataColumnI; import jalview.fts.api.FTSRestClientI; import jalview.fts.core.FTSDataColumnPreferences; import jalview.fts.core.FTSDataColumnPreferences.PreferenceSource; import jalview.fts.core.FTSRestRequest; import jalview.fts.core.FTSRestResponse; import jalview.fts.service.threedbeacons.TDBeaconsFTSRestClient; import jalview.jbgui.FilterOption; import jalview.util.MessageManager; /** * logic for querying the 3DBeacons API for structures of sequences * * @author jprocter */ public class ThreeDBStructureChooserQuerySource extends StructureChooserQuerySource { private static int MAX_QLENGTH = 7820; protected FTSRestRequest lastTdbRequest; protected FTSRestClientI tdbRestClient; private FTSRestRequest lastPdbRequest; public ThreeDBStructureChooserQuerySource() { tdbRestClient = TDBeaconsFTSRestClient.getInstance(); docFieldPrefs = new FTSDataColumnPreferences( PreferenceSource.STRUCTURE_CHOOSER, TDBeaconsFTSRestClient.getInstance()); } /** * Builds a query string for a given sequences using its DBRef entries 3d * Beacons is only useful for uniprot IDs * * @param seq * the sequences to build a query for * @return the built query string */ public String buildQuery(SequenceI seq) { boolean isPDBRefsFound = false; boolean isUniProtRefsFound = false; StringBuilder queryBuilder = new StringBuilder(); Set seqRefs = new LinkedHashSet<>(); /* * note PDBs as DBRefEntry so they are not duplicated in query */ Set pdbids = new HashSet<>(); List refs = seq.getDBRefs(); if (refs != null && refs.size() != 0) { for (int ib = 0, nb = refs.size(); ib < nb; ib++) { DBRefEntry dbRef = refs.get(ib); if (isValidSeqName(getDBRefId(dbRef)) && queryBuilder.length() < MAX_QLENGTH) { if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) && dbRef.isCanonical()) { // TODO: pick best Uniprot accession isUniProtRefsFound = true; return getDBRefId(dbRef); } } } } return null; } /** * Ensures sequence ref names are not less than 3 characters and does not * contain a database name * * @param seqName * @return */ static boolean isValidSeqName(String seqName) { // System.out.println("seqName : " + seqName); String ignoreList = "pdb,uniprot,swiss-prot"; if (seqName.length() < 3) { return false; } if (seqName.contains(":")) { return false; } seqName = seqName.toLowerCase(); for (String ignoredEntry : ignoreList.split(",")) { if (seqName.contains(ignoredEntry)) { return false; } } return true; } static String getDBRefId(DBRefEntry dbRef) { String ref = dbRef.getAccessionId().replaceAll("GO:", ""); return ref; } /** * FTSRestClient specific query builder to recover associated structure data * records for a sequence * * @param seq * - seq to generate a query for * @param wantedFields * - fields to retrieve * @param selectedFilterOpt * - criterion for ranking results (e.g. resolution) * @param b * - sort ascending or descending * @return * @throws Exception */ public FTSRestResponse fetchStructuresMetaData(SequenceI seq, Collection wantedFields, FilterOption selectedFilterOpt, boolean b) throws Exception { FTSRestResponse resultList; FTSRestRequest tdbRequest = getTDBeaconsRequest(seq, wantedFields); resultList = tdbRestClient.executeRequest(tdbRequest); lastTdbRequest = tdbRequest; return resultList; } private FTSRestRequest getTDBeaconsRequest(SequenceI seq, Collection wantedFields) { FTSRestRequest pdbRequest = new FTSRestRequest(); pdbRequest.setAllowEmptySeq(false); pdbRequest.setResponseSize(500); pdbRequest.setWantedFields(wantedFields); String query = buildQuery(seq); if (query == null) { return null; } pdbRequest.setSearchTerm(query + ".json"); pdbRequest.setAssociatedSequence(seq); return pdbRequest; } @Override public List getAvailableFilterOptions(String VIEWS_FILTER) { List filters = new ArrayList(); filters.add( new FilterOption(MessageManager.getString("label.best_quality"), "overall_quality", VIEWS_FILTER, false)); filters.add(new FilterOption( MessageManager.getString("label.best_resolution"), "resolution", VIEWS_FILTER, false)); filters.add(new FilterOption( MessageManager.getString("label.most_protein_chain"), "number_of_protein_chains", VIEWS_FILTER, false)); filters.add(new FilterOption( MessageManager.getString("label.most_bound_molecules"), "number_of_bound_molecules", VIEWS_FILTER, false)); filters.add(new FilterOption( MessageManager.getString("label.most_polymer_residues"), "number_of_polymer_residues", VIEWS_FILTER, true)); return filters; } /** * model categories - update as needed. warnings output if unknown types * encountered. * * Order denotes 'trust' */ private static List EXP_CATEGORIES = Arrays .asList(new String[] { "EXPERIMENTALLY DETERMINED", "DEEP LEARNING", "TEMPLATE-BASED" }); /** * FTSRestClient specific query builder to pick top ranked entry from a * fetchStructuresMetaData query * * @param seq * - seq to generate a query for * @param wantedFields * - fields to retrieve * @param selectedFilterOpt * - criterion for ranking results (e.g. resolution) * @param b * - sort ascending or descending * @return * @throws Exception */ public FTSRestResponse selectFirstRankedQuery(SequenceI seq, Collection collectedResults, Collection wantedFields, String fieldToFilterBy, boolean b) throws Exception { List filteredResponse = new ArrayList(); final int idx_ups = lastTdbRequest.getFieldIndex("Uniprot Start"); final int idx_upe = lastTdbRequest.getFieldIndex("Uniprot End"); final int idx_mcat = lastTdbRequest.getFieldIndex("Model Category"); final int idx_mqual = lastTdbRequest.getFieldIndex("Qmean"); final int idx_resol = lastTdbRequest.getFieldIndex("Resolution"); // ignore anything outside the sequence region for (FTSData row : collectedResults) { int up_s = (Integer) row.getSummaryData()[idx_ups]; int up_e = (Integer) row.getSummaryData()[idx_upe]; if (seq == row.getSummaryData()[0] && up_e > seq.getStart() && up_s < seq.getEnd()) { filteredResponse.add(row); } } // sort according to decreasing length, // increasing start Collections.sort(filteredResponse, new Comparator() { private final int scoreCategory(String cat) { // TODO: make quicker int idx = EXP_CATEGORIES.indexOf(cat.toUpperCase()); if (idx == -1) { System.out.println("Unknown category: '" + cat + "'"); } return -EXP_CATEGORIES.size() - idx; } @Override public int compare(FTSData o1, FTSData o2) { int o1_s = (Integer) o1.getSummaryData()[idx_ups]; int o1_e = (Integer) o1.getSummaryData()[idx_upe]; int o1_cat = scoreCategory((String) o1.getSummaryData()[idx_mcat]); int o2_s = (Integer) o2.getSummaryData()[idx_ups]; int o2_e = (Integer) o2.getSummaryData()[idx_upe]; int o2_cat = scoreCategory((String) o2.getSummaryData()[idx_mcat]); if (o1_cat == o2_cat) { if (o1_s == o2_s) { int o1_xtent = o1_e - o1_s; int o2_xtent = o2_e - o2_s; if (o1_xtent == o2_xtent) { if (o1_cat == scoreCategory(EXP_CATEGORIES.get(0))) { // experimental structures, so rank on quality double o1_res = (Double) o1.getSummaryData()[idx_resol]; double o2_res = (Double) o2.getSummaryData()[idx_resol]; return (o2_res < o1_res) ? 1 : (o2_res == o1_res) ? 0 : -1; } else { // models, so rank on qmean float o1_mq = (Float) o1.getSummaryData()[idx_mqual]; float o2_mq = (Float) o2.getSummaryData()[idx_mqual]; return (o2_mq < o1_mq) ? 1 : (o2_mq == o1_mq) ? 0 : -1; } } else { return o1_xtent - o2_xtent; } } else { return o1_s - o2_s; } } else { return o2_cat - o1_cat; } } @Override public boolean equals(Object obj) { return super.equals(obj); } }); FTSRestResponse resultList = new FTSRestResponse(); resultList.setNumberOfItemsFound(filteredResponse.size()); resultList.setSearchSummary(filteredResponse); return resultList; } @Override public PDBEntry[] collectSelectedRows(JTable restable, int[] selectedRows, List selectedSeqsToView) { int refSeqColIndex = restable.getColumn("Ref Sequence").getModelIndex(); PDBEntry[] pdbEntriesToView = new PDBEntry[selectedRows.length]; int count = 0; int idColumnIndex = restable.getColumn("Model id").getModelIndex(); int urlColumnIndex = restable.getColumn("Url").getModelIndex(); int typeColumnIndex = restable.getColumn("Provider").getModelIndex(); int categoryColumnIndex = restable.getColumn("Model Category") .getModelIndex(); for (int row : selectedRows) { // unique id - could be a horrible hash String pdbIdStr = restable.getValueAt(row, idColumnIndex).toString(); String urlStr = restable.getValueAt(row, urlColumnIndex).toString(); String typeColumn = restable.getValueAt(row, typeColumnIndex) .toString(); SequenceI selectedSeq = (SequenceI) restable.getValueAt(row, refSeqColIndex); selectedSeqsToView.add(selectedSeq); PDBEntry pdbEntry = selectedSeq.getPDBEntry(pdbIdStr); if (pdbEntry == null) { pdbEntry = getFindEntry(pdbIdStr, selectedSeq.getAllPDBEntries()); } if (pdbEntry == null) { pdbEntry = new PDBEntry(); pdbEntry.setId(pdbIdStr); pdbEntry.setType(PDBEntry.Type.MMCIF); if (!"PDBe".equalsIgnoreCase(typeColumn)) { pdbEntry.setRetrievalUrl(urlStr); } selectedSeq.getDatasetSequence().addPDBId(pdbEntry); } pdbEntriesToView[count++] = pdbEntry; } return pdbEntriesToView; } @Override protected FTSRestRequest getLastFTSRequest() { return lastTdbRequest; } /** * generate a query for PDBFTS to retrieve structure metadata * * @param ftsRestRequest * @param upResponse * @return */ public String buildPDBFTSQueryFor(FTSRestResponse upResponse) { List pdbIds = new ArrayList(); int idx_modelId = getLastFTSRequest().getFieldIndex("Model id"); int idx_provider = getLastFTSRequest().getFieldIndex("Provider"); for (FTSData row : upResponse.getSearchSummary()) { String id = (String) row.getSummaryData()[idx_modelId]; String provider = (String) row.getSummaryData()[idx_provider]; if ("PDBe".equalsIgnoreCase(provider)) { pdbIds.add(id); } } return String.join(" OR ", pdbIds).toString(); } /** * query PDBe for structure metadata * * @param pdbquery * @param upResponse * @return FTSRestResponse via PDBStructureChooserQuerySource */ public FTSRestResponse fetchStructuresMetaDataFor( PDBStructureChooserQuerySource pdbquery, FTSRestResponse upResponse) throws Exception { String pdb_Query = buildPDBFTSQueryFor(upResponse); FTSRestResponse resultList; FTSRestRequest pdbRequest = new FTSRestRequest(); pdbRequest.setAllowEmptySeq(false); pdbRequest.setResponseSize(500); pdbRequest.setFieldToSearchBy("("); // pdbRequest.setFieldToSortBy("pdb_id"); pdbRequest.setWantedFields( pdbquery.getDocFieldPrefs().getStructureSummaryFields()); pdbRequest.setSearchTerm(pdb_Query + ")"); resultList = pdbquery.executePDBFTSRestRequest(pdbRequest); lastPdbRequest = pdbRequest; return resultList; } public FTSRestResponse joinResponses(FTSRestResponse upResponse, FTSRestResponse pdbResponse) { int idx_provider = getLastFTSRequest().getFieldIndex("Provider"); // join on int idx_modelId = getLastFTSRequest().getFieldIndex("Model id"); int pdbIdx = lastPdbRequest.getFieldIndex("pdb_id"); for (FTSData row : upResponse.getSearchSummary()) { String id = (String) row.getSummaryData()[idx_modelId]; String provider = (String) row.getSummaryData()[idx_provider]; if ("PDBe".equalsIgnoreCase(provider)) { for (FTSData pdbrow : pdbResponse.getSearchSummary()) { String pdbid = (String) pdbrow.getSummaryData()[pdbIdx]; if (id.equalsIgnoreCase(pdbid)) { // often multiple entries per PDB ID so we bail after first // get wanted fields // append to FTSRestResponse array } } } } // TODO Auto-generated method stub return null; } }