package jalview.gui.structurechooser; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Set; import javax.swing.JTable; import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; import jalview.datamodel.PDBEntry; import jalview.datamodel.SequenceI; import jalview.fts.api.FTSData; import jalview.fts.api.FTSDataColumnI; import jalview.fts.api.FTSRestClientI; import jalview.fts.core.FTSDataColumnPreferences; import jalview.fts.core.FTSDataColumnPreferences.PreferenceSource; import jalview.fts.core.FTSRestRequest; import jalview.fts.core.FTSRestResponse; import jalview.fts.service.threedbeacons.TDBeaconsFTSRestClient; import jalview.jbgui.FilterOption; import jalview.util.MessageManager; /** * logic for querying the 3DBeacons API for structures of sequences * * @author jprocter */ public class ThreeDBStructureChooserQuerySource extends StructureChooserQuerySource { private static int MAX_QLENGTH = 7820; protected FTSRestRequest lastTdbRequest; protected FTSRestClientI tdbRestClient; private FTSRestRequest lastPdbRequest; public ThreeDBStructureChooserQuerySource() { tdbRestClient = TDBeaconsFTSRestClient.getInstance(); docFieldPrefs = new FTSDataColumnPreferences( PreferenceSource.STRUCTURE_CHOOSER, TDBeaconsFTSRestClient.getInstance()); } /** * Builds a query string for a given sequences using its DBRef entries 3d * Beacons is only useful for uniprot IDs * * @param seq * the sequences to build a query for * @return the built query string */ public String buildQuery(SequenceI seq) { boolean isPDBRefsFound = false; boolean isUniProtRefsFound = false; StringBuilder queryBuilder = new StringBuilder(); Set seqRefs = new LinkedHashSet<>(); /* * note PDBs as DBRefEntry so they are not duplicated in query */ Set pdbids = new HashSet<>(); List refs = seq.getDBRefs(); if (refs != null && refs.size() != 0) { for (int ib = 0, nb = refs.size(); ib < nb; ib++) { DBRefEntry dbRef = refs.get(ib); if (isValidSeqName(getDBRefId(dbRef)) && queryBuilder.length() < MAX_QLENGTH) { if (dbRef.getSource().equalsIgnoreCase(DBRefSource.UNIPROT) && dbRef.isCanonical()) { // TODO: pick best Uniprot accession isUniProtRefsFound = true; return getDBRefId(dbRef); } } } } return null; } /** * Ensures sequence ref names are not less than 3 characters and does not * contain a database name * * @param seqName * @return */ static boolean isValidSeqName(String seqName) { // System.out.println("seqName : " + seqName); String ignoreList = "pdb,uniprot,swiss-prot"; if (seqName.length() < 3) { return false; } if (seqName.contains(":")) { return false; } seqName = seqName.toLowerCase(); for (String ignoredEntry : ignoreList.split(",")) { if (seqName.contains(ignoredEntry)) { return false; } } return true; } static String getDBRefId(DBRefEntry dbRef) { String ref = dbRef.getAccessionId().replaceAll("GO:", ""); return ref; } /** * FTSRestClient specific query builder to recover associated structure data * records for a sequence * * @param seq * - seq to generate a query for * @param wantedFields * - fields to retrieve * @param selectedFilterOpt * - criterion for ranking results (e.g. resolution) * @param b * - sort ascending or descending * @return * @throws Exception */ public FTSRestResponse fetchStructuresMetaData(SequenceI seq, Collection wantedFields, FilterOption selectedFilterOpt, boolean b) throws Exception { FTSRestResponse resultList; FTSRestRequest tdbRequest = getTDBeaconsRequest(seq, wantedFields); resultList = tdbRestClient.executeRequest(tdbRequest); lastTdbRequest = tdbRequest; // Query the PDB and add additional metadata FTSRestResponse pdbResponse = fetchStructuresMetaDataFor(getPDBQuerySource(), resultList); FTSRestResponse joinedResp = joinResponses(resultList, pdbResponse); return resultList; } PDBStructureChooserQuerySource pdbQuerySource=null; private PDBStructureChooserQuerySource getPDBQuerySource() { if (pdbQuerySource==null) { pdbQuerySource = new PDBStructureChooserQuerySource(); } return pdbQuerySource; } private FTSRestRequest getTDBeaconsRequest(SequenceI seq, Collection wantedFields) { FTSRestRequest pdbRequest = new FTSRestRequest(); pdbRequest.setAllowEmptySeq(false); pdbRequest.setResponseSize(500); pdbRequest.setWantedFields(wantedFields); String query = buildQuery(seq); if (query == null) { return null; } pdbRequest.setSearchTerm(query + ".json"); pdbRequest.setAssociatedSequence(seq); return pdbRequest; } @Override public List getAvailableFilterOptions(String VIEWS_FILTER) { List filters = new ArrayList(); filters.add( new FilterOption(MessageManager.getString("label.best_quality"), "overall_quality", VIEWS_FILTER, false)); filters.add(new FilterOption( MessageManager.getString("label.best_resolution"), "resolution", VIEWS_FILTER, false)); filters.add(new FilterOption( MessageManager.getString("label.most_protein_chain"), "number_of_protein_chains", VIEWS_FILTER, false)); filters.add(new FilterOption( MessageManager.getString("label.most_bound_molecules"), "number_of_bound_molecules", VIEWS_FILTER, false)); filters.add(new FilterOption( MessageManager.getString("label.most_polymer_residues"), "number_of_polymer_residues", VIEWS_FILTER, true)); return filters; } /** * FTSRestClient specific query builder to pick top ranked entry from a * fetchStructuresMetaData query * * @param seq * - seq to generate a query for * @param wantedFields * - fields to retrieve * @param selectedFilterOpt * - criterion for ranking results (e.g. resolution) * @param b * - sort ascending or descending * @return * @throws Exception */ public FTSRestResponse selectFirstRankedQuery(SequenceI seq, Collection collectedResults, Collection wantedFields, String fieldToFilterBy, boolean b) throws Exception { TDBResultAnalyser analyser= new TDBResultAnalyser(seq,collectedResults, lastTdbRequest); FTSRestResponse resultList = new FTSRestResponse(); List filteredResponse = analyser.getFilteredResponse(); List selectedStructures = analyser.selectStructures(filteredResponse); resultList.setNumberOfItemsFound(selectedStructures.size()); resultList.setSearchSummary(selectedStructures); return resultList; } @Override public PDBEntry[] collectSelectedRows(JTable restable, int[] selectedRows, List selectedSeqsToView) { int refSeqColIndex = restable.getColumn("Ref Sequence").getModelIndex(); PDBEntry[] pdbEntriesToView = new PDBEntry[selectedRows.length]; int count = 0; int idColumnIndex = restable.getColumn("Model id").getModelIndex(); int urlColumnIndex = restable.getColumn("Url").getModelIndex(); int typeColumnIndex = restable.getColumn("Provider").getModelIndex(); int categoryColumnIndex = restable.getColumn("Model Category") .getModelIndex(); final int up_start_idx = restable.getColumn("Uniprot Start").getModelIndex(); final int up_end_idx = restable.getColumn("Uniprot End").getModelIndex(); int i=0; // bleugh! Integer[] sellist = new Integer[selectedRows.length]; for (Integer row: selectedRows) { sellist[i++] = row; } // Sort rows by coverage Arrays.sort(sellist,new Comparator() { @Override public int compare(Integer o1, Integer o2) { int o1_xt = ((Integer)restable.getValueAt(o1, up_end_idx)) - (Integer)restable.getValueAt(o1, up_start_idx); int o2_xt = ((Integer)restable.getValueAt(o2, up_end_idx)) - (Integer)restable.getValueAt(o2, up_start_idx); return o2_xt-o1_xt; } }); for (int row : sellist) { // unique id - could be a horrible hash String pdbIdStr = restable.getValueAt(row, idColumnIndex).toString(); String urlStr = restable.getValueAt(row, urlColumnIndex).toString(); String typeColumn = restable.getValueAt(row, typeColumnIndex) .toString(); SequenceI selectedSeq = (SequenceI) restable.getValueAt(row, refSeqColIndex); selectedSeqsToView.add(selectedSeq); PDBEntry pdbEntry = selectedSeq.getPDBEntry(pdbIdStr); if (pdbEntry == null) { pdbEntry = getFindEntry(pdbIdStr, selectedSeq.getAllPDBEntries()); } if (pdbEntry == null) { pdbEntry = new PDBEntry(); pdbEntry.setId(pdbIdStr); pdbEntry.setType(PDBEntry.Type.MMCIF); if (!"PDBe".equalsIgnoreCase(typeColumn)) { pdbEntry.setRetrievalUrl(urlStr); } selectedSeq.getDatasetSequence().addPDBId(pdbEntry); } pdbEntriesToView[count++] = pdbEntry; } return pdbEntriesToView; } @Override protected FTSRestRequest getLastFTSRequest() { return lastTdbRequest; } /** * generate a query for PDBFTS to retrieve structure metadata * * @param ftsRestRequest * @param upResponse * @return */ public String buildPDBFTSQueryFor(FTSRestResponse upResponse) { List pdbIds = new ArrayList(); int idx_modelId = getLastFTSRequest().getFieldIndex("Model id"); int idx_provider = getLastFTSRequest().getFieldIndex("Provider"); for (FTSData row : upResponse.getSearchSummary()) { String id = (String) row.getSummaryData()[idx_modelId]; String provider = (String) row.getSummaryData()[idx_provider]; if ("PDBe".equalsIgnoreCase(provider)) { pdbIds.add(id); } } return String.join(" OR ", pdbIds).toString(); } /** * query PDBe for structure metadata * * @param pdbquery * @param upResponse * @return FTSRestResponse via PDBStructureChooserQuerySource */ public FTSRestResponse fetchStructuresMetaDataFor( PDBStructureChooserQuerySource pdbquery, FTSRestResponse upResponse) throws Exception { String pdb_Query = buildPDBFTSQueryFor(upResponse); FTSRestResponse resultList; FTSRestRequest pdbRequest = new FTSRestRequest(); pdbRequest.setAllowEmptySeq(false); pdbRequest.setResponseSize(500); pdbRequest.setFieldToSearchBy("("); // pdbRequest.setFieldToSortBy("pdb_id"); pdbRequest.setWantedFields( pdbquery.getDocFieldPrefs().getStructureSummaryFields()); pdbRequest.setSearchTerm(pdb_Query + ")"); resultList = pdbquery.executePDBFTSRestRequest(pdbRequest); lastPdbRequest = pdbRequest; return resultList; } public FTSRestResponse joinResponses(FTSRestResponse upResponse, FTSRestResponse pdbResponse) { int idx_provider = getLastFTSRequest().getFieldIndex("Provider"); // join on int idx_modelId = getLastFTSRequest().getFieldIndex("Model id"); int pdbIdx = lastPdbRequest.getFieldIndex("PDB Id"); int pdbTitle_idx = lastPdbRequest.getFieldIndex("Title"); int tdbTitle_idx = getLastFTSRequest().getFieldIndex("Title"); List joinedRows = new ArrayList(); for (final FTSData row : upResponse.getSearchSummary()) { String id = (String) row.getSummaryData()[idx_modelId]; String provider = (String) row.getSummaryData()[idx_provider]; if ("PDBe".equalsIgnoreCase(provider)) { for (final FTSData pdbrow : pdbResponse.getSearchSummary()) { String pdbid = (String) pdbrow.getSummaryData()[pdbIdx]; if (id.equalsIgnoreCase(pdbid)) { row.getSummaryData()[tdbTitle_idx] = pdbrow .getSummaryData()[pdbTitle_idx]; } } } else { row.getSummaryData()[tdbTitle_idx] = "Model from TDB"; } } return upResponse; } }