/* * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.gui.structurechooser; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; import java.util.Collection; import java.util.List; import java.util.Vector; import org.junit.Assert; import org.testng.AssertJUnit; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; import jalview.datamodel.PDBEntry; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; import jalview.fts.api.FTSData; import jalview.fts.core.FTSRestRequest; import jalview.fts.core.FTSRestResponse; import jalview.fts.service.pdb.PDBFTSRestClient; import jalview.fts.threedbeacons.TDBeaconsFTSRestClientTest; import jalview.gui.JvOptionPane; import jalview.gui.StructureChooser; import jalview.jbgui.FilterOption; public class StructureChooserQuerySourceTest { @BeforeClass(alwaysRun = true) public void setUpJvOptionPane() { JvOptionPane.setInteractiveMode(false); JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION); } Sequence seq,upSeq; // same set up as for structurechooser test @BeforeMethod(alwaysRun = true) public void setUp() throws Exception { seq = new Sequence("PDB|4kqy|4KQY|A", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1, 26); seq.createDatasetSequence(); for (int x = 1; x < 5; x++) { DBRefEntry dbRef = new DBRefEntry(); dbRef.setAccessionId("XYZ_" + x); seq.addDBRef(dbRef); } PDBEntry dbRef = new PDBEntry(); dbRef.setId("1tim"); Vector pdbIds = new Vector<>(); pdbIds.add(dbRef); seq.setPDBId(pdbIds); // Uniprot sequence for 3D-Beacons mocks upSeq = new Sequence("P38398", "MDLSALRVEEVQNVINAMQKILECPICLELIKEPVSTKCDHIFCKFCMLKLLNQKKGPSQCPLCKNDITKRS\n" + "LQESTRFSQLVEELLKIICAFQLDTGLEYANSYNFAKKENNSPEHLKDEVSIIQSMGYRNRAKRLLQSEPEN\n" + "PSLQETSLSVQLSNLGTVRTLRTKQRIQPQKTSVYIELGSDSSEDTVNKATYCSVGDQELLQITPQGTRDEI\n" + "SLDSAKKAACEFSETDVTNTEHHQPSNNDLNTTEKRAAERHPEKYQGSSVSNLHVEPCGTNTHASSLQHENS\n" + "SLLLTKDRMNVEKAEFCNKSKQPGLARSQHNRWAGSKETCNDRRTPSTEKKVDLNADPLCERKEWNKQKLPC\n" + "SENPRDTEDVPWITLNSSIQKVNEWFSRSDELLGSDDSHDGESESNAKVADVLDVLNEVDEYSGSSEKIDLL\n" + "ASDPHEALICKSERVHSKSVESNIEDKIFGKTYRKKASLPNLSHVTENLIIGAFVTEPQIIQERPLTNKLKR\n" + "KRRPTSGLHPEDFIKKADLAVQKTPEMINQGTNQTEQNGQVMNITNSGHENKTKGDSIQNEKNPNPIESLEK\n" + "ESAFKTKAEPISSSISNMELELNIHNSKAPKKNRLRRKSSTRHIHALELVVSRNLSPPNCTELQIDSCSSSE\n" + "EIKKKKYNQMPVRHSRNLQLMEGKEPATGAKKSNKPNEQTSKRHDSDTFPELKLTNAPGSFTKCSNTSELKE\n" + "FVNPSLPREEKEEKLETVKVSNNAEDPKDLMLSGERVLQTERSVESSSISLVPGTDYGTQESISLLEVSTLG\n" + "KAKTEPNKCVSQCAAFENPKGLIHGCSKDNRNDTEGFKYPLGHEVNHSRETSIEMEESELDAQYLQNTFKVS\n" + "KRQSFAPFSNPGNAEEECATFSAHSGSLKKQSPKVTFECEQKEENQGKNESNIKPVQTVNITAGFPVVGQKD\n" + "KPVDNAKCSIKGGSRFCLSSQFRGNETGLITPNKHGLLQNPYRIPPLFPIKSFVKTKCKKNLLEENFEEHSM\n" + "SPEREMGNENIPSTVSTISRNNIRENVFKEASSSNINEVGSSTNEVGSSINEIGSSDENIQAELGRNRGPKL\n" + "NAMLRLGVLQPEVYKQSLPGSNCKHPEIKKQEYEEVVQTVNTDFSPYLISDNLEQPMGSSHASQVCSETPDD\n" + "LLDDGEIKEDTSFAENDIKESSAVFSKSVQKGELSRSPSPFTHTHLAQGYRRGAKKLESSEENLSSEDEELP\n" + "CFQHLLFGKVNNIPSQSTRHSTVATECLSKNTEENLLSLKNSLNDCSNQVILAKASQEHHLSEETKCSASLF\n" + "SSQCSELEDLTANTNTQDPFLIGSSKQMRHQSESQGVGLSDKELVSDDEERGTGLEENNQEEQSMDSNLGEA\n" + "ASGCESETSVSEDCSGLSSQSDILTTQQRDTMQHNLIKLQQEMAELEAVLEQHGSQPSNSYPSIISDSSALE\n" + "DLRNPEQSTSEKAVLTSQKSSEYPISQNPEGLSADKFEVSADSSTSKNKEPGVERSSPSKCPSLDDRWYMHS\n" + "CSGSLQNRNYPSQEELIKVVDVEEQQLEESGPHDLTETSYLPRQDLEGTPYLESGISLFSDDPESDPSEDRA\n" + "PESARVGNIPSSTSALKVPQLKVAESAQSPAAAHTTDTAGYNAMEESVSREKPELTASTERVNKRMSMVVSG\n" + "LTPEEFMLVYKFARKHHITLTNLITEETTHVVMKTDAEFVCERTLKYFLGIAGGKWVVSYFWVTQSIKERKM\n" + "LNEHDFEVRGDVVNGRNHQGPKRARESQDRKIFRGLEICCYGPFTNMPTDQLEWMVQLCGASVVKELSSFTL\n" + "GTGVHPIVVVQPDAWTEDNGFHAIGQMCEAPVVTREWVLDSVALYQCQELDTYLIPQIPHSHY\n" + "", 1, 1863); upSeq.createDatasetSequence(); upSeq.setDescription("Breast cancer type 1 susceptibility protein"); upSeq.addDBRef(new DBRefEntry("UNIPROT","0","P38398",null,true)); } @AfterMethod(alwaysRun = true) public void tearDown() throws Exception { seq = null; upSeq=null; } @SuppressWarnings("deprecation") @Test(groups = { "Functional" }) public void buildPDBQueryTest() { System.out.println("seq >>>> " + seq); StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { seq}); AssertJUnit.assertTrue(scquery instanceof PDBStructureChooserQuerySource); String query = scquery.buildQuery(seq); AssertJUnit.assertEquals("pdb_id:1tim", query); seq.getAllPDBEntries().clear(); query = scquery.buildQuery(seq); AssertJUnit.assertEquals( "text:XYZ_1 OR text:XYZ_2 OR text:XYZ_3 OR text:XYZ_4 OR text:4kqy", query); seq.setDBRefs(null); query = scquery.buildQuery(seq); System.out.println(query); AssertJUnit.assertEquals("text:4kqy", query); DBRefEntry uniprotDBRef = new DBRefEntry(); uniprotDBRef.setAccessionId("P12345"); uniprotDBRef.setSource(DBRefSource.UNIPROT); seq.addDBRef(uniprotDBRef); DBRefEntry pdbDBRef = new DBRefEntry(); pdbDBRef.setAccessionId("1XYZ"); pdbDBRef.setSource(DBRefSource.PDB); seq.addDBRef(pdbDBRef); for (int x = 1; x < 5; x++) { DBRefEntry dbRef = new DBRefEntry(); dbRef.setAccessionId("XYZ_" + x); seq.addDBRef(dbRef); } System.out.println(""); System.out.println(seq.getDBRefs()); System.out.println(query); query = scquery.buildQuery(seq); AssertJUnit.assertEquals( "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz", query); } @SuppressWarnings("deprecation") @Test(groups = { "Functional" }) public void buildThreeDBQueryTest() { System.out.println("seq >>>> " + upSeq); TDBeaconsFTSRestClientTest.setMock(); PDBFTSRestClient.setMock(); StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { upSeq}); // gets the lightweight proxy rather than the ThreeDBStructureChooserQuerySource AssertJUnit.assertTrue(scquery instanceof ThreeDBStructureChooserQuerySource); String query = scquery.buildQuery(upSeq); AssertJUnit.assertEquals("P38398", query); // query shouldn't change regardless of additional entries // because 3DBeacons requires canonical entries. upSeq.getAllPDBEntries().clear(); query = scquery.buildQuery(upSeq); AssertJUnit.assertEquals("P38398", query); upSeq.setDBRefs(null); query = scquery.buildQuery(upSeq); /* * legacy projects/datasets will not have canonical flags set for uniprot dbrefs * graceful behaviour would be to * - pick one ? not possible * - iterate through all until a 200 is obtained ? * ---> ideal but could be costly * ---> better to do a direct retrieval from uniprot to work out which is the canonical identifier.. * ----> need a test to check that accessions can be promoted to canonical! */ //FIXME - need to be able to use ID to query here ? AssertJUnit.assertEquals(null, query); // TODO: /** * set of sequences: * - no protein -> TDB not applicable, query PDBe only (consider RNA or DNA - specific query adapter ?) * - protein but no uniprot -> first consider trying to get uniprot refs (need a mark to say none are available) * - protein and uniprot - no canonicals -> resolve to uniprot automatically to get canonicals * - query uniprot against 3DBeacons * --> decorate experimental structures with additional data from PDBe * - query remaining against PDBe * Ranking * - 3D Beacons * --> in memory ranking - no need to query twice * Rank by * - experimental > AlphaFold -> Model * - start > end * -> filters for * -> experimental only * -> experimental plus best models for other regions * -> "best cover" * -> need to be able to select correct reference (the longest one that covers all) for superposition */ // // DBRefEntry uniprotDBRef = new DBRefEntry(); // uniprotDBRef.setAccessionId("P12345"); // uniprotDBRef.setSource(DBRefSource.UNIPROT); // upSeq.addDBRef(uniprotDBRef); // // DBRefEntry pdbDBRef = new DBRefEntry(); // pdbDBRef.setAccessionId("1XYZ"); // pdbDBRef.setSource(DBRefSource.PDB); // upSeq.addDBRef(pdbDBRef); // // for (int x = 1; x < 5; x++) // { // DBRefEntry dbRef = new DBRefEntry(); // dbRef.setAccessionId("XYZ_" + x); // seq.addDBRef(dbRef); // } // System.out.println(""); // System.out.println(seq.getDBRefs()); // System.out.println(query); // query = scquery.buildQuery(seq); // assertEquals( // "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz", // query); } @Test(groups= {"Functional"}) public void cascadingThreeDBandPDBQuerys() { TDBeaconsFTSRestClientTest.setMock(); PDBFTSRestClient.setMock(); ThreeDBStructureChooserQuerySource tdbquery = new ThreeDBStructureChooserQuerySource(); PDBStructureChooserQuerySource pdbquery = new PDBStructureChooserQuerySource(); FTSRestResponse upResponse = null; FTSRestResponse pdbResponse = null; // TODO test available options // Best coverage // Best Alphafold Model // Best model (by confidence score) // Will also need to develop a more sophisticated filtering system List opts = tdbquery.getAvailableFilterOptions(StructureChooser.VIEWS_FILTER); FilterOption opt_singlebest = opts.get(0); FilterOption opt_manybest = opts.get(1); assertEquals(opt_singlebest.getValue(), ThreeDBStructureChooserQuerySource.FILTER_FIRST_BEST_COVERAGE); assertEquals(opt_manybest.getValue(), ThreeDBStructureChooserQuerySource.FILTER_TDBEACONS_COVERAGE); try { upResponse = tdbquery.fetchStructuresMetaData(upSeq, tdbquery.getDocFieldPrefs().getStructureSummaryFields(), opt_singlebest, false); tdbquery.updateAvailableFilterOptions(StructureChooser.VIEWS_FILTER,opts,upResponse.getSearchSummary()); // test ranking without additional PDBe data FTSRestResponse firstRanked = tdbquery.selectFirstRankedQuery(upSeq, upResponse.getSearchSummary(), tdbquery.getDocFieldPrefs().getStructureSummaryFields(), opt_singlebest.getValue(), false); assertEquals(firstRanked.getNumberOfItemsFound(),1); // many best response upResponse = tdbquery.fetchStructuresMetaData(upSeq, tdbquery.getDocFieldPrefs().getStructureSummaryFields(), opt_manybest, false); assertTrue(firstRanked.getNumberOfItemsFound()0); pdbResponse = tdbquery.fetchStructuresMetaDataFor(pdbquery, upResponse); assertTrue(pdbResponse.getNumberOfItemsFound()>0); FTSRestResponse joinedResp = tdbquery.joinResponses(upResponse, pdbResponse); assertEquals(upResponse.getNumberOfItemsFound(),joinedResp.getNumberOfItemsFound()); } catch (Exception x) { x.printStackTrace(); Assert.fail("Unexpected Exception"); } StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { upSeq}); } @Test(groups = { "Functional" }) public void sanitizeSeqNameTest() { String name = "ab_cdEF|fwxyz012349"; AssertJUnit.assertEquals(name, PDBStructureChooserQuerySource.sanitizeSeqName(name)); // remove a [nn] substring name = "abcde12[345]fg"; AssertJUnit.assertEquals("abcde12fg", PDBStructureChooserQuerySource.sanitizeSeqName(name)); // remove characters other than a-zA-Z0-9 | or _ name = "ab[cd],.\t£$*!- \\\"@:e"; AssertJUnit.assertEquals("abcde", PDBStructureChooserQuerySource.sanitizeSeqName(name)); name = "abcde12[345a]fg"; AssertJUnit.assertEquals("abcde12345afg", PDBStructureChooserQuerySource.sanitizeSeqName(name)); } }