X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=test%2Fjalview%2Fgui%2Fstructurechooser%2FStructureChooserQuerySourceTest.java;fp=test%2Fjalview%2Fgui%2Fstructurechooser%2FStructureChooserQuerySourceTest.java;h=1912f146bb8cb06cdcb10cc61463a7eacd2d949b;hb=304e64fb34b32659be1bbfd39fb4e15b2f79586e;hp=0000000000000000000000000000000000000000;hpb=cfb79b69d9fa44595560659bd95d1d1cd27677ad;p=jalview.git
diff --git a/test/jalview/gui/structurechooser/StructureChooserQuerySourceTest.java b/test/jalview/gui/structurechooser/StructureChooserQuerySourceTest.java
new file mode 100644
index 0000000..1912f14
--- /dev/null
+++ b/test/jalview/gui/structurechooser/StructureChooserQuerySourceTest.java
@@ -0,0 +1,337 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see .
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.gui.structurechooser;
+
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Vector;
+
+import org.junit.Assert;
+import org.testng.AssertJUnit;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.DBRefSource;
+import jalview.datamodel.PDBEntry;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceI;
+import jalview.fts.api.FTSData;
+import jalview.fts.core.FTSRestRequest;
+import jalview.fts.core.FTSRestResponse;
+import jalview.fts.service.pdb.PDBFTSRestClient;
+import jalview.fts.service.pdb.PDBFTSRestClientTest;
+import jalview.fts.threedbeacons.TDBeaconsFTSRestClientTest;
+import jalview.gui.JvOptionPane;
+import jalview.gui.StructureChooser;
+import jalview.jbgui.FilterOption;
+
+public class StructureChooserQuerySourceTest
+{
+
+ @BeforeClass(alwaysRun = true)
+ public void setUpJvOptionPane()
+ {
+ JvOptionPane.setInteractiveMode(false);
+ JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
+ }
+
+ Sequence seq,upSeq,upSeq_insulin;
+
+ // same set up as for structurechooser test
+
+@BeforeMethod(alwaysRun = true)
+ public void setUp() throws Exception
+ {
+ seq = new Sequence("PDB|4kqy|4KQY|A", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1,
+ 26);
+ seq.createDatasetSequence();
+ for (int x = 1; x < 5; x++)
+ {
+ DBRefEntry dbRef = new DBRefEntry();
+ dbRef.setAccessionId("XYZ_" + x);
+ seq.addDBRef(dbRef);
+ }
+
+ PDBEntry dbRef = new PDBEntry();
+ dbRef.setId("1tim");
+
+ Vector pdbIds = new Vector<>();
+ pdbIds.add(dbRef);
+
+ seq.setPDBId(pdbIds);
+
+ // Uniprot sequence for 3D-Beacons mocks
+ upSeq = new Sequence("P38398",
+ "MDLSALRVEEVQNVINAMQKILECPICLELIKEPVSTKCDHIFCKFCMLKLLNQKKGPSQCPLCKNDITKRS\n"
+ + "LQESTRFSQLVEELLKIICAFQLDTGLEYANSYNFAKKENNSPEHLKDEVSIIQSMGYRNRAKRLLQSEPEN\n"
+ + "PSLQETSLSVQLSNLGTVRTLRTKQRIQPQKTSVYIELGSDSSEDTVNKATYCSVGDQELLQITPQGTRDEI\n"
+ + "SLDSAKKAACEFSETDVTNTEHHQPSNNDLNTTEKRAAERHPEKYQGSSVSNLHVEPCGTNTHASSLQHENS\n"
+ + "SLLLTKDRMNVEKAEFCNKSKQPGLARSQHNRWAGSKETCNDRRTPSTEKKVDLNADPLCERKEWNKQKLPC\n"
+ + "SENPRDTEDVPWITLNSSIQKVNEWFSRSDELLGSDDSHDGESESNAKVADVLDVLNEVDEYSGSSEKIDLL\n"
+ + "ASDPHEALICKSERVHSKSVESNIEDKIFGKTYRKKASLPNLSHVTENLIIGAFVTEPQIIQERPLTNKLKR\n"
+ + "KRRPTSGLHPEDFIKKADLAVQKTPEMINQGTNQTEQNGQVMNITNSGHENKTKGDSIQNEKNPNPIESLEK\n"
+ + "ESAFKTKAEPISSSISNMELELNIHNSKAPKKNRLRRKSSTRHIHALELVVSRNLSPPNCTELQIDSCSSSE\n"
+ + "EIKKKKYNQMPVRHSRNLQLMEGKEPATGAKKSNKPNEQTSKRHDSDTFPELKLTNAPGSFTKCSNTSELKE\n"
+ + "FVNPSLPREEKEEKLETVKVSNNAEDPKDLMLSGERVLQTERSVESSSISLVPGTDYGTQESISLLEVSTLG\n"
+ + "KAKTEPNKCVSQCAAFENPKGLIHGCSKDNRNDTEGFKYPLGHEVNHSRETSIEMEESELDAQYLQNTFKVS\n"
+ + "KRQSFAPFSNPGNAEEECATFSAHSGSLKKQSPKVTFECEQKEENQGKNESNIKPVQTVNITAGFPVVGQKD\n"
+ + "KPVDNAKCSIKGGSRFCLSSQFRGNETGLITPNKHGLLQNPYRIPPLFPIKSFVKTKCKKNLLEENFEEHSM\n"
+ + "SPEREMGNENIPSTVSTISRNNIRENVFKEASSSNINEVGSSTNEVGSSINEIGSSDENIQAELGRNRGPKL\n"
+ + "NAMLRLGVLQPEVYKQSLPGSNCKHPEIKKQEYEEVVQTVNTDFSPYLISDNLEQPMGSSHASQVCSETPDD\n"
+ + "LLDDGEIKEDTSFAENDIKESSAVFSKSVQKGELSRSPSPFTHTHLAQGYRRGAKKLESSEENLSSEDEELP\n"
+ + "CFQHLLFGKVNNIPSQSTRHSTVATECLSKNTEENLLSLKNSLNDCSNQVILAKASQEHHLSEETKCSASLF\n"
+ + "SSQCSELEDLTANTNTQDPFLIGSSKQMRHQSESQGVGLSDKELVSDDEERGTGLEENNQEEQSMDSNLGEA\n"
+ + "ASGCESETSVSEDCSGLSSQSDILTTQQRDTMQHNLIKLQQEMAELEAVLEQHGSQPSNSYPSIISDSSALE\n"
+ + "DLRNPEQSTSEKAVLTSQKSSEYPISQNPEGLSADKFEVSADSSTSKNKEPGVERSSPSKCPSLDDRWYMHS\n"
+ + "CSGSLQNRNYPSQEELIKVVDVEEQQLEESGPHDLTETSYLPRQDLEGTPYLESGISLFSDDPESDPSEDRA\n"
+ + "PESARVGNIPSSTSALKVPQLKVAESAQSPAAAHTTDTAGYNAMEESVSREKPELTASTERVNKRMSMVVSG\n"
+ + "LTPEEFMLVYKFARKHHITLTNLITEETTHVVMKTDAEFVCERTLKYFLGIAGGKWVVSYFWVTQSIKERKM\n"
+ + "LNEHDFEVRGDVVNGRNHQGPKRARESQDRKIFRGLEICCYGPFTNMPTDQLEWMVQLCGASVVKELSSFTL\n"
+ + "GTGVHPIVVVQPDAWTEDNGFHAIGQMCEAPVVTREWVLDSVALYQCQELDTYLIPQIPHSHY\n"
+ + "", 1,
+1863);
+ upSeq.createDatasetSequence();
+ upSeq.setDescription("Breast cancer type 1 susceptibility protein");
+ upSeq.addDBRef(new DBRefEntry("UNIPROT","0","P38398",null,true));
+
+ upSeq_insulin=new Sequence("INS_HUMAN",
+ "MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGGGP"
+ + "GAGSLQPLALEGSLQKRGIVEQCCTSICSLYQLENYCN");
+ upSeq_insulin.createDatasetSequence();
+ upSeq_insulin.setDescription("Insulin");
+ upSeq_insulin.addDBRef(new DBRefEntry("UNIPROT","0","P01308",null,true));
+ }
+
+@AfterMethod(alwaysRun = true)
+ public void tearDown() throws Exception
+ {
+ seq = null;
+ upSeq=null;
+ }
+
+ @SuppressWarnings("deprecation")
+ @Test(groups = { "Functional" })
+ public void buildPDBQueryTest()
+ {
+ System.out.println("seq >>>> " + seq);
+
+ StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { seq});
+ AssertJUnit.assertTrue(scquery instanceof PDBStructureChooserQuerySource);
+ String query = scquery.buildQuery(seq);
+ AssertJUnit.assertEquals("pdb_id:1tim", query);
+ seq.getAllPDBEntries().clear();
+ query = scquery.buildQuery(seq);
+ AssertJUnit.assertEquals(
+ "text:XYZ_1 OR text:XYZ_2 OR text:XYZ_3 OR text:XYZ_4 OR text:4kqy",
+ query);
+ seq.setDBRefs(null);
+ query = scquery.buildQuery(seq);
+ System.out.println(query);
+ AssertJUnit.assertEquals("text:4kqy", query);
+
+ DBRefEntry uniprotDBRef = new DBRefEntry();
+ uniprotDBRef.setAccessionId("P12345");
+ uniprotDBRef.setSource(DBRefSource.UNIPROT);
+ seq.addDBRef(uniprotDBRef);
+
+ DBRefEntry pdbDBRef = new DBRefEntry();
+ pdbDBRef.setAccessionId("1XYZ");
+ pdbDBRef.setSource(DBRefSource.PDB);
+ seq.addDBRef(pdbDBRef);
+
+ for (int x = 1; x < 5; x++)
+ {
+ DBRefEntry dbRef = new DBRefEntry();
+ dbRef.setAccessionId("XYZ_" + x);
+ seq.addDBRef(dbRef);
+ }
+ System.out.println("");
+ System.out.println(seq.getDBRefs());
+ System.out.println(query);
+ query = scquery.buildQuery(seq);
+ AssertJUnit.assertEquals(
+ "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
+ query);
+ }
+
+ @SuppressWarnings("deprecation")
+ @Test(groups = { "Functional" })
+ public void buildThreeDBQueryTest()
+ {
+ System.out.println("seq >>>> " + upSeq);
+ TDBeaconsFTSRestClientTest.setMock();
+ PDBFTSRestClientTest.setMock();
+ StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { upSeq});
+ // gets the lightweight proxy rather than the ThreeDBStructureChooserQuerySource
+ AssertJUnit.assertTrue(scquery instanceof ThreeDBStructureChooserQuerySource);
+ String query = scquery.buildQuery(upSeq);
+ AssertJUnit.assertEquals("P38398", query);
+
+ // query shouldn't change regardless of additional entries
+ // because 3DBeacons requires canonical entries.
+ upSeq.getAllPDBEntries().clear();
+ query = scquery.buildQuery(upSeq);
+ AssertJUnit.assertEquals("P38398", query);
+ upSeq.setDBRefs(null);
+ query = scquery.buildQuery(upSeq);
+ /*
+ * legacy projects/datasets will not have canonical flags set for uniprot dbrefs
+ * graceful behaviour would be to
+ * - pick one ? not possible
+ * - iterate through all until a 200 is obtained ?
+ * ---> ideal but could be costly
+ * ---> better to do a direct retrieval from uniprot to work out which is the canonical identifier..
+ * ----> need a test to check that accessions can be promoted to canonical!
+ */
+ //FIXME - need to be able to use ID to query here ?
+ AssertJUnit.assertEquals(null, query);
+
+
+
+ // TODO:
+ /**
+ * set of sequences:
+ * - no protein -> TDB not applicable, query PDBe only (consider RNA or DNA - specific query adapter ?)
+ * - protein but no uniprot -> first consider trying to get uniprot refs (need a mark to say none are available)
+ * - protein and uniprot - no canonicals -> resolve to uniprot automatically to get canonicals
+ * - query uniprot against 3DBeacons
+ * --> decorate experimental structures with additional data from PDBe
+ * - query remaining against PDBe
+ * Ranking
+ * - 3D Beacons
+ * --> in memory ranking - no need to query twice
+ * Rank by
+ * - experimental > AlphaFold -> Model
+ * - start > end
+ * -> filters for
+ * -> experimental only
+ * -> experimental plus best models for other regions
+ * -> "best cover"
+ * -> need to be able to select correct reference (the longest one that covers all) for superposition
+ */
+//
+// DBRefEntry uniprotDBRef = new DBRefEntry();
+// uniprotDBRef.setAccessionId("P12345");
+// uniprotDBRef.setSource(DBRefSource.UNIPROT);
+// upSeq.addDBRef(uniprotDBRef);
+//
+// DBRefEntry pdbDBRef = new DBRefEntry();
+// pdbDBRef.setAccessionId("1XYZ");
+// pdbDBRef.setSource(DBRefSource.PDB);
+// upSeq.addDBRef(pdbDBRef);
+//
+// for (int x = 1; x < 5; x++)
+// {
+// DBRefEntry dbRef = new DBRefEntry();
+// dbRef.setAccessionId("XYZ_" + x);
+// seq.addDBRef(dbRef);
+// }
+// System.out.println("");
+// System.out.println(seq.getDBRefs());
+// System.out.println(query);
+// query = scquery.buildQuery(seq);
+// assertEquals(
+// "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
+// query);
+ }
+ @Test(groups= {"Functional"},dataProvider = "testUpSeqs")
+ public void cascadingThreeDBandPDBQuerys(SequenceI testUpSeq)
+ {
+ TDBeaconsFTSRestClientTest.setMock();
+ PDBFTSRestClientTest.setMock();
+ ThreeDBStructureChooserQuerySource tdbquery = new ThreeDBStructureChooserQuerySource();
+ PDBStructureChooserQuerySource pdbquery = new PDBStructureChooserQuerySource();
+
+
+
+ FTSRestResponse upResponse = null;
+ FTSRestResponse pdbResponse = null;
+ // TODO test available options
+ // Best coverage
+ // Best Alphafold Model
+ // Best model (by confidence score)
+ // Will also need to develop a more sophisticated filtering system
+ List opts = tdbquery.getAvailableFilterOptions(StructureChooser.VIEWS_FILTER);
+ FilterOption opt_singlebest = opts.get(0);
+ FilterOption opt_manybest = opts.get(1);
+ assertEquals(opt_singlebest.getValue(), ThreeDBStructureChooserQuerySource.FILTER_FIRST_BEST_COVERAGE);
+ assertEquals(opt_manybest.getValue(), ThreeDBStructureChooserQuerySource.FILTER_TDBEACONS_COVERAGE);
+
+ try {
+ upResponse = tdbquery.fetchStructuresMetaData(testUpSeq, tdbquery.getDocFieldPrefs().getStructureSummaryFields(), opt_singlebest, false);
+ tdbquery.updateAvailableFilterOptions(StructureChooser.VIEWS_FILTER,opts,upResponse.getSearchSummary());
+ // test ranking without additional PDBe data
+ FTSRestResponse firstRanked = tdbquery.selectFirstRankedQuery(testUpSeq, upResponse.getSearchSummary(), tdbquery.getDocFieldPrefs().getStructureSummaryFields(), opt_singlebest.getValue(), false);
+ assertEquals(firstRanked.getNumberOfItemsFound(),1);
+ // many best response
+ upResponse = tdbquery.fetchStructuresMetaData(testUpSeq, tdbquery.getDocFieldPrefs().getStructureSummaryFields(), opt_manybest, false);
+ assertTrue(firstRanked.getSearchSummary().size()0);
+ pdbResponse = tdbquery.fetchStructuresMetaDataFor(pdbquery, upResponse);
+ assertTrue(pdbResponse.getNumberOfItemsFound()>0);
+ FTSRestResponse joinedResp = tdbquery.joinResponses(upResponse, pdbResponse);
+ assertEquals(upResponse.getNumberOfItemsFound(),joinedResp.getNumberOfItemsFound());
+
+
+ } catch (Exception x)
+ {
+ x.printStackTrace();
+ Assert.fail("Unexpected Exception");
+ }
+ StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { testUpSeq});
+
+ }
+
+ @DataProvider(name = "testUpSeqs")
+ public Object[][] testUpSeqs() throws Exception
+ {
+ setUp();
+ return new Object[][] { {upSeq},{upSeq_insulin}};
+ }
+
+ @Test(groups = { "Functional" })
+ public void sanitizeSeqNameTest()
+ {
+ String name = "ab_cdEF|fwxyz012349";
+ AssertJUnit.assertEquals(name, PDBStructureChooserQuerySource.sanitizeSeqName(name));
+
+ // remove a [nn] substring
+ name = "abcde12[345]fg";
+ AssertJUnit.assertEquals("abcde12fg", PDBStructureChooserQuerySource.sanitizeSeqName(name));
+
+ // remove characters other than a-zA-Z0-9 | or _
+ name = "ab[cd],.\t£$*!- \\\"@:e";
+ AssertJUnit.assertEquals("abcde", PDBStructureChooserQuerySource.sanitizeSeqName(name));
+
+ name = "abcde12[345a]fg";
+ AssertJUnit.assertEquals("abcde12345afg", PDBStructureChooserQuerySource.sanitizeSeqName(name));
+ }
+}