JAL-3829 minimal test of additional TDB filters - not at all robust
[jalview.git] / test / jalview / gui / structurechooser / StructureChooserQuerySourceTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.gui.structurechooser;
22
23
24 import static org.testng.Assert.assertEquals;
25 import static org.testng.Assert.assertTrue;
26
27 import java.util.Collection;
28 import java.util.List;
29 import java.util.Vector;
30
31 import org.junit.Assert;
32 import org.testng.AssertJUnit;
33 import org.testng.annotations.AfterMethod;
34 import org.testng.annotations.BeforeClass;
35 import org.testng.annotations.BeforeMethod;
36 import org.testng.annotations.Test;
37
38 import jalview.datamodel.DBRefEntry;
39 import jalview.datamodel.DBRefSource;
40 import jalview.datamodel.PDBEntry;
41 import jalview.datamodel.Sequence;
42 import jalview.datamodel.SequenceI;
43 import jalview.fts.api.FTSData;
44 import jalview.fts.core.FTSRestRequest;
45 import jalview.fts.core.FTSRestResponse;
46 import jalview.fts.service.pdb.PDBFTSRestClient;
47 import jalview.fts.threedbeacons.TDBeaconsFTSRestClientTest;
48 import jalview.gui.JvOptionPane;
49 import jalview.gui.StructureChooser;
50 import jalview.jbgui.FilterOption;
51
52 public class StructureChooserQuerySourceTest
53 {
54
55   @BeforeClass(alwaysRun = true)
56   public void setUpJvOptionPane()
57   {
58     JvOptionPane.setInteractiveMode(false);
59     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
60   }
61
62   Sequence seq,upSeq;
63
64   // same set up as for structurechooser test
65   
66 @BeforeMethod(alwaysRun = true)
67   public void setUp() throws Exception
68   {
69     seq = new Sequence("PDB|4kqy|4KQY|A", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1,
70             26);
71     seq.createDatasetSequence();
72     for (int x = 1; x < 5; x++)
73     {
74       DBRefEntry dbRef = new DBRefEntry();
75       dbRef.setAccessionId("XYZ_" + x);
76       seq.addDBRef(dbRef);
77     }
78
79     PDBEntry dbRef = new PDBEntry();
80     dbRef.setId("1tim");
81
82     Vector<PDBEntry> pdbIds = new Vector<>();
83     pdbIds.add(dbRef);
84
85     seq.setPDBId(pdbIds);
86     
87     // Uniprot sequence for 3D-Beacons mocks
88     upSeq = new Sequence("P38398", 
89             "MDLSALRVEEVQNVINAMQKILECPICLELIKEPVSTKCDHIFCKFCMLKLLNQKKGPSQCPLCKNDITKRS\n"
90             + "LQESTRFSQLVEELLKIICAFQLDTGLEYANSYNFAKKENNSPEHLKDEVSIIQSMGYRNRAKRLLQSEPEN\n"
91             + "PSLQETSLSVQLSNLGTVRTLRTKQRIQPQKTSVYIELGSDSSEDTVNKATYCSVGDQELLQITPQGTRDEI\n"
92             + "SLDSAKKAACEFSETDVTNTEHHQPSNNDLNTTEKRAAERHPEKYQGSSVSNLHVEPCGTNTHASSLQHENS\n"
93             + "SLLLTKDRMNVEKAEFCNKSKQPGLARSQHNRWAGSKETCNDRRTPSTEKKVDLNADPLCERKEWNKQKLPC\n"
94             + "SENPRDTEDVPWITLNSSIQKVNEWFSRSDELLGSDDSHDGESESNAKVADVLDVLNEVDEYSGSSEKIDLL\n"
95             + "ASDPHEALICKSERVHSKSVESNIEDKIFGKTYRKKASLPNLSHVTENLIIGAFVTEPQIIQERPLTNKLKR\n"
96             + "KRRPTSGLHPEDFIKKADLAVQKTPEMINQGTNQTEQNGQVMNITNSGHENKTKGDSIQNEKNPNPIESLEK\n"
97             + "ESAFKTKAEPISSSISNMELELNIHNSKAPKKNRLRRKSSTRHIHALELVVSRNLSPPNCTELQIDSCSSSE\n"
98             + "EIKKKKYNQMPVRHSRNLQLMEGKEPATGAKKSNKPNEQTSKRHDSDTFPELKLTNAPGSFTKCSNTSELKE\n"
99             + "FVNPSLPREEKEEKLETVKVSNNAEDPKDLMLSGERVLQTERSVESSSISLVPGTDYGTQESISLLEVSTLG\n"
100             + "KAKTEPNKCVSQCAAFENPKGLIHGCSKDNRNDTEGFKYPLGHEVNHSRETSIEMEESELDAQYLQNTFKVS\n"
101             + "KRQSFAPFSNPGNAEEECATFSAHSGSLKKQSPKVTFECEQKEENQGKNESNIKPVQTVNITAGFPVVGQKD\n"
102             + "KPVDNAKCSIKGGSRFCLSSQFRGNETGLITPNKHGLLQNPYRIPPLFPIKSFVKTKCKKNLLEENFEEHSM\n"
103             + "SPEREMGNENIPSTVSTISRNNIRENVFKEASSSNINEVGSSTNEVGSSINEIGSSDENIQAELGRNRGPKL\n"
104             + "NAMLRLGVLQPEVYKQSLPGSNCKHPEIKKQEYEEVVQTVNTDFSPYLISDNLEQPMGSSHASQVCSETPDD\n"
105             + "LLDDGEIKEDTSFAENDIKESSAVFSKSVQKGELSRSPSPFTHTHLAQGYRRGAKKLESSEENLSSEDEELP\n"
106             + "CFQHLLFGKVNNIPSQSTRHSTVATECLSKNTEENLLSLKNSLNDCSNQVILAKASQEHHLSEETKCSASLF\n"
107             + "SSQCSELEDLTANTNTQDPFLIGSSKQMRHQSESQGVGLSDKELVSDDEERGTGLEENNQEEQSMDSNLGEA\n"
108             + "ASGCESETSVSEDCSGLSSQSDILTTQQRDTMQHNLIKLQQEMAELEAVLEQHGSQPSNSYPSIISDSSALE\n"
109             + "DLRNPEQSTSEKAVLTSQKSSEYPISQNPEGLSADKFEVSADSSTSKNKEPGVERSSPSKCPSLDDRWYMHS\n"
110             + "CSGSLQNRNYPSQEELIKVVDVEEQQLEESGPHDLTETSYLPRQDLEGTPYLESGISLFSDDPESDPSEDRA\n"
111             + "PESARVGNIPSSTSALKVPQLKVAESAQSPAAAHTTDTAGYNAMEESVSREKPELTASTERVNKRMSMVVSG\n"
112             + "LTPEEFMLVYKFARKHHITLTNLITEETTHVVMKTDAEFVCERTLKYFLGIAGGKWVVSYFWVTQSIKERKM\n"
113             + "LNEHDFEVRGDVVNGRNHQGPKRARESQDRKIFRGLEICCYGPFTNMPTDQLEWMVQLCGASVVKELSSFTL\n"
114             + "GTGVHPIVVVQPDAWTEDNGFHAIGQMCEAPVVTREWVLDSVALYQCQELDTYLIPQIPHSHY\n"
115             + "", 1,
116 1863);
117     upSeq.createDatasetSequence();
118     upSeq.setDescription("Breast cancer type 1 susceptibility protein");
119     upSeq.addDBRef(new DBRefEntry("UNIPROT","0","P38398",null,true));
120   }
121
122 @AfterMethod(alwaysRun = true)
123   public void tearDown() throws Exception
124   {
125     seq = null;
126     upSeq=null;
127   }
128
129   @SuppressWarnings("deprecation")
130   @Test(groups = { "Functional" })
131   public void buildPDBQueryTest()
132   {
133     System.out.println("seq >>>> " + seq);
134     
135     StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { seq});
136     AssertJUnit.assertTrue(scquery instanceof PDBStructureChooserQuerySource);
137     String query = scquery.buildQuery(seq);
138     AssertJUnit.assertEquals("pdb_id:1tim", query);
139     seq.getAllPDBEntries().clear();
140     query = scquery.buildQuery(seq);
141     AssertJUnit.assertEquals(
142             "text:XYZ_1 OR text:XYZ_2 OR text:XYZ_3 OR text:XYZ_4 OR text:4kqy",
143             query);
144     seq.setDBRefs(null);
145     query = scquery.buildQuery(seq);
146     System.out.println(query);
147     AssertJUnit.assertEquals("text:4kqy", query);
148
149     DBRefEntry uniprotDBRef = new DBRefEntry();
150     uniprotDBRef.setAccessionId("P12345");
151     uniprotDBRef.setSource(DBRefSource.UNIPROT);
152     seq.addDBRef(uniprotDBRef);
153
154     DBRefEntry pdbDBRef = new DBRefEntry();
155     pdbDBRef.setAccessionId("1XYZ");
156     pdbDBRef.setSource(DBRefSource.PDB);
157     seq.addDBRef(pdbDBRef);
158
159     for (int x = 1; x < 5; x++)
160     {
161       DBRefEntry dbRef = new DBRefEntry();
162       dbRef.setAccessionId("XYZ_" + x);
163       seq.addDBRef(dbRef);
164     }
165     System.out.println("");
166     System.out.println(seq.getDBRefs());
167     System.out.println(query);
168     query = scquery.buildQuery(seq);
169     AssertJUnit.assertEquals(
170             "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
171             query);
172   }
173
174   @SuppressWarnings("deprecation")
175   @Test(groups = { "Functional" })
176   public void buildThreeDBQueryTest()
177   {
178     System.out.println("seq >>>> " + upSeq);
179     TDBeaconsFTSRestClientTest.setMock();
180     PDBFTSRestClient.setMock();
181     StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { upSeq});
182     // gets the lightweight proxy rather than the ThreeDBStructureChooserQuerySource
183     AssertJUnit.assertTrue(scquery instanceof ThreeDBStructureChooserQuerySource);
184     String query = scquery.buildQuery(upSeq);
185     AssertJUnit.assertEquals("P38398", query);
186     
187     // query shouldn't change regardless of additional entries
188     // because 3DBeacons requires canonical entries.
189     upSeq.getAllPDBEntries().clear();
190     query = scquery.buildQuery(upSeq);
191     AssertJUnit.assertEquals("P38398", query);
192     upSeq.setDBRefs(null);
193     query = scquery.buildQuery(upSeq);
194     /*
195      * legacy projects/datasets will not have canonical flags set for uniprot dbrefs
196      * graceful behaviour would be to
197      *  - pick one ? not possible
198      *  - iterate through all until a 200 is obtained ?
199      *  ---> ideal but could be costly
200      *  ---> better to do a direct retrieval from uniprot to work out which is the canonical identifier..
201      *  ----> need a test to check that accessions can be promoted to canonical!
202      */
203     //FIXME - need to be able to use ID to query here ?
204     AssertJUnit.assertEquals(null, query);
205
206     
207     
208     // TODO: 
209     /**
210      * set of sequences:
211      * - no protein -> TDB not applicable, query PDBe only (consider RNA or DNA - specific query adapter ?)
212      * - protein but no uniprot -> first consider trying to get uniprot refs (need a mark to say none are available)
213      * - protein and uniprot - no canonicals -> resolve to uniprot automatically to get canonicals
214      * - query uniprot against 3DBeacons
215      * --> decorate experimental structures with additional data from PDBe
216      * - query remaining against PDBe
217      * Ranking
218      * - 3D Beacons
219      *  --> in memory ranking - no need to query twice
220      *  Rank by
221      *  - experimental > AlphaFold -> Model
222      *  - start > end
223      *  -> filters for 
224      *  -> experimental only
225      *  -> experimental plus best models for other regions
226      *  -> "best cover" 
227      *  -> need to be able to select correct reference (the longest one that covers all) for superposition
228      */
229 //    
230 //    DBRefEntry uniprotDBRef = new DBRefEntry();
231 //    uniprotDBRef.setAccessionId("P12345");
232 //    uniprotDBRef.setSource(DBRefSource.UNIPROT);
233 //    upSeq.addDBRef(uniprotDBRef);
234 //
235 //    DBRefEntry pdbDBRef = new DBRefEntry();
236 //    pdbDBRef.setAccessionId("1XYZ");
237 //    pdbDBRef.setSource(DBRefSource.PDB);
238 //    upSeq.addDBRef(pdbDBRef);
239 //
240 //    for (int x = 1; x < 5; x++)
241 //    {
242 //      DBRefEntry dbRef = new DBRefEntry();
243 //      dbRef.setAccessionId("XYZ_" + x);
244 //      seq.addDBRef(dbRef);
245 //    }
246 //    System.out.println("");
247 //    System.out.println(seq.getDBRefs());
248 //    System.out.println(query);
249 //    query = scquery.buildQuery(seq);
250 //    assertEquals(
251 //            "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
252 //            query);
253   }
254   @Test(groups= {"Functional"})
255   public void cascadingThreeDBandPDBQuerys()
256   {
257     TDBeaconsFTSRestClientTest.setMock();
258     PDBFTSRestClient.setMock();
259     ThreeDBStructureChooserQuerySource tdbquery = new ThreeDBStructureChooserQuerySource();
260     PDBStructureChooserQuerySource pdbquery  = new PDBStructureChooserQuerySource();
261             
262
263     
264     FTSRestResponse upResponse = null;
265     FTSRestResponse pdbResponse = null;
266     // TODO test available options
267     // Best coverage
268     // Best Alphafold Model
269     // Best model (by confidence score)
270     // Will also need to develop a more sophisticated filtering system
271     List<FilterOption> opts = tdbquery.getAvailableFilterOptions(StructureChooser.VIEWS_FILTER);
272     FilterOption opt_singlebest = opts.get(0);
273     FilterOption opt_manybest = opts.get(1);
274     assertEquals(opt_singlebest.getValue(), ThreeDBStructureChooserQuerySource.FILTER_FIRST_BEST_COVERAGE);
275     assertEquals(opt_manybest.getValue(), ThreeDBStructureChooserQuerySource.FILTER_TDBEACONS_COVERAGE);
276     
277     try {
278       upResponse = tdbquery.fetchStructuresMetaData(upSeq, tdbquery.getDocFieldPrefs().getStructureSummaryFields(),  opt_singlebest, false);
279       tdbquery.updateAvailableFilterOptions(StructureChooser.VIEWS_FILTER,opts,upResponse.getSearchSummary());
280       // test ranking without additional PDBe data
281       FTSRestResponse firstRanked = tdbquery.selectFirstRankedQuery(upSeq, upResponse.getSearchSummary(), tdbquery.getDocFieldPrefs().getStructureSummaryFields(), opt_singlebest.getValue(), false);
282       assertEquals(firstRanked.getNumberOfItemsFound(),1);
283       // many best response
284       upResponse = tdbquery.fetchStructuresMetaData(upSeq, tdbquery.getDocFieldPrefs().getStructureSummaryFields(),  opt_manybest, false);
285       assertTrue(firstRanked.getNumberOfItemsFound()<upResponse.getNumberOfItemsFound());
286       // NB Could have race condition here 
287       String pdb_Query = tdbquery.buildPDBFTSQueryFor(upResponse);
288       assertTrue(pdb_Query.trim().length()>0);
289       pdbResponse = tdbquery.fetchStructuresMetaDataFor(pdbquery, upResponse);
290       assertTrue(pdbResponse.getNumberOfItemsFound()>0);
291       FTSRestResponse joinedResp = tdbquery.joinResponses(upResponse, pdbResponse);
292       assertEquals(upResponse.getNumberOfItemsFound(),joinedResp.getNumberOfItemsFound());
293       
294       
295     } catch (Exception x)
296     {
297       x.printStackTrace();
298       Assert.fail("Unexpected Exception");
299     }
300     StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { upSeq});
301
302   }
303   
304   @Test(groups = { "Functional" })
305   public void sanitizeSeqNameTest()
306   {
307     String name = "ab_cdEF|fwxyz012349";
308     AssertJUnit.assertEquals(name, PDBStructureChooserQuerySource.sanitizeSeqName(name));
309
310     // remove a [nn] substring
311     name = "abcde12[345]fg";
312     AssertJUnit.assertEquals("abcde12fg", PDBStructureChooserQuerySource.sanitizeSeqName(name));
313
314     // remove characters other than a-zA-Z0-9 | or _
315     name = "ab[cd],.\t£$*!- \\\"@:e";
316     AssertJUnit.assertEquals("abcde", PDBStructureChooserQuerySource.sanitizeSeqName(name));
317
318     name = "abcde12[345a]fg";
319     AssertJUnit.assertEquals("abcde12345afg", PDBStructureChooserQuerySource.sanitizeSeqName(name));
320   }
321 }