1912f146bb8cb06cdcb10cc61463a7eacd2d949b
[jalview.git] / test / jalview / gui / structurechooser / StructureChooserQuerySourceTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.gui.structurechooser;
22
23
24 import static org.testng.Assert.assertEquals;
25 import static org.testng.Assert.assertTrue;
26
27 import java.util.Collection;
28 import java.util.List;
29 import java.util.Vector;
30
31 import org.junit.Assert;
32 import org.testng.AssertJUnit;
33 import org.testng.annotations.AfterMethod;
34 import org.testng.annotations.BeforeClass;
35 import org.testng.annotations.BeforeMethod;
36 import org.testng.annotations.DataProvider;
37 import org.testng.annotations.Test;
38
39 import jalview.datamodel.DBRefEntry;
40 import jalview.datamodel.DBRefSource;
41 import jalview.datamodel.PDBEntry;
42 import jalview.datamodel.Sequence;
43 import jalview.datamodel.SequenceI;
44 import jalview.fts.api.FTSData;
45 import jalview.fts.core.FTSRestRequest;
46 import jalview.fts.core.FTSRestResponse;
47 import jalview.fts.service.pdb.PDBFTSRestClient;
48 import jalview.fts.service.pdb.PDBFTSRestClientTest;
49 import jalview.fts.threedbeacons.TDBeaconsFTSRestClientTest;
50 import jalview.gui.JvOptionPane;
51 import jalview.gui.StructureChooser;
52 import jalview.jbgui.FilterOption;
53
54 public class StructureChooserQuerySourceTest
55 {
56
57   @BeforeClass(alwaysRun = true)
58   public void setUpJvOptionPane()
59   {
60     JvOptionPane.setInteractiveMode(false);
61     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
62   }
63
64   Sequence seq,upSeq,upSeq_insulin;
65
66   // same set up as for structurechooser test
67   
68 @BeforeMethod(alwaysRun = true)
69   public void setUp() throws Exception
70   {
71     seq = new Sequence("PDB|4kqy|4KQY|A", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1,
72             26);
73     seq.createDatasetSequence();
74     for (int x = 1; x < 5; x++)
75     {
76       DBRefEntry dbRef = new DBRefEntry();
77       dbRef.setAccessionId("XYZ_" + x);
78       seq.addDBRef(dbRef);
79     }
80
81     PDBEntry dbRef = new PDBEntry();
82     dbRef.setId("1tim");
83
84     Vector<PDBEntry> pdbIds = new Vector<>();
85     pdbIds.add(dbRef);
86
87     seq.setPDBId(pdbIds);
88     
89     // Uniprot sequence for 3D-Beacons mocks
90     upSeq = new Sequence("P38398", 
91             "MDLSALRVEEVQNVINAMQKILECPICLELIKEPVSTKCDHIFCKFCMLKLLNQKKGPSQCPLCKNDITKRS\n"
92             + "LQESTRFSQLVEELLKIICAFQLDTGLEYANSYNFAKKENNSPEHLKDEVSIIQSMGYRNRAKRLLQSEPEN\n"
93             + "PSLQETSLSVQLSNLGTVRTLRTKQRIQPQKTSVYIELGSDSSEDTVNKATYCSVGDQELLQITPQGTRDEI\n"
94             + "SLDSAKKAACEFSETDVTNTEHHQPSNNDLNTTEKRAAERHPEKYQGSSVSNLHVEPCGTNTHASSLQHENS\n"
95             + "SLLLTKDRMNVEKAEFCNKSKQPGLARSQHNRWAGSKETCNDRRTPSTEKKVDLNADPLCERKEWNKQKLPC\n"
96             + "SENPRDTEDVPWITLNSSIQKVNEWFSRSDELLGSDDSHDGESESNAKVADVLDVLNEVDEYSGSSEKIDLL\n"
97             + "ASDPHEALICKSERVHSKSVESNIEDKIFGKTYRKKASLPNLSHVTENLIIGAFVTEPQIIQERPLTNKLKR\n"
98             + "KRRPTSGLHPEDFIKKADLAVQKTPEMINQGTNQTEQNGQVMNITNSGHENKTKGDSIQNEKNPNPIESLEK\n"
99             + "ESAFKTKAEPISSSISNMELELNIHNSKAPKKNRLRRKSSTRHIHALELVVSRNLSPPNCTELQIDSCSSSE\n"
100             + "EIKKKKYNQMPVRHSRNLQLMEGKEPATGAKKSNKPNEQTSKRHDSDTFPELKLTNAPGSFTKCSNTSELKE\n"
101             + "FVNPSLPREEKEEKLETVKVSNNAEDPKDLMLSGERVLQTERSVESSSISLVPGTDYGTQESISLLEVSTLG\n"
102             + "KAKTEPNKCVSQCAAFENPKGLIHGCSKDNRNDTEGFKYPLGHEVNHSRETSIEMEESELDAQYLQNTFKVS\n"
103             + "KRQSFAPFSNPGNAEEECATFSAHSGSLKKQSPKVTFECEQKEENQGKNESNIKPVQTVNITAGFPVVGQKD\n"
104             + "KPVDNAKCSIKGGSRFCLSSQFRGNETGLITPNKHGLLQNPYRIPPLFPIKSFVKTKCKKNLLEENFEEHSM\n"
105             + "SPEREMGNENIPSTVSTISRNNIRENVFKEASSSNINEVGSSTNEVGSSINEIGSSDENIQAELGRNRGPKL\n"
106             + "NAMLRLGVLQPEVYKQSLPGSNCKHPEIKKQEYEEVVQTVNTDFSPYLISDNLEQPMGSSHASQVCSETPDD\n"
107             + "LLDDGEIKEDTSFAENDIKESSAVFSKSVQKGELSRSPSPFTHTHLAQGYRRGAKKLESSEENLSSEDEELP\n"
108             + "CFQHLLFGKVNNIPSQSTRHSTVATECLSKNTEENLLSLKNSLNDCSNQVILAKASQEHHLSEETKCSASLF\n"
109             + "SSQCSELEDLTANTNTQDPFLIGSSKQMRHQSESQGVGLSDKELVSDDEERGTGLEENNQEEQSMDSNLGEA\n"
110             + "ASGCESETSVSEDCSGLSSQSDILTTQQRDTMQHNLIKLQQEMAELEAVLEQHGSQPSNSYPSIISDSSALE\n"
111             + "DLRNPEQSTSEKAVLTSQKSSEYPISQNPEGLSADKFEVSADSSTSKNKEPGVERSSPSKCPSLDDRWYMHS\n"
112             + "CSGSLQNRNYPSQEELIKVVDVEEQQLEESGPHDLTETSYLPRQDLEGTPYLESGISLFSDDPESDPSEDRA\n"
113             + "PESARVGNIPSSTSALKVPQLKVAESAQSPAAAHTTDTAGYNAMEESVSREKPELTASTERVNKRMSMVVSG\n"
114             + "LTPEEFMLVYKFARKHHITLTNLITEETTHVVMKTDAEFVCERTLKYFLGIAGGKWVVSYFWVTQSIKERKM\n"
115             + "LNEHDFEVRGDVVNGRNHQGPKRARESQDRKIFRGLEICCYGPFTNMPTDQLEWMVQLCGASVVKELSSFTL\n"
116             + "GTGVHPIVVVQPDAWTEDNGFHAIGQMCEAPVVTREWVLDSVALYQCQELDTYLIPQIPHSHY\n"
117             + "", 1,
118 1863);
119     upSeq.createDatasetSequence();
120     upSeq.setDescription("Breast cancer type 1 susceptibility protein");
121     upSeq.addDBRef(new DBRefEntry("UNIPROT","0","P38398",null,true));
122     
123     upSeq_insulin=new Sequence("INS_HUMAN",
124             "MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGGGP"
125             + "GAGSLQPLALEGSLQKRGIVEQCCTSICSLYQLENYCN");
126     upSeq_insulin.createDatasetSequence();
127     upSeq_insulin.setDescription("Insulin");
128     upSeq_insulin.addDBRef(new DBRefEntry("UNIPROT","0","P01308",null,true));
129   }
130
131 @AfterMethod(alwaysRun = true)
132   public void tearDown() throws Exception
133   {
134     seq = null;
135     upSeq=null;
136   }
137
138   @SuppressWarnings("deprecation")
139   @Test(groups = { "Functional" })
140   public void buildPDBQueryTest()
141   {
142     System.out.println("seq >>>> " + seq);
143     
144     StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { seq});
145     AssertJUnit.assertTrue(scquery instanceof PDBStructureChooserQuerySource);
146     String query = scquery.buildQuery(seq);
147     AssertJUnit.assertEquals("pdb_id:1tim", query);
148     seq.getAllPDBEntries().clear();
149     query = scquery.buildQuery(seq);
150     AssertJUnit.assertEquals(
151             "text:XYZ_1 OR text:XYZ_2 OR text:XYZ_3 OR text:XYZ_4 OR text:4kqy",
152             query);
153     seq.setDBRefs(null);
154     query = scquery.buildQuery(seq);
155     System.out.println(query);
156     AssertJUnit.assertEquals("text:4kqy", query);
157
158     DBRefEntry uniprotDBRef = new DBRefEntry();
159     uniprotDBRef.setAccessionId("P12345");
160     uniprotDBRef.setSource(DBRefSource.UNIPROT);
161     seq.addDBRef(uniprotDBRef);
162
163     DBRefEntry pdbDBRef = new DBRefEntry();
164     pdbDBRef.setAccessionId("1XYZ");
165     pdbDBRef.setSource(DBRefSource.PDB);
166     seq.addDBRef(pdbDBRef);
167
168     for (int x = 1; x < 5; x++)
169     {
170       DBRefEntry dbRef = new DBRefEntry();
171       dbRef.setAccessionId("XYZ_" + x);
172       seq.addDBRef(dbRef);
173     }
174     System.out.println("");
175     System.out.println(seq.getDBRefs());
176     System.out.println(query);
177     query = scquery.buildQuery(seq);
178     AssertJUnit.assertEquals(
179             "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
180             query);
181   }
182
183   @SuppressWarnings("deprecation")
184   @Test(groups = { "Functional" })
185   public void buildThreeDBQueryTest()
186   {
187     System.out.println("seq >>>> " + upSeq);
188     TDBeaconsFTSRestClientTest.setMock();
189     PDBFTSRestClientTest.setMock();
190     StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { upSeq});
191     // gets the lightweight proxy rather than the ThreeDBStructureChooserQuerySource
192     AssertJUnit.assertTrue(scquery instanceof ThreeDBStructureChooserQuerySource);
193     String query = scquery.buildQuery(upSeq);
194     AssertJUnit.assertEquals("P38398", query);
195     
196     // query shouldn't change regardless of additional entries
197     // because 3DBeacons requires canonical entries.
198     upSeq.getAllPDBEntries().clear();
199     query = scquery.buildQuery(upSeq);
200     AssertJUnit.assertEquals("P38398", query);
201     upSeq.setDBRefs(null);
202     query = scquery.buildQuery(upSeq);
203     /*
204      * legacy projects/datasets will not have canonical flags set for uniprot dbrefs
205      * graceful behaviour would be to
206      *  - pick one ? not possible
207      *  - iterate through all until a 200 is obtained ?
208      *  ---> ideal but could be costly
209      *  ---> better to do a direct retrieval from uniprot to work out which is the canonical identifier..
210      *  ----> need a test to check that accessions can be promoted to canonical!
211      */
212     //FIXME - need to be able to use ID to query here ?
213     AssertJUnit.assertEquals(null, query);
214
215     
216     
217     // TODO: 
218     /**
219      * set of sequences:
220      * - no protein -> TDB not applicable, query PDBe only (consider RNA or DNA - specific query adapter ?)
221      * - protein but no uniprot -> first consider trying to get uniprot refs (need a mark to say none are available)
222      * - protein and uniprot - no canonicals -> resolve to uniprot automatically to get canonicals
223      * - query uniprot against 3DBeacons
224      * --> decorate experimental structures with additional data from PDBe
225      * - query remaining against PDBe
226      * Ranking
227      * - 3D Beacons
228      *  --> in memory ranking - no need to query twice
229      *  Rank by
230      *  - experimental > AlphaFold -> Model
231      *  - start > end
232      *  -> filters for 
233      *  -> experimental only
234      *  -> experimental plus best models for other regions
235      *  -> "best cover" 
236      *  -> need to be able to select correct reference (the longest one that covers all) for superposition
237      */
238 //    
239 //    DBRefEntry uniprotDBRef = new DBRefEntry();
240 //    uniprotDBRef.setAccessionId("P12345");
241 //    uniprotDBRef.setSource(DBRefSource.UNIPROT);
242 //    upSeq.addDBRef(uniprotDBRef);
243 //
244 //    DBRefEntry pdbDBRef = new DBRefEntry();
245 //    pdbDBRef.setAccessionId("1XYZ");
246 //    pdbDBRef.setSource(DBRefSource.PDB);
247 //    upSeq.addDBRef(pdbDBRef);
248 //
249 //    for (int x = 1; x < 5; x++)
250 //    {
251 //      DBRefEntry dbRef = new DBRefEntry();
252 //      dbRef.setAccessionId("XYZ_" + x);
253 //      seq.addDBRef(dbRef);
254 //    }
255 //    System.out.println("");
256 //    System.out.println(seq.getDBRefs());
257 //    System.out.println(query);
258 //    query = scquery.buildQuery(seq);
259 //    assertEquals(
260 //            "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
261 //            query);
262   }
263   @Test(groups= {"Functional"},dataProvider = "testUpSeqs")
264   public void cascadingThreeDBandPDBQuerys(SequenceI testUpSeq)
265   {
266     TDBeaconsFTSRestClientTest.setMock();
267     PDBFTSRestClientTest.setMock();
268     ThreeDBStructureChooserQuerySource tdbquery = new ThreeDBStructureChooserQuerySource();
269     PDBStructureChooserQuerySource pdbquery  = new PDBStructureChooserQuerySource();
270             
271
272     
273     FTSRestResponse upResponse = null;
274     FTSRestResponse pdbResponse = null;
275     // TODO test available options
276     // Best coverage
277     // Best Alphafold Model
278     // Best model (by confidence score)
279     // Will also need to develop a more sophisticated filtering system
280     List<FilterOption> opts = tdbquery.getAvailableFilterOptions(StructureChooser.VIEWS_FILTER);
281     FilterOption opt_singlebest = opts.get(0);
282     FilterOption opt_manybest = opts.get(1);
283     assertEquals(opt_singlebest.getValue(), ThreeDBStructureChooserQuerySource.FILTER_FIRST_BEST_COVERAGE);
284     assertEquals(opt_manybest.getValue(), ThreeDBStructureChooserQuerySource.FILTER_TDBEACONS_COVERAGE);
285     
286     try {
287       upResponse = tdbquery.fetchStructuresMetaData(testUpSeq, tdbquery.getDocFieldPrefs().getStructureSummaryFields(),  opt_singlebest, false);
288       tdbquery.updateAvailableFilterOptions(StructureChooser.VIEWS_FILTER,opts,upResponse.getSearchSummary());
289       // test ranking without additional PDBe data
290       FTSRestResponse firstRanked = tdbquery.selectFirstRankedQuery(testUpSeq, upResponse.getSearchSummary(), tdbquery.getDocFieldPrefs().getStructureSummaryFields(), opt_singlebest.getValue(), false);
291       assertEquals(firstRanked.getNumberOfItemsFound(),1);
292       // many best response
293       upResponse = tdbquery.fetchStructuresMetaData(testUpSeq, tdbquery.getDocFieldPrefs().getStructureSummaryFields(),  opt_manybest, false);
294       assertTrue(firstRanked.getSearchSummary().size()<upResponse.getSearchSummary().size());
295       // NB Could have race condition here 
296       String pdb_Query = tdbquery.buildPDBFTSQueryFor(upResponse);
297       assertTrue(pdb_Query.trim().length()>0);
298       pdbResponse = tdbquery.fetchStructuresMetaDataFor(pdbquery, upResponse);
299       assertTrue(pdbResponse.getNumberOfItemsFound()>0);
300       FTSRestResponse joinedResp = tdbquery.joinResponses(upResponse, pdbResponse);
301       assertEquals(upResponse.getNumberOfItemsFound(),joinedResp.getNumberOfItemsFound());
302       
303       
304     } catch (Exception x)
305     {
306       x.printStackTrace();
307       Assert.fail("Unexpected Exception");
308     }
309     StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { testUpSeq});
310
311   }
312   
313   @DataProvider(name = "testUpSeqs")
314   public Object[][] testUpSeqs() throws Exception
315   {
316     setUp();
317     return new Object[][] { {upSeq},{upSeq_insulin}};
318   }
319   
320   @Test(groups = { "Functional" })
321   public void sanitizeSeqNameTest()
322   {
323     String name = "ab_cdEF|fwxyz012349";
324     AssertJUnit.assertEquals(name, PDBStructureChooserQuerySource.sanitizeSeqName(name));
325
326     // remove a [nn] substring
327     name = "abcde12[345]fg";
328     AssertJUnit.assertEquals("abcde12fg", PDBStructureChooserQuerySource.sanitizeSeqName(name));
329
330     // remove characters other than a-zA-Z0-9 | or _
331     name = "ab[cd],.\t£$*!- \\\"@:e";
332     AssertJUnit.assertEquals("abcde", PDBStructureChooserQuerySource.sanitizeSeqName(name));
333
334     name = "abcde12[345a]fg";
335     AssertJUnit.assertEquals("abcde12345afg", PDBStructureChooserQuerySource.sanitizeSeqName(name));
336   }
337 }