6e858866b4cc5e282f7b0f0caf6b85a5f9345e9a
[jalview.git] / test / jalview / gui / structurechooser / StructureChooserQuerySourceTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.gui.structurechooser;
22
23 import static org.testng.Assert.assertEquals;
24 import static org.testng.Assert.assertTrue;
25
26 import java.util.Collection;
27 import java.util.List;
28 import java.util.Vector;
29
30 import org.junit.Assert;
31 import org.testng.AssertJUnit;
32 import org.testng.annotations.AfterMethod;
33 import org.testng.annotations.BeforeClass;
34 import org.testng.annotations.BeforeMethod;
35 import org.testng.annotations.DataProvider;
36 import org.testng.annotations.Test;
37
38 import jalview.datamodel.DBRefEntry;
39 import jalview.datamodel.DBRefSource;
40 import jalview.datamodel.PDBEntry;
41 import jalview.datamodel.Sequence;
42 import jalview.datamodel.SequenceI;
43 import jalview.fts.api.FTSData;
44 import jalview.fts.core.FTSRestRequest;
45 import jalview.fts.core.FTSRestResponse;
46 import jalview.fts.service.pdb.PDBFTSRestClient;
47 import jalview.fts.service.pdb.PDBFTSRestClientTest;
48 import jalview.fts.threedbeacons.TDBeaconsFTSRestClientTest;
49 import jalview.gui.JvOptionPane;
50 import jalview.gui.StructureChooser;
51 import jalview.jbgui.FilterOption;
52
53 public class StructureChooserQuerySourceTest
54 {
55
56   @BeforeClass(alwaysRun = true)
57   public void setUpJvOptionPane()
58   {
59     JvOptionPane.setInteractiveMode(false);
60     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
61   }
62
63   Sequence seq, upSeq, upSeq_insulin;
64
65   // same set up as for structurechooser test
66
67   @BeforeMethod(alwaysRun = true)
68   public void setUp() throws Exception
69   {
70     seq = new Sequence("PDB|4kqy|4KQY|A", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1,
71             26);
72     seq.createDatasetSequence();
73     for (int x = 1; x < 5; x++)
74     {
75       DBRefEntry dbRef = new DBRefEntry();
76       dbRef.setAccessionId("XYZ_" + x);
77       seq.addDBRef(dbRef);
78     }
79
80     PDBEntry dbRef = new PDBEntry();
81     dbRef.setId("1tim");
82
83     Vector<PDBEntry> pdbIds = new Vector<>();
84     pdbIds.add(dbRef);
85
86     seq.setPDBId(pdbIds);
87
88     // Uniprot sequence for 3D-Beacons mocks
89     upSeq = new Sequence("P38398",
90             "MDLSALRVEEVQNVINAMQKILECPICLELIKEPVSTKCDHIFCKFCMLKLLNQKKGPSQCPLCKNDITKRS\n"
91                     + "LQESTRFSQLVEELLKIICAFQLDTGLEYANSYNFAKKENNSPEHLKDEVSIIQSMGYRNRAKRLLQSEPEN\n"
92                     + "PSLQETSLSVQLSNLGTVRTLRTKQRIQPQKTSVYIELGSDSSEDTVNKATYCSVGDQELLQITPQGTRDEI\n"
93                     + "SLDSAKKAACEFSETDVTNTEHHQPSNNDLNTTEKRAAERHPEKYQGSSVSNLHVEPCGTNTHASSLQHENS\n"
94                     + "SLLLTKDRMNVEKAEFCNKSKQPGLARSQHNRWAGSKETCNDRRTPSTEKKVDLNADPLCERKEWNKQKLPC\n"
95                     + "SENPRDTEDVPWITLNSSIQKVNEWFSRSDELLGSDDSHDGESESNAKVADVLDVLNEVDEYSGSSEKIDLL\n"
96                     + "ASDPHEALICKSERVHSKSVESNIEDKIFGKTYRKKASLPNLSHVTENLIIGAFVTEPQIIQERPLTNKLKR\n"
97                     + "KRRPTSGLHPEDFIKKADLAVQKTPEMINQGTNQTEQNGQVMNITNSGHENKTKGDSIQNEKNPNPIESLEK\n"
98                     + "ESAFKTKAEPISSSISNMELELNIHNSKAPKKNRLRRKSSTRHIHALELVVSRNLSPPNCTELQIDSCSSSE\n"
99                     + "EIKKKKYNQMPVRHSRNLQLMEGKEPATGAKKSNKPNEQTSKRHDSDTFPELKLTNAPGSFTKCSNTSELKE\n"
100                     + "FVNPSLPREEKEEKLETVKVSNNAEDPKDLMLSGERVLQTERSVESSSISLVPGTDYGTQESISLLEVSTLG\n"
101                     + "KAKTEPNKCVSQCAAFENPKGLIHGCSKDNRNDTEGFKYPLGHEVNHSRETSIEMEESELDAQYLQNTFKVS\n"
102                     + "KRQSFAPFSNPGNAEEECATFSAHSGSLKKQSPKVTFECEQKEENQGKNESNIKPVQTVNITAGFPVVGQKD\n"
103                     + "KPVDNAKCSIKGGSRFCLSSQFRGNETGLITPNKHGLLQNPYRIPPLFPIKSFVKTKCKKNLLEENFEEHSM\n"
104                     + "SPEREMGNENIPSTVSTISRNNIRENVFKEASSSNINEVGSSTNEVGSSINEIGSSDENIQAELGRNRGPKL\n"
105                     + "NAMLRLGVLQPEVYKQSLPGSNCKHPEIKKQEYEEVVQTVNTDFSPYLISDNLEQPMGSSHASQVCSETPDD\n"
106                     + "LLDDGEIKEDTSFAENDIKESSAVFSKSVQKGELSRSPSPFTHTHLAQGYRRGAKKLESSEENLSSEDEELP\n"
107                     + "CFQHLLFGKVNNIPSQSTRHSTVATECLSKNTEENLLSLKNSLNDCSNQVILAKASQEHHLSEETKCSASLF\n"
108                     + "SSQCSELEDLTANTNTQDPFLIGSSKQMRHQSESQGVGLSDKELVSDDEERGTGLEENNQEEQSMDSNLGEA\n"
109                     + "ASGCESETSVSEDCSGLSSQSDILTTQQRDTMQHNLIKLQQEMAELEAVLEQHGSQPSNSYPSIISDSSALE\n"
110                     + "DLRNPEQSTSEKAVLTSQKSSEYPISQNPEGLSADKFEVSADSSTSKNKEPGVERSSPSKCPSLDDRWYMHS\n"
111                     + "CSGSLQNRNYPSQEELIKVVDVEEQQLEESGPHDLTETSYLPRQDLEGTPYLESGISLFSDDPESDPSEDRA\n"
112                     + "PESARVGNIPSSTSALKVPQLKVAESAQSPAAAHTTDTAGYNAMEESVSREKPELTASTERVNKRMSMVVSG\n"
113                     + "LTPEEFMLVYKFARKHHITLTNLITEETTHVVMKTDAEFVCERTLKYFLGIAGGKWVVSYFWVTQSIKERKM\n"
114                     + "LNEHDFEVRGDVVNGRNHQGPKRARESQDRKIFRGLEICCYGPFTNMPTDQLEWMVQLCGASVVKELSSFTL\n"
115                     + "GTGVHPIVVVQPDAWTEDNGFHAIGQMCEAPVVTREWVLDSVALYQCQELDTYLIPQIPHSHY\n"
116                     + "",
117             1, 1863);
118     upSeq.createDatasetSequence();
119     upSeq.setDescription("Breast cancer type 1 susceptibility protein");
120     upSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "P38398", null, true));
121
122     upSeq_insulin = new Sequence("INS_HUMAN",
123             "MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGGGP"
124                     + "GAGSLQPLALEGSLQKRGIVEQCCTSICSLYQLENYCN");
125     upSeq_insulin.createDatasetSequence();
126     upSeq_insulin.setDescription("Insulin");
127     upSeq_insulin
128             .addDBRef(new DBRefEntry("UNIPROT", "0", "P01308", null, true));
129   }
130
131   @AfterMethod(alwaysRun = true)
132   public void tearDown() throws Exception
133   {
134     seq = null;
135     upSeq = null;
136   }
137
138   @SuppressWarnings("deprecation")
139   @Test(groups = { "Functional" })
140   public void buildPDBQueryTest()
141   {
142     System.out.println("seq >>>> " + seq);
143
144     StructureChooserQuerySource scquery = StructureChooserQuerySource
145             .getQuerySourceFor(new SequenceI[]
146             { seq });
147     AssertJUnit
148             .assertTrue(scquery instanceof PDBStructureChooserQuerySource);
149     String query = scquery.buildQuery(seq);
150     AssertJUnit.assertEquals("pdb_id:1tim", query);
151     seq.getAllPDBEntries().clear();
152     query = scquery.buildQuery(seq);
153     AssertJUnit.assertEquals(
154             "text:XYZ_1 OR text:XYZ_2 OR text:XYZ_3 OR text:XYZ_4 OR text:4kqy",
155             query);
156     seq.setDBRefs(null);
157     query = scquery.buildQuery(seq);
158     System.out.println(query);
159     AssertJUnit.assertEquals("text:4kqy", query);
160
161     DBRefEntry uniprotDBRef = new DBRefEntry();
162     uniprotDBRef.setAccessionId("P12345");
163     uniprotDBRef.setSource(DBRefSource.UNIPROT);
164     seq.addDBRef(uniprotDBRef);
165
166     DBRefEntry pdbDBRef = new DBRefEntry();
167     pdbDBRef.setAccessionId("1XYZ");
168     pdbDBRef.setSource(DBRefSource.PDB);
169     seq.addDBRef(pdbDBRef);
170
171     for (int x = 1; x < 5; x++)
172     {
173       DBRefEntry dbRef = new DBRefEntry();
174       dbRef.setAccessionId("XYZ_" + x);
175       seq.addDBRef(dbRef);
176     }
177     System.out.println("");
178     System.out.println(seq.getDBRefs());
179     System.out.println(query);
180     query = scquery.buildQuery(seq);
181     AssertJUnit.assertEquals(
182             "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
183             query);
184   }
185
186   @SuppressWarnings("deprecation")
187   @Test(groups = { "Functional" })
188   public void buildThreeDBQueryTest()
189   {
190     System.out.println("seq >>>> " + upSeq);
191     TDBeaconsFTSRestClientTest.setMock();
192     PDBFTSRestClientTest.setMock();
193     StructureChooserQuerySource scquery = StructureChooserQuerySource
194             .getQuerySourceFor(new SequenceI[]
195             { upSeq });
196     // gets the lightweight proxy rather than the
197     // ThreeDBStructureChooserQuerySource
198     AssertJUnit.assertTrue(
199             scquery instanceof ThreeDBStructureChooserQuerySource);
200     String query = scquery.buildQuery(upSeq);
201     AssertJUnit.assertEquals("P38398", query);
202
203     // query shouldn't change regardless of additional entries
204     // because 3DBeacons requires canonical entries.
205     upSeq.getAllPDBEntries().clear();
206     query = scquery.buildQuery(upSeq);
207     AssertJUnit.assertEquals("P38398", query);
208     upSeq.setDBRefs(null);
209     query = scquery.buildQuery(upSeq);
210     /*
211      * legacy projects/datasets will not have canonical flags set for uniprot dbrefs
212      * graceful behaviour would be to
213      *  - pick one ? not possible
214      *  - iterate through all until a 200 is obtained ?
215      *  ---> ideal but could be costly
216      *  ---> better to do a direct retrieval from uniprot to work out which is the canonical identifier..
217      *  ----> need a test to check that accessions can be promoted to canonical!
218      */
219     // FIXME - need to be able to use ID to query here ?
220     AssertJUnit.assertEquals(null, query);
221
222     // TODO:
223     /**
224      * set of sequences: - no protein -> TDB not applicable, query PDBe only
225      * (consider RNA or DNA - specific query adapter ?) - protein but no uniprot
226      * -> first consider trying to get uniprot refs (need a mark to say none are
227      * available) - protein and uniprot - no canonicals -> resolve to uniprot
228      * automatically to get canonicals - query uniprot against 3DBeacons -->
229      * decorate experimental structures with additional data from PDBe - query
230      * remaining against PDBe Ranking - 3D Beacons --> in memory ranking - no
231      * need to query twice Rank by - experimental > AlphaFold -> Model - start >
232      * end -> filters for -> experimental only -> experimental plus best models
233      * for other regions -> "best cover" -> need to be able to select correct
234      * reference (the longest one that covers all) for superposition
235      */
236     //
237     // DBRefEntry uniprotDBRef = new DBRefEntry();
238     // uniprotDBRef.setAccessionId("P12345");
239     // uniprotDBRef.setSource(DBRefSource.UNIPROT);
240     // upSeq.addDBRef(uniprotDBRef);
241     //
242     // DBRefEntry pdbDBRef = new DBRefEntry();
243     // pdbDBRef.setAccessionId("1XYZ");
244     // pdbDBRef.setSource(DBRefSource.PDB);
245     // upSeq.addDBRef(pdbDBRef);
246     //
247     // for (int x = 1; x < 5; x++)
248     // {
249     // DBRefEntry dbRef = new DBRefEntry();
250     // dbRef.setAccessionId("XYZ_" + x);
251     // seq.addDBRef(dbRef);
252     // }
253     // System.out.println("");
254     // System.out.println(seq.getDBRefs());
255     // System.out.println(query);
256     // query = scquery.buildQuery(seq);
257     // assertEquals(
258     // "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
259     // query);
260   }
261
262   @Test(groups = { "Functional" }, dataProvider = "testUpSeqs")
263   public void cascadingThreeDBandPDBQuerys(SequenceI testUpSeq)
264   {
265     TDBeaconsFTSRestClientTest.setMock();
266     PDBFTSRestClientTest.setMock();
267     ThreeDBStructureChooserQuerySource tdbquery = new ThreeDBStructureChooserQuerySource();
268     PDBStructureChooserQuerySource pdbquery = new PDBStructureChooserQuerySource();
269
270     FTSRestResponse upResponse = null;
271     FTSRestResponse pdbResponse = null;
272     // TODO test available options
273     // Best coverage
274     // Best Alphafold Model
275     // Best model (by confidence score)
276     // Will also need to develop a more sophisticated filtering system
277     List<FilterOption> opts = tdbquery
278             .getAvailableFilterOptions(StructureChooser.VIEWS_FILTER);
279     FilterOption opt_singlebest = opts.get(0);
280     FilterOption opt_manybest = opts.get(1);
281     assertEquals(opt_singlebest.getValue(),
282             ThreeDBStructureChooserQuerySource.FILTER_FIRST_BEST_COVERAGE);
283     assertEquals(opt_manybest.getValue(),
284             ThreeDBStructureChooserQuerySource.FILTER_TDBEACONS_COVERAGE);
285
286     try
287     {
288       upResponse = tdbquery.fetchStructuresMetaData(testUpSeq,
289               tdbquery.getDocFieldPrefs().getStructureSummaryFields(),
290               opt_singlebest, false);
291       tdbquery.updateAvailableFilterOptions(StructureChooser.VIEWS_FILTER,
292               opts, upResponse.getSearchSummary());
293       // test ranking without additional PDBe data
294       FTSRestResponse firstRanked = tdbquery.selectFirstRankedQuery(
295               testUpSeq, upResponse.getSearchSummary(),
296               tdbquery.getDocFieldPrefs().getStructureSummaryFields(),
297               opt_singlebest.getValue(), false);
298       assertEquals(firstRanked.getNumberOfItemsFound(), 1);
299       // many best response
300       upResponse = tdbquery.fetchStructuresMetaData(testUpSeq,
301               tdbquery.getDocFieldPrefs().getStructureSummaryFields(),
302               opt_manybest, false);
303       assertTrue(firstRanked.getSearchSummary().size() < upResponse
304               .getSearchSummary().size());
305       // NB Could have race condition here
306       String pdb_Query = tdbquery.buildPDBFTSQueryFor(upResponse);
307       assertTrue(pdb_Query.trim().length() > 0);
308       pdbResponse = tdbquery.fetchStructuresMetaDataFor(pdbquery,
309               upResponse);
310       assertTrue(pdbResponse.getNumberOfItemsFound() > 0);
311       FTSRestResponse joinedResp = tdbquery.joinResponses(upResponse,
312               pdbResponse);
313       assertEquals(upResponse.getNumberOfItemsFound(),
314               joinedResp.getNumberOfItemsFound());
315
316     } catch (Exception x)
317     {
318       x.printStackTrace();
319       Assert.fail("Unexpected Exception");
320     }
321     StructureChooserQuerySource scquery = StructureChooserQuerySource
322             .getQuerySourceFor(new SequenceI[]
323             { testUpSeq });
324
325   }
326
327   @DataProvider(name = "testUpSeqs")
328   public Object[][] testUpSeqs() throws Exception
329   {
330     setUp();
331     return new Object[][] { { upSeq }, { upSeq_insulin } };
332   }
333
334   @Test(groups = { "Functional" })
335   public void sanitizeSeqNameTest()
336   {
337     String name = "ab_cdEF|fwxyz012349";
338     AssertJUnit.assertEquals(name,
339             PDBStructureChooserQuerySource.sanitizeSeqName(name));
340
341     // remove a [nn] substring
342     name = "abcde12[345]fg";
343     AssertJUnit.assertEquals("abcde12fg",
344             PDBStructureChooserQuerySource.sanitizeSeqName(name));
345
346     // remove characters other than a-zA-Z0-9 | or _
347     name = "ab[cd],.\t£$*!- \\\"@:e";
348     AssertJUnit.assertEquals("abcde",
349             PDBStructureChooserQuerySource.sanitizeSeqName(name));
350
351     name = "abcde12[345a]fg";
352     AssertJUnit.assertEquals("abcde12345afg",
353             PDBStructureChooserQuerySource.sanitizeSeqName(name));
354   }
355 }