JAL-3829 work in progress - look up additional metadata from PDBe for structures...
[jalview.git] / test / jalview / gui / structurechooser / StructureChooserQuerySourceTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.gui.structurechooser;
22
23
24 import static org.testng.Assert.assertEquals;
25 import static org.testng.Assert.assertTrue;
26
27 import java.util.Vector;
28
29 import org.junit.Assert;
30 import org.testng.AssertJUnit;
31 import org.testng.annotations.AfterMethod;
32 import org.testng.annotations.BeforeClass;
33 import org.testng.annotations.BeforeMethod;
34 import org.testng.annotations.Test;
35
36 import jalview.datamodel.DBRefEntry;
37 import jalview.datamodel.DBRefSource;
38 import jalview.datamodel.PDBEntry;
39 import jalview.datamodel.Sequence;
40 import jalview.datamodel.SequenceI;
41 import jalview.fts.core.FTSRestRequest;
42 import jalview.fts.core.FTSRestResponse;
43 import jalview.fts.service.pdb.PDBFTSRestClient;
44 import jalview.fts.threedbeacons.TDBeaconsFTSRestClientTest;
45 import jalview.gui.JvOptionPane;
46 import jalview.jbgui.FilterOption;
47
48 public class StructureChooserQuerySourceTest
49 {
50
51   @BeforeClass(alwaysRun = true)
52   public void setUpJvOptionPane()
53   {
54     JvOptionPane.setInteractiveMode(false);
55     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
56   }
57
58   Sequence seq,upSeq;
59
60   // same set up as for structurechooser test
61   
62 @BeforeMethod(alwaysRun = true)
63   public void setUp() throws Exception
64   {
65     seq = new Sequence("PDB|4kqy|4KQY|A", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1,
66             26);
67     seq.createDatasetSequence();
68     for (int x = 1; x < 5; x++)
69     {
70       DBRefEntry dbRef = new DBRefEntry();
71       dbRef.setAccessionId("XYZ_" + x);
72       seq.addDBRef(dbRef);
73     }
74
75     PDBEntry dbRef = new PDBEntry();
76     dbRef.setId("1tim");
77
78     Vector<PDBEntry> pdbIds = new Vector<>();
79     pdbIds.add(dbRef);
80
81     seq.setPDBId(pdbIds);
82     
83     // Uniprot sequence for 3D-Beacons mocks
84     upSeq = new Sequence("P38398", 
85             "MDLSALRVEEVQNVINAMQKILECPICLELIKEPVSTKCDHIFCKFCMLKLLNQKKGPSQCPLCKNDITKRS\n"
86             + "LQESTRFSQLVEELLKIICAFQLDTGLEYANSYNFAKKENNSPEHLKDEVSIIQSMGYRNRAKRLLQSEPEN\n"
87             + "PSLQETSLSVQLSNLGTVRTLRTKQRIQPQKTSVYIELGSDSSEDTVNKATYCSVGDQELLQITPQGTRDEI\n"
88             + "SLDSAKKAACEFSETDVTNTEHHQPSNNDLNTTEKRAAERHPEKYQGSSVSNLHVEPCGTNTHASSLQHENS\n"
89             + "SLLLTKDRMNVEKAEFCNKSKQPGLARSQHNRWAGSKETCNDRRTPSTEKKVDLNADPLCERKEWNKQKLPC\n"
90             + "SENPRDTEDVPWITLNSSIQKVNEWFSRSDELLGSDDSHDGESESNAKVADVLDVLNEVDEYSGSSEKIDLL\n"
91             + "ASDPHEALICKSERVHSKSVESNIEDKIFGKTYRKKASLPNLSHVTENLIIGAFVTEPQIIQERPLTNKLKR\n"
92             + "KRRPTSGLHPEDFIKKADLAVQKTPEMINQGTNQTEQNGQVMNITNSGHENKTKGDSIQNEKNPNPIESLEK\n"
93             + "ESAFKTKAEPISSSISNMELELNIHNSKAPKKNRLRRKSSTRHIHALELVVSRNLSPPNCTELQIDSCSSSE\n"
94             + "EIKKKKYNQMPVRHSRNLQLMEGKEPATGAKKSNKPNEQTSKRHDSDTFPELKLTNAPGSFTKCSNTSELKE\n"
95             + "FVNPSLPREEKEEKLETVKVSNNAEDPKDLMLSGERVLQTERSVESSSISLVPGTDYGTQESISLLEVSTLG\n"
96             + "KAKTEPNKCVSQCAAFENPKGLIHGCSKDNRNDTEGFKYPLGHEVNHSRETSIEMEESELDAQYLQNTFKVS\n"
97             + "KRQSFAPFSNPGNAEEECATFSAHSGSLKKQSPKVTFECEQKEENQGKNESNIKPVQTVNITAGFPVVGQKD\n"
98             + "KPVDNAKCSIKGGSRFCLSSQFRGNETGLITPNKHGLLQNPYRIPPLFPIKSFVKTKCKKNLLEENFEEHSM\n"
99             + "SPEREMGNENIPSTVSTISRNNIRENVFKEASSSNINEVGSSTNEVGSSINEIGSSDENIQAELGRNRGPKL\n"
100             + "NAMLRLGVLQPEVYKQSLPGSNCKHPEIKKQEYEEVVQTVNTDFSPYLISDNLEQPMGSSHASQVCSETPDD\n"
101             + "LLDDGEIKEDTSFAENDIKESSAVFSKSVQKGELSRSPSPFTHTHLAQGYRRGAKKLESSEENLSSEDEELP\n"
102             + "CFQHLLFGKVNNIPSQSTRHSTVATECLSKNTEENLLSLKNSLNDCSNQVILAKASQEHHLSEETKCSASLF\n"
103             + "SSQCSELEDLTANTNTQDPFLIGSSKQMRHQSESQGVGLSDKELVSDDEERGTGLEENNQEEQSMDSNLGEA\n"
104             + "ASGCESETSVSEDCSGLSSQSDILTTQQRDTMQHNLIKLQQEMAELEAVLEQHGSQPSNSYPSIISDSSALE\n"
105             + "DLRNPEQSTSEKAVLTSQKSSEYPISQNPEGLSADKFEVSADSSTSKNKEPGVERSSPSKCPSLDDRWYMHS\n"
106             + "CSGSLQNRNYPSQEELIKVVDVEEQQLEESGPHDLTETSYLPRQDLEGTPYLESGISLFSDDPESDPSEDRA\n"
107             + "PESARVGNIPSSTSALKVPQLKVAESAQSPAAAHTTDTAGYNAMEESVSREKPELTASTERVNKRMSMVVSG\n"
108             + "LTPEEFMLVYKFARKHHITLTNLITEETTHVVMKTDAEFVCERTLKYFLGIAGGKWVVSYFWVTQSIKERKM\n"
109             + "LNEHDFEVRGDVVNGRNHQGPKRARESQDRKIFRGLEICCYGPFTNMPTDQLEWMVQLCGASVVKELSSFTL\n"
110             + "GTGVHPIVVVQPDAWTEDNGFHAIGQMCEAPVVTREWVLDSVALYQCQELDTYLIPQIPHSHY\n"
111             + "", 1,
112 1863);
113     upSeq.createDatasetSequence();
114     upSeq.setDescription("Breast cancer type 1 susceptibility protein");
115     upSeq.addDBRef(new DBRefEntry("UNIPROT","0","P38398",null,true));
116   }
117
118 @AfterMethod(alwaysRun = true)
119   public void tearDown() throws Exception
120   {
121     seq = null;
122     upSeq=null;
123   }
124
125   @SuppressWarnings("deprecation")
126   @Test(groups = { "Functional" })
127   public void buildPDBQueryTest()
128   {
129     System.out.println("seq >>>> " + seq);
130     
131     StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { seq});
132     AssertJUnit.assertTrue(scquery instanceof PDBStructureChooserQuerySource);
133     String query = scquery.buildQuery(seq);
134     AssertJUnit.assertEquals("pdb_id:1tim", query);
135     seq.getAllPDBEntries().clear();
136     query = scquery.buildQuery(seq);
137     AssertJUnit.assertEquals(
138             "text:XYZ_1 OR text:XYZ_2 OR text:XYZ_3 OR text:XYZ_4 OR text:4kqy",
139             query);
140     seq.setDBRefs(null);
141     query = scquery.buildQuery(seq);
142     System.out.println(query);
143     AssertJUnit.assertEquals("text:4kqy", query);
144
145     DBRefEntry uniprotDBRef = new DBRefEntry();
146     uniprotDBRef.setAccessionId("P12345");
147     uniprotDBRef.setSource(DBRefSource.UNIPROT);
148     seq.addDBRef(uniprotDBRef);
149
150     DBRefEntry pdbDBRef = new DBRefEntry();
151     pdbDBRef.setAccessionId("1XYZ");
152     pdbDBRef.setSource(DBRefSource.PDB);
153     seq.addDBRef(pdbDBRef);
154
155     for (int x = 1; x < 5; x++)
156     {
157       DBRefEntry dbRef = new DBRefEntry();
158       dbRef.setAccessionId("XYZ_" + x);
159       seq.addDBRef(dbRef);
160     }
161     System.out.println("");
162     System.out.println(seq.getDBRefs());
163     System.out.println(query);
164     query = scquery.buildQuery(seq);
165     AssertJUnit.assertEquals(
166             "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
167             query);
168   }
169
170   @SuppressWarnings("deprecation")
171   @Test(groups = { "Functional" })
172   public void buildThreeDBQueryTest()
173   {
174     System.out.println("seq >>>> " + upSeq);
175     TDBeaconsFTSRestClientTest.setMock();
176     StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { upSeq});
177     AssertJUnit.assertTrue(scquery instanceof ThreeDBStructureChooserQuerySource);
178     String query = scquery.buildQuery(upSeq);
179     AssertJUnit.assertEquals("P38398", query);
180     
181     // query shouldn't change regardless of additional entries
182     // because 3DBeacons requires canonical entries.
183     upSeq.getAllPDBEntries().clear();
184     query = scquery.buildQuery(upSeq);
185     AssertJUnit.assertEquals("P38398", query);
186     upSeq.setDBRefs(null);
187     query = scquery.buildQuery(upSeq);
188     /*
189      * legacy projects/datasets will not have canonical flags set for uniprot dbrefs
190      * graceful behaviour would be to
191      *  - pick one ? not possible
192      *  - iterate through all until a 200 is obtained ?
193      *  ---> ideal but could be costly
194      *  ---> better to do a direct retrieval from uniprot to work out which is the canonical identifier..
195      *  ----> need a test to check that accessions can be promoted to canonical!
196      */
197     //FIXME - need to be able to use ID to query here ?
198     AssertJUnit.assertEquals(null, query);
199
200     
201     
202     // TODO: 
203     /**
204      * set of sequences:
205      * - no protein -> TDB not applicable, query PDBe only (consider RNA or DNA - specific query adapter ?)
206      * - protein but no uniprot -> first consider trying to get uniprot refs (need a mark to say none are available)
207      * - protein and uniprot - no canonicals -> resolve to uniprot automatically to get canonicals
208      * - query uniprot against 3DBeacons
209      * --> decorate experimental structures with additional data from PDBe
210      * - query remaining against PDBe
211      * Ranking
212      * - 3D Beacons
213      *  --> in memory ranking - no need to query twice
214      *  Rank by
215      *  - experimental > AlphaFold -> Model
216      *  - start > end
217      *  -> filters for 
218      *  -> experimental only
219      *  -> experimental plus best models for other regions
220      *  -> "best cover" 
221      *  -> need to be able to select correct reference (the longest one that covers all) for superposition
222      */
223 //    
224 //    DBRefEntry uniprotDBRef = new DBRefEntry();
225 //    uniprotDBRef.setAccessionId("P12345");
226 //    uniprotDBRef.setSource(DBRefSource.UNIPROT);
227 //    upSeq.addDBRef(uniprotDBRef);
228 //
229 //    DBRefEntry pdbDBRef = new DBRefEntry();
230 //    pdbDBRef.setAccessionId("1XYZ");
231 //    pdbDBRef.setSource(DBRefSource.PDB);
232 //    upSeq.addDBRef(pdbDBRef);
233 //
234 //    for (int x = 1; x < 5; x++)
235 //    {
236 //      DBRefEntry dbRef = new DBRefEntry();
237 //      dbRef.setAccessionId("XYZ_" + x);
238 //      seq.addDBRef(dbRef);
239 //    }
240 //    System.out.println("");
241 //    System.out.println(seq.getDBRefs());
242 //    System.out.println(query);
243 //    query = scquery.buildQuery(seq);
244 //    assertEquals(
245 //            "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
246 //            query);
247   }
248   @Test(groups= {"Functional"})
249   public void cascadingThreeDBandPDBQuerys()
250   {
251     TDBeaconsFTSRestClientTest.setMock();
252     PDBFTSRestClient.setMock();
253     ThreeDBStructureChooserQuerySource tdbquery = new ThreeDBStructureChooserQuerySource();
254     PDBStructureChooserQuerySource pdbquery  = new PDBStructureChooserQuerySource();
255             
256
257     
258     FTSRestResponse upResponse = null;
259     FTSRestResponse pdbResponse = null;
260     
261     try {
262       upResponse = tdbquery.fetchStructuresMetaData(upSeq, tdbquery.getDocFieldPrefs().getStructureSummaryFields(),  null, false);
263       // NB Could have race condition here 
264       String pdb_Query = tdbquery.buildPDBFTSQueryFor(upResponse);
265       assertTrue(pdb_Query.trim().length()>0);
266       pdbResponse = tdbquery.fetchStructuresMetaDataFor(pdbquery, upResponse);
267       assertTrue(pdbResponse.getNumberOfItemsFound()>0);
268       FTSRestResponse joinedResp = tdbquery.joinResponses(upResponse, pdbResponse);
269       assertEquals(upResponse.getNumberOfItemsFound(),joinedResp.getNumberOfItemsFound());
270       
271     } catch (Exception x)
272     {
273       x.printStackTrace();
274       Assert.fail("Unexpected Exception");
275     }
276     StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { upSeq});
277
278   }
279   
280   @Test(groups = { "Functional" })
281   public void sanitizeSeqNameTest()
282   {
283     String name = "ab_cdEF|fwxyz012349";
284     AssertJUnit.assertEquals(name, PDBStructureChooserQuerySource.sanitizeSeqName(name));
285
286     // remove a [nn] substring
287     name = "abcde12[345]fg";
288     AssertJUnit.assertEquals("abcde12fg", PDBStructureChooserQuerySource.sanitizeSeqName(name));
289
290     // remove characters other than a-zA-Z0-9 | or _
291     name = "ab[cd],.\t£$*!- \\\"@:e";
292     AssertJUnit.assertEquals("abcde", PDBStructureChooserQuerySource.sanitizeSeqName(name));
293
294     name = "abcde12[345a]fg";
295     AssertJUnit.assertEquals("abcde12345afg", PDBStructureChooserQuerySource.sanitizeSeqName(name));
296   }
297 }