JAL-3829 pull out structure chooser query source tests to new class
[jalview.git] / test / jalview / gui / structurechooser / StructureChooserQuerySourceTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.gui.structurechooser;
22
23
24 import java.util.Vector;
25
26 import org.testng.AssertJUnit;
27 import org.testng.annotations.AfterMethod;
28 import org.testng.annotations.BeforeClass;
29 import org.testng.annotations.BeforeMethod;
30 import org.testng.annotations.Test;
31
32 import jalview.datamodel.DBRefEntry;
33 import jalview.datamodel.DBRefSource;
34 import jalview.datamodel.PDBEntry;
35 import jalview.datamodel.Sequence;
36 import jalview.datamodel.SequenceI;
37 import jalview.fts.core.FTSRestRequest;
38 import jalview.fts.threedbeacons.TDBeaconsFTSRestClientTest;
39 import jalview.gui.JvOptionPane;
40
41 public class StructureChooserQuerySourceTest
42 {
43
44   @BeforeClass(alwaysRun = true)
45   public void setUpJvOptionPane()
46   {
47     JvOptionPane.setInteractiveMode(false);
48     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
49   }
50
51   Sequence seq,upSeq;
52
53   // same set up as for structurechooser test
54   
55 @BeforeMethod(alwaysRun = true)
56   public void setUp() throws Exception
57   {
58     seq = new Sequence("PDB|4kqy|4KQY|A", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1,
59             26);
60     seq.createDatasetSequence();
61     for (int x = 1; x < 5; x++)
62     {
63       DBRefEntry dbRef = new DBRefEntry();
64       dbRef.setAccessionId("XYZ_" + x);
65       seq.addDBRef(dbRef);
66     }
67
68     PDBEntry dbRef = new PDBEntry();
69     dbRef.setId("1tim");
70
71     Vector<PDBEntry> pdbIds = new Vector<>();
72     pdbIds.add(dbRef);
73
74     seq.setPDBId(pdbIds);
75     
76     // Uniprot sequence for 3D-Beacons mocks
77     upSeq = new Sequence("P38398", 
78             "MDLSALRVEEVQNVINAMQKILECPICLELIKEPVSTKCDHIFCKFCMLKLLNQKKGPSQCPLCKNDITKRS\n"
79             + "LQESTRFSQLVEELLKIICAFQLDTGLEYANSYNFAKKENNSPEHLKDEVSIIQSMGYRNRAKRLLQSEPEN\n"
80             + "PSLQETSLSVQLSNLGTVRTLRTKQRIQPQKTSVYIELGSDSSEDTVNKATYCSVGDQELLQITPQGTRDEI\n"
81             + "SLDSAKKAACEFSETDVTNTEHHQPSNNDLNTTEKRAAERHPEKYQGSSVSNLHVEPCGTNTHASSLQHENS\n"
82             + "SLLLTKDRMNVEKAEFCNKSKQPGLARSQHNRWAGSKETCNDRRTPSTEKKVDLNADPLCERKEWNKQKLPC\n"
83             + "SENPRDTEDVPWITLNSSIQKVNEWFSRSDELLGSDDSHDGESESNAKVADVLDVLNEVDEYSGSSEKIDLL\n"
84             + "ASDPHEALICKSERVHSKSVESNIEDKIFGKTYRKKASLPNLSHVTENLIIGAFVTEPQIIQERPLTNKLKR\n"
85             + "KRRPTSGLHPEDFIKKADLAVQKTPEMINQGTNQTEQNGQVMNITNSGHENKTKGDSIQNEKNPNPIESLEK\n"
86             + "ESAFKTKAEPISSSISNMELELNIHNSKAPKKNRLRRKSSTRHIHALELVVSRNLSPPNCTELQIDSCSSSE\n"
87             + "EIKKKKYNQMPVRHSRNLQLMEGKEPATGAKKSNKPNEQTSKRHDSDTFPELKLTNAPGSFTKCSNTSELKE\n"
88             + "FVNPSLPREEKEEKLETVKVSNNAEDPKDLMLSGERVLQTERSVESSSISLVPGTDYGTQESISLLEVSTLG\n"
89             + "KAKTEPNKCVSQCAAFENPKGLIHGCSKDNRNDTEGFKYPLGHEVNHSRETSIEMEESELDAQYLQNTFKVS\n"
90             + "KRQSFAPFSNPGNAEEECATFSAHSGSLKKQSPKVTFECEQKEENQGKNESNIKPVQTVNITAGFPVVGQKD\n"
91             + "KPVDNAKCSIKGGSRFCLSSQFRGNETGLITPNKHGLLQNPYRIPPLFPIKSFVKTKCKKNLLEENFEEHSM\n"
92             + "SPEREMGNENIPSTVSTISRNNIRENVFKEASSSNINEVGSSTNEVGSSINEIGSSDENIQAELGRNRGPKL\n"
93             + "NAMLRLGVLQPEVYKQSLPGSNCKHPEIKKQEYEEVVQTVNTDFSPYLISDNLEQPMGSSHASQVCSETPDD\n"
94             + "LLDDGEIKEDTSFAENDIKESSAVFSKSVQKGELSRSPSPFTHTHLAQGYRRGAKKLESSEENLSSEDEELP\n"
95             + "CFQHLLFGKVNNIPSQSTRHSTVATECLSKNTEENLLSLKNSLNDCSNQVILAKASQEHHLSEETKCSASLF\n"
96             + "SSQCSELEDLTANTNTQDPFLIGSSKQMRHQSESQGVGLSDKELVSDDEERGTGLEENNQEEQSMDSNLGEA\n"
97             + "ASGCESETSVSEDCSGLSSQSDILTTQQRDTMQHNLIKLQQEMAELEAVLEQHGSQPSNSYPSIISDSSALE\n"
98             + "DLRNPEQSTSEKAVLTSQKSSEYPISQNPEGLSADKFEVSADSSTSKNKEPGVERSSPSKCPSLDDRWYMHS\n"
99             + "CSGSLQNRNYPSQEELIKVVDVEEQQLEESGPHDLTETSYLPRQDLEGTPYLESGISLFSDDPESDPSEDRA\n"
100             + "PESARVGNIPSSTSALKVPQLKVAESAQSPAAAHTTDTAGYNAMEESVSREKPELTASTERVNKRMSMVVSG\n"
101             + "LTPEEFMLVYKFARKHHITLTNLITEETTHVVMKTDAEFVCERTLKYFLGIAGGKWVVSYFWVTQSIKERKM\n"
102             + "LNEHDFEVRGDVVNGRNHQGPKRARESQDRKIFRGLEICCYGPFTNMPTDQLEWMVQLCGASVVKELSSFTL\n"
103             + "GTGVHPIVVVQPDAWTEDNGFHAIGQMCEAPVVTREWVLDSVALYQCQELDTYLIPQIPHSHY\n"
104             + "", 1,
105 1863);
106     upSeq.createDatasetSequence();
107     upSeq.setDescription("Breast cancer type 1 susceptibility protein");
108     upSeq.addDBRef(new DBRefEntry("UNIPROT","0","P38398",null,true));
109   }
110
111 @AfterMethod(alwaysRun = true)
112   public void tearDown() throws Exception
113   {
114     seq = null;
115     upSeq=null;
116   }
117
118   @SuppressWarnings("deprecation")
119   @Test(groups = { "Functional" })
120   public void buildPDBQueryTest()
121   {
122     System.out.println("seq >>>> " + seq);
123     
124     StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { seq});
125     AssertJUnit.assertTrue(scquery instanceof PDBStructureChooserQuerySource);
126     String query = scquery.buildQuery(seq);
127     AssertJUnit.assertEquals("pdb_id:1tim", query);
128     seq.getAllPDBEntries().clear();
129     query = scquery.buildQuery(seq);
130     AssertJUnit.assertEquals(
131             "text:XYZ_1 OR text:XYZ_2 OR text:XYZ_3 OR text:XYZ_4 OR text:4kqy",
132             query);
133     seq.setDBRefs(null);
134     query = scquery.buildQuery(seq);
135     System.out.println(query);
136     AssertJUnit.assertEquals("text:4kqy", query);
137
138     DBRefEntry uniprotDBRef = new DBRefEntry();
139     uniprotDBRef.setAccessionId("P12345");
140     uniprotDBRef.setSource(DBRefSource.UNIPROT);
141     seq.addDBRef(uniprotDBRef);
142
143     DBRefEntry pdbDBRef = new DBRefEntry();
144     pdbDBRef.setAccessionId("1XYZ");
145     pdbDBRef.setSource(DBRefSource.PDB);
146     seq.addDBRef(pdbDBRef);
147
148     for (int x = 1; x < 5; x++)
149     {
150       DBRefEntry dbRef = new DBRefEntry();
151       dbRef.setAccessionId("XYZ_" + x);
152       seq.addDBRef(dbRef);
153     }
154     System.out.println("");
155     System.out.println(seq.getDBRefs());
156     System.out.println(query);
157     query = scquery.buildQuery(seq);
158     AssertJUnit.assertEquals(
159             "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
160             query);
161   }
162
163   @SuppressWarnings("deprecation")
164   @Test(groups = { "Functional" })
165   public void buildThreeDBQueryTest()
166   {
167     System.out.println("seq >>>> " + upSeq);
168     TDBeaconsFTSRestClientTest.setMock();
169     StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { upSeq});
170     AssertJUnit.assertTrue(scquery instanceof ThreeDBStructureChooserQuerySource);
171     String query = scquery.buildQuery(upSeq);
172     AssertJUnit.assertEquals("P38398", query);
173     
174     // query shouldn't change regardless of additional entries
175     // because 3DBeacons requires canonical entries.
176     upSeq.getAllPDBEntries().clear();
177     query = scquery.buildQuery(upSeq);
178     AssertJUnit.assertEquals("P38398", query);
179     upSeq.setDBRefs(null);
180     query = scquery.buildQuery(upSeq);
181     /*
182      * legacy projects/datasets will not have canonical flags set for uniprot dbrefs
183      * graceful behaviour would be to
184      *  - pick one ? not possible
185      *  - iterate through all until a 200 is obtained ?
186      *  ---> ideal but could be costly
187      *  ---> better to do a direct retrieval from uniprot to work out which is the canonical identifier..
188      *  ----> need a test to check that accessions can be promoted to canonical!
189      */
190     //FIXME - need to be able to use ID to query here ?
191     AssertJUnit.assertEquals(null, query);
192
193     
194     
195     // TODO: 
196     /**
197      * set of sequences:
198      * - no protein -> TDB not applicable, query PDBe only (consider RNA or DNA - specific query adapter ?)
199      * - protein but no uniprot -> first consider trying to get uniprot refs (need a mark to say none are available)
200      * - protein and uniprot - no canonicals -> resolve to uniprot automatically to get canonicals
201      * - query uniprot against 3DBeacons
202      * --> decorate experimental structures with additional data from PDBe
203      * - query remaining against PDBe
204      * Ranking
205      * - 3D Beacons
206      *  --> in memory ranking - no need to query twice
207      *  Rank by
208      *  - experimental > AlphaFold -> Model
209      *  - start > end
210      *  -> filters for 
211      *  -> experimental only
212      *  -> experimental plus best models for other regions
213      *  -> "best cover" 
214      *  -> need to be able to select correct reference (the longest one that covers all) for superposition
215      */
216 //    
217 //    DBRefEntry uniprotDBRef = new DBRefEntry();
218 //    uniprotDBRef.setAccessionId("P12345");
219 //    uniprotDBRef.setSource(DBRefSource.UNIPROT);
220 //    upSeq.addDBRef(uniprotDBRef);
221 //
222 //    DBRefEntry pdbDBRef = new DBRefEntry();
223 //    pdbDBRef.setAccessionId("1XYZ");
224 //    pdbDBRef.setSource(DBRefSource.PDB);
225 //    upSeq.addDBRef(pdbDBRef);
226 //
227 //    for (int x = 1; x < 5; x++)
228 //    {
229 //      DBRefEntry dbRef = new DBRefEntry();
230 //      dbRef.setAccessionId("XYZ_" + x);
231 //      seq.addDBRef(dbRef);
232 //    }
233 //    System.out.println("");
234 //    System.out.println(seq.getDBRefs());
235 //    System.out.println(query);
236 //    query = scquery.buildQuery(seq);
237 //    assertEquals(
238 //            "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
239 //            query);
240   }
241   @Test(groups= {"Functional"})
242   public void cascadingThreeDBandPDBQuerys()
243   {
244     TDBeaconsFTSRestClientTest.setMock();
245     
246     StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { upSeq});
247
248     // query TDB for doc
249     // query PDBe for PDB entry metadata
250     // Combine
251     
252     FTSRestRequest tdbQuery = new FTSRestRequest();
253     tdbQuery.setResponseSize(100);
254     tdbQuery.setFieldToSearchBy("");
255     tdbQuery.setSearchTerm(scquery.buildQuery(upSeq));
256     tdbQuery.setWantedFields(scquery.getDocFieldPrefs().getStructureSummaryFields());
257     //scquery.fetchStructuresMetaData(upSeq, null, null, false);
258     String secondaryPdbQuery;
259     //secondaryPdbQuery = ((ThreeDBStructureChooserQuerySource)scquery).buildPDBFTSquery();
260
261   }
262   
263   @Test(groups = { "Functional" })
264   public void sanitizeSeqNameTest()
265   {
266     String name = "ab_cdEF|fwxyz012349";
267     AssertJUnit.assertEquals(name, PDBStructureChooserQuerySource.sanitizeSeqName(name));
268
269     // remove a [nn] substring
270     name = "abcde12[345]fg";
271     AssertJUnit.assertEquals("abcde12fg", PDBStructureChooserQuerySource.sanitizeSeqName(name));
272
273     // remove characters other than a-zA-Z0-9 | or _
274     name = "ab[cd],.\t£$*!- \\\"@:e";
275     AssertJUnit.assertEquals("abcde", PDBStructureChooserQuerySource.sanitizeSeqName(name));
276
277     name = "abcde12[345a]fg";
278     AssertJUnit.assertEquals("abcde12345afg", PDBStructureChooserQuerySource.sanitizeSeqName(name));
279   }
280 }