2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.gui.structurechooser;
24 import static org.testng.Assert.assertEquals;
25 import static org.testng.Assert.assertTrue;
27 import java.util.Collection;
28 import java.util.List;
29 import java.util.Vector;
31 import org.junit.Assert;
32 import org.testng.AssertJUnit;
33 import org.testng.annotations.AfterMethod;
34 import org.testng.annotations.BeforeClass;
35 import org.testng.annotations.BeforeMethod;
36 import org.testng.annotations.DataProvider;
37 import org.testng.annotations.Test;
39 import jalview.datamodel.DBRefEntry;
40 import jalview.datamodel.DBRefSource;
41 import jalview.datamodel.PDBEntry;
42 import jalview.datamodel.Sequence;
43 import jalview.datamodel.SequenceI;
44 import jalview.fts.api.FTSData;
45 import jalview.fts.core.FTSRestRequest;
46 import jalview.fts.core.FTSRestResponse;
47 import jalview.fts.service.pdb.PDBFTSRestClient;
48 import jalview.fts.service.pdb.PDBFTSRestClientTest;
49 import jalview.fts.threedbeacons.TDBeaconsFTSRestClientTest;
50 import jalview.gui.JvOptionPane;
51 import jalview.gui.StructureChooser;
52 import jalview.jbgui.FilterOption;
54 public class StructureChooserQuerySourceTest
57 @BeforeClass(alwaysRun = true)
58 public void setUpJvOptionPane()
60 JvOptionPane.setInteractiveMode(false);
61 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
64 Sequence seq,upSeq,upSeq_insulin;
66 // same set up as for structurechooser test
68 @BeforeMethod(alwaysRun = true)
69 public void setUp() throws Exception
71 seq = new Sequence("PDB|4kqy|4KQY|A", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1,
73 seq.createDatasetSequence();
74 for (int x = 1; x < 5; x++)
76 DBRefEntry dbRef = new DBRefEntry();
77 dbRef.setAccessionId("XYZ_" + x);
81 PDBEntry dbRef = new PDBEntry();
84 Vector<PDBEntry> pdbIds = new Vector<>();
89 // Uniprot sequence for 3D-Beacons mocks
90 upSeq = new Sequence("P38398",
91 "MDLSALRVEEVQNVINAMQKILECPICLELIKEPVSTKCDHIFCKFCMLKLLNQKKGPSQCPLCKNDITKRS\n"
92 + "LQESTRFSQLVEELLKIICAFQLDTGLEYANSYNFAKKENNSPEHLKDEVSIIQSMGYRNRAKRLLQSEPEN\n"
93 + "PSLQETSLSVQLSNLGTVRTLRTKQRIQPQKTSVYIELGSDSSEDTVNKATYCSVGDQELLQITPQGTRDEI\n"
94 + "SLDSAKKAACEFSETDVTNTEHHQPSNNDLNTTEKRAAERHPEKYQGSSVSNLHVEPCGTNTHASSLQHENS\n"
95 + "SLLLTKDRMNVEKAEFCNKSKQPGLARSQHNRWAGSKETCNDRRTPSTEKKVDLNADPLCERKEWNKQKLPC\n"
96 + "SENPRDTEDVPWITLNSSIQKVNEWFSRSDELLGSDDSHDGESESNAKVADVLDVLNEVDEYSGSSEKIDLL\n"
97 + "ASDPHEALICKSERVHSKSVESNIEDKIFGKTYRKKASLPNLSHVTENLIIGAFVTEPQIIQERPLTNKLKR\n"
98 + "KRRPTSGLHPEDFIKKADLAVQKTPEMINQGTNQTEQNGQVMNITNSGHENKTKGDSIQNEKNPNPIESLEK\n"
99 + "ESAFKTKAEPISSSISNMELELNIHNSKAPKKNRLRRKSSTRHIHALELVVSRNLSPPNCTELQIDSCSSSE\n"
100 + "EIKKKKYNQMPVRHSRNLQLMEGKEPATGAKKSNKPNEQTSKRHDSDTFPELKLTNAPGSFTKCSNTSELKE\n"
101 + "FVNPSLPREEKEEKLETVKVSNNAEDPKDLMLSGERVLQTERSVESSSISLVPGTDYGTQESISLLEVSTLG\n"
102 + "KAKTEPNKCVSQCAAFENPKGLIHGCSKDNRNDTEGFKYPLGHEVNHSRETSIEMEESELDAQYLQNTFKVS\n"
103 + "KRQSFAPFSNPGNAEEECATFSAHSGSLKKQSPKVTFECEQKEENQGKNESNIKPVQTVNITAGFPVVGQKD\n"
104 + "KPVDNAKCSIKGGSRFCLSSQFRGNETGLITPNKHGLLQNPYRIPPLFPIKSFVKTKCKKNLLEENFEEHSM\n"
105 + "SPEREMGNENIPSTVSTISRNNIRENVFKEASSSNINEVGSSTNEVGSSINEIGSSDENIQAELGRNRGPKL\n"
106 + "NAMLRLGVLQPEVYKQSLPGSNCKHPEIKKQEYEEVVQTVNTDFSPYLISDNLEQPMGSSHASQVCSETPDD\n"
107 + "LLDDGEIKEDTSFAENDIKESSAVFSKSVQKGELSRSPSPFTHTHLAQGYRRGAKKLESSEENLSSEDEELP\n"
108 + "CFQHLLFGKVNNIPSQSTRHSTVATECLSKNTEENLLSLKNSLNDCSNQVILAKASQEHHLSEETKCSASLF\n"
109 + "SSQCSELEDLTANTNTQDPFLIGSSKQMRHQSESQGVGLSDKELVSDDEERGTGLEENNQEEQSMDSNLGEA\n"
110 + "ASGCESETSVSEDCSGLSSQSDILTTQQRDTMQHNLIKLQQEMAELEAVLEQHGSQPSNSYPSIISDSSALE\n"
111 + "DLRNPEQSTSEKAVLTSQKSSEYPISQNPEGLSADKFEVSADSSTSKNKEPGVERSSPSKCPSLDDRWYMHS\n"
112 + "CSGSLQNRNYPSQEELIKVVDVEEQQLEESGPHDLTETSYLPRQDLEGTPYLESGISLFSDDPESDPSEDRA\n"
113 + "PESARVGNIPSSTSALKVPQLKVAESAQSPAAAHTTDTAGYNAMEESVSREKPELTASTERVNKRMSMVVSG\n"
114 + "LTPEEFMLVYKFARKHHITLTNLITEETTHVVMKTDAEFVCERTLKYFLGIAGGKWVVSYFWVTQSIKERKM\n"
115 + "LNEHDFEVRGDVVNGRNHQGPKRARESQDRKIFRGLEICCYGPFTNMPTDQLEWMVQLCGASVVKELSSFTL\n"
116 + "GTGVHPIVVVQPDAWTEDNGFHAIGQMCEAPVVTREWVLDSVALYQCQELDTYLIPQIPHSHY\n"
119 upSeq.createDatasetSequence();
120 upSeq.setDescription("Breast cancer type 1 susceptibility protein");
121 upSeq.addDBRef(new DBRefEntry("UNIPROT","0","P38398",null,true));
123 upSeq_insulin=new Sequence("INS_HUMAN",
124 "MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGGGP"
125 + "GAGSLQPLALEGSLQKRGIVEQCCTSICSLYQLENYCN");
126 upSeq_insulin.createDatasetSequence();
127 upSeq_insulin.setDescription("Insulin");
128 upSeq_insulin.addDBRef(new DBRefEntry("UNIPROT","0","P01308",null,true));
131 @AfterMethod(alwaysRun = true)
132 public void tearDown() throws Exception
138 @SuppressWarnings("deprecation")
139 @Test(groups = { "Functional" })
140 public void buildPDBQueryTest()
142 System.out.println("seq >>>> " + seq);
144 StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { seq});
145 AssertJUnit.assertTrue(scquery instanceof PDBStructureChooserQuerySource);
146 String query = scquery.buildQuery(seq);
147 AssertJUnit.assertEquals("pdb_id:1tim", query);
148 seq.getAllPDBEntries().clear();
149 query = scquery.buildQuery(seq);
150 AssertJUnit.assertEquals(
151 "text:XYZ_1 OR text:XYZ_2 OR text:XYZ_3 OR text:XYZ_4 OR text:4kqy",
154 query = scquery.buildQuery(seq);
155 System.out.println(query);
156 AssertJUnit.assertEquals("text:4kqy", query);
158 DBRefEntry uniprotDBRef = new DBRefEntry();
159 uniprotDBRef.setAccessionId("P12345");
160 uniprotDBRef.setSource(DBRefSource.UNIPROT);
161 seq.addDBRef(uniprotDBRef);
163 DBRefEntry pdbDBRef = new DBRefEntry();
164 pdbDBRef.setAccessionId("1XYZ");
165 pdbDBRef.setSource(DBRefSource.PDB);
166 seq.addDBRef(pdbDBRef);
168 for (int x = 1; x < 5; x++)
170 DBRefEntry dbRef = new DBRefEntry();
171 dbRef.setAccessionId("XYZ_" + x);
174 System.out.println("");
175 System.out.println(seq.getDBRefs());
176 System.out.println(query);
177 query = scquery.buildQuery(seq);
178 AssertJUnit.assertEquals(
179 "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
183 @SuppressWarnings("deprecation")
184 @Test(groups = { "Functional" })
185 public void buildThreeDBQueryTest()
187 System.out.println("seq >>>> " + upSeq);
188 TDBeaconsFTSRestClientTest.setMock();
189 PDBFTSRestClientTest.setMock();
190 StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { upSeq});
191 // gets the lightweight proxy rather than the ThreeDBStructureChooserQuerySource
192 AssertJUnit.assertTrue(scquery instanceof ThreeDBStructureChooserQuerySource);
193 String query = scquery.buildQuery(upSeq);
194 AssertJUnit.assertEquals("P38398", query);
196 // query shouldn't change regardless of additional entries
197 // because 3DBeacons requires canonical entries.
198 upSeq.getAllPDBEntries().clear();
199 query = scquery.buildQuery(upSeq);
200 AssertJUnit.assertEquals("P38398", query);
201 upSeq.setDBRefs(null);
202 query = scquery.buildQuery(upSeq);
204 * legacy projects/datasets will not have canonical flags set for uniprot dbrefs
205 * graceful behaviour would be to
206 * - pick one ? not possible
207 * - iterate through all until a 200 is obtained ?
208 * ---> ideal but could be costly
209 * ---> better to do a direct retrieval from uniprot to work out which is the canonical identifier..
210 * ----> need a test to check that accessions can be promoted to canonical!
212 //FIXME - need to be able to use ID to query here ?
213 AssertJUnit.assertEquals(null, query);
220 * - no protein -> TDB not applicable, query PDBe only (consider RNA or DNA - specific query adapter ?)
221 * - protein but no uniprot -> first consider trying to get uniprot refs (need a mark to say none are available)
222 * - protein and uniprot - no canonicals -> resolve to uniprot automatically to get canonicals
223 * - query uniprot against 3DBeacons
224 * --> decorate experimental structures with additional data from PDBe
225 * - query remaining against PDBe
228 * --> in memory ranking - no need to query twice
230 * - experimental > AlphaFold -> Model
233 * -> experimental only
234 * -> experimental plus best models for other regions
236 * -> need to be able to select correct reference (the longest one that covers all) for superposition
239 // DBRefEntry uniprotDBRef = new DBRefEntry();
240 // uniprotDBRef.setAccessionId("P12345");
241 // uniprotDBRef.setSource(DBRefSource.UNIPROT);
242 // upSeq.addDBRef(uniprotDBRef);
244 // DBRefEntry pdbDBRef = new DBRefEntry();
245 // pdbDBRef.setAccessionId("1XYZ");
246 // pdbDBRef.setSource(DBRefSource.PDB);
247 // upSeq.addDBRef(pdbDBRef);
249 // for (int x = 1; x < 5; x++)
251 // DBRefEntry dbRef = new DBRefEntry();
252 // dbRef.setAccessionId("XYZ_" + x);
253 // seq.addDBRef(dbRef);
255 // System.out.println("");
256 // System.out.println(seq.getDBRefs());
257 // System.out.println(query);
258 // query = scquery.buildQuery(seq);
260 // "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
263 @Test(groups= {"Functional"},dataProvider = "testUpSeqs")
264 public void cascadingThreeDBandPDBQuerys(SequenceI testUpSeq)
266 TDBeaconsFTSRestClientTest.setMock();
267 PDBFTSRestClientTest.setMock();
268 ThreeDBStructureChooserQuerySource tdbquery = new ThreeDBStructureChooserQuerySource();
269 PDBStructureChooserQuerySource pdbquery = new PDBStructureChooserQuerySource();
273 FTSRestResponse upResponse = null;
274 FTSRestResponse pdbResponse = null;
275 // TODO test available options
277 // Best Alphafold Model
278 // Best model (by confidence score)
279 // Will also need to develop a more sophisticated filtering system
280 List<FilterOption> opts = tdbquery.getAvailableFilterOptions(StructureChooser.VIEWS_FILTER);
281 FilterOption opt_singlebest = opts.get(0);
282 FilterOption opt_manybest = opts.get(1);
283 assertEquals(opt_singlebest.getValue(), ThreeDBStructureChooserQuerySource.FILTER_FIRST_BEST_COVERAGE);
284 assertEquals(opt_manybest.getValue(), ThreeDBStructureChooserQuerySource.FILTER_TDBEACONS_COVERAGE);
287 upResponse = tdbquery.fetchStructuresMetaData(testUpSeq, tdbquery.getDocFieldPrefs().getStructureSummaryFields(), opt_singlebest, false);
288 tdbquery.updateAvailableFilterOptions(StructureChooser.VIEWS_FILTER,opts,upResponse.getSearchSummary());
289 // test ranking without additional PDBe data
290 FTSRestResponse firstRanked = tdbquery.selectFirstRankedQuery(testUpSeq, upResponse.getSearchSummary(), tdbquery.getDocFieldPrefs().getStructureSummaryFields(), opt_singlebest.getValue(), false);
291 assertEquals(firstRanked.getNumberOfItemsFound(),1);
292 // many best response
293 upResponse = tdbquery.fetchStructuresMetaData(testUpSeq, tdbquery.getDocFieldPrefs().getStructureSummaryFields(), opt_manybest, false);
294 assertTrue(firstRanked.getSearchSummary().size()<upResponse.getSearchSummary().size());
295 // NB Could have race condition here
296 String pdb_Query = tdbquery.buildPDBFTSQueryFor(upResponse);
297 assertTrue(pdb_Query.trim().length()>0);
298 pdbResponse = tdbquery.fetchStructuresMetaDataFor(pdbquery, upResponse);
299 assertTrue(pdbResponse.getNumberOfItemsFound()>0);
300 FTSRestResponse joinedResp = tdbquery.joinResponses(upResponse, pdbResponse);
301 assertEquals(upResponse.getNumberOfItemsFound(),joinedResp.getNumberOfItemsFound());
304 } catch (Exception x)
307 Assert.fail("Unexpected Exception");
309 StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { testUpSeq});
313 @DataProvider(name = "testUpSeqs")
314 public Object[][] testUpSeqs() throws Exception
317 return new Object[][] { {upSeq},{upSeq_insulin}};
320 @Test(groups = { "Functional" })
321 public void sanitizeSeqNameTest()
323 String name = "ab_cdEF|fwxyz012349";
324 AssertJUnit.assertEquals(name, PDBStructureChooserQuerySource.sanitizeSeqName(name));
326 // remove a [nn] substring
327 name = "abcde12[345]fg";
328 AssertJUnit.assertEquals("abcde12fg", PDBStructureChooserQuerySource.sanitizeSeqName(name));
330 // remove characters other than a-zA-Z0-9 | or _
331 name = "ab[cd],.\t£$*!- \\\"@:e";
332 AssertJUnit.assertEquals("abcde", PDBStructureChooserQuerySource.sanitizeSeqName(name));
334 name = "abcde12[345a]fg";
335 AssertJUnit.assertEquals("abcde12345afg", PDBStructureChooserQuerySource.sanitizeSeqName(name));