2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.gui.structurechooser;
23 import static org.testng.Assert.assertEquals;
24 import static org.testng.Assert.assertTrue;
26 import java.util.Collection;
27 import java.util.List;
28 import java.util.Vector;
30 import org.junit.Assert;
31 import org.testng.AssertJUnit;
32 import org.testng.annotations.AfterMethod;
33 import org.testng.annotations.BeforeClass;
34 import org.testng.annotations.BeforeMethod;
35 import org.testng.annotations.DataProvider;
36 import org.testng.annotations.Test;
38 import jalview.datamodel.DBRefEntry;
39 import jalview.datamodel.DBRefSource;
40 import jalview.datamodel.PDBEntry;
41 import jalview.datamodel.Sequence;
42 import jalview.datamodel.SequenceI;
43 import jalview.fts.api.FTSData;
44 import jalview.fts.core.FTSRestRequest;
45 import jalview.fts.core.FTSRestResponse;
46 import jalview.fts.service.pdb.PDBFTSRestClient;
47 import jalview.fts.service.pdb.PDBFTSRestClientTest;
48 import jalview.fts.threedbeacons.TDBeaconsFTSRestClientTest;
49 import jalview.gui.JvOptionPane;
50 import jalview.gui.StructureChooser;
51 import jalview.jbgui.FilterOption;
53 public class StructureChooserQuerySourceTest
56 @BeforeClass(alwaysRun = true)
57 public void setUpJvOptionPane()
59 JvOptionPane.setInteractiveMode(false);
60 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
63 Sequence seq, upSeq, upSeq_insulin;
65 // same set up as for structurechooser test
67 @BeforeMethod(alwaysRun = true)
68 public void setUp() throws Exception
70 seq = new Sequence("PDB|4kqy|4KQY|A", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1,
72 seq.createDatasetSequence();
73 for (int x = 1; x < 5; x++)
75 DBRefEntry dbRef = new DBRefEntry();
76 dbRef.setAccessionId("XYZ_" + x);
80 PDBEntry dbRef = new PDBEntry();
83 Vector<PDBEntry> pdbIds = new Vector<>();
88 // Uniprot sequence for 3D-Beacons mocks
89 upSeq = new Sequence("P38398",
90 "MDLSALRVEEVQNVINAMQKILECPICLELIKEPVSTKCDHIFCKFCMLKLLNQKKGPSQCPLCKNDITKRS\n"
91 + "LQESTRFSQLVEELLKIICAFQLDTGLEYANSYNFAKKENNSPEHLKDEVSIIQSMGYRNRAKRLLQSEPEN\n"
92 + "PSLQETSLSVQLSNLGTVRTLRTKQRIQPQKTSVYIELGSDSSEDTVNKATYCSVGDQELLQITPQGTRDEI\n"
93 + "SLDSAKKAACEFSETDVTNTEHHQPSNNDLNTTEKRAAERHPEKYQGSSVSNLHVEPCGTNTHASSLQHENS\n"
94 + "SLLLTKDRMNVEKAEFCNKSKQPGLARSQHNRWAGSKETCNDRRTPSTEKKVDLNADPLCERKEWNKQKLPC\n"
95 + "SENPRDTEDVPWITLNSSIQKVNEWFSRSDELLGSDDSHDGESESNAKVADVLDVLNEVDEYSGSSEKIDLL\n"
96 + "ASDPHEALICKSERVHSKSVESNIEDKIFGKTYRKKASLPNLSHVTENLIIGAFVTEPQIIQERPLTNKLKR\n"
97 + "KRRPTSGLHPEDFIKKADLAVQKTPEMINQGTNQTEQNGQVMNITNSGHENKTKGDSIQNEKNPNPIESLEK\n"
98 + "ESAFKTKAEPISSSISNMELELNIHNSKAPKKNRLRRKSSTRHIHALELVVSRNLSPPNCTELQIDSCSSSE\n"
99 + "EIKKKKYNQMPVRHSRNLQLMEGKEPATGAKKSNKPNEQTSKRHDSDTFPELKLTNAPGSFTKCSNTSELKE\n"
100 + "FVNPSLPREEKEEKLETVKVSNNAEDPKDLMLSGERVLQTERSVESSSISLVPGTDYGTQESISLLEVSTLG\n"
101 + "KAKTEPNKCVSQCAAFENPKGLIHGCSKDNRNDTEGFKYPLGHEVNHSRETSIEMEESELDAQYLQNTFKVS\n"
102 + "KRQSFAPFSNPGNAEEECATFSAHSGSLKKQSPKVTFECEQKEENQGKNESNIKPVQTVNITAGFPVVGQKD\n"
103 + "KPVDNAKCSIKGGSRFCLSSQFRGNETGLITPNKHGLLQNPYRIPPLFPIKSFVKTKCKKNLLEENFEEHSM\n"
104 + "SPEREMGNENIPSTVSTISRNNIRENVFKEASSSNINEVGSSTNEVGSSINEIGSSDENIQAELGRNRGPKL\n"
105 + "NAMLRLGVLQPEVYKQSLPGSNCKHPEIKKQEYEEVVQTVNTDFSPYLISDNLEQPMGSSHASQVCSETPDD\n"
106 + "LLDDGEIKEDTSFAENDIKESSAVFSKSVQKGELSRSPSPFTHTHLAQGYRRGAKKLESSEENLSSEDEELP\n"
107 + "CFQHLLFGKVNNIPSQSTRHSTVATECLSKNTEENLLSLKNSLNDCSNQVILAKASQEHHLSEETKCSASLF\n"
108 + "SSQCSELEDLTANTNTQDPFLIGSSKQMRHQSESQGVGLSDKELVSDDEERGTGLEENNQEEQSMDSNLGEA\n"
109 + "ASGCESETSVSEDCSGLSSQSDILTTQQRDTMQHNLIKLQQEMAELEAVLEQHGSQPSNSYPSIISDSSALE\n"
110 + "DLRNPEQSTSEKAVLTSQKSSEYPISQNPEGLSADKFEVSADSSTSKNKEPGVERSSPSKCPSLDDRWYMHS\n"
111 + "CSGSLQNRNYPSQEELIKVVDVEEQQLEESGPHDLTETSYLPRQDLEGTPYLESGISLFSDDPESDPSEDRA\n"
112 + "PESARVGNIPSSTSALKVPQLKVAESAQSPAAAHTTDTAGYNAMEESVSREKPELTASTERVNKRMSMVVSG\n"
113 + "LTPEEFMLVYKFARKHHITLTNLITEETTHVVMKTDAEFVCERTLKYFLGIAGGKWVVSYFWVTQSIKERKM\n"
114 + "LNEHDFEVRGDVVNGRNHQGPKRARESQDRKIFRGLEICCYGPFTNMPTDQLEWMVQLCGASVVKELSSFTL\n"
115 + "GTGVHPIVVVQPDAWTEDNGFHAIGQMCEAPVVTREWVLDSVALYQCQELDTYLIPQIPHSHY\n"
118 upSeq.createDatasetSequence();
119 upSeq.setDescription("Breast cancer type 1 susceptibility protein");
120 upSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "P38398", null, true));
122 upSeq_insulin = new Sequence("INS_HUMAN",
123 "MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGGGP"
124 + "GAGSLQPLALEGSLQKRGIVEQCCTSICSLYQLENYCN");
125 upSeq_insulin.createDatasetSequence();
126 upSeq_insulin.setDescription("Insulin");
128 .addDBRef(new DBRefEntry("UNIPROT", "0", "P01308", null, true));
131 @AfterMethod(alwaysRun = true)
132 public void tearDown() throws Exception
138 @SuppressWarnings("deprecation")
139 @Test(groups = { "Functional" })
140 public void buildPDBQueryTest()
142 System.out.println("seq >>>> " + seq);
144 StructureChooserQuerySource scquery = StructureChooserQuerySource
145 .getQuerySourceFor(new SequenceI[]
148 .assertTrue(scquery instanceof PDBStructureChooserQuerySource);
149 String query = scquery.buildQuery(seq);
150 AssertJUnit.assertEquals("pdb_id:1tim", query);
151 seq.getAllPDBEntries().clear();
152 query = scquery.buildQuery(seq);
153 AssertJUnit.assertEquals(
154 "text:XYZ_1 OR text:XYZ_2 OR text:XYZ_3 OR text:XYZ_4 OR text:4kqy",
157 query = scquery.buildQuery(seq);
158 System.out.println(query);
159 AssertJUnit.assertEquals("text:4kqy", query);
161 DBRefEntry uniprotDBRef = new DBRefEntry();
162 uniprotDBRef.setAccessionId("P12345");
163 uniprotDBRef.setSource(DBRefSource.UNIPROT);
164 seq.addDBRef(uniprotDBRef);
166 DBRefEntry pdbDBRef = new DBRefEntry();
167 pdbDBRef.setAccessionId("1XYZ");
168 pdbDBRef.setSource(DBRefSource.PDB);
169 seq.addDBRef(pdbDBRef);
171 for (int x = 1; x < 5; x++)
173 DBRefEntry dbRef = new DBRefEntry();
174 dbRef.setAccessionId("XYZ_" + x);
177 System.out.println("");
178 System.out.println(seq.getDBRefs());
179 System.out.println(query);
180 query = scquery.buildQuery(seq);
181 AssertJUnit.assertEquals(
182 "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
186 @SuppressWarnings("deprecation")
187 @Test(groups = { "Functional" })
188 public void buildThreeDBQueryTest()
190 System.out.println("seq >>>> " + upSeq);
191 TDBeaconsFTSRestClientTest.setMock();
192 PDBFTSRestClientTest.setMock();
193 StructureChooserQuerySource scquery = StructureChooserQuerySource
194 .getQuerySourceFor(new SequenceI[]
196 // gets the lightweight proxy rather than the
197 // ThreeDBStructureChooserQuerySource
198 AssertJUnit.assertTrue(
199 scquery instanceof ThreeDBStructureChooserQuerySource);
200 String query = scquery.buildQuery(upSeq);
201 AssertJUnit.assertEquals("P38398", query);
203 // query shouldn't change regardless of additional entries
204 // because 3DBeacons requires canonical entries.
205 upSeq.getAllPDBEntries().clear();
206 query = scquery.buildQuery(upSeq);
207 AssertJUnit.assertEquals("P38398", query);
208 upSeq.setDBRefs(null);
209 query = scquery.buildQuery(upSeq);
211 * legacy projects/datasets will not have canonical flags set for uniprot dbrefs
212 * graceful behaviour would be to
213 * - pick one ? not possible
214 * - iterate through all until a 200 is obtained ?
215 * ---> ideal but could be costly
216 * ---> better to do a direct retrieval from uniprot to work out which is the canonical identifier..
217 * ----> need a test to check that accessions can be promoted to canonical!
219 // FIXME - need to be able to use ID to query here ?
220 AssertJUnit.assertEquals(null, query);
224 * set of sequences: - no protein -> TDB not applicable, query PDBe only
225 * (consider RNA or DNA - specific query adapter ?) - protein but no uniprot
226 * -> first consider trying to get uniprot refs (need a mark to say none are
227 * available) - protein and uniprot - no canonicals -> resolve to uniprot
228 * automatically to get canonicals - query uniprot against 3DBeacons -->
229 * decorate experimental structures with additional data from PDBe - query
230 * remaining against PDBe Ranking - 3D Beacons --> in memory ranking - no
231 * need to query twice Rank by - experimental > AlphaFold -> Model - start >
232 * end -> filters for -> experimental only -> experimental plus best models
233 * for other regions -> "best cover" -> need to be able to select correct
234 * reference (the longest one that covers all) for superposition
237 // DBRefEntry uniprotDBRef = new DBRefEntry();
238 // uniprotDBRef.setAccessionId("P12345");
239 // uniprotDBRef.setSource(DBRefSource.UNIPROT);
240 // upSeq.addDBRef(uniprotDBRef);
242 // DBRefEntry pdbDBRef = new DBRefEntry();
243 // pdbDBRef.setAccessionId("1XYZ");
244 // pdbDBRef.setSource(DBRefSource.PDB);
245 // upSeq.addDBRef(pdbDBRef);
247 // for (int x = 1; x < 5; x++)
249 // DBRefEntry dbRef = new DBRefEntry();
250 // dbRef.setAccessionId("XYZ_" + x);
251 // seq.addDBRef(dbRef);
253 // System.out.println("");
254 // System.out.println(seq.getDBRefs());
255 // System.out.println(query);
256 // query = scquery.buildQuery(seq);
258 // "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
262 @Test(groups = { "Functional" }, dataProvider = "testUpSeqs")
263 public void cascadingThreeDBandPDBQuerys(SequenceI testUpSeq)
265 TDBeaconsFTSRestClientTest.setMock();
266 PDBFTSRestClientTest.setMock();
267 ThreeDBStructureChooserQuerySource tdbquery = new ThreeDBStructureChooserQuerySource();
268 PDBStructureChooserQuerySource pdbquery = new PDBStructureChooserQuerySource();
270 FTSRestResponse upResponse = null;
271 FTSRestResponse pdbResponse = null;
272 // TODO test available options
274 // Best Alphafold Model
275 // Best model (by confidence score)
276 // Will also need to develop a more sophisticated filtering system
277 List<FilterOption> opts = tdbquery
278 .getAvailableFilterOptions(StructureChooser.VIEWS_FILTER);
279 FilterOption opt_singlebest = opts.get(0);
280 FilterOption opt_manybest = opts.get(1);
281 assertEquals(opt_singlebest.getValue(),
282 ThreeDBStructureChooserQuerySource.FILTER_FIRST_BEST_COVERAGE);
283 assertEquals(opt_manybest.getValue(),
284 ThreeDBStructureChooserQuerySource.FILTER_TDBEACONS_COVERAGE);
288 upResponse = tdbquery.fetchStructuresMetaData(testUpSeq,
289 tdbquery.getDocFieldPrefs().getStructureSummaryFields(),
290 opt_singlebest, false);
291 tdbquery.updateAvailableFilterOptions(StructureChooser.VIEWS_FILTER,
292 opts, upResponse.getSearchSummary());
293 // test ranking without additional PDBe data
294 FTSRestResponse firstRanked = tdbquery.selectFirstRankedQuery(
295 testUpSeq, upResponse.getSearchSummary(),
296 tdbquery.getDocFieldPrefs().getStructureSummaryFields(),
297 opt_singlebest.getValue(), false);
298 assertEquals(firstRanked.getNumberOfItemsFound(), 1);
299 // many best response
300 upResponse = tdbquery.fetchStructuresMetaData(testUpSeq,
301 tdbquery.getDocFieldPrefs().getStructureSummaryFields(),
302 opt_manybest, false);
303 assertTrue(firstRanked.getSearchSummary().size() < upResponse
304 .getSearchSummary().size());
305 // NB Could have race condition here
306 String pdb_Query = tdbquery.buildPDBFTSQueryFor(upResponse);
307 assertTrue(pdb_Query.trim().length() > 0);
308 pdbResponse = tdbquery.fetchStructuresMetaDataFor(pdbquery,
310 assertTrue(pdbResponse.getNumberOfItemsFound() > 0);
311 FTSRestResponse joinedResp = tdbquery.joinResponses(upResponse,
313 assertEquals(upResponse.getNumberOfItemsFound(),
314 joinedResp.getNumberOfItemsFound());
316 } catch (Exception x)
319 Assert.fail("Unexpected Exception");
321 StructureChooserQuerySource scquery = StructureChooserQuerySource
322 .getQuerySourceFor(new SequenceI[]
327 @DataProvider(name = "testUpSeqs")
328 public Object[][] testUpSeqs() throws Exception
331 return new Object[][] { { upSeq }, { upSeq_insulin } };
334 @Test(groups = { "Functional" })
335 public void sanitizeSeqNameTest()
337 String name = "ab_cdEF|fwxyz012349";
338 AssertJUnit.assertEquals(name,
339 PDBStructureChooserQuerySource.sanitizeSeqName(name));
341 // remove a [nn] substring
342 name = "abcde12[345]fg";
343 AssertJUnit.assertEquals("abcde12fg",
344 PDBStructureChooserQuerySource.sanitizeSeqName(name));
346 // remove characters other than a-zA-Z0-9 | or _
347 name = "ab[cd],.\t£$*!- \\\"@:e";
348 AssertJUnit.assertEquals("abcde",
349 PDBStructureChooserQuerySource.sanitizeSeqName(name));
351 name = "abcde12[345a]fg";
352 AssertJUnit.assertEquals("abcde12345afg",
353 PDBStructureChooserQuerySource.sanitizeSeqName(name));