2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.gui.structurechooser;
24 import static org.testng.Assert.assertEquals;
25 import static org.testng.Assert.assertTrue;
27 import java.util.Vector;
29 import org.junit.Assert;
30 import org.testng.AssertJUnit;
31 import org.testng.annotations.AfterMethod;
32 import org.testng.annotations.BeforeClass;
33 import org.testng.annotations.BeforeMethod;
34 import org.testng.annotations.Test;
36 import jalview.datamodel.DBRefEntry;
37 import jalview.datamodel.DBRefSource;
38 import jalview.datamodel.PDBEntry;
39 import jalview.datamodel.Sequence;
40 import jalview.datamodel.SequenceI;
41 import jalview.fts.core.FTSRestRequest;
42 import jalview.fts.core.FTSRestResponse;
43 import jalview.fts.service.pdb.PDBFTSRestClient;
44 import jalview.fts.threedbeacons.TDBeaconsFTSRestClientTest;
45 import jalview.gui.JvOptionPane;
46 import jalview.jbgui.FilterOption;
48 public class StructureChooserQuerySourceTest
51 @BeforeClass(alwaysRun = true)
52 public void setUpJvOptionPane()
54 JvOptionPane.setInteractiveMode(false);
55 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
60 // same set up as for structurechooser test
62 @BeforeMethod(alwaysRun = true)
63 public void setUp() throws Exception
65 seq = new Sequence("PDB|4kqy|4KQY|A", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1,
67 seq.createDatasetSequence();
68 for (int x = 1; x < 5; x++)
70 DBRefEntry dbRef = new DBRefEntry();
71 dbRef.setAccessionId("XYZ_" + x);
75 PDBEntry dbRef = new PDBEntry();
78 Vector<PDBEntry> pdbIds = new Vector<>();
83 // Uniprot sequence for 3D-Beacons mocks
84 upSeq = new Sequence("P38398",
85 "MDLSALRVEEVQNVINAMQKILECPICLELIKEPVSTKCDHIFCKFCMLKLLNQKKGPSQCPLCKNDITKRS\n"
86 + "LQESTRFSQLVEELLKIICAFQLDTGLEYANSYNFAKKENNSPEHLKDEVSIIQSMGYRNRAKRLLQSEPEN\n"
87 + "PSLQETSLSVQLSNLGTVRTLRTKQRIQPQKTSVYIELGSDSSEDTVNKATYCSVGDQELLQITPQGTRDEI\n"
88 + "SLDSAKKAACEFSETDVTNTEHHQPSNNDLNTTEKRAAERHPEKYQGSSVSNLHVEPCGTNTHASSLQHENS\n"
89 + "SLLLTKDRMNVEKAEFCNKSKQPGLARSQHNRWAGSKETCNDRRTPSTEKKVDLNADPLCERKEWNKQKLPC\n"
90 + "SENPRDTEDVPWITLNSSIQKVNEWFSRSDELLGSDDSHDGESESNAKVADVLDVLNEVDEYSGSSEKIDLL\n"
91 + "ASDPHEALICKSERVHSKSVESNIEDKIFGKTYRKKASLPNLSHVTENLIIGAFVTEPQIIQERPLTNKLKR\n"
92 + "KRRPTSGLHPEDFIKKADLAVQKTPEMINQGTNQTEQNGQVMNITNSGHENKTKGDSIQNEKNPNPIESLEK\n"
93 + "ESAFKTKAEPISSSISNMELELNIHNSKAPKKNRLRRKSSTRHIHALELVVSRNLSPPNCTELQIDSCSSSE\n"
94 + "EIKKKKYNQMPVRHSRNLQLMEGKEPATGAKKSNKPNEQTSKRHDSDTFPELKLTNAPGSFTKCSNTSELKE\n"
95 + "FVNPSLPREEKEEKLETVKVSNNAEDPKDLMLSGERVLQTERSVESSSISLVPGTDYGTQESISLLEVSTLG\n"
96 + "KAKTEPNKCVSQCAAFENPKGLIHGCSKDNRNDTEGFKYPLGHEVNHSRETSIEMEESELDAQYLQNTFKVS\n"
97 + "KRQSFAPFSNPGNAEEECATFSAHSGSLKKQSPKVTFECEQKEENQGKNESNIKPVQTVNITAGFPVVGQKD\n"
98 + "KPVDNAKCSIKGGSRFCLSSQFRGNETGLITPNKHGLLQNPYRIPPLFPIKSFVKTKCKKNLLEENFEEHSM\n"
99 + "SPEREMGNENIPSTVSTISRNNIRENVFKEASSSNINEVGSSTNEVGSSINEIGSSDENIQAELGRNRGPKL\n"
100 + "NAMLRLGVLQPEVYKQSLPGSNCKHPEIKKQEYEEVVQTVNTDFSPYLISDNLEQPMGSSHASQVCSETPDD\n"
101 + "LLDDGEIKEDTSFAENDIKESSAVFSKSVQKGELSRSPSPFTHTHLAQGYRRGAKKLESSEENLSSEDEELP\n"
102 + "CFQHLLFGKVNNIPSQSTRHSTVATECLSKNTEENLLSLKNSLNDCSNQVILAKASQEHHLSEETKCSASLF\n"
103 + "SSQCSELEDLTANTNTQDPFLIGSSKQMRHQSESQGVGLSDKELVSDDEERGTGLEENNQEEQSMDSNLGEA\n"
104 + "ASGCESETSVSEDCSGLSSQSDILTTQQRDTMQHNLIKLQQEMAELEAVLEQHGSQPSNSYPSIISDSSALE\n"
105 + "DLRNPEQSTSEKAVLTSQKSSEYPISQNPEGLSADKFEVSADSSTSKNKEPGVERSSPSKCPSLDDRWYMHS\n"
106 + "CSGSLQNRNYPSQEELIKVVDVEEQQLEESGPHDLTETSYLPRQDLEGTPYLESGISLFSDDPESDPSEDRA\n"
107 + "PESARVGNIPSSTSALKVPQLKVAESAQSPAAAHTTDTAGYNAMEESVSREKPELTASTERVNKRMSMVVSG\n"
108 + "LTPEEFMLVYKFARKHHITLTNLITEETTHVVMKTDAEFVCERTLKYFLGIAGGKWVVSYFWVTQSIKERKM\n"
109 + "LNEHDFEVRGDVVNGRNHQGPKRARESQDRKIFRGLEICCYGPFTNMPTDQLEWMVQLCGASVVKELSSFTL\n"
110 + "GTGVHPIVVVQPDAWTEDNGFHAIGQMCEAPVVTREWVLDSVALYQCQELDTYLIPQIPHSHY\n"
113 upSeq.createDatasetSequence();
114 upSeq.setDescription("Breast cancer type 1 susceptibility protein");
115 upSeq.addDBRef(new DBRefEntry("UNIPROT","0","P38398",null,true));
118 @AfterMethod(alwaysRun = true)
119 public void tearDown() throws Exception
125 @SuppressWarnings("deprecation")
126 @Test(groups = { "Functional" })
127 public void buildPDBQueryTest()
129 System.out.println("seq >>>> " + seq);
131 StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { seq});
132 AssertJUnit.assertTrue(scquery instanceof PDBStructureChooserQuerySource);
133 String query = scquery.buildQuery(seq);
134 AssertJUnit.assertEquals("pdb_id:1tim", query);
135 seq.getAllPDBEntries().clear();
136 query = scquery.buildQuery(seq);
137 AssertJUnit.assertEquals(
138 "text:XYZ_1 OR text:XYZ_2 OR text:XYZ_3 OR text:XYZ_4 OR text:4kqy",
141 query = scquery.buildQuery(seq);
142 System.out.println(query);
143 AssertJUnit.assertEquals("text:4kqy", query);
145 DBRefEntry uniprotDBRef = new DBRefEntry();
146 uniprotDBRef.setAccessionId("P12345");
147 uniprotDBRef.setSource(DBRefSource.UNIPROT);
148 seq.addDBRef(uniprotDBRef);
150 DBRefEntry pdbDBRef = new DBRefEntry();
151 pdbDBRef.setAccessionId("1XYZ");
152 pdbDBRef.setSource(DBRefSource.PDB);
153 seq.addDBRef(pdbDBRef);
155 for (int x = 1; x < 5; x++)
157 DBRefEntry dbRef = new DBRefEntry();
158 dbRef.setAccessionId("XYZ_" + x);
161 System.out.println("");
162 System.out.println(seq.getDBRefs());
163 System.out.println(query);
164 query = scquery.buildQuery(seq);
165 AssertJUnit.assertEquals(
166 "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
170 @SuppressWarnings("deprecation")
171 @Test(groups = { "Functional" })
172 public void buildThreeDBQueryTest()
174 System.out.println("seq >>>> " + upSeq);
175 TDBeaconsFTSRestClientTest.setMock();
176 StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { upSeq});
177 AssertJUnit.assertTrue(scquery instanceof ThreeDBStructureChooserQuerySource);
178 String query = scquery.buildQuery(upSeq);
179 AssertJUnit.assertEquals("P38398", query);
181 // query shouldn't change regardless of additional entries
182 // because 3DBeacons requires canonical entries.
183 upSeq.getAllPDBEntries().clear();
184 query = scquery.buildQuery(upSeq);
185 AssertJUnit.assertEquals("P38398", query);
186 upSeq.setDBRefs(null);
187 query = scquery.buildQuery(upSeq);
189 * legacy projects/datasets will not have canonical flags set for uniprot dbrefs
190 * graceful behaviour would be to
191 * - pick one ? not possible
192 * - iterate through all until a 200 is obtained ?
193 * ---> ideal but could be costly
194 * ---> better to do a direct retrieval from uniprot to work out which is the canonical identifier..
195 * ----> need a test to check that accessions can be promoted to canonical!
197 //FIXME - need to be able to use ID to query here ?
198 AssertJUnit.assertEquals(null, query);
205 * - no protein -> TDB not applicable, query PDBe only (consider RNA or DNA - specific query adapter ?)
206 * - protein but no uniprot -> first consider trying to get uniprot refs (need a mark to say none are available)
207 * - protein and uniprot - no canonicals -> resolve to uniprot automatically to get canonicals
208 * - query uniprot against 3DBeacons
209 * --> decorate experimental structures with additional data from PDBe
210 * - query remaining against PDBe
213 * --> in memory ranking - no need to query twice
215 * - experimental > AlphaFold -> Model
218 * -> experimental only
219 * -> experimental plus best models for other regions
221 * -> need to be able to select correct reference (the longest one that covers all) for superposition
224 // DBRefEntry uniprotDBRef = new DBRefEntry();
225 // uniprotDBRef.setAccessionId("P12345");
226 // uniprotDBRef.setSource(DBRefSource.UNIPROT);
227 // upSeq.addDBRef(uniprotDBRef);
229 // DBRefEntry pdbDBRef = new DBRefEntry();
230 // pdbDBRef.setAccessionId("1XYZ");
231 // pdbDBRef.setSource(DBRefSource.PDB);
232 // upSeq.addDBRef(pdbDBRef);
234 // for (int x = 1; x < 5; x++)
236 // DBRefEntry dbRef = new DBRefEntry();
237 // dbRef.setAccessionId("XYZ_" + x);
238 // seq.addDBRef(dbRef);
240 // System.out.println("");
241 // System.out.println(seq.getDBRefs());
242 // System.out.println(query);
243 // query = scquery.buildQuery(seq);
245 // "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
248 @Test(groups= {"Functional"})
249 public void cascadingThreeDBandPDBQuerys()
251 TDBeaconsFTSRestClientTest.setMock();
252 PDBFTSRestClient.setMock();
253 ThreeDBStructureChooserQuerySource tdbquery = new ThreeDBStructureChooserQuerySource();
254 PDBStructureChooserQuerySource pdbquery = new PDBStructureChooserQuerySource();
258 FTSRestResponse upResponse = null;
259 FTSRestResponse pdbResponse = null;
262 upResponse = tdbquery.fetchStructuresMetaData(upSeq, tdbquery.getDocFieldPrefs().getStructureSummaryFields(), null, false);
263 // NB Could have race condition here
264 String pdb_Query = tdbquery.buildPDBFTSQueryFor(upResponse);
265 assertTrue(pdb_Query.trim().length()>0);
266 pdbResponse = tdbquery.fetchStructuresMetaDataFor(pdbquery, upResponse);
267 assertTrue(pdbResponse.getNumberOfItemsFound()>0);
268 FTSRestResponse joinedResp = tdbquery.joinResponses(upResponse, pdbResponse);
269 assertEquals(upResponse.getNumberOfItemsFound(),joinedResp.getNumberOfItemsFound());
271 } catch (Exception x)
274 Assert.fail("Unexpected Exception");
276 StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { upSeq});
280 @Test(groups = { "Functional" })
281 public void sanitizeSeqNameTest()
283 String name = "ab_cdEF|fwxyz012349";
284 AssertJUnit.assertEquals(name, PDBStructureChooserQuerySource.sanitizeSeqName(name));
286 // remove a [nn] substring
287 name = "abcde12[345]fg";
288 AssertJUnit.assertEquals("abcde12fg", PDBStructureChooserQuerySource.sanitizeSeqName(name));
290 // remove characters other than a-zA-Z0-9 | or _
291 name = "ab[cd],.\t£$*!- \\\"@:e";
292 AssertJUnit.assertEquals("abcde", PDBStructureChooserQuerySource.sanitizeSeqName(name));
294 name = "abcde12[345a]fg";
295 AssertJUnit.assertEquals("abcde12345afg", PDBStructureChooserQuerySource.sanitizeSeqName(name));