2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.gui.structurechooser;
24 import java.util.Vector;
26 import org.testng.AssertJUnit;
27 import org.testng.annotations.AfterMethod;
28 import org.testng.annotations.BeforeClass;
29 import org.testng.annotations.BeforeMethod;
30 import org.testng.annotations.Test;
32 import jalview.datamodel.DBRefEntry;
33 import jalview.datamodel.DBRefSource;
34 import jalview.datamodel.PDBEntry;
35 import jalview.datamodel.Sequence;
36 import jalview.datamodel.SequenceI;
37 import jalview.fts.core.FTSRestRequest;
38 import jalview.fts.threedbeacons.TDBeaconsFTSRestClientTest;
39 import jalview.gui.JvOptionPane;
41 public class StructureChooserQuerySourceTest
44 @BeforeClass(alwaysRun = true)
45 public void setUpJvOptionPane()
47 JvOptionPane.setInteractiveMode(false);
48 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
53 // same set up as for structurechooser test
55 @BeforeMethod(alwaysRun = true)
56 public void setUp() throws Exception
58 seq = new Sequence("PDB|4kqy|4KQY|A", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1,
60 seq.createDatasetSequence();
61 for (int x = 1; x < 5; x++)
63 DBRefEntry dbRef = new DBRefEntry();
64 dbRef.setAccessionId("XYZ_" + x);
68 PDBEntry dbRef = new PDBEntry();
71 Vector<PDBEntry> pdbIds = new Vector<>();
76 // Uniprot sequence for 3D-Beacons mocks
77 upSeq = new Sequence("P38398",
78 "MDLSALRVEEVQNVINAMQKILECPICLELIKEPVSTKCDHIFCKFCMLKLLNQKKGPSQCPLCKNDITKRS\n"
79 + "LQESTRFSQLVEELLKIICAFQLDTGLEYANSYNFAKKENNSPEHLKDEVSIIQSMGYRNRAKRLLQSEPEN\n"
80 + "PSLQETSLSVQLSNLGTVRTLRTKQRIQPQKTSVYIELGSDSSEDTVNKATYCSVGDQELLQITPQGTRDEI\n"
81 + "SLDSAKKAACEFSETDVTNTEHHQPSNNDLNTTEKRAAERHPEKYQGSSVSNLHVEPCGTNTHASSLQHENS\n"
82 + "SLLLTKDRMNVEKAEFCNKSKQPGLARSQHNRWAGSKETCNDRRTPSTEKKVDLNADPLCERKEWNKQKLPC\n"
83 + "SENPRDTEDVPWITLNSSIQKVNEWFSRSDELLGSDDSHDGESESNAKVADVLDVLNEVDEYSGSSEKIDLL\n"
84 + "ASDPHEALICKSERVHSKSVESNIEDKIFGKTYRKKASLPNLSHVTENLIIGAFVTEPQIIQERPLTNKLKR\n"
85 + "KRRPTSGLHPEDFIKKADLAVQKTPEMINQGTNQTEQNGQVMNITNSGHENKTKGDSIQNEKNPNPIESLEK\n"
86 + "ESAFKTKAEPISSSISNMELELNIHNSKAPKKNRLRRKSSTRHIHALELVVSRNLSPPNCTELQIDSCSSSE\n"
87 + "EIKKKKYNQMPVRHSRNLQLMEGKEPATGAKKSNKPNEQTSKRHDSDTFPELKLTNAPGSFTKCSNTSELKE\n"
88 + "FVNPSLPREEKEEKLETVKVSNNAEDPKDLMLSGERVLQTERSVESSSISLVPGTDYGTQESISLLEVSTLG\n"
89 + "KAKTEPNKCVSQCAAFENPKGLIHGCSKDNRNDTEGFKYPLGHEVNHSRETSIEMEESELDAQYLQNTFKVS\n"
90 + "KRQSFAPFSNPGNAEEECATFSAHSGSLKKQSPKVTFECEQKEENQGKNESNIKPVQTVNITAGFPVVGQKD\n"
91 + "KPVDNAKCSIKGGSRFCLSSQFRGNETGLITPNKHGLLQNPYRIPPLFPIKSFVKTKCKKNLLEENFEEHSM\n"
92 + "SPEREMGNENIPSTVSTISRNNIRENVFKEASSSNINEVGSSTNEVGSSINEIGSSDENIQAELGRNRGPKL\n"
93 + "NAMLRLGVLQPEVYKQSLPGSNCKHPEIKKQEYEEVVQTVNTDFSPYLISDNLEQPMGSSHASQVCSETPDD\n"
94 + "LLDDGEIKEDTSFAENDIKESSAVFSKSVQKGELSRSPSPFTHTHLAQGYRRGAKKLESSEENLSSEDEELP\n"
95 + "CFQHLLFGKVNNIPSQSTRHSTVATECLSKNTEENLLSLKNSLNDCSNQVILAKASQEHHLSEETKCSASLF\n"
96 + "SSQCSELEDLTANTNTQDPFLIGSSKQMRHQSESQGVGLSDKELVSDDEERGTGLEENNQEEQSMDSNLGEA\n"
97 + "ASGCESETSVSEDCSGLSSQSDILTTQQRDTMQHNLIKLQQEMAELEAVLEQHGSQPSNSYPSIISDSSALE\n"
98 + "DLRNPEQSTSEKAVLTSQKSSEYPISQNPEGLSADKFEVSADSSTSKNKEPGVERSSPSKCPSLDDRWYMHS\n"
99 + "CSGSLQNRNYPSQEELIKVVDVEEQQLEESGPHDLTETSYLPRQDLEGTPYLESGISLFSDDPESDPSEDRA\n"
100 + "PESARVGNIPSSTSALKVPQLKVAESAQSPAAAHTTDTAGYNAMEESVSREKPELTASTERVNKRMSMVVSG\n"
101 + "LTPEEFMLVYKFARKHHITLTNLITEETTHVVMKTDAEFVCERTLKYFLGIAGGKWVVSYFWVTQSIKERKM\n"
102 + "LNEHDFEVRGDVVNGRNHQGPKRARESQDRKIFRGLEICCYGPFTNMPTDQLEWMVQLCGASVVKELSSFTL\n"
103 + "GTGVHPIVVVQPDAWTEDNGFHAIGQMCEAPVVTREWVLDSVALYQCQELDTYLIPQIPHSHY\n"
106 upSeq.createDatasetSequence();
107 upSeq.setDescription("Breast cancer type 1 susceptibility protein");
108 upSeq.addDBRef(new DBRefEntry("UNIPROT","0","P38398",null,true));
111 @AfterMethod(alwaysRun = true)
112 public void tearDown() throws Exception
118 @SuppressWarnings("deprecation")
119 @Test(groups = { "Functional" })
120 public void buildPDBQueryTest()
122 System.out.println("seq >>>> " + seq);
124 StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { seq});
125 AssertJUnit.assertTrue(scquery instanceof PDBStructureChooserQuerySource);
126 String query = scquery.buildQuery(seq);
127 AssertJUnit.assertEquals("pdb_id:1tim", query);
128 seq.getAllPDBEntries().clear();
129 query = scquery.buildQuery(seq);
130 AssertJUnit.assertEquals(
131 "text:XYZ_1 OR text:XYZ_2 OR text:XYZ_3 OR text:XYZ_4 OR text:4kqy",
134 query = scquery.buildQuery(seq);
135 System.out.println(query);
136 AssertJUnit.assertEquals("text:4kqy", query);
138 DBRefEntry uniprotDBRef = new DBRefEntry();
139 uniprotDBRef.setAccessionId("P12345");
140 uniprotDBRef.setSource(DBRefSource.UNIPROT);
141 seq.addDBRef(uniprotDBRef);
143 DBRefEntry pdbDBRef = new DBRefEntry();
144 pdbDBRef.setAccessionId("1XYZ");
145 pdbDBRef.setSource(DBRefSource.PDB);
146 seq.addDBRef(pdbDBRef);
148 for (int x = 1; x < 5; x++)
150 DBRefEntry dbRef = new DBRefEntry();
151 dbRef.setAccessionId("XYZ_" + x);
154 System.out.println("");
155 System.out.println(seq.getDBRefs());
156 System.out.println(query);
157 query = scquery.buildQuery(seq);
158 AssertJUnit.assertEquals(
159 "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
163 @SuppressWarnings("deprecation")
164 @Test(groups = { "Functional" })
165 public void buildThreeDBQueryTest()
167 System.out.println("seq >>>> " + upSeq);
168 TDBeaconsFTSRestClientTest.setMock();
169 StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { upSeq});
170 AssertJUnit.assertTrue(scquery instanceof ThreeDBStructureChooserQuerySource);
171 String query = scquery.buildQuery(upSeq);
172 AssertJUnit.assertEquals("P38398", query);
174 // query shouldn't change regardless of additional entries
175 // because 3DBeacons requires canonical entries.
176 upSeq.getAllPDBEntries().clear();
177 query = scquery.buildQuery(upSeq);
178 AssertJUnit.assertEquals("P38398", query);
179 upSeq.setDBRefs(null);
180 query = scquery.buildQuery(upSeq);
182 * legacy projects/datasets will not have canonical flags set for uniprot dbrefs
183 * graceful behaviour would be to
184 * - pick one ? not possible
185 * - iterate through all until a 200 is obtained ?
186 * ---> ideal but could be costly
187 * ---> better to do a direct retrieval from uniprot to work out which is the canonical identifier..
188 * ----> need a test to check that accessions can be promoted to canonical!
190 //FIXME - need to be able to use ID to query here ?
191 AssertJUnit.assertEquals(null, query);
198 * - no protein -> TDB not applicable, query PDBe only (consider RNA or DNA - specific query adapter ?)
199 * - protein but no uniprot -> first consider trying to get uniprot refs (need a mark to say none are available)
200 * - protein and uniprot - no canonicals -> resolve to uniprot automatically to get canonicals
201 * - query uniprot against 3DBeacons
202 * --> decorate experimental structures with additional data from PDBe
203 * - query remaining against PDBe
206 * --> in memory ranking - no need to query twice
208 * - experimental > AlphaFold -> Model
211 * -> experimental only
212 * -> experimental plus best models for other regions
214 * -> need to be able to select correct reference (the longest one that covers all) for superposition
217 // DBRefEntry uniprotDBRef = new DBRefEntry();
218 // uniprotDBRef.setAccessionId("P12345");
219 // uniprotDBRef.setSource(DBRefSource.UNIPROT);
220 // upSeq.addDBRef(uniprotDBRef);
222 // DBRefEntry pdbDBRef = new DBRefEntry();
223 // pdbDBRef.setAccessionId("1XYZ");
224 // pdbDBRef.setSource(DBRefSource.PDB);
225 // upSeq.addDBRef(pdbDBRef);
227 // for (int x = 1; x < 5; x++)
229 // DBRefEntry dbRef = new DBRefEntry();
230 // dbRef.setAccessionId("XYZ_" + x);
231 // seq.addDBRef(dbRef);
233 // System.out.println("");
234 // System.out.println(seq.getDBRefs());
235 // System.out.println(query);
236 // query = scquery.buildQuery(seq);
238 // "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
241 @Test(groups= {"Functional"})
242 public void cascadingThreeDBandPDBQuerys()
244 TDBeaconsFTSRestClientTest.setMock();
246 StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { upSeq});
249 // query PDBe for PDB entry metadata
252 FTSRestRequest tdbQuery = new FTSRestRequest();
253 tdbQuery.setResponseSize(100);
254 tdbQuery.setFieldToSearchBy("");
255 tdbQuery.setSearchTerm(scquery.buildQuery(upSeq));
256 tdbQuery.setWantedFields(scquery.getDocFieldPrefs().getStructureSummaryFields());
257 //scquery.fetchStructuresMetaData(upSeq, null, null, false);
258 String secondaryPdbQuery;
259 //secondaryPdbQuery = ((ThreeDBStructureChooserQuerySource)scquery).buildPDBFTSquery();
263 @Test(groups = { "Functional" })
264 public void sanitizeSeqNameTest()
266 String name = "ab_cdEF|fwxyz012349";
267 AssertJUnit.assertEquals(name, PDBStructureChooserQuerySource.sanitizeSeqName(name));
269 // remove a [nn] substring
270 name = "abcde12[345]fg";
271 AssertJUnit.assertEquals("abcde12fg", PDBStructureChooserQuerySource.sanitizeSeqName(name));
273 // remove characters other than a-zA-Z0-9 | or _
274 name = "ab[cd],.\t£$*!- \\\"@:e";
275 AssertJUnit.assertEquals("abcde", PDBStructureChooserQuerySource.sanitizeSeqName(name));
277 name = "abcde12[345a]fg";
278 AssertJUnit.assertEquals("abcde12345afg", PDBStructureChooserQuerySource.sanitizeSeqName(name));