Merge branch 'bug/JAL-4313_cme_in_tests' into patch/JAL-4311_import_pLDDT_scores_from...
[jalview.git] / test / jalview / gui / structurechooser / StructureChooserQuerySourceTest.java
1 /*
2  * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3  * Copyright (C) $$Year-Rel$$ The Jalview Authors
4  * 
5  * This file is part of Jalview.
6  * 
7  * Jalview is free software: you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License 
9  * as published by the Free Software Foundation, either version 3
10  * of the License, or (at your option) any later version.
11  *  
12  * Jalview is distributed in the hope that it will be useful, but 
13  * WITHOUT ANY WARRANTY; without even the implied warranty 
14  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
15  * PURPOSE.  See the GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
19  * The Jalview Authors are detailed in the 'AUTHORS' file.
20  */
21 package jalview.gui.structurechooser;
22
23 import static org.testng.Assert.assertEquals;
24 import static org.testng.Assert.assertTrue;
25 import static org.testng.Assert.fail;
26
27 import java.util.ArrayList;
28 import java.util.List;
29 import java.util.Vector;
30
31 import org.junit.Assert;
32 import org.testng.AssertJUnit;
33 import org.testng.annotations.AfterMethod;
34 import org.testng.annotations.BeforeClass;
35 import org.testng.annotations.BeforeMethod;
36 import org.testng.annotations.DataProvider;
37 import org.testng.annotations.Test;
38
39 import jalview.datamodel.DBRefEntry;
40 import jalview.datamodel.DBRefSource;
41 import jalview.datamodel.PDBEntry;
42 import jalview.datamodel.Sequence;
43 import jalview.datamodel.SequenceI;
44 import jalview.fts.api.FTSData;
45 import jalview.fts.api.FTSDataColumnI;
46 import jalview.fts.core.FTSRestRequest;
47 import jalview.fts.core.FTSRestResponse;
48 import jalview.fts.service.pdb.PDBFTSRestClientTest;
49 import jalview.fts.service.threedbeacons.TDB_FTSData;
50 import jalview.fts.service.threedbeacons.TDBeaconsFTSRestClient;
51 import jalview.fts.threedbeacons.TDBeaconsFTSRestClientTest;
52 import jalview.gui.JvOptionPane;
53 import jalview.gui.StructureChooser;
54 import jalview.jbgui.FilterOption;
55
56 public class StructureChooserQuerySourceTest
57 {
58
59   @BeforeClass(alwaysRun = true)
60   public void setUpJvOptionPane()
61   {
62     JvOptionPane.setInteractiveMode(false);
63     JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
64   }
65
66   Sequence seq, upSeq, upSeq_insulin, upSeq_r1ab;
67
68   private Sequence upSeq_fer1_maize;
69
70   private Sequence upSeq_epas1_human;
71
72   // same set up as for structurechooser test
73
74   @BeforeMethod(alwaysRun = true)
75   public void setUp() throws Exception
76   {
77     seq = new Sequence("PDB|4kqy|4KQY|A", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1,
78             26);
79     seq.createDatasetSequence();
80     for (int x = 1; x < 5; x++)
81     {
82       DBRefEntry dbRef = new DBRefEntry();
83       dbRef.setAccessionId("XYZ_" + x);
84       seq.addDBRef(dbRef);
85     }
86
87     PDBEntry dbRef = new PDBEntry();
88     dbRef.setId("1tim");
89
90     Vector<PDBEntry> pdbIds = new Vector<>();
91     pdbIds.add(dbRef);
92
93     seq.setPDBId(pdbIds);
94
95     // Uniprot sequence for 3D-Beacons mocks
96     upSeq = new Sequence("P38398",
97             "MDLSALRVEEVQNVINAMQKILECPICLELIKEPVSTKCDHIFCKFCMLKLLNQKKGPSQCPLCKNDITKRS\n"
98                     + "LQESTRFSQLVEELLKIICAFQLDTGLEYANSYNFAKKENNSPEHLKDEVSIIQSMGYRNRAKRLLQSEPEN\n"
99                     + "PSLQETSLSVQLSNLGTVRTLRTKQRIQPQKTSVYIELGSDSSEDTVNKATYCSVGDQELLQITPQGTRDEI\n"
100                     + "SLDSAKKAACEFSETDVTNTEHHQPSNNDLNTTEKRAAERHPEKYQGSSVSNLHVEPCGTNTHASSLQHENS\n"
101                     + "SLLLTKDRMNVEKAEFCNKSKQPGLARSQHNRWAGSKETCNDRRTPSTEKKVDLNADPLCERKEWNKQKLPC\n"
102                     + "SENPRDTEDVPWITLNSSIQKVNEWFSRSDELLGSDDSHDGESESNAKVADVLDVLNEVDEYSGSSEKIDLL\n"
103                     + "ASDPHEALICKSERVHSKSVESNIEDKIFGKTYRKKASLPNLSHVTENLIIGAFVTEPQIIQERPLTNKLKR\n"
104                     + "KRRPTSGLHPEDFIKKADLAVQKTPEMINQGTNQTEQNGQVMNITNSGHENKTKGDSIQNEKNPNPIESLEK\n"
105                     + "ESAFKTKAEPISSSISNMELELNIHNSKAPKKNRLRRKSSTRHIHALELVVSRNLSPPNCTELQIDSCSSSE\n"
106                     + "EIKKKKYNQMPVRHSRNLQLMEGKEPATGAKKSNKPNEQTSKRHDSDTFPELKLTNAPGSFTKCSNTSELKE\n"
107                     + "FVNPSLPREEKEEKLETVKVSNNAEDPKDLMLSGERVLQTERSVESSSISLVPGTDYGTQESISLLEVSTLG\n"
108                     + "KAKTEPNKCVSQCAAFENPKGLIHGCSKDNRNDTEGFKYPLGHEVNHSRETSIEMEESELDAQYLQNTFKVS\n"
109                     + "KRQSFAPFSNPGNAEEECATFSAHSGSLKKQSPKVTFECEQKEENQGKNESNIKPVQTVNITAGFPVVGQKD\n"
110                     + "KPVDNAKCSIKGGSRFCLSSQFRGNETGLITPNKHGLLQNPYRIPPLFPIKSFVKTKCKKNLLEENFEEHSM\n"
111                     + "SPEREMGNENIPSTVSTISRNNIRENVFKEASSSNINEVGSSTNEVGSSINEIGSSDENIQAELGRNRGPKL\n"
112                     + "NAMLRLGVLQPEVYKQSLPGSNCKHPEIKKQEYEEVVQTVNTDFSPYLISDNLEQPMGSSHASQVCSETPDD\n"
113                     + "LLDDGEIKEDTSFAENDIKESSAVFSKSVQKGELSRSPSPFTHTHLAQGYRRGAKKLESSEENLSSEDEELP\n"
114                     + "CFQHLLFGKVNNIPSQSTRHSTVATECLSKNTEENLLSLKNSLNDCSNQVILAKASQEHHLSEETKCSASLF\n"
115                     + "SSQCSELEDLTANTNTQDPFLIGSSKQMRHQSESQGVGLSDKELVSDDEERGTGLEENNQEEQSMDSNLGEA\n"
116                     + "ASGCESETSVSEDCSGLSSQSDILTTQQRDTMQHNLIKLQQEMAELEAVLEQHGSQPSNSYPSIISDSSALE\n"
117                     + "DLRNPEQSTSEKAVLTSQKSSEYPISQNPEGLSADKFEVSADSSTSKNKEPGVERSSPSKCPSLDDRWYMHS\n"
118                     + "CSGSLQNRNYPSQEELIKVVDVEEQQLEESGPHDLTETSYLPRQDLEGTPYLESGISLFSDDPESDPSEDRA\n"
119                     + "PESARVGNIPSSTSALKVPQLKVAESAQSPAAAHTTDTAGYNAMEESVSREKPELTASTERVNKRMSMVVSG\n"
120                     + "LTPEEFMLVYKFARKHHITLTNLITEETTHVVMKTDAEFVCERTLKYFLGIAGGKWVVSYFWVTQSIKERKM\n"
121                     + "LNEHDFEVRGDVVNGRNHQGPKRARESQDRKIFRGLEICCYGPFTNMPTDQLEWMVQLCGASVVKELSSFTL\n"
122                     + "GTGVHPIVVVQPDAWTEDNGFHAIGQMCEAPVVTREWVLDSVALYQCQELDTYLIPQIPHSHY\n"
123                     + "",
124             1, 1863);
125     upSeq.createDatasetSequence();
126     upSeq.setDescription("Breast cancer type 1 susceptibility protein");
127     upSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "P38398", null, true));
128
129     upSeq_insulin = new Sequence("INS_HUMAN",
130             "MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGGGP"
131                     + "GAGSLQPLALEGSLQKRGIVEQCCTSICSLYQLENYCN");
132     upSeq_insulin.createDatasetSequence();
133     upSeq_insulin.setDescription("Insulin");
134     upSeq_insulin
135             .addDBRef(new DBRefEntry("UNIPROT", "0", "P01308", null, true));
136
137     upSeq_r1ab = new Sequence("R1AB_SARS2",
138             "MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGTCGLVEVEKGVLPQLEQPYVFIK\n"
139                     + "RSDARTAPHGHVMVELVAELEGIQYGRSGETLGVLVPHVGEIPVAYRKVLLRKNGNKGAGGHSYGADLKSFD\n"
140                     + "LGDELGTDPYEDFQENWNTKHSSGVTRELMRELNGGAYTRYVDNNFCGPDGYPLECIKDLLARAGKASCTLS\n"
141                     + "EQLDFIDTKRGVYCCREHEHEIAWYTERSEKSYELQTPFEIKLAKKFDTFNGECPNFVFPLNSIIKTIQPRV\n"
142                     + "EKKKLDGFMGRIRSVYPVASPNECNQMCLSTLMKCDHCGETSWQTGDFVKATCEFCGTENLTKEGATTCGYL\n"
143                     + "PQNAVVKIYCPACHNSEVGPEHSLAEYHNESGLKTILRKGGRTIAFGGCVFSYVGCHNKCAYWVPRASANIG\n"
144                     + "CNHTGVVGEGSEGLNDNLLEILQKEKVNINIVGDFKLNEEIAIILASFSASTSAFVETVKGLDYKAFKQIVE\n"
145                     + "SCGNFKVTKGKAKKGAWNIGEQKSILSPLYAFASEAARVVRSIFSRTLETAQNSVRVLQKAAITILDGISQY\n"
146                     + "SLRLIDAMMFTSDLATNNLVVMAYITGGVVQLTSQWLTNIFGTVYEKLKPVLDWLEEKFKEGVEFLRDGWEI\n"
147                     + "VKFISTCACEIVGGQIVTCAKEIKESVQTFFKLVNKFLALCADSIIIGGAKLKALNLGETFVTHSKGLYRKC\n"
148                     + "VKSREETGLLMPLKAPKEIIFLEGETLPTEVLTEEVVLKTGDLQPLEQPTSEAVEAPLVGTPVCINGLMLLE\n"
149                     + "IKDTEKYCALAPNMMVTNNTFTLKGGAPTKVTFGDDTVIEVQGYKSVNITFELDERIDKVLNEKCSAYTVEL\n"
150                     + "GTEVNEFACVVADAVIKTLQPVSELLTPLGIDLDEWSMATYYLFDESGEFKLASHMYCSFYPPDEDEEEGDC\n"
151                     + "EEEEFEPSTQYEYGTEDDYQGKPLEFGATSAALQPEEEQEEDWLDDDSQQTVGQQDGSEDNQTTTIQTIVEV\n"
152                     + "QPQLEMELTPVVQTIEVNSFSGYLKLTDNVYIKNADIVEEAKKVKPTVVVNAANVYLKHGGGVAGALNKATN\n"
153                     + "NAMQVESDDYIATNGPLKVGGSCVLSGHNLAKHCLHVVGPNVNKGEDIQLLKSAYENFNQHEVLLAPLLSAG\n"
154                     + "IFGADPIHSLRVCVDTVRTNVYLAVFDKNLYDKLVSSFLEMKSEKQVEQKIAEIPKEEVKPFITESKPSVEQ\n"
155                     + "RKQDDKKIKACVEEVTTTLEETKFLTENLLLYIDINGNLHPDSATLVSDIDITFLKKDAPYIVGDVVQEGVL\n"
156                     + "TAVVIPTKKAGGTTEMLAKALRKVPTDNYITTYPGQGLNGYTVEEAKTVLKKCKSAFYILPSIISNEKQEIL\n"
157                     + "GTVSWNLREMLAHAEETRKLMPVCVETKAIVSTIQRKYKGIKIQEGVVDYGARFYFYTSKTTVASLINTLND\n"
158                     + "LNETLVTMPLGYVTHGLNLEEAARYMRSLKVPATVSVSSPDAVTAYNGYLTSSSKTPEEHFIETISLAGSYK\n"
159                     + "DWSYSGQSTQLGIEFLKRGDKSVYYTSNPTTFHLDGEVITFDNLKTLLSLREVRTIKVFTTVDNINLHTQVV\n"
160                     + "DMSMTYGQQFGPTYLDGADVTKIKPHNSHEGKTFYVLPNDDTLRVEAFEYYHTTDPSFLGRYMSALNHTKKW\n"
161                     + "KYPQVNGLTSIKWADNNCYLATALLTLQQIELKFNPPALQDAYYRARAGEAANFCALILAYCNKTVGELGDV\n"
162                     + "RETMSYLFQHANLDSCKRVLNVVCKTCGQQQTTLKGVEAVMYMGTLSYEQFKKGVQIPCTCGKQATKYLVQQ\n"
163                     + "ESPFVMMSAPPAQYELKHGTFTCASEYTGNYQCGHYKHITSKETLYCIDGALLTKSSEYKGPITDVFYKENS\n"
164                     + "YTTTIKPVTYKLDGVVCTEIDPKLDNYYKKDNSYFTEQPIDLVPNQPYPNASFDNFKFVCDNIKFADDLNQL\n"
165                     + "TGYKKPASRELKVTFFPDLNGDVVAIDYKHYTPSFKKGAKLLHKPIVWHVNNATNKATYKPNTWCIRCLWST\n"
166                     + "KPVETSNSFDVLKSEDAQGMDNLACEDLKPVSEEVVENPTIQKDVLECNVKTTEVVGDIILKPANNSLKITE\n"
167                     + "EVGHTDLMAAYVDNSSLTIKKPNELSRVLGLKTLATHGLAAVNSVPWDTIANYAKPFLNKVVSTTTNIVTRC\n"
168                     + "LNRVCTNYMPYFFTLLLQLCTFTRSTNSRIKASMPTTIAKNTVKSVGKFCLEASFNYLKSPNFSKLINIIIW\n"
169                     + "FLLLSVCLGSLIYSTAALGVLMSNLGMPSYCTGYREGYLNSTNVTIATYCTGSIPCSVCLSGLDSLDTYPSL\n"
170                     + "ETIQITISSFKWDLTAFGLVAEWFLAYILFTRFFYVLGLAAIMQLFFSYFAVHFISNSWLMWLIINLVQMAP\n"
171                     + "ISAMVRMYIFFASFYYVWKSYVHVVDGCNSSTCMMCYKRNRATRVECTTIVNGVRRSFYVYANGGKGFCKLH\n"
172                     + "NWNCVNCDTFCAGSTFISDEVARDLSLQFKRPINPTDQSSYIVDSVTVKNGSIHLYFDKAGQKTYERHSLSH\n"
173                     + "FVNLDNLRANNTKGSLPINVIVFDGKSKCEESSAKSASVYYSQLMCQPILLLDQALVSDVGDSAEVAVKMFD\n"
174                     + "AYVNTFSSTFNVPMEKLKTLVATAEAELAKNVSLDNVLSTFISAARQGFVDSDVETKDVVECLKLSHQSDIE\n"
175                     + "VTGDSCNNYMLTYNKVENMTPRDLGACIDCSARHINAQVAKSHNIALIWNVKDFMSLSEQLRKQIRSAAKKN\n"
176                     + "NLPFKLTCATTRQVVNVVTTKIALKGGKIVNNWLKQLIKVTLVFLFVAAIFYLITPVHVMSKHTDFSSEIIG\n"
177                     + "YKAIDGGVTRDIASTDTCFANKHADFDTWFSQRGGSYTNDKACPLIAAVITREVGFVVPGLPGTILRTTNGD\n"
178                     + "FLHFLPRVFSAVGNICYTPSKLIEYTDFATSACVLAAECTIFKDASGKPVPYCYDTNVLEGSVAYESLRPDT\n"
179                     + "RYVLMDGSIIQFPNTYLEGSVRVVTTFDSEYCRHGTCERSEAGVCVSTSGRWVLNNDYYRSLPGVFCGVDAV\n"
180                     + "NLLTNMFTPLIQPIGALDISASIVAGGIVAIVVTCLAYYFMRFRRAFGEYSHVVAFNTLLFLMSFTVLCLTP\n"
181                     + "VYSFLPGVYSVIYLYLTFYLTNDVSFLAHIQWMVMFTPLVPFWITIAYIICISTKHFYWFFSNYLKRRVVFN\n"
182                     + "GVSFSTFEEAALCTFLLNKEMYLKLRSDVLLPLTQYNRYLALYNKYKYFSGAMDTTSYREAACCHLAKALND\n"
183                     + "FSNSGSDVLYQPPQTSITSAVLQSGFRKMAFPSGKVEGCMVQVTCGTTTLNGLWLDDVVYCPRHVICTSEDM\n"
184                     + "LNPNYEDLLIRKSNHNFLVQAGNVQLRVIGHSMQNCVLKLKVDTANPKTPKYKFVRIQPGQTFSVLACYNGS\n"
185                     + "PSGVYQCAMRPNFTIKGSFLNGSCGSVGFNIDYDCVSFCYMHHMELPTGVHAGTDLEGNFYGPFVDRQTAQA\n"
186                     + "AGTDTTITVNVLAWLYAAVINGDRWFLNRFTTTLNDFNLVAMKYNYEPLTQDHVDILGPLSAQTGIAVLDMC\n"
187                     + "ASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVTFQSAVKRTIKGTHHWLLLTILTSLLVLVQSTQW\n"
188                     + "SLFFFLYENAFLPFAMGIIAMSAFAMMFVKHKHAFLCLFLLPSLATVAYFNMVYMPASWVMRIMTWLDMVDT\n"
189                     + "SLSGFKLKDCVMYASAVVLLILMTARTVYDDGARRVWTLMNVLTLVYKVYYGNALDQAISMWALIISVTSNY\n"
190                     + "SGVVTTVMFLARGIVFMCVEYCPIFFITGNTLQCIMLVYCFLGYFCTCYFGLFCLLNRYFRLTLGVYDYLVS\n"
191                     + "TQEFRYMNSQGLLPPKNSIDAFKLNIKLLGVGGKPCIKVATVQSKMSDVKCTSVVLLSVLQQLRVESSSKLW\n"
192                     + "AQCVQLHNDILLAKDTTEAFEKMVSLLSVLLSMQGAVDINKLCEEMLDNRATLQAIASEFSSLPSYAAFATA\n"
193                     + "QEAYEQAVANGDSEVVLKKLKKSLNVAKSEFDRDAAMQRKLEKMADQAMTQMYKQARSEDKRAKVTSAMQTM\n"
194                     + "LFTMLRKLDNDALNNIINNARDGCVPLNIIPLTTAAKLMVVIPDYNTYKNTCDGTTFTYASALWEIQQVVDA\n"
195                     + "DSKIVQLSEISMDNSPNLAWPLIVTALRANSAVKLQNNELSPVALRQMSCAAGTTQTACTDDNALAYYNTTK\n"
196                     + "GGRFVLALLSDLQDLKWARFPKSDGTGTIYTELEPPCRFVTDTPKGPKVKYLYFIKGLNNLNRGMVLGSLAA\n"
197                     + "TVRLQAGNATEVPANSTVLSFCAFAVDAAKAYKDYLASGGQPITNCVKMLCTHTGTGQAITVTPEANMDQES\n"
198                     + "FGGASCCLYCRCHIDHPNPKGFCDLKGKYVQIPTTCANDPVGFTLKNTVCTVCGMWKGYGCSCDQLREPMLQ\n"
199                     + "SADAQSFLNRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAGFAKFLKTNCCRFQEKDEDDNLIDSYFVV\n"
200                     + "KRHTFSNYQHEETIYNLLKDCPAVAKHDFFKFRIDGDMVPHISRQRLTKYTMADLVYALRHFDEGNCDTLKE\n"
201                     + "ILVTYNCCDDDYFNKKDWYDFVENPDILRVYANLGERVRQALLKTVQFCDAMRNAGIVGVLTLDNQDLNGNW\n"
202                     + "YDFGDFIQTTPGSGVPVVDSYYSLLMPILTLTRALTAESHVDTDLTKPYIKWDLLKYDFTEERLKLFDRYFK\n"
203                     + "YWDQTYHPNCVNCLDDRCILHCANFNVLFSTVFPPTSFGPLVRKIFVDGVPFVVSTGYHFRELGVVHNQDVN\n"
204                     + "LHSSRLSFKELLVYAADPAMHAASGNLLLDKRTTCFSVAALTNNVAFQTVKPGNFNKDFYDFAVSKGFFKEG\n"
205                     + "SSVELKHFFFAQDGNAAISDYDYYRYNLPTMCDIRQLLFVVEVVDKYFDCYDGGCINANQVIVNNLDKSAGF\n"
206                     + "PFNKWGKARLYYDSMSYEDQDALFAYTKRNVIPTITQMNLKYAISAKNRARTVAGVSICSTMTNRQFHQKLL\n"
207                     + "KSIAATRGATVVIGTSKFYGGWHNMLKTVYSDVENPHLMGWDYPKCDRAMPNMLRIMASLVLARKHTTCCSL\n"
208                     + "SHRFYRLANECAQVLSEMVMCGGSLYVKPGGTSSGDATTAYANSVFNICQAVTANVNALLSTDGNKIADKYV\n"
209                     + "RNLQHRLYECLYRNRDVDTDFVNEFYAYLRKHFSMMILSDDAVVCFNSTYASQGLVASIKNFKSVLYYQNNV\n"
210                     + "FMSEAKCWTETDLTKGPHEFCSQHTMLVKQGDDYVYLPYPDPSRILGAGCFVDDIVKTDGTLMIERFVSLAI\n"
211                     + "DAYPLTKHPNQEYADVFHLYLQYIRKLHDELTGHMLDMYSVMLTNDNTSRYWEPEFYEAMYTPHTVLQAVGA\n"
212                     + "CVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHK\n"
213                     + "PPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLS\n"
214                     + "YGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLN\n"
215                     + "VGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFA\n"
216                     + "IGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALP\n"
217                     + "ETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDM\n"
218                     + "FLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRK\n"
219                     + "AVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRD\n"
220                     + "LYDKLQFTSLEIPRRNVATLQAENVTGLFKDCSKVITGLHPTQAPTHLSVDTKFKTEGLCVDIPGIPKDMTY\n"
221                     + "RRLISMMGFKMNYQVNGYPNMFITREEAIRHVRAWIGFDVEGCHATREAVGTNLPLQLGFSTGVNLVAVPTG\n"
222                     + "YVDTPNNTDFSRVSAKPPPGDQFKHLIPLMYKGLPWNVVRIKIVQMLSDTLKNLSDRVVFVLWAHGFELTSM\n"
223                     + "KYFVKIGPERTCCLCDRRATCFSTASDTYACWHHSIGFDYVYNPFMIDVQQWGFTGNLQSNHDLYCQVHGNA\n"
224                     + "HVASCDAIMTRCLAVHECFVKRVDWTIEYPIIGDELKINAACRKVQHMVVKAALLADKFPVLHDIGNPKAIK\n"
225                     + "CVPQADVEWKFYDAQPCSDKAYKIEELFYSYATHSDKFTDGVCLFWNCNVDRYPANSIVCRFDTRVLSNLNL\n"
226                     + "PGCDGGSLYVNKHAFHTPAFDKSAFVNLKQLPFFYYSDSPCESHGKQVVSDIDYVPLKSATCITRCNLGGAV\n"
227                     + "CRHHANEYRLYLDAYNMMISAGFSLWVYKQFDTYNLWNTFTRLQSLENVAFNVVNKGHFDGQQGEVPVSIIN\n"
228                     + "NTVYTKVDGVDVELFENKTTLPVNVAFELWAKRNIKPVPEVKILNNLGVDIAANTVIWDYKRDAPAHISTIG\n"
229                     + "VCSMTDIAKKPTETICAPLTVFFDGRVDGQVDLFRNARNGVLITEGSVKGLQPSVGPKQASLNGVTLIGEAV\n"
230                     + "KTQFNYYKKVDGVVQQLPETYFTQSRNLQEFKPRSQMEIDFLELAMDEFIERYKLEGYAFEHIVYGDFSHSQ\n"
231                     + "LGGLHLLIGLAKRFKESPFELEDFIPMDSTVKNYFITDAQTGSSKCVCSVIDLLLDDFVEIIKSQDLSVVSK\n"
232                     + "VVKVTIDYTEISFMLWCKDGHVETFYPKLQSSQAWQPGVAMPNLYKMQRMLLEKCDLQNYGDSATLPKGIMM\n"
233                     + "NVAKYTQLCQYLNTLTLAVPYNMRVIHFGAGSDKGVAPGTAVLRQWLPTGTLLVDSDLNDFVSDADSTLIGD\n"
234                     + "CATVHTANKWDLIISDMYDPKTKNVTKENDSKEGFFTYICGFIQQKLALGGSVAIKITEHSWNADLYKLMGH\n"
235                     + "FAWWTAFVTNVNASSSEAFLIGCNYLGKPREQIDGYVMHANYIFWRNTNPIQLSSYSLFDMSKFPLKLRGTA\n"
236                     + "VMSLKEGQINDMILSLLSKGRLIIRENNRVVISSDVLVNN");
237     upSeq_r1ab.setDescription("sars2 r1ab polyprotein");
238     upSeq_r1ab
239             .addDBRef(new DBRefEntry("UNIPROT", "0", "P0DTD1", null, true));
240     upSeq_r1ab.createDatasetSequence();
241     upSeq_fer1_maize = new Sequence("FER1_MAIZE",
242             "MATVLGSPRAPAFFFSSSSLRAAPAPTAVALPAAKVGIMGRSASSRRRLRAQATYNVKLITPEGE"
243                     + "VELQVPDDVYILDQAEEDGIDLPYSCRAGSCSSCAGKVVSGSVDQSDQSYLDDGQIADGWVLTCHAYPTSDV"
244                     + "VIETHKEEELTGA");
245     upSeq_fer1_maize.setDescription("Feredoxin 1 Maize");
246     upSeq_fer1_maize
247             .addDBRef(new DBRefEntry("UNIPROT", "0", "P27787", null, true));
248     upSeq_fer1_maize.createDatasetSequence();
249     
250     upSeq_epas1_human = new Sequence("EPAS1_HUMAN","MTADKEKKRSSSERRKEKSRDAARCRRSKETEVFYELAHELPLPHSVSSHLDKASIMRLAISFLRTHKLLSS\n"
251             + "VCSENESEAEADQQMDNLYLKALEGFIAVVTQDGDMIFLSENISKFMGLTQVELTGHSIFDFTHPCDHEEIR\n"
252             + "ENLSLKNGSGFGKKSKDMSTERDFFMRMKCTVTNRGRTVNLKSATWKVLHCTGQVKVYNNCPPHNSLCGYKE\n"
253             + "PLLSCLIIMCEPIQHPSHMDIPLDSKTFLSRHSMDMKFTYCDDRITELIGYHPEELLGRSAYEFYHALDSEN\n"
254             + "MTKSHQNLCTKGQVVSGQYRMLAKHGGYVWLETQGTVIYNPRNLQPQCIMCVNYVLSEIEKNDVVFSMDQTE\n"
255             + "SLFKPHLMAMNSIFDSSGKGAVSEKSNFLFTKLKEEPEELAQLAPTPGDAIISLDFGNQNFEESSAYGKAIL\n"
256             + "PPSQPWATELRSHSTQSEAGSLPAFTVPQAAAPGSTTPSATSSSSSCSTPNSPEDYYTSLDNDLKIEVIEKL\n"
257             + "FAMDTEAKDQCSTQTDFNELDLETLAPYIPMDGEDFQLSPICPEERLLAENPQSTPQHCFSAMTNIFQPLAP\n"
258             + "VAPHSPFLLDKFQQQLESKKTEPEHRPMSSIFFDAGSKASLPPCCGQASTPLSSMGGRSNTQWPPDPPLHFG\n"
259             + "PTKWAVGDQRTEFLGAAPLGPPVSPPHVSTFKTRSAKGFGARGPDVLSPAMVALSNKLKLKRQLEYEEQAFQ\n"
260             + "DLSGGDPPGGSTSHLMWKRMKNLRGGSCPLMPDKPLSANVPNDKFTQNPMRGLGHPLRHLPLPQPPSAISPG\n"
261             + "ENSKSRFPPQCYATQYQDYSLSSAHKVSGMASRLLGPSFESYLLPELTRYDCEVNVPVLGSSTLLQGGDLLR\n"
262             + "ALDQAT");
263     upSeq_epas1_human.setDescription("Endothelial PAS domain-containing protein 1");
264     upSeq_epas1_human
265     .addDBRef(new DBRefEntry("UNIPROT", "0", "Q99814", null, true));
266     upSeq_epas1_human.createDatasetSequence();
267   }
268
269   @AfterMethod(alwaysRun = true)
270   public void tearDown() throws Exception
271   {
272     seq = null;
273     upSeq = null;
274     upSeq_r1ab = null;
275     upSeq_fer1_maize = null;
276     upSeq_epas1_human=null;
277   }
278
279   @SuppressWarnings("deprecation")
280   @Test(groups = { "Functional" })
281   public void buildPDBQueryTest()
282   {
283     System.out.println("seq >>>> " + seq);
284
285     StructureChooserQuerySource scquery = StructureChooserQuerySource
286             .getQuerySourceFor(new SequenceI[]
287             { seq });
288     AssertJUnit
289             .assertTrue(scquery instanceof PDBStructureChooserQuerySource);
290     String query = scquery.buildQuery(seq);
291     AssertJUnit.assertEquals("pdb_id:1tim", query);
292     seq.getAllPDBEntries().clear();
293     query = scquery.buildQuery(seq);
294     AssertJUnit.assertEquals(
295             "text:XYZ_1 OR text:XYZ_2 OR text:XYZ_3 OR text:XYZ_4 OR text:4kqy",
296             query);
297     seq.setDBRefs(null);
298     query = scquery.buildQuery(seq);
299     System.out.println(query);
300     AssertJUnit.assertEquals("text:4kqy", query);
301
302     DBRefEntry uniprotDBRef = new DBRefEntry();
303     uniprotDBRef.setAccessionId("P12345");
304     uniprotDBRef.setSource(DBRefSource.UNIPROT);
305     seq.addDBRef(uniprotDBRef);
306
307     DBRefEntry pdbDBRef = new DBRefEntry();
308     pdbDBRef.setAccessionId("1XYZ");
309     pdbDBRef.setSource(DBRefSource.PDB);
310     seq.addDBRef(pdbDBRef);
311
312     for (int x = 1; x < 5; x++)
313     {
314       DBRefEntry dbRef = new DBRefEntry();
315       dbRef.setAccessionId("XYZ_" + x);
316       seq.addDBRef(dbRef);
317     }
318     System.out.println("");
319     System.out.println(seq.getDBRefs());
320     System.out.println(query);
321     query = scquery.buildQuery(seq);
322     AssertJUnit.assertEquals(
323             "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
324             query);
325   }
326
327   @SuppressWarnings("deprecation")
328   @Test(groups = { "Functional" })
329   public void buildThreeDBQueryTest()
330   {
331     System.out.println("seq >>>> " + upSeq);
332     TDBeaconsFTSRestClientTest.setMock();
333     PDBFTSRestClientTest.setMock();
334     StructureChooserQuerySource scquery = StructureChooserQuerySource
335             .getQuerySourceFor(new SequenceI[]
336             { upSeq });
337     // gets the lightweight proxy rather than the
338     // ThreeDBStructureChooserQuerySource
339     AssertJUnit.assertTrue(
340             scquery instanceof ThreeDBStructureChooserQuerySource);
341     String query = scquery.buildQuery(upSeq);
342     AssertJUnit.assertEquals("P38398", query);
343
344     // query shouldn't change regardless of additional entries
345     // because 3DBeacons requires canonical entries.
346     upSeq.getAllPDBEntries().clear();
347     query = scquery.buildQuery(upSeq);
348     AssertJUnit.assertEquals("P38398", query);
349     upSeq.setDBRefs(null);
350     query = scquery.buildQuery(upSeq);
351     /*
352      * legacy projects/datasets will not have canonical flags set for uniprot dbrefs
353      * graceful behaviour would be to
354      *  - pick one ? not possible
355      *  - iterate through all until a 200 is obtained ?
356      *  ---> ideal but could be costly
357      *  ---> better to do a direct retrieval from uniprot to work out which is the canonical identifier..
358      *  ----> need a test to check that accessions can be promoted to canonical!
359      */
360     // FIXME - need to be able to use ID to query here ?
361     AssertJUnit.assertEquals(null, query);
362
363     // TODO:
364     /**
365      * set of sequences: - no protein -> TDB not applicable, query PDBe only
366      * (consider RNA or DNA - specific query adapter ?) - protein but no uniprot
367      * -> first consider trying to get uniprot refs (need a mark to say none are
368      * available) - protein and uniprot - no canonicals -> resolve to uniprot
369      * automatically to get canonicals - query uniprot against 3DBeacons -->
370      * decorate experimental structures with additional data from PDBe - query
371      * remaining against PDBe Ranking - 3D Beacons --> in memory ranking - no
372      * need to query twice Rank by - experimental > AlphaFold -> Model - start >
373      * end -> filters for -> experimental only -> experimental plus best models
374      * for other regions -> "best cover" -> need to be able to select correct
375      * reference (the longest one that covers all) for superposition
376      */
377     //
378     // DBRefEntry uniprotDBRef = new DBRefEntry();
379     // uniprotDBRef.setAccessionId("P12345");
380     // uniprotDBRef.setSource(DBRefSource.UNIPROT);
381     // upSeq.addDBRef(uniprotDBRef);
382     //
383     // DBRefEntry pdbDBRef = new DBRefEntry();
384     // pdbDBRef.setAccessionId("1XYZ");
385     // pdbDBRef.setSource(DBRefSource.PDB);
386     // upSeq.addDBRef(pdbDBRef);
387     //
388     // for (int x = 1; x < 5; x++)
389     // {
390     // DBRefEntry dbRef = new DBRefEntry();
391     // dbRef.setAccessionId("XYZ_" + x);
392     // seq.addDBRef(dbRef);
393     // }
394     // System.out.println("");
395     // System.out.println(seq.getDBRefs());
396     // System.out.println(query);
397     // query = scquery.buildQuery(seq);
398     // assertEquals(
399     // "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
400     // query);
401   }
402
403   @Test(groups = { "Functional" }, dataProvider = "testUpSeqs")
404   public void cascadingThreeDBandPDBQuerys(SequenceI testUpSeq)
405   {
406     TDBeaconsFTSRestClientTest.setMock();
407     PDBFTSRestClientTest.setMock();
408     ThreeDBStructureChooserQuerySource tdbquery = new ThreeDBStructureChooserQuerySource();
409     PDBStructureChooserQuerySource pdbquery = new PDBStructureChooserQuerySource();
410
411     FTSRestResponse upResponse = null;
412     List<FTSRestResponse> pdbResponse = null;
413     // TODO test available options
414     // Best coverage
415     // Best Alphafold Model
416     // Best model (by confidence score)
417     // Will also need to develop a more sophisticated filtering system
418     List<FilterOption> opts = tdbquery
419             .getAvailableFilterOptions(StructureChooser.VIEWS_FILTER);
420     FilterOption opt_singlebest = opts.get(0);
421     FilterOption opt_manybest = opts.get(1);
422     assertEquals(opt_singlebest.getValue(),
423             ThreeDBStructureChooserQuerySource.FILTER_FIRST_BEST_COVERAGE);
424     assertEquals(opt_manybest.getValue(),
425             ThreeDBStructureChooserQuerySource.FILTER_TDBEACONS_COVERAGE);
426
427     try
428     {
429       upResponse = tdbquery.fetchStructuresMetaData(testUpSeq,
430               tdbquery.getDocFieldPrefs().getStructureSummaryFields(),
431               opt_singlebest, false);
432       tdbquery.updateAvailableFilterOptions(StructureChooser.VIEWS_FILTER,
433               opts, upResponse.getSearchSummary());
434       // test ranking without additional PDBe data
435       FTSRestResponse firstRanked = tdbquery.selectFirstRankedQuery(
436               testUpSeq, upResponse.getSearchSummary(),
437               tdbquery.getDocFieldPrefs().getStructureSummaryFields(),
438               opt_singlebest.getValue(), false);
439       assertEquals(firstRanked.getNumberOfItemsFound(), 1);
440       // many best response
441       upResponse = tdbquery.fetchStructuresMetaData(testUpSeq,
442               tdbquery.getDocFieldPrefs().getStructureSummaryFields(),
443               opt_manybest, false);
444       assertTrue(firstRanked.getSearchSummary().size() < upResponse
445               .getSearchSummary().size());
446       // NB Could have race condition here
447       List<String> pdb_Queries = tdbquery.buildPDBFTSQueryFor(upResponse);
448       assertTrue(pdb_Queries.size() > 0);
449       for (String pdb_Query : pdb_Queries)
450       {
451         assertTrue(pdb_Query.trim().length() > 0);
452       }
453
454       pdbResponse = tdbquery.fetchStructuresMetaDataFor(pdbquery,
455               upResponse);
456       // check all queries resulted in a response
457       assertEquals(pdbResponse.size(), pdb_Queries.size());
458       for (FTSRestResponse pdbr : pdbResponse)
459       {
460         assertTrue(pdbr.getNumberOfItemsFound() > 0);
461       }
462
463       // and finally that join works
464       FTSRestResponse joinedResp = tdbquery.joinResponses(upResponse,
465               pdbResponse);
466       assertEquals(upResponse.getNumberOfItemsFound(),
467               joinedResp.getNumberOfItemsFound());
468       
469       // Special data test case
470       if (testUpSeq.getDisplayId(true)
471               .equals(upSeq_epas1_human.getDisplayId(true)))
472       {
473
474         TDBResultAnalyser tDBResultAnalyz = new TDBResultAnalyser(testUpSeq,
475                 joinedResp.getSearchSummary(), tdbquery.lastTdbRequest,
476                 ThreeDBStructureChooserQuerySource.FILTER_FIRST_BEST_COVERAGE,
477                 tdbquery.remove_prefix(
478                         ThreeDBStructureChooserQuerySource.FILTER_FIRST_BEST_COVERAGE));
479         List<FTSData> ordered = tDBResultAnalyz.getFilteredResponse();
480         List<FTSData> selected = tDBResultAnalyz.selectStructures(ordered);
481         assertEquals(((TDB_FTSData) selected.get(0)).getProvider(),
482                 "AlphaFold DB");
483         // to be sufficient, should also
484         // test that adjacent ordered structure in ordered is levyLab
485         // TDB_FTSData first = (TDB_FTSData) ordered.get(0),
486         // second = (TDB_FTSData) ordered.get(1),
487         // third = (TDB_FTSData) ordered.get(2);
488         // Assert.assertEquals("pLDDT", first.getConfidenceScoreType());
489         // Assert.assertTrue(first.getConfidenceScoreType()
490         // .equals(second.getConfidenceScoreType())); // pLDDT first and
491         // // second
492       }
493
494     } catch (
495
496     Exception x)
497     {
498       x.printStackTrace();
499       Assert.fail("Unexpected Exception");
500     }
501
502     StructureChooserQuerySource scquery = StructureChooserQuerySource
503             .getQuerySourceFor(new SequenceI[]
504             { testUpSeq });
505
506   }
507
508   @DataProvider(name = "testUpSeqs")
509   public Object[][] testUpSeqs() throws Exception
510   {
511     setUp();
512     return new Object[][] { { upSeq }, { upSeq_insulin }, { upSeq_r1ab },
513         { upSeq_fer1_maize },{upSeq_epas1_human} };
514   }
515
516   @Test(groups = { "Functional" })
517   public void sanitizeSeqNameTest()
518   {
519     String name = "ab_cdEF|fwxyz012349";
520     AssertJUnit.assertEquals(name,
521             PDBStructureChooserQuerySource.sanitizeSeqName(name));
522
523     // remove a [nn] substring
524     name = "abcde12[345]fg";
525     AssertJUnit.assertEquals("abcde12fg",
526             PDBStructureChooserQuerySource.sanitizeSeqName(name));
527
528     // remove characters other than a-zA-Z0-9 | or _
529     name = "ab[cd],.\t£$*!- \\\"@:e";
530     AssertJUnit.assertEquals("abcde",
531             PDBStructureChooserQuerySource.sanitizeSeqName(name));
532
533     name = "abcde12[345a]fg";
534     AssertJUnit.assertEquals("abcde12345afg",
535             PDBStructureChooserQuerySource.sanitizeSeqName(name));
536   }  
537 }