/*
* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
* Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see .
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.gui.structurechooser;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
import java.util.Collection;
import java.util.List;
import java.util.Vector;
import org.junit.Assert;
import org.testng.AssertJUnit;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import jalview.datamodel.DBRefEntry;
import jalview.datamodel.DBRefSource;
import jalview.datamodel.PDBEntry;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceI;
import jalview.fts.api.FTSData;
import jalview.fts.core.FTSRestRequest;
import jalview.fts.core.FTSRestResponse;
import jalview.fts.service.pdb.PDBFTSRestClient;
import jalview.fts.service.pdb.PDBFTSRestClientTest;
import jalview.fts.threedbeacons.TDBeaconsFTSRestClientTest;
import jalview.gui.JvOptionPane;
import jalview.gui.StructureChooser;
import jalview.jbgui.FilterOption;
public class StructureChooserQuerySourceTest
{
@BeforeClass(alwaysRun = true)
public void setUpJvOptionPane()
{
JvOptionPane.setInteractiveMode(false);
JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
}
Sequence seq, upSeq, upSeq_insulin;
// same set up as for structurechooser test
@BeforeMethod(alwaysRun = true)
public void setUp() throws Exception
{
seq = new Sequence("PDB|4kqy|4KQY|A", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1,
26);
seq.createDatasetSequence();
for (int x = 1; x < 5; x++)
{
DBRefEntry dbRef = new DBRefEntry();
dbRef.setAccessionId("XYZ_" + x);
seq.addDBRef(dbRef);
}
PDBEntry dbRef = new PDBEntry();
dbRef.setId("1tim");
Vector pdbIds = new Vector<>();
pdbIds.add(dbRef);
seq.setPDBId(pdbIds);
// Uniprot sequence for 3D-Beacons mocks
upSeq = new Sequence("P38398",
"MDLSALRVEEVQNVINAMQKILECPICLELIKEPVSTKCDHIFCKFCMLKLLNQKKGPSQCPLCKNDITKRS\n"
+ "LQESTRFSQLVEELLKIICAFQLDTGLEYANSYNFAKKENNSPEHLKDEVSIIQSMGYRNRAKRLLQSEPEN\n"
+ "PSLQETSLSVQLSNLGTVRTLRTKQRIQPQKTSVYIELGSDSSEDTVNKATYCSVGDQELLQITPQGTRDEI\n"
+ "SLDSAKKAACEFSETDVTNTEHHQPSNNDLNTTEKRAAERHPEKYQGSSVSNLHVEPCGTNTHASSLQHENS\n"
+ "SLLLTKDRMNVEKAEFCNKSKQPGLARSQHNRWAGSKETCNDRRTPSTEKKVDLNADPLCERKEWNKQKLPC\n"
+ "SENPRDTEDVPWITLNSSIQKVNEWFSRSDELLGSDDSHDGESESNAKVADVLDVLNEVDEYSGSSEKIDLL\n"
+ "ASDPHEALICKSERVHSKSVESNIEDKIFGKTYRKKASLPNLSHVTENLIIGAFVTEPQIIQERPLTNKLKR\n"
+ "KRRPTSGLHPEDFIKKADLAVQKTPEMINQGTNQTEQNGQVMNITNSGHENKTKGDSIQNEKNPNPIESLEK\n"
+ "ESAFKTKAEPISSSISNMELELNIHNSKAPKKNRLRRKSSTRHIHALELVVSRNLSPPNCTELQIDSCSSSE\n"
+ "EIKKKKYNQMPVRHSRNLQLMEGKEPATGAKKSNKPNEQTSKRHDSDTFPELKLTNAPGSFTKCSNTSELKE\n"
+ "FVNPSLPREEKEEKLETVKVSNNAEDPKDLMLSGERVLQTERSVESSSISLVPGTDYGTQESISLLEVSTLG\n"
+ "KAKTEPNKCVSQCAAFENPKGLIHGCSKDNRNDTEGFKYPLGHEVNHSRETSIEMEESELDAQYLQNTFKVS\n"
+ "KRQSFAPFSNPGNAEEECATFSAHSGSLKKQSPKVTFECEQKEENQGKNESNIKPVQTVNITAGFPVVGQKD\n"
+ "KPVDNAKCSIKGGSRFCLSSQFRGNETGLITPNKHGLLQNPYRIPPLFPIKSFVKTKCKKNLLEENFEEHSM\n"
+ "SPEREMGNENIPSTVSTISRNNIRENVFKEASSSNINEVGSSTNEVGSSINEIGSSDENIQAELGRNRGPKL\n"
+ "NAMLRLGVLQPEVYKQSLPGSNCKHPEIKKQEYEEVVQTVNTDFSPYLISDNLEQPMGSSHASQVCSETPDD\n"
+ "LLDDGEIKEDTSFAENDIKESSAVFSKSVQKGELSRSPSPFTHTHLAQGYRRGAKKLESSEENLSSEDEELP\n"
+ "CFQHLLFGKVNNIPSQSTRHSTVATECLSKNTEENLLSLKNSLNDCSNQVILAKASQEHHLSEETKCSASLF\n"
+ "SSQCSELEDLTANTNTQDPFLIGSSKQMRHQSESQGVGLSDKELVSDDEERGTGLEENNQEEQSMDSNLGEA\n"
+ "ASGCESETSVSEDCSGLSSQSDILTTQQRDTMQHNLIKLQQEMAELEAVLEQHGSQPSNSYPSIISDSSALE\n"
+ "DLRNPEQSTSEKAVLTSQKSSEYPISQNPEGLSADKFEVSADSSTSKNKEPGVERSSPSKCPSLDDRWYMHS\n"
+ "CSGSLQNRNYPSQEELIKVVDVEEQQLEESGPHDLTETSYLPRQDLEGTPYLESGISLFSDDPESDPSEDRA\n"
+ "PESARVGNIPSSTSALKVPQLKVAESAQSPAAAHTTDTAGYNAMEESVSREKPELTASTERVNKRMSMVVSG\n"
+ "LTPEEFMLVYKFARKHHITLTNLITEETTHVVMKTDAEFVCERTLKYFLGIAGGKWVVSYFWVTQSIKERKM\n"
+ "LNEHDFEVRGDVVNGRNHQGPKRARESQDRKIFRGLEICCYGPFTNMPTDQLEWMVQLCGASVVKELSSFTL\n"
+ "GTGVHPIVVVQPDAWTEDNGFHAIGQMCEAPVVTREWVLDSVALYQCQELDTYLIPQIPHSHY\n"
+ "",
1, 1863);
upSeq.createDatasetSequence();
upSeq.setDescription("Breast cancer type 1 susceptibility protein");
upSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "P38398", null, true));
upSeq_insulin = new Sequence("INS_HUMAN",
"MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGGGP"
+ "GAGSLQPLALEGSLQKRGIVEQCCTSICSLYQLENYCN");
upSeq_insulin.createDatasetSequence();
upSeq_insulin.setDescription("Insulin");
upSeq_insulin
.addDBRef(new DBRefEntry("UNIPROT", "0", "P01308", null, true));
}
@AfterMethod(alwaysRun = true)
public void tearDown() throws Exception
{
seq = null;
upSeq = null;
}
@SuppressWarnings("deprecation")
@Test(groups = { "Functional" })
public void buildPDBQueryTest()
{
System.out.println("seq >>>> " + seq);
StructureChooserQuerySource scquery = StructureChooserQuerySource
.getQuerySourceFor(new SequenceI[]
{ seq });
AssertJUnit
.assertTrue(scquery instanceof PDBStructureChooserQuerySource);
String query = scquery.buildQuery(seq);
AssertJUnit.assertEquals("pdb_id:1tim", query);
seq.getAllPDBEntries().clear();
query = scquery.buildQuery(seq);
AssertJUnit.assertEquals(
"text:XYZ_1 OR text:XYZ_2 OR text:XYZ_3 OR text:XYZ_4 OR text:4kqy",
query);
seq.setDBRefs(null);
query = scquery.buildQuery(seq);
System.out.println(query);
AssertJUnit.assertEquals("text:4kqy", query);
DBRefEntry uniprotDBRef = new DBRefEntry();
uniprotDBRef.setAccessionId("P12345");
uniprotDBRef.setSource(DBRefSource.UNIPROT);
seq.addDBRef(uniprotDBRef);
DBRefEntry pdbDBRef = new DBRefEntry();
pdbDBRef.setAccessionId("1XYZ");
pdbDBRef.setSource(DBRefSource.PDB);
seq.addDBRef(pdbDBRef);
for (int x = 1; x < 5; x++)
{
DBRefEntry dbRef = new DBRefEntry();
dbRef.setAccessionId("XYZ_" + x);
seq.addDBRef(dbRef);
}
System.out.println("");
System.out.println(seq.getDBRefs());
System.out.println(query);
query = scquery.buildQuery(seq);
AssertJUnit.assertEquals(
"uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
query);
}
@SuppressWarnings("deprecation")
@Test(groups = { "Functional" })
public void buildThreeDBQueryTest()
{
System.out.println("seq >>>> " + upSeq);
TDBeaconsFTSRestClientTest.setMock();
PDBFTSRestClientTest.setMock();
StructureChooserQuerySource scquery = StructureChooserQuerySource
.getQuerySourceFor(new SequenceI[]
{ upSeq });
// gets the lightweight proxy rather than the
// ThreeDBStructureChooserQuerySource
AssertJUnit.assertTrue(
scquery instanceof ThreeDBStructureChooserQuerySource);
String query = scquery.buildQuery(upSeq);
AssertJUnit.assertEquals("P38398", query);
// query shouldn't change regardless of additional entries
// because 3DBeacons requires canonical entries.
upSeq.getAllPDBEntries().clear();
query = scquery.buildQuery(upSeq);
AssertJUnit.assertEquals("P38398", query);
upSeq.setDBRefs(null);
query = scquery.buildQuery(upSeq);
/*
* legacy projects/datasets will not have canonical flags set for uniprot dbrefs
* graceful behaviour would be to
* - pick one ? not possible
* - iterate through all until a 200 is obtained ?
* ---> ideal but could be costly
* ---> better to do a direct retrieval from uniprot to work out which is the canonical identifier..
* ----> need a test to check that accessions can be promoted to canonical!
*/
// FIXME - need to be able to use ID to query here ?
AssertJUnit.assertEquals(null, query);
// TODO:
/**
* set of sequences: - no protein -> TDB not applicable, query PDBe only
* (consider RNA or DNA - specific query adapter ?) - protein but no uniprot
* -> first consider trying to get uniprot refs (need a mark to say none are
* available) - protein and uniprot - no canonicals -> resolve to uniprot
* automatically to get canonicals - query uniprot against 3DBeacons -->
* decorate experimental structures with additional data from PDBe - query
* remaining against PDBe Ranking - 3D Beacons --> in memory ranking - no
* need to query twice Rank by - experimental > AlphaFold -> Model - start >
* end -> filters for -> experimental only -> experimental plus best models
* for other regions -> "best cover" -> need to be able to select correct
* reference (the longest one that covers all) for superposition
*/
//
// DBRefEntry uniprotDBRef = new DBRefEntry();
// uniprotDBRef.setAccessionId("P12345");
// uniprotDBRef.setSource(DBRefSource.UNIPROT);
// upSeq.addDBRef(uniprotDBRef);
//
// DBRefEntry pdbDBRef = new DBRefEntry();
// pdbDBRef.setAccessionId("1XYZ");
// pdbDBRef.setSource(DBRefSource.PDB);
// upSeq.addDBRef(pdbDBRef);
//
// for (int x = 1; x < 5; x++)
// {
// DBRefEntry dbRef = new DBRefEntry();
// dbRef.setAccessionId("XYZ_" + x);
// seq.addDBRef(dbRef);
// }
// System.out.println("");
// System.out.println(seq.getDBRefs());
// System.out.println(query);
// query = scquery.buildQuery(seq);
// assertEquals(
// "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
// query);
}
@Test(groups = { "Functional" }, dataProvider = "testUpSeqs")
public void cascadingThreeDBandPDBQuerys(SequenceI testUpSeq)
{
TDBeaconsFTSRestClientTest.setMock();
PDBFTSRestClientTest.setMock();
ThreeDBStructureChooserQuerySource tdbquery = new ThreeDBStructureChooserQuerySource();
PDBStructureChooserQuerySource pdbquery = new PDBStructureChooserQuerySource();
FTSRestResponse upResponse = null;
FTSRestResponse pdbResponse = null;
// TODO test available options
// Best coverage
// Best Alphafold Model
// Best model (by confidence score)
// Will also need to develop a more sophisticated filtering system
List opts = tdbquery
.getAvailableFilterOptions(StructureChooser.VIEWS_FILTER);
FilterOption opt_singlebest = opts.get(0);
FilterOption opt_manybest = opts.get(1);
assertEquals(opt_singlebest.getValue(),
ThreeDBStructureChooserQuerySource.FILTER_FIRST_BEST_COVERAGE);
assertEquals(opt_manybest.getValue(),
ThreeDBStructureChooserQuerySource.FILTER_TDBEACONS_COVERAGE);
try
{
upResponse = tdbquery.fetchStructuresMetaData(testUpSeq,
tdbquery.getDocFieldPrefs().getStructureSummaryFields(),
opt_singlebest, false);
tdbquery.updateAvailableFilterOptions(StructureChooser.VIEWS_FILTER,
opts, upResponse.getSearchSummary());
// test ranking without additional PDBe data
FTSRestResponse firstRanked = tdbquery.selectFirstRankedQuery(
testUpSeq, upResponse.getSearchSummary(),
tdbquery.getDocFieldPrefs().getStructureSummaryFields(),
opt_singlebest.getValue(), false);
assertEquals(firstRanked.getNumberOfItemsFound(), 1);
// many best response
upResponse = tdbquery.fetchStructuresMetaData(testUpSeq,
tdbquery.getDocFieldPrefs().getStructureSummaryFields(),
opt_manybest, false);
assertTrue(firstRanked.getSearchSummary().size() < upResponse
.getSearchSummary().size());
// NB Could have race condition here
String pdb_Query = tdbquery.buildPDBFTSQueryFor(upResponse);
assertTrue(pdb_Query.trim().length() > 0);
pdbResponse = tdbquery.fetchStructuresMetaDataFor(pdbquery,
upResponse);
assertTrue(pdbResponse.getNumberOfItemsFound() > 0);
FTSRestResponse joinedResp = tdbquery.joinResponses(upResponse,
pdbResponse);
assertEquals(upResponse.getNumberOfItemsFound(),
joinedResp.getNumberOfItemsFound());
} catch (Exception x)
{
x.printStackTrace();
Assert.fail("Unexpected Exception");
}
StructureChooserQuerySource scquery = StructureChooserQuerySource
.getQuerySourceFor(new SequenceI[]
{ testUpSeq });
}
@DataProvider(name = "testUpSeqs")
public Object[][] testUpSeqs() throws Exception
{
setUp();
return new Object[][] { { upSeq }, { upSeq_insulin } };
}
@Test(groups = { "Functional" })
public void sanitizeSeqNameTest()
{
String name = "ab_cdEF|fwxyz012349";
AssertJUnit.assertEquals(name,
PDBStructureChooserQuerySource.sanitizeSeqName(name));
// remove a [nn] substring
name = "abcde12[345]fg";
AssertJUnit.assertEquals("abcde12fg",
PDBStructureChooserQuerySource.sanitizeSeqName(name));
// remove characters other than a-zA-Z0-9 | or _
name = "ab[cd],.\t£$*!- \\\"@:e";
AssertJUnit.assertEquals("abcde",
PDBStructureChooserQuerySource.sanitizeSeqName(name));
name = "abcde12[345a]fg";
AssertJUnit.assertEquals("abcde12345afg",
PDBStructureChooserQuerySource.sanitizeSeqName(name));
}
}