JAL-3829 pull out structure chooser query source tests to new class
[jalview.git] / test / jalview / gui / structurechooser / StructureChooserQuerySourceTest.java
diff --git a/test/jalview/gui/structurechooser/StructureChooserQuerySourceTest.java b/test/jalview/gui/structurechooser/StructureChooserQuerySourceTest.java
new file mode 100644 (file)
index 0000000..b6b4615
--- /dev/null
@@ -0,0 +1,280 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ * 
+ * This file is part of Jalview.
+ * 
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License 
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *  
+ * Jalview is distributed in the hope that it will be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+ * PURPOSE.  See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.gui.structurechooser;
+
+
+import java.util.Vector;
+
+import org.testng.AssertJUnit;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.DBRefSource;
+import jalview.datamodel.PDBEntry;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceI;
+import jalview.fts.core.FTSRestRequest;
+import jalview.fts.threedbeacons.TDBeaconsFTSRestClientTest;
+import jalview.gui.JvOptionPane;
+
+public class StructureChooserQuerySourceTest
+{
+
+  @BeforeClass(alwaysRun = true)
+  public void setUpJvOptionPane()
+  {
+    JvOptionPane.setInteractiveMode(false);
+    JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
+  }
+
+  Sequence seq,upSeq;
+
+  // same set up as for structurechooser test
+  
+@BeforeMethod(alwaysRun = true)
+  public void setUp() throws Exception
+  {
+    seq = new Sequence("PDB|4kqy|4KQY|A", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1,
+            26);
+    seq.createDatasetSequence();
+    for (int x = 1; x < 5; x++)
+    {
+      DBRefEntry dbRef = new DBRefEntry();
+      dbRef.setAccessionId("XYZ_" + x);
+      seq.addDBRef(dbRef);
+    }
+
+    PDBEntry dbRef = new PDBEntry();
+    dbRef.setId("1tim");
+
+    Vector<PDBEntry> pdbIds = new Vector<>();
+    pdbIds.add(dbRef);
+
+    seq.setPDBId(pdbIds);
+    
+    // Uniprot sequence for 3D-Beacons mocks
+    upSeq = new Sequence("P38398", 
+            "MDLSALRVEEVQNVINAMQKILECPICLELIKEPVSTKCDHIFCKFCMLKLLNQKKGPSQCPLCKNDITKRS\n"
+            + "LQESTRFSQLVEELLKIICAFQLDTGLEYANSYNFAKKENNSPEHLKDEVSIIQSMGYRNRAKRLLQSEPEN\n"
+            + "PSLQETSLSVQLSNLGTVRTLRTKQRIQPQKTSVYIELGSDSSEDTVNKATYCSVGDQELLQITPQGTRDEI\n"
+            + "SLDSAKKAACEFSETDVTNTEHHQPSNNDLNTTEKRAAERHPEKYQGSSVSNLHVEPCGTNTHASSLQHENS\n"
+            + "SLLLTKDRMNVEKAEFCNKSKQPGLARSQHNRWAGSKETCNDRRTPSTEKKVDLNADPLCERKEWNKQKLPC\n"
+            + "SENPRDTEDVPWITLNSSIQKVNEWFSRSDELLGSDDSHDGESESNAKVADVLDVLNEVDEYSGSSEKIDLL\n"
+            + "ASDPHEALICKSERVHSKSVESNIEDKIFGKTYRKKASLPNLSHVTENLIIGAFVTEPQIIQERPLTNKLKR\n"
+            + "KRRPTSGLHPEDFIKKADLAVQKTPEMINQGTNQTEQNGQVMNITNSGHENKTKGDSIQNEKNPNPIESLEK\n"
+            + "ESAFKTKAEPISSSISNMELELNIHNSKAPKKNRLRRKSSTRHIHALELVVSRNLSPPNCTELQIDSCSSSE\n"
+            + "EIKKKKYNQMPVRHSRNLQLMEGKEPATGAKKSNKPNEQTSKRHDSDTFPELKLTNAPGSFTKCSNTSELKE\n"
+            + "FVNPSLPREEKEEKLETVKVSNNAEDPKDLMLSGERVLQTERSVESSSISLVPGTDYGTQESISLLEVSTLG\n"
+            + "KAKTEPNKCVSQCAAFENPKGLIHGCSKDNRNDTEGFKYPLGHEVNHSRETSIEMEESELDAQYLQNTFKVS\n"
+            + "KRQSFAPFSNPGNAEEECATFSAHSGSLKKQSPKVTFECEQKEENQGKNESNIKPVQTVNITAGFPVVGQKD\n"
+            + "KPVDNAKCSIKGGSRFCLSSQFRGNETGLITPNKHGLLQNPYRIPPLFPIKSFVKTKCKKNLLEENFEEHSM\n"
+            + "SPEREMGNENIPSTVSTISRNNIRENVFKEASSSNINEVGSSTNEVGSSINEIGSSDENIQAELGRNRGPKL\n"
+            + "NAMLRLGVLQPEVYKQSLPGSNCKHPEIKKQEYEEVVQTVNTDFSPYLISDNLEQPMGSSHASQVCSETPDD\n"
+            + "LLDDGEIKEDTSFAENDIKESSAVFSKSVQKGELSRSPSPFTHTHLAQGYRRGAKKLESSEENLSSEDEELP\n"
+            + "CFQHLLFGKVNNIPSQSTRHSTVATECLSKNTEENLLSLKNSLNDCSNQVILAKASQEHHLSEETKCSASLF\n"
+            + "SSQCSELEDLTANTNTQDPFLIGSSKQMRHQSESQGVGLSDKELVSDDEERGTGLEENNQEEQSMDSNLGEA\n"
+            + "ASGCESETSVSEDCSGLSSQSDILTTQQRDTMQHNLIKLQQEMAELEAVLEQHGSQPSNSYPSIISDSSALE\n"
+            + "DLRNPEQSTSEKAVLTSQKSSEYPISQNPEGLSADKFEVSADSSTSKNKEPGVERSSPSKCPSLDDRWYMHS\n"
+            + "CSGSLQNRNYPSQEELIKVVDVEEQQLEESGPHDLTETSYLPRQDLEGTPYLESGISLFSDDPESDPSEDRA\n"
+            + "PESARVGNIPSSTSALKVPQLKVAESAQSPAAAHTTDTAGYNAMEESVSREKPELTASTERVNKRMSMVVSG\n"
+            + "LTPEEFMLVYKFARKHHITLTNLITEETTHVVMKTDAEFVCERTLKYFLGIAGGKWVVSYFWVTQSIKERKM\n"
+            + "LNEHDFEVRGDVVNGRNHQGPKRARESQDRKIFRGLEICCYGPFTNMPTDQLEWMVQLCGASVVKELSSFTL\n"
+            + "GTGVHPIVVVQPDAWTEDNGFHAIGQMCEAPVVTREWVLDSVALYQCQELDTYLIPQIPHSHY\n"
+            + "", 1,
+1863);
+    upSeq.createDatasetSequence();
+    upSeq.setDescription("Breast cancer type 1 susceptibility protein");
+    upSeq.addDBRef(new DBRefEntry("UNIPROT","0","P38398",null,true));
+  }
+
+@AfterMethod(alwaysRun = true)
+  public void tearDown() throws Exception
+  {
+    seq = null;
+    upSeq=null;
+  }
+
+  @SuppressWarnings("deprecation")
+  @Test(groups = { "Functional" })
+  public void buildPDBQueryTest()
+  {
+    System.out.println("seq >>>> " + seq);
+    
+    StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { seq});
+    AssertJUnit.assertTrue(scquery instanceof PDBStructureChooserQuerySource);
+    String query = scquery.buildQuery(seq);
+    AssertJUnit.assertEquals("pdb_id:1tim", query);
+    seq.getAllPDBEntries().clear();
+    query = scquery.buildQuery(seq);
+    AssertJUnit.assertEquals(
+            "text:XYZ_1 OR text:XYZ_2 OR text:XYZ_3 OR text:XYZ_4 OR text:4kqy",
+            query);
+    seq.setDBRefs(null);
+    query = scquery.buildQuery(seq);
+    System.out.println(query);
+    AssertJUnit.assertEquals("text:4kqy", query);
+
+    DBRefEntry uniprotDBRef = new DBRefEntry();
+    uniprotDBRef.setAccessionId("P12345");
+    uniprotDBRef.setSource(DBRefSource.UNIPROT);
+    seq.addDBRef(uniprotDBRef);
+
+    DBRefEntry pdbDBRef = new DBRefEntry();
+    pdbDBRef.setAccessionId("1XYZ");
+    pdbDBRef.setSource(DBRefSource.PDB);
+    seq.addDBRef(pdbDBRef);
+
+    for (int x = 1; x < 5; x++)
+    {
+      DBRefEntry dbRef = new DBRefEntry();
+      dbRef.setAccessionId("XYZ_" + x);
+      seq.addDBRef(dbRef);
+    }
+    System.out.println("");
+    System.out.println(seq.getDBRefs());
+    System.out.println(query);
+    query = scquery.buildQuery(seq);
+    AssertJUnit.assertEquals(
+            "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
+            query);
+  }
+
+  @SuppressWarnings("deprecation")
+  @Test(groups = { "Functional" })
+  public void buildThreeDBQueryTest()
+  {
+    System.out.println("seq >>>> " + upSeq);
+    TDBeaconsFTSRestClientTest.setMock();
+    StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { upSeq});
+    AssertJUnit.assertTrue(scquery instanceof ThreeDBStructureChooserQuerySource);
+    String query = scquery.buildQuery(upSeq);
+    AssertJUnit.assertEquals("P38398", query);
+    
+    // query shouldn't change regardless of additional entries
+    // because 3DBeacons requires canonical entries.
+    upSeq.getAllPDBEntries().clear();
+    query = scquery.buildQuery(upSeq);
+    AssertJUnit.assertEquals("P38398", query);
+    upSeq.setDBRefs(null);
+    query = scquery.buildQuery(upSeq);
+    /*
+     * legacy projects/datasets will not have canonical flags set for uniprot dbrefs
+     * graceful behaviour would be to
+     *  - pick one ? not possible
+     *  - iterate through all until a 200 is obtained ?
+     *  ---> ideal but could be costly
+     *  ---> better to do a direct retrieval from uniprot to work out which is the canonical identifier..
+     *  ----> need a test to check that accessions can be promoted to canonical!
+     */
+    //FIXME - need to be able to use ID to query here ?
+    AssertJUnit.assertEquals(null, query);
+
+    
+    
+    // TODO: 
+    /**
+     * set of sequences:
+     * - no protein -> TDB not applicable, query PDBe only (consider RNA or DNA - specific query adapter ?)
+     * - protein but no uniprot -> first consider trying to get uniprot refs (need a mark to say none are available)
+     * - protein and uniprot - no canonicals -> resolve to uniprot automatically to get canonicals
+     * - query uniprot against 3DBeacons
+     * --> decorate experimental structures with additional data from PDBe
+     * - query remaining against PDBe
+     * Ranking
+     * - 3D Beacons
+     *  --> in memory ranking - no need to query twice
+     *  Rank by
+     *  - experimental > AlphaFold -> Model
+     *  - start > end
+     *  -> filters for 
+     *  -> experimental only
+     *  -> experimental plus best models for other regions
+     *  -> "best cover" 
+     *  -> need to be able to select correct reference (the longest one that covers all) for superposition
+     */
+//    
+//    DBRefEntry uniprotDBRef = new DBRefEntry();
+//    uniprotDBRef.setAccessionId("P12345");
+//    uniprotDBRef.setSource(DBRefSource.UNIPROT);
+//    upSeq.addDBRef(uniprotDBRef);
+//
+//    DBRefEntry pdbDBRef = new DBRefEntry();
+//    pdbDBRef.setAccessionId("1XYZ");
+//    pdbDBRef.setSource(DBRefSource.PDB);
+//    upSeq.addDBRef(pdbDBRef);
+//
+//    for (int x = 1; x < 5; x++)
+//    {
+//      DBRefEntry dbRef = new DBRefEntry();
+//      dbRef.setAccessionId("XYZ_" + x);
+//      seq.addDBRef(dbRef);
+//    }
+//    System.out.println("");
+//    System.out.println(seq.getDBRefs());
+//    System.out.println(query);
+//    query = scquery.buildQuery(seq);
+//    assertEquals(
+//            "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
+//            query);
+  }
+  @Test(groups= {"Functional"})
+  public void cascadingThreeDBandPDBQuerys()
+  {
+    TDBeaconsFTSRestClientTest.setMock();
+    
+    StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { upSeq});
+
+    // query TDB for doc
+    // query PDBe for PDB entry metadata
+    // Combine
+    
+    FTSRestRequest tdbQuery = new FTSRestRequest();
+    tdbQuery.setResponseSize(100);
+    tdbQuery.setFieldToSearchBy("");
+    tdbQuery.setSearchTerm(scquery.buildQuery(upSeq));
+    tdbQuery.setWantedFields(scquery.getDocFieldPrefs().getStructureSummaryFields());
+    //scquery.fetchStructuresMetaData(upSeq, null, null, false);
+    String secondaryPdbQuery;
+    //secondaryPdbQuery = ((ThreeDBStructureChooserQuerySource)scquery).buildPDBFTSquery();
+
+  }
+  
+  @Test(groups = { "Functional" })
+  public void sanitizeSeqNameTest()
+  {
+    String name = "ab_cdEF|fwxyz012349";
+    AssertJUnit.assertEquals(name, PDBStructureChooserQuerySource.sanitizeSeqName(name));
+
+    // remove a [nn] substring
+    name = "abcde12[345]fg";
+    AssertJUnit.assertEquals("abcde12fg", PDBStructureChooserQuerySource.sanitizeSeqName(name));
+
+    // remove characters other than a-zA-Z0-9 | or _
+    name = "ab[cd],.\t£$*!- \\\"@:e";
+    AssertJUnit.assertEquals("abcde", PDBStructureChooserQuerySource.sanitizeSeqName(name));
+
+    name = "abcde12[345a]fg";
+    AssertJUnit.assertEquals("abcde12345afg", PDBStructureChooserQuerySource.sanitizeSeqName(name));
+  }
+}