JAL-3829 pull out structure chooser query source tests to new class
authorJim Procter <j.procter@dundee.ac.uk>
Mon, 6 Sep 2021 09:52:49 +0000 (10:52 +0100)
committerJim Procter <j.procter@dundee.ac.uk>
Mon, 6 Sep 2021 09:53:07 +0000 (10:53 +0100)
test/jalview/gui/StructureChooserTest.java
test/jalview/gui/structurechooser/StructureChooserQuerySourceTest.java [new file with mode: 0644]

index 309b5ad..685e1bc 100644 (file)
@@ -30,6 +30,8 @@ import jalview.datamodel.PDBEntry;
 import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceI;
 import jalview.fts.api.FTSData;
+import jalview.fts.core.FTSRestRequest;
+import jalview.fts.service.pdb.PDBFTSRestClient;
 import jalview.fts.service.threedbeacons.TDBeaconsFTSRestClient;
 import jalview.fts.threedbeacons.TDBeaconsFTSRestClientTest;
 import jalview.gui.structurechooser.PDBStructureChooserQuerySource;
@@ -41,6 +43,7 @@ import jalview.ws.params.InvalidArgumentException;
 import java.util.Collection;
 import java.util.Vector;
 
+import org.junit.Assert;
 import org.testng.annotations.AfterMethod;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.BeforeMethod;
@@ -123,108 +126,6 @@ public class StructureChooserTest
     upSeq=null;
   }
 
-  @SuppressWarnings("deprecation")
-  @Test(groups = { "Functional" })
-  public void buildQueryTest()
-  {
-    System.out.println("seq >>>> " + seq);
-    
-    StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { seq});
-    assertTrue(scquery instanceof PDBStructureChooserQuerySource);
-    String query = scquery.buildQuery(seq);
-    assertEquals("pdb_id:1tim", query);
-    seq.getAllPDBEntries().clear();
-    query = scquery.buildQuery(seq);
-    assertEquals(
-            "text:XYZ_1 OR text:XYZ_2 OR text:XYZ_3 OR text:XYZ_4 OR text:4kqy",
-            query);
-    seq.setDBRefs(null);
-    query = scquery.buildQuery(seq);
-    System.out.println(query);
-    assertEquals("text:4kqy", query);
-
-    DBRefEntry uniprotDBRef = new DBRefEntry();
-    uniprotDBRef.setAccessionId("P12345");
-    uniprotDBRef.setSource(DBRefSource.UNIPROT);
-    seq.addDBRef(uniprotDBRef);
-
-    DBRefEntry pdbDBRef = new DBRefEntry();
-    pdbDBRef.setAccessionId("1XYZ");
-    pdbDBRef.setSource(DBRefSource.PDB);
-    seq.addDBRef(pdbDBRef);
-
-    for (int x = 1; x < 5; x++)
-    {
-      DBRefEntry dbRef = new DBRefEntry();
-      dbRef.setAccessionId("XYZ_" + x);
-      seq.addDBRef(dbRef);
-    }
-    System.out.println("");
-    System.out.println(seq.getDBRefs());
-    System.out.println(query);
-    query = scquery.buildQuery(seq);
-    assertEquals(
-            "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
-            query);
-  }
-
-  @SuppressWarnings("deprecation")
-  @Test(groups = { "Functional" })
-  public void buildThreeDBQueryTest()
-  {
-    System.out.println("seq >>>> " + upSeq);
-    TDBeaconsFTSRestClientTest.setMock();
-    StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { upSeq});
-    assertTrue(scquery instanceof ThreeDBStructureChooserQuerySource);
-    String query = scquery.buildQuery(upSeq);
-    assertEquals("P38398", query);
-    
-    // query shouldn't change regardless of additional entries
-    // because 3DBeacons requires canonical entries.
-    upSeq.getAllPDBEntries().clear();
-    query = scquery.buildQuery(upSeq);
-    assertEquals("P38398", query);
-    upSeq.setDBRefs(null);
-    query = scquery.buildQuery(upSeq);
-    /*
-     * legacy projects/datasets will not have canonical flags set for uniprot dbrefs
-     * graceful behaviour would be to
-     *  - pick one ? not possible
-     *  - iterate through all until a 200 is obtained ?
-     *  ---> ideal but could be costly
-     *  ---> better to do a direct retrieval from uniprot to work out which is the canonical identifier..
-     *  ----> need a test to check that accessions can be promoted to canonical!
-     */
-    assertEquals(null, query);
-
-    // TODO: 
-//    
-//    DBRefEntry uniprotDBRef = new DBRefEntry();
-//    uniprotDBRef.setAccessionId("P12345");
-//    uniprotDBRef.setSource(DBRefSource.UNIPROT);
-//    upSeq.addDBRef(uniprotDBRef);
-//
-//    DBRefEntry pdbDBRef = new DBRefEntry();
-//    pdbDBRef.setAccessionId("1XYZ");
-//    pdbDBRef.setSource(DBRefSource.PDB);
-//    upSeq.addDBRef(pdbDBRef);
-//
-//    for (int x = 1; x < 5; x++)
-//    {
-//      DBRefEntry dbRef = new DBRefEntry();
-//      dbRef.setAccessionId("XYZ_" + x);
-//      seq.addDBRef(dbRef);
-//    }
-//    System.out.println("");
-//    System.out.println(seq.getDBRefs());
-//    System.out.println(query);
-//    query = scquery.buildQuery(seq);
-//    assertEquals(
-//            "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
-//            query);
-  }
-  
-
   @Test(groups = { "Functional" })
   public void populateFilterComboBoxTest() throws InterruptedException
   {
@@ -257,7 +158,22 @@ public class StructureChooserTest
             "discoveredStructuresSet");
     assertNotNull(ss);
     assertTrue(ss.size() > 0);
+  }
 
+  @Test(groups = { "Functional" })
+  public void fetchStructuresInfoMockedTest()
+  {
+    Assert.fail("Not yet implemented");
+    TDBeaconsFTSRestClientTest.setMock();
+    
+    // PDBFTSRestClient.setMock();
+    SequenceI[] selectedSeqs = new SequenceI[] { upSeq };
+    StructureChooser sc = new StructureChooser(selectedSeqs, seq, null);
+    sc.fetchStructuresMetaData();
+    Collection<FTSData> ss = (Collection<FTSData>) PA.getValue(sc,
+            "discoveredStructuresSet");
+    assertNotNull(ss);
+    assertTrue(ss.size() > 0);
   }
 
   @Test(groups = { "Functional" })
diff --git a/test/jalview/gui/structurechooser/StructureChooserQuerySourceTest.java b/test/jalview/gui/structurechooser/StructureChooserQuerySourceTest.java
new file mode 100644 (file)
index 0000000..b6b4615
--- /dev/null
@@ -0,0 +1,280 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ * 
+ * This file is part of Jalview.
+ * 
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License 
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *  
+ * Jalview is distributed in the hope that it will be useful, but 
+ * WITHOUT ANY WARRANTY; without even the implied warranty 
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+ * PURPOSE.  See the GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.gui.structurechooser;
+
+
+import java.util.Vector;
+
+import org.testng.AssertJUnit;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.DBRefSource;
+import jalview.datamodel.PDBEntry;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceI;
+import jalview.fts.core.FTSRestRequest;
+import jalview.fts.threedbeacons.TDBeaconsFTSRestClientTest;
+import jalview.gui.JvOptionPane;
+
+public class StructureChooserQuerySourceTest
+{
+
+  @BeforeClass(alwaysRun = true)
+  public void setUpJvOptionPane()
+  {
+    JvOptionPane.setInteractiveMode(false);
+    JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
+  }
+
+  Sequence seq,upSeq;
+
+  // same set up as for structurechooser test
+  
+@BeforeMethod(alwaysRun = true)
+  public void setUp() throws Exception
+  {
+    seq = new Sequence("PDB|4kqy|4KQY|A", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1,
+            26);
+    seq.createDatasetSequence();
+    for (int x = 1; x < 5; x++)
+    {
+      DBRefEntry dbRef = new DBRefEntry();
+      dbRef.setAccessionId("XYZ_" + x);
+      seq.addDBRef(dbRef);
+    }
+
+    PDBEntry dbRef = new PDBEntry();
+    dbRef.setId("1tim");
+
+    Vector<PDBEntry> pdbIds = new Vector<>();
+    pdbIds.add(dbRef);
+
+    seq.setPDBId(pdbIds);
+    
+    // Uniprot sequence for 3D-Beacons mocks
+    upSeq = new Sequence("P38398", 
+            "MDLSALRVEEVQNVINAMQKILECPICLELIKEPVSTKCDHIFCKFCMLKLLNQKKGPSQCPLCKNDITKRS\n"
+            + "LQESTRFSQLVEELLKIICAFQLDTGLEYANSYNFAKKENNSPEHLKDEVSIIQSMGYRNRAKRLLQSEPEN\n"
+            + "PSLQETSLSVQLSNLGTVRTLRTKQRIQPQKTSVYIELGSDSSEDTVNKATYCSVGDQELLQITPQGTRDEI\n"
+            + "SLDSAKKAACEFSETDVTNTEHHQPSNNDLNTTEKRAAERHPEKYQGSSVSNLHVEPCGTNTHASSLQHENS\n"
+            + "SLLLTKDRMNVEKAEFCNKSKQPGLARSQHNRWAGSKETCNDRRTPSTEKKVDLNADPLCERKEWNKQKLPC\n"
+            + "SENPRDTEDVPWITLNSSIQKVNEWFSRSDELLGSDDSHDGESESNAKVADVLDVLNEVDEYSGSSEKIDLL\n"
+            + "ASDPHEALICKSERVHSKSVESNIEDKIFGKTYRKKASLPNLSHVTENLIIGAFVTEPQIIQERPLTNKLKR\n"
+            + "KRRPTSGLHPEDFIKKADLAVQKTPEMINQGTNQTEQNGQVMNITNSGHENKTKGDSIQNEKNPNPIESLEK\n"
+            + "ESAFKTKAEPISSSISNMELELNIHNSKAPKKNRLRRKSSTRHIHALELVVSRNLSPPNCTELQIDSCSSSE\n"
+            + "EIKKKKYNQMPVRHSRNLQLMEGKEPATGAKKSNKPNEQTSKRHDSDTFPELKLTNAPGSFTKCSNTSELKE\n"
+            + "FVNPSLPREEKEEKLETVKVSNNAEDPKDLMLSGERVLQTERSVESSSISLVPGTDYGTQESISLLEVSTLG\n"
+            + "KAKTEPNKCVSQCAAFENPKGLIHGCSKDNRNDTEGFKYPLGHEVNHSRETSIEMEESELDAQYLQNTFKVS\n"
+            + "KRQSFAPFSNPGNAEEECATFSAHSGSLKKQSPKVTFECEQKEENQGKNESNIKPVQTVNITAGFPVVGQKD\n"
+            + "KPVDNAKCSIKGGSRFCLSSQFRGNETGLITPNKHGLLQNPYRIPPLFPIKSFVKTKCKKNLLEENFEEHSM\n"
+            + "SPEREMGNENIPSTVSTISRNNIRENVFKEASSSNINEVGSSTNEVGSSINEIGSSDENIQAELGRNRGPKL\n"
+            + "NAMLRLGVLQPEVYKQSLPGSNCKHPEIKKQEYEEVVQTVNTDFSPYLISDNLEQPMGSSHASQVCSETPDD\n"
+            + "LLDDGEIKEDTSFAENDIKESSAVFSKSVQKGELSRSPSPFTHTHLAQGYRRGAKKLESSEENLSSEDEELP\n"
+            + "CFQHLLFGKVNNIPSQSTRHSTVATECLSKNTEENLLSLKNSLNDCSNQVILAKASQEHHLSEETKCSASLF\n"
+            + "SSQCSELEDLTANTNTQDPFLIGSSKQMRHQSESQGVGLSDKELVSDDEERGTGLEENNQEEQSMDSNLGEA\n"
+            + "ASGCESETSVSEDCSGLSSQSDILTTQQRDTMQHNLIKLQQEMAELEAVLEQHGSQPSNSYPSIISDSSALE\n"
+            + "DLRNPEQSTSEKAVLTSQKSSEYPISQNPEGLSADKFEVSADSSTSKNKEPGVERSSPSKCPSLDDRWYMHS\n"
+            + "CSGSLQNRNYPSQEELIKVVDVEEQQLEESGPHDLTETSYLPRQDLEGTPYLESGISLFSDDPESDPSEDRA\n"
+            + "PESARVGNIPSSTSALKVPQLKVAESAQSPAAAHTTDTAGYNAMEESVSREKPELTASTERVNKRMSMVVSG\n"
+            + "LTPEEFMLVYKFARKHHITLTNLITEETTHVVMKTDAEFVCERTLKYFLGIAGGKWVVSYFWVTQSIKERKM\n"
+            + "LNEHDFEVRGDVVNGRNHQGPKRARESQDRKIFRGLEICCYGPFTNMPTDQLEWMVQLCGASVVKELSSFTL\n"
+            + "GTGVHPIVVVQPDAWTEDNGFHAIGQMCEAPVVTREWVLDSVALYQCQELDTYLIPQIPHSHY\n"
+            + "", 1,
+1863);
+    upSeq.createDatasetSequence();
+    upSeq.setDescription("Breast cancer type 1 susceptibility protein");
+    upSeq.addDBRef(new DBRefEntry("UNIPROT","0","P38398",null,true));
+  }
+
+@AfterMethod(alwaysRun = true)
+  public void tearDown() throws Exception
+  {
+    seq = null;
+    upSeq=null;
+  }
+
+  @SuppressWarnings("deprecation")
+  @Test(groups = { "Functional" })
+  public void buildPDBQueryTest()
+  {
+    System.out.println("seq >>>> " + seq);
+    
+    StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { seq});
+    AssertJUnit.assertTrue(scquery instanceof PDBStructureChooserQuerySource);
+    String query = scquery.buildQuery(seq);
+    AssertJUnit.assertEquals("pdb_id:1tim", query);
+    seq.getAllPDBEntries().clear();
+    query = scquery.buildQuery(seq);
+    AssertJUnit.assertEquals(
+            "text:XYZ_1 OR text:XYZ_2 OR text:XYZ_3 OR text:XYZ_4 OR text:4kqy",
+            query);
+    seq.setDBRefs(null);
+    query = scquery.buildQuery(seq);
+    System.out.println(query);
+    AssertJUnit.assertEquals("text:4kqy", query);
+
+    DBRefEntry uniprotDBRef = new DBRefEntry();
+    uniprotDBRef.setAccessionId("P12345");
+    uniprotDBRef.setSource(DBRefSource.UNIPROT);
+    seq.addDBRef(uniprotDBRef);
+
+    DBRefEntry pdbDBRef = new DBRefEntry();
+    pdbDBRef.setAccessionId("1XYZ");
+    pdbDBRef.setSource(DBRefSource.PDB);
+    seq.addDBRef(pdbDBRef);
+
+    for (int x = 1; x < 5; x++)
+    {
+      DBRefEntry dbRef = new DBRefEntry();
+      dbRef.setAccessionId("XYZ_" + x);
+      seq.addDBRef(dbRef);
+    }
+    System.out.println("");
+    System.out.println(seq.getDBRefs());
+    System.out.println(query);
+    query = scquery.buildQuery(seq);
+    AssertJUnit.assertEquals(
+            "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
+            query);
+  }
+
+  @SuppressWarnings("deprecation")
+  @Test(groups = { "Functional" })
+  public void buildThreeDBQueryTest()
+  {
+    System.out.println("seq >>>> " + upSeq);
+    TDBeaconsFTSRestClientTest.setMock();
+    StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { upSeq});
+    AssertJUnit.assertTrue(scquery instanceof ThreeDBStructureChooserQuerySource);
+    String query = scquery.buildQuery(upSeq);
+    AssertJUnit.assertEquals("P38398", query);
+    
+    // query shouldn't change regardless of additional entries
+    // because 3DBeacons requires canonical entries.
+    upSeq.getAllPDBEntries().clear();
+    query = scquery.buildQuery(upSeq);
+    AssertJUnit.assertEquals("P38398", query);
+    upSeq.setDBRefs(null);
+    query = scquery.buildQuery(upSeq);
+    /*
+     * legacy projects/datasets will not have canonical flags set for uniprot dbrefs
+     * graceful behaviour would be to
+     *  - pick one ? not possible
+     *  - iterate through all until a 200 is obtained ?
+     *  ---> ideal but could be costly
+     *  ---> better to do a direct retrieval from uniprot to work out which is the canonical identifier..
+     *  ----> need a test to check that accessions can be promoted to canonical!
+     */
+    //FIXME - need to be able to use ID to query here ?
+    AssertJUnit.assertEquals(null, query);
+
+    
+    
+    // TODO: 
+    /**
+     * set of sequences:
+     * - no protein -> TDB not applicable, query PDBe only (consider RNA or DNA - specific query adapter ?)
+     * - protein but no uniprot -> first consider trying to get uniprot refs (need a mark to say none are available)
+     * - protein and uniprot - no canonicals -> resolve to uniprot automatically to get canonicals
+     * - query uniprot against 3DBeacons
+     * --> decorate experimental structures with additional data from PDBe
+     * - query remaining against PDBe
+     * Ranking
+     * - 3D Beacons
+     *  --> in memory ranking - no need to query twice
+     *  Rank by
+     *  - experimental > AlphaFold -> Model
+     *  - start > end
+     *  -> filters for 
+     *  -> experimental only
+     *  -> experimental plus best models for other regions
+     *  -> "best cover" 
+     *  -> need to be able to select correct reference (the longest one that covers all) for superposition
+     */
+//    
+//    DBRefEntry uniprotDBRef = new DBRefEntry();
+//    uniprotDBRef.setAccessionId("P12345");
+//    uniprotDBRef.setSource(DBRefSource.UNIPROT);
+//    upSeq.addDBRef(uniprotDBRef);
+//
+//    DBRefEntry pdbDBRef = new DBRefEntry();
+//    pdbDBRef.setAccessionId("1XYZ");
+//    pdbDBRef.setSource(DBRefSource.PDB);
+//    upSeq.addDBRef(pdbDBRef);
+//
+//    for (int x = 1; x < 5; x++)
+//    {
+//      DBRefEntry dbRef = new DBRefEntry();
+//      dbRef.setAccessionId("XYZ_" + x);
+//      seq.addDBRef(dbRef);
+//    }
+//    System.out.println("");
+//    System.out.println(seq.getDBRefs());
+//    System.out.println(query);
+//    query = scquery.buildQuery(seq);
+//    assertEquals(
+//            "uniprot_accession:P12345 OR uniprot_id:P12345 OR pdb_id:1xyz",
+//            query);
+  }
+  @Test(groups= {"Functional"})
+  public void cascadingThreeDBandPDBQuerys()
+  {
+    TDBeaconsFTSRestClientTest.setMock();
+    
+    StructureChooserQuerySource scquery = StructureChooserQuerySource.getQuerySourceFor(new SequenceI[] { upSeq});
+
+    // query TDB for doc
+    // query PDBe for PDB entry metadata
+    // Combine
+    
+    FTSRestRequest tdbQuery = new FTSRestRequest();
+    tdbQuery.setResponseSize(100);
+    tdbQuery.setFieldToSearchBy("");
+    tdbQuery.setSearchTerm(scquery.buildQuery(upSeq));
+    tdbQuery.setWantedFields(scquery.getDocFieldPrefs().getStructureSummaryFields());
+    //scquery.fetchStructuresMetaData(upSeq, null, null, false);
+    String secondaryPdbQuery;
+    //secondaryPdbQuery = ((ThreeDBStructureChooserQuerySource)scquery).buildPDBFTSquery();
+
+  }
+  
+  @Test(groups = { "Functional" })
+  public void sanitizeSeqNameTest()
+  {
+    String name = "ab_cdEF|fwxyz012349";
+    AssertJUnit.assertEquals(name, PDBStructureChooserQuerySource.sanitizeSeqName(name));
+
+    // remove a [nn] substring
+    name = "abcde12[345]fg";
+    AssertJUnit.assertEquals("abcde12fg", PDBStructureChooserQuerySource.sanitizeSeqName(name));
+
+    // remove characters other than a-zA-Z0-9 | or _
+    name = "ab[cd],.\t£$*!- \\\"@:e";
+    AssertJUnit.assertEquals("abcde", PDBStructureChooserQuerySource.sanitizeSeqName(name));
+
+    name = "abcde12[345a]fg";
+    AssertJUnit.assertEquals("abcde12345afg", PDBStructureChooserQuerySource.sanitizeSeqName(name));
+  }
+}