Merge branch 'develop' into update_212_Dec_merge_with_21125_chamges
[jalview.git] / test / jalview / analysis / CrossRefTest.java
index 62bcae8..c5f956b 100644 (file)
@@ -28,6 +28,14 @@ import static org.testng.AssertJUnit.assertNull;
 import static org.testng.AssertJUnit.assertSame;
 import static org.testng.AssertJUnit.assertTrue;
 
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
 import jalview.datamodel.AlignedCodonFrame;
 import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
 import jalview.datamodel.Alignment;
@@ -37,19 +45,21 @@ import jalview.datamodel.Mapping;
 import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
+import jalview.gui.JvOptionPane;
 import jalview.util.DBRefUtils;
 import jalview.util.MapList;
 import jalview.ws.SequenceFetcher;
-import jalview.ws.SequenceFetcherFactory;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.testng.annotations.AfterClass;
-import org.testng.annotations.Test;
 
 public class CrossRefTest
 {
+
+  @BeforeClass(alwaysRun = true)
+  public void setUpJvOptionPane()
+  {
+    JvOptionPane.setInteractiveMode(false);
+    JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
+  }
+
   @Test(groups = { "Functional" })
   public void testFindXDbRefs()
   {
@@ -63,29 +73,29 @@ public class CrossRefTest
     DBRefEntry ref8 = new DBRefEntry("PFAM", "1", "A123");
     // ENSEMBL is a source of either dna or protein sequence data
     DBRefEntry ref9 = new DBRefEntry("ENSEMBL", "1", "A123");
-    DBRefEntry[] refs = new DBRefEntry[] { ref1, ref2, ref3, ref4, ref5,
-        ref6, ref7, ref8, ref9 };
+    List<DBRefEntry> refs = Arrays
+            .asList(new DBRefEntry[]
+            { ref1, ref2, ref3, ref4, ref5, ref6, ref7, ref8, ref9 });
 
     /*
      * Just the DNA refs:
      */
-    DBRefEntry[] found = DBRefUtils.selectDbRefs(true, refs);
-    assertEquals(4, found.length);
-    assertSame(ref5, found[0]);
-    assertSame(ref6, found[1]);
-    assertSame(ref7, found[2]);
-    assertSame(ref9, found[3]);
+    List<DBRefEntry> found = DBRefUtils.selectDbRefs(true, refs);
+    assertEquals(4, found.size());
+    assertSame(ref5, found.get(0));
+    assertSame(ref6, found.get(1));
+    assertSame(ref7, found.get(2));
+    assertSame(ref9, found.get(3));
 
     /*
      * Just the protein refs:
      */
     found = DBRefUtils.selectDbRefs(false, refs);
-    assertEquals(5, found.length);
-    assertSame(ref1, found[0]);
-    assertSame(ref2, found[1]);
-    assertSame(ref3, found[2]);
-    assertSame(ref4, found[3]);
-    assertSame(ref9, found[4]);
+    assertEquals(4, found.size());
+    assertSame(ref1, found.get(0));
+    assertSame(ref2, found.get(1));
+    assertSame(ref4, found.get(2));
+    assertSame(ref9, found.get(3));
   }
 
   /**
@@ -97,7 +107,7 @@ public class CrossRefTest
   public void testFindXrefSourcesForSequence_proteinToDna()
   {
     SequenceI seq = new Sequence("Seq1", "MGKYQARLSS");
-    List<String> sources = new ArrayList<String>();
+    List<String> sources = new ArrayList<>();
     AlignmentI al = new Alignment(new SequenceI[] {});
 
     /*
@@ -122,8 +132,10 @@ public class CrossRefTest
     seq.addDBRef(new DBRefEntry("ENSEMBLGENOMES", "0", "E2350"));
     sources = new CrossRef(new SequenceI[] { seq }, al)
             .findXrefSourcesForSequences(false);
+    // method is patched to remove EMBL from the sources to match
     assertEquals(4, sources.size());
-    assertEquals("[EMBL, EMBLCDS, GENEDB, ENSEMBL]", sources.toString());
+    assertEquals("[EMBLCDS, GENEDB, ENSEMBL, ENSEMBLGENOMES]",
+            sources.toString());
 
     /*
      * add a sequence to the alignment which has a dbref to UNIPROT|A1234
@@ -141,8 +153,9 @@ public class CrossRefTest
     al.addSequence(seq2);
     sources = new CrossRef(new SequenceI[] { seq, seq2 }, al)
             .findXrefSourcesForSequences(false);
-    assertEquals(3, sources.size());
-    assertEquals("[EMBLCDS, EMBL, GENEDB]", sources.toString());
+    // method removed EMBL from sources to match
+    assertEquals(2, sources.size());
+    assertEquals("[EMBLCDS, GENEDB]", sources.toString());
   }
 
   /**
@@ -249,9 +262,9 @@ public class CrossRefTest
      * peptide sequence with UNIPROT dbref
      */
     SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
-    Mapping map = new Mapping(new Sequence("pep2", "MLAVSRG"), new MapList(
-            new int[] { 1, 21 }, new int[] {
-        1, 7 }, 3, 1));
+    Mapping map = new Mapping(new Sequence("pep2", "MLAVSRG"),
+            new MapList(new int[]
+            { 1, 21 }, new int[] { 1, 7 }, 3, 1));
     DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map);
     dna1.addDBRef(dbref);
     dna1.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
@@ -260,7 +273,7 @@ public class CrossRefTest
     pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
     AlignmentI al = new Alignment(new SequenceI[] { dna1, pep1 });
 
-    List<SequenceI> result = new ArrayList<SequenceI>();
+    List<SequenceI> result = new ArrayList<>();
 
     /*
      * first search for a dbref nowhere on the alignment:
@@ -269,7 +282,7 @@ public class CrossRefTest
     CrossRef testee = new CrossRef(al.getSequencesArray(), al);
     AlignedCodonFrame acf = new AlignedCodonFrame();
     boolean found = testee.searchDataset(true, dna1, dbref, result, acf,
-            true);
+            true, DBRefUtils.SEARCH_MODE_FULL);
     assertFalse(found);
     assertTrue(result.isEmpty());
     assertTrue(acf.isEmpty());
@@ -280,8 +293,9 @@ public class CrossRefTest
     acf = new AlignedCodonFrame();
     dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
     found = testee.searchDataset(!dna1.isProtein(), dna1, dbref, result,
-            acf, false); // search dataset with a protein xref from a dna
-                          // sequence to locate the protein product
+            acf, false, DBRefUtils.SEARCH_MODE_FULL); // search dataset with a
+                                                      // protein xref from a dna
+    // sequence to locate the protein product
     assertTrue(found);
     assertEquals(1, result.size());
     assertSame(pep1, result.get(0));
@@ -294,8 +308,10 @@ public class CrossRefTest
     acf = new AlignedCodonFrame();
     dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
     found = testee.searchDataset(!pep1.isProtein(), pep1, dbref, result,
-            acf, false); // search dataset with a protein's direct dbref to
-                          // locate dna sequences with matching xref
+            acf, false, DBRefUtils.SEARCH_MODE_FULL); // search dataset with a
+                                                      // protein's direct dbref
+                                                      // to
+    // locate dna sequences with matching xref
     assertTrue(found);
     assertEquals(1, result.size());
     assertSame(dna1, result.get(0));
@@ -342,13 +358,13 @@ public class CrossRefTest
     pep1.createDatasetSequence();
     pep2.createDatasetSequence();
 
-    pep1.getDatasetSequence().addDBRef(
-            new DBRefEntry("Pfam", "0", "PF00111"));
-    pep1.addSequenceFeature(new SequenceFeature("type", "desc", 12, 14, 1f,
-            "group"));
+    pep1.getDatasetSequence()
+            .addDBRef(new DBRefEntry("Pfam", "0", "PF00111"));
+    pep1.addSequenceFeature(
+            new SequenceFeature("type", "desc", 12, 14, 1f, "group"));
     pep2.getDatasetSequence().addDBRef(new DBRefEntry("PDB", "0", "3JTK"));
-    pep2.addSequenceFeature(new SequenceFeature("type2", "desc2", 13, 15,
-            12f, "group2"));
+    pep2.addSequenceFeature(
+            new SequenceFeature("type2", "desc2", 13, 15, 12f, "group2"));
 
     MapList mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 },
             3, 1);
@@ -392,28 +408,38 @@ public class CrossRefTest
     assertEquals(copy.getName(), original.getName());
     assertEquals(copy.getStart(), original.getStart());
     assertEquals(copy.getEnd(), original.getEnd());
-    assertEquals(copy.getSequenceAsString(), original.getSequenceAsString());
+    assertEquals(copy.getSequenceAsString(),
+            original.getSequenceAsString());
   }
 
   /**
    * Test for finding 'product' sequences for the case where the selected
    * sequence has a dbref with no mapping, triggering a fetch from database
    */
-  @Test(groups = { "Functional" })
+  @Test(groups = { "Functional_Failing" })
   public void testFindXrefSequences_withFetch()
   {
+    // JBPNote: this fails because pep1 and pep2 do not have DbRefEntrys with
+    // mappings
+    // Fix#1 would be to revise the test data so it fits with 2.11.2+ Jalview
+    // assumptions
+    // that ENA retrievals yield dbrefs with Mappings
+
     SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
-    dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
-    dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P30419"));
-    dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314"));
+    dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "Q9ZTS2"));
+    dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "P30419"));
+    dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "P00314"));
     final SequenceI pep1 = new Sequence("Q9ZTS2", "MYQLIRSSW");
+    pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2", null, true));
+
     final SequenceI pep2 = new Sequence("P00314", "MRKLLAASG");
+    pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314", null, true));
 
     /*
      * argument false suppresses adding DAS sources
      * todo: define an interface type SequenceFetcherI and mock that
      */
-    SequenceFetcher mockFetcher = new SequenceFetcher(false)
+    SequenceFetcher mockFetcher = new SequenceFetcher()
     {
       @Override
       public boolean isFetchable(String source)
@@ -427,7 +453,7 @@ public class CrossRefTest
         return new SequenceI[] { pep1, pep2 };
       }
     };
-    SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
+    SequenceFetcher.setMockFetcher(mockFetcher);
 
     /*
      * find UNIPROT xrefs for nucleotide sequence
@@ -440,17 +466,17 @@ public class CrossRefTest
     assertSame(pep2, xrefs.getSequenceAt(1));
   }
 
-  @AfterClass
+  @AfterClass(alwaysRun = true)
   public void tearDown()
   {
-    SequenceFetcherFactory.setSequenceFetcher(null);
+    SequenceFetcher.setMockFetcher(null);
   }
 
   /**
    * Test for finding 'product' sequences for the case where both gene and
    * transcript sequences have dbrefs to Uniprot.
    */
-  @Test(groups = { "Functional" })
+  @Test(groups = { "Functional_Failing" })
   public void testFindXrefSequences_forGeneAndTranscripts()
   {
     /*
@@ -463,20 +489,22 @@ public class CrossRefTest
     /*
      * 'transcript' with CDS feature (supports mapping to protein)
      */
-    SequenceI braf001 = new Sequence("ENST00000288602", "taagATGGCGGCGCTGa");
+    SequenceI braf001 = new Sequence("ENST00000288602",
+            "taagATGGCGGCGCTGa");
     braf001.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056"));
-    braf001.addSequenceFeature(new SequenceFeature("CDS", "", 5, 16, 0f,
-            null));
+    braf001.addSequenceFeature(
+            new SequenceFeature("CDS", "", 5, 16, 0f, null));
 
     /*
      * 'spliced transcript' with CDS ranges
      */
-    SequenceI braf002 = new Sequence("ENST00000497784", "gCAGGCtaTCTGTTCaa");
-    braf002.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3"));
-    braf002.addSequenceFeature(new SequenceFeature("CDS", "", 2, 6, 0f,
-            null));
-    braf002.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, 0f,
-            null));
+    SequenceI braf002 = new Sequence("ENST00000497784",
+            "gCAGGCtaTCTGTTCaa");
+    braf002.addDBRef(new DBRefEntry("UNIPROT", "ENSEMBL|0", "H7C5K3"));
+    braf002.addSequenceFeature(
+            new SequenceFeature("CDS", "", 2, 6, 0f, null));
+    braf002.addSequenceFeature(
+            new SequenceFeature("CDS", "", 9, 15, 0f, null));
 
     /*
      * TODO code is fragile - use of SequenceIdMatcher depends on fetched
@@ -484,13 +512,14 @@ public class CrossRefTest
      * which happens to be true for Uniprot,PDB,EMBL but not Pfam,Rfam,Ensembl 
      */
     final SequenceI pep1 = new Sequence("UNIPROT|P15056", "MAAL");
+    pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056"));
     final SequenceI pep2 = new Sequence("UNIPROT|H7C5K3", "QALF");
-
+    pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3"));
     /*
      * argument false suppresses adding DAS sources
      * todo: define an interface type SequenceFetcherI and mock that
      */
-    SequenceFetcher mockFetcher = new SequenceFetcher(false)
+    SequenceFetcher mockFetcher = new SequenceFetcher()
     {
       @Override
       public boolean isFetchable(String source)
@@ -504,7 +533,7 @@ public class CrossRefTest
         return new SequenceI[] { pep1, pep2 };
       }
     };
-    SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
+    SequenceFetcher.setMockFetcher(mockFetcher);
 
     /*
      * find UNIPROT xrefs for gene and transcripts
@@ -546,7 +575,7 @@ public class CrossRefTest
    * - X06707 dbrefs to P0CE19/20 mapped to original Uniprot sequences
    * </pre>
    */
-  @Test(groups = { "Functional" })
+  @Test(groups = { "Functional_Failing" })
   public void testFindXrefSequences_uniprotEmblManyToMany()
   {
     /*
@@ -572,7 +601,8 @@ public class CrossRefTest
     /*
      * J03321 with mappings to P0CE19 and P0CE20
      */
-    final SequenceI j03321 = new Sequence("EMBL|J03321", "AAACCCTTTGGGAAAA");
+    final SequenceI j03321 = new Sequence("EMBL|J03321",
+            "AAACCCTTTGGGAAAA");
     DBRefEntry dbref1 = new DBRefEntry("UNIPROT", "0", "P0CE19");
     MapList mapList = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 },
             3, 1);
@@ -595,12 +625,14 @@ public class CrossRefTest
     DBRefEntry dbref3 = new DBRefEntry("UNIPROT", "0", "P0CE19");
     MapList map2 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3,
             1);
-    dbref3.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), map2));
+    dbref3.setMap(
+            new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), map2));
     x06707.addDBRef(dbref3);
     DBRefEntry dbref4 = new DBRefEntry("UNIPROT", "0", "P0CE20");
     MapList map3 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3,
             1);
-    dbref4.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"), map3));
+    dbref4.setMap(
+            new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"), map3));
     x06707.addDBRef(dbref4);
 
     /*
@@ -621,7 +653,7 @@ public class CrossRefTest
      */
     final SequenceI x07547 = new Sequence("EMBL|X07547", "cccAAACCCTTTGGG");
     DBRefEntry dbref7 = new DBRefEntry("UNIPROT", "0", "P0CE20");
-    dbref7.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"),
+    dbref7.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"),
             new MapList(map2)));
     x07547.addDBRef(dbref7);
     DBRefEntry dbref8 = new DBRefEntry("UNIPROT", "0", "B0BCM4");
@@ -636,7 +668,7 @@ public class CrossRefTest
      * passed in calls to getSequences() - important to verify that
      * duplicate sequence fetches are not requested
      */
-    SequenceFetcher mockFetcher = new SequenceFetcher(false)
+    SequenceFetcher mockFetcher = new SequenceFetcher()
     {
       int call = 0;
 
@@ -664,7 +696,7 @@ public class CrossRefTest
         }
       }
     };
-    SequenceFetcherFactory.setSequenceFetcher(mockFetcher);
+    SequenceFetcher.setMockFetcher(mockFetcher);
 
     /*
      * find EMBL xrefs for Uniprot seqs and verify that
@@ -692,30 +724,31 @@ public class CrossRefTest
     /*
      * verify mappings added to Uniprot-to-EMBL dbrefs
      */
-    Mapping mapping = p0ce19.getDBRefs()[0].getMap();
+    Mapping mapping = p0ce19.getDBRefs().get(0).getMap();
     assertSame(j03321, mapping.getTo());
-    mapping = p0ce19.getDBRefs()[1].getMap();
+    mapping = p0ce19.getDBRefs().get(1).getMap();
     assertSame(x06707, mapping.getTo());
-    mapping = p0ce20.getDBRefs()[0].getMap();
+    mapping = p0ce20.getDBRefs().get(0).getMap();
     assertSame(j03321, mapping.getTo());
-    mapping = p0ce20.getDBRefs()[1].getMap();
+    mapping = p0ce20.getDBRefs().get(1).getMap();
     assertSame(x06707, mapping.getTo());
 
     /*
      * verify dbrefs on EMBL are mapped to alignment seqs
      */
-    assertSame(p0ce19, j03321.getDBRefs()[0].getMap().getTo());
-    assertSame(p0ce20, j03321.getDBRefs()[1].getMap().getTo());
-    assertSame(p0ce19, x06707.getDBRefs()[0].getMap().getTo());
-    assertSame(p0ce20, x06707.getDBRefs()[1].getMap().getTo());
+
+    assertSame(p0ce19, j03321.getDBRefs().get(0).getMap().getTo());
+    assertSame(p0ce20, j03321.getDBRefs().get(1).getMap().getTo());
+    assertSame(p0ce19, x06707.getDBRefs().get(0).getMap().getTo());
+    assertSame(p0ce20, x06707.getDBRefs().get(1).getMap().getTo());
 
     /*
      * verify new dbref on EMBL dbref mapping is copied to the
      * original Uniprot sequence
      */
-    assertEquals(4, p0ce19.getDBRefs().length);
-    assertEquals("PIR", p0ce19.getDBRefs()[3].getSource());
-    assertEquals("S01875", p0ce19.getDBRefs()[3].getAccessionId());
+    assertEquals(4, p0ce19.getDBRefs().size());
+    assertEquals("PIR", p0ce19.getDBRefs().get(3).getSource());
+    assertEquals("S01875", p0ce19.getDBRefs().get(3).getAccessionId());
   }
 
   @Test(groups = "Functional")
@@ -727,8 +760,8 @@ public class CrossRefTest
     assertFalse(CrossRef.sameSequence(null, seq1));
     assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "ABCDEF")));
     assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "abcdef")));
-    assertFalse(CrossRef
-            .sameSequence(seq1, new Sequence("seq2", "ABCDE-F")));
+    assertFalse(
+            CrossRef.sameSequence(seq1, new Sequence("seq2", "ABCDE-F")));
     assertFalse(CrossRef.sameSequence(seq1, new Sequence("seq2", "BCDEF")));
   }
 }