JAL-2110 bug fix / test for searchDataset mappings created
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 4 Jul 2016 14:21:17 +0000 (15:21 +0100)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 4 Jul 2016 14:21:17 +0000 (15:21 +0100)
src/jalview/analysis/CrossRef.java
test/jalview/analysis/CrossRefTest.java

index e73912d..2b5a0e2 100644 (file)
@@ -145,18 +145,18 @@ public class CrossRef
        * find sequence's direct (dna-to-dna, peptide-to-peptide) xrefs
        */
       DBRefEntry[] lrfs = DBRefUtils.selectDbRefs(fromDna, seq.getDBRefs());
-      List<SequenceI> rseqs = new ArrayList<SequenceI>();
+      List<SequenceI> foundSeqs = new ArrayList<SequenceI>();
 
       /*
        * find sequences in the alignment which xref one of these DBRefs
        * i.e. is xref-ed to a common sequence identifier
        */
-      searchDatasetXrefs(fromDna, seq, lrfs, rseqs, null);
+      searchDatasetXrefs(fromDna, seq, lrfs, foundSeqs, null);
 
       /*
        * add those sequences' (dna-to-peptide or peptide-to-dna) dbref sources
        */
-      for (SequenceI rs : rseqs)
+      for (SequenceI rs : foundSeqs)
       {
         DBRefEntry[] xrs = DBRefUtils
                 .selectDbRefs(!fromDna, rs.getDBRefs());
@@ -728,11 +728,11 @@ public class CrossRef
    *          context was searching from Protein sequences
    * @param sequenceI
    * @param lrfs
-   * @param rseqs
+   * @param foundSeqs
    * @return true if matches were found.
    */
   private boolean searchDatasetXrefs(boolean fromDna, SequenceI sequenceI,
-          DBRefEntry[] lrfs, List<SequenceI> rseqs, AlignedCodonFrame cf)
+          DBRefEntry[] lrfs, List<SequenceI> foundSeqs, AlignedCodonFrame cf)
   {
     boolean found = false;
     if (lrfs == null)
@@ -745,7 +745,7 @@ public class CrossRef
       // add in wildcards
       xref.setVersion(null);
       xref.setMap(null);
-      found |= searchDataset(fromDna, sequenceI, xref, rseqs, cf, false);
+      found |= searchDataset(fromDna, sequenceI, xref, foundSeqs, cf, false);
     }
     return found;
   }
@@ -757,13 +757,13 @@ public class CrossRef
    * @param fromDna
    *          true if context was searching for refs *from* dna sequence, false
    *          if context was searching for refs *from* protein sequence
-   * @param sequenceI
+   * @param fromSeq
    *          a sequence to ignore (start point of search)
    * @param xrf
    *          a cross-reference to try to match
-   * @param rseqs
+   * @param foundSeqs
    *          result list to add to
-   * @param cf
+   * @param mappings
    *          a set of sequence mappings to add to
    * @param direct
    *          - indicates the type of relationship between returned sequences,
@@ -778,8 +778,8 @@ public class CrossRef
    *          </ul>
    * @return true if relationship found and sequence added.
    */
-  boolean searchDataset(boolean fromDna, SequenceI sequenceI,
-          DBRefEntry xrf, List<SequenceI> rseqs, AlignedCodonFrame cf,
+  boolean searchDataset(boolean fromDna, SequenceI fromSeq,
+          DBRefEntry xrf, List<SequenceI> foundSeqs, AlignedCodonFrame mappings,
           boolean direct)
   {
     boolean found = false;
@@ -808,7 +808,7 @@ public class CrossRef
                             + nxt.getDatasetSequence().getDisplayId(true)
                             + ")");
           }
-          if (nxt == sequenceI || nxt == sequenceI.getDatasetSequence())
+          if (nxt == fromSeq || nxt == fromSeq.getDatasetSequence())
           {
             continue;
           }
@@ -840,13 +840,17 @@ public class CrossRef
           // }
           if (!cands.isEmpty())
           {
-            if (!rseqs.contains(nxt))
+            if (!foundSeqs.contains(nxt))
             {
               found = true;
-              rseqs.add(nxt);
-              if (cf != null)
+              foundSeqs.add(nxt);
+              if (mappings != null && !direct)
               {
-                // don't search if we aren't given a codon map object
+                /*
+                 * if the matched sequence has mapped dbrefs to
+                 * protein product / cdna, add equivalent mappings to
+                 * our source sequence
+                 */
                 for (DBRefEntry candidate : cands)
                 {
                   Mapping mapping = candidate.getMap();
@@ -856,23 +860,21 @@ public class CrossRef
                     if (mapping.getTo() != null
                             && map.getFromRatio() != map.getToRatio())
                     {
-                      // get sense of map correct for adding to product
-                      // alignment.
-                      if (fromDna)
+                      /*
+                       * add a mapping, as from dna to peptide sequence
+                       */
+                      if (map.getFromRatio() == 3)
                       {
-                        // map is from dna seq to a protein product
-                        cf.addMap(sequenceI, nxt, map);
+                        mappings.addMap(nxt, fromSeq, map);
                       }
                       else
                       {
-                        // map should be from protein seq to its coding dna
-                        cf.addMap(nxt, sequenceI, map.getInverse());
+                        mappings.addMap(nxt, fromSeq, map.getInverse());
                       }
                     }
                   }
                 }
               }
-              // TODO: add mapping between sequences if necessary
             }
           }
         }
index 0c3e4d5..62bcae8 100644 (file)
@@ -27,8 +27,9 @@ import static org.testng.AssertJUnit.assertNotSame;
 import static org.testng.AssertJUnit.assertNull;
 import static org.testng.AssertJUnit.assertSame;
 import static org.testng.AssertJUnit.assertTrue;
-import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
 
+import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping;
 import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.DBRefEntry;
@@ -248,9 +249,14 @@ public class CrossRefTest
      * peptide sequence with UNIPROT dbref
      */
     SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
-    dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+    Mapping map = new Mapping(new Sequence("pep2", "MLAVSRG"), new MapList(
+            new int[] { 1, 21 }, new int[] {
+        1, 7 }, 3, 1));
+    DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map);
+    dna1.addDBRef(dbref);
     dna1.addDBRef(new DBRefEntry("EMBL", "0", "AF039662"));
     SequenceI pep1 = new Sequence("Q9ZTS2", "MLAVSRGQ");
+    dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
     pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
     AlignmentI al = new Alignment(new SequenceI[] { dna1, pep1 });
 
@@ -259,76 +265,100 @@ public class CrossRefTest
     /*
      * first search for a dbref nowhere on the alignment:
      */
-    DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "P30419");
+    dbref = new DBRefEntry("UNIPROT", "0", "P30419");
     CrossRef testee = new CrossRef(al.getSequencesArray(), al);
-    boolean found = testee.searchDataset(true, dna1, dbref, result, null,
+    AlignedCodonFrame acf = new AlignedCodonFrame();
+    boolean found = testee.searchDataset(true, dna1, dbref, result, acf,
             true);
     assertFalse(found);
     assertTrue(result.isEmpty());
-
-    // TODO we are setting direct=true here but it is set to
-    // false in Jalview code...
+    assertTrue(acf.isEmpty());
 
     /*
      * search for a protein sequence with dbref UNIPROT:Q9ZTS2
      */
+    acf = new AlignedCodonFrame();
     dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
     found = testee.searchDataset(!dna1.isProtein(), dna1, dbref, result,
-            null, false); // search dataset with a protein xref from a dna
+            acf, false); // search dataset with a protein xref from a dna
                           // sequence to locate the protein product
     assertTrue(found);
     assertEquals(1, result.size());
     assertSame(pep1, result.get(0));
+    assertTrue(acf.isEmpty());
 
     /*
      * search for a nucleotide sequence with dbref UNIPROT:Q9ZTS2
      */
     result.clear();
+    acf = new AlignedCodonFrame();
     dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2");
     found = testee.searchDataset(!pep1.isProtein(), pep1, dbref, result,
-            null, false); // search dataset with a protein's direct dbref to
+            acf, false); // search dataset with a protein's direct dbref to
                           // locate dna sequences with matching xref
     assertTrue(found);
     assertEquals(1, result.size());
     assertSame(dna1, result.get(0));
+    // should now have a mapping from dna to pep1
+    List<SequenceToSequenceMapping> mappings = acf.getMappings();
+    assertEquals(1, mappings.size());
+    SequenceToSequenceMapping mapping = mappings.get(0);
+    assertSame(dna1, mapping.getFromSeq());
+    assertSame(pep1, mapping.getMapping().getTo());
+    MapList mapList = mapping.getMapping().getMap();
+    assertEquals(1, mapList.getToRatio());
+    assertEquals(3, mapList.getFromRatio());
+    assertEquals(1, mapList.getFromRanges().size());
+    assertEquals(1, mapList.getFromRanges().get(0)[0]);
+    assertEquals(21, mapList.getFromRanges().get(0)[1]);
+    assertEquals(1, mapList.getToRanges().size());
+    assertEquals(1, mapList.getToRanges().get(0)[0]);
+    assertEquals(7, mapList.getToRanges().get(0)[1]);
   }
 
   /**
    * Test for finding 'product' sequences for the case where the selected
-   * sequence has a dbref with a mapping to a sequence
+   * sequence has a dbref with a mapping to a sequence. This represents the case
+   * where either
+   * <ul>
+   * <li>a fetched sequence is already decorated with its cross-reference (e.g.
+   * EMBL + translation), or</li>
+   * <li>Get Cross-References has been done once resulting in instantiated
+   * cross-reference mappings</li>
+   * </ul>
    */
   @Test(groups = { "Functional" })
   public void testFindXrefSequences_fromDbRefMap()
   {
     /*
-     * two peptide sequences each with a DBRef and SequenceFeature
+     * scenario: nucleotide sequence AF039662
+     *   with dbref + mapping to Q9ZTS2 and P30419
+     *     which themselves each have a dbref and feature
      */
+    SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
     SequenceI pep1 = new Sequence("Q9ZTS2", "MALFQRSV");
-    pep1.addDBRef(new DBRefEntry("Pfam", "0", "PF00111"));
+    SequenceI pep2 = new Sequence("P30419", "MTRRSQIF");
+    dna1.createDatasetSequence();
+    pep1.createDatasetSequence();
+    pep2.createDatasetSequence();
+
+    pep1.getDatasetSequence().addDBRef(
+            new DBRefEntry("Pfam", "0", "PF00111"));
     pep1.addSequenceFeature(new SequenceFeature("type", "desc", 12, 14, 1f,
             "group"));
-    SequenceI pep2 = new Sequence("P30419", "MTRRSQIF");
-    pep2.addDBRef(new DBRefEntry("PDB", "0", "3JTK"));
+    pep2.getDatasetSequence().addDBRef(new DBRefEntry("PDB", "0", "3JTK"));
     pep2.addSequenceFeature(new SequenceFeature("type2", "desc2", 13, 15,
             12f, "group2"));
 
-    /*
-     * nucleotide sequence (to go in the alignment)
-     */
-    SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
-
-    /*
-     * add DBRefEntry's to dna1 with mappings from dna to both peptides
-     */
     MapList mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 },
             3, 1);
     Mapping map = new Mapping(pep1, mapList);
     DBRefEntry dbRef1 = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map);
-    dna1.addDBRef(dbRef1);
+    dna1.getDatasetSequence().addDBRef(dbRef1);
     mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1);
     map = new Mapping(pep2, mapList);
     DBRefEntry dbRef2 = new DBRefEntry("UNIPROT", "0", "P30419", map);
-    dna1.addDBRef(dbRef2);
+    dna1.getDatasetSequence().addDBRef(dbRef2);
 
     /*
      * find UNIPROT xrefs for nucleotide sequence - it should pick up 
@@ -348,39 +378,21 @@ public class CrossRefTest
   }
 
   /**
-   * Helper method to assert seq1 looks like a copy of seq2
+   * Helper method that verifies that 'copy' has the same name, start, end,
+   * sequence and dataset sequence object as 'original' (but is not the same
+   * object)
    * 
-   * @param seq1
-   * @param seq2
+   * @param copy
+   * @param original
    */
-  private void checkCopySequence(SequenceI seq1, SequenceI seq2)
+  private void checkCopySequence(SequenceI copy, SequenceI original)
   {
-    assertNotSame(seq1, seq2);
-    assertEquals(seq1.getName(), seq2.getName());
-    assertEquals(seq1.getStart(), seq2.getStart());
-    assertEquals(seq1.getEnd(), seq2.getEnd());
-    assertEquals(seq1.getSequenceAsString(), seq2.getSequenceAsString());
-
-    /*
-     * compare dbrefs
-     */
-    assertArrayEquals(seq1.getDBRefs(), seq2.getDBRefs());
-    // check one to verify a copy, not the same object
-    if (seq1.getDBRefs().length > 0)
-    {
-      assertNotSame(seq1.getDBRefs()[0], seq2.getDBRefs()[0]);
-    }
-
-    /*
-     * compare features
-     */
-    assertArrayEquals(seq1.getSequenceFeatures(),
-            seq2.getSequenceFeatures());
-    if (seq1.getSequenceFeatures().length > 0)
-    {
-      assertNotSame(seq1.getSequenceFeatures()[0],
-              seq2.getSequenceFeatures()[0]);
-    }
+    assertNotSame(copy, original);
+    assertSame(copy.getDatasetSequence(), original.getDatasetSequence());
+    assertEquals(copy.getName(), original.getName());
+    assertEquals(copy.getStart(), original.getStart());
+    assertEquals(copy.getEnd(), original.getEnd());
+    assertEquals(copy.getSequenceAsString(), original.getSequenceAsString());
   }
 
   /**