JAL-1270 JUnit to TestNG refactoring
[jalview.git] / test / jalview / analysis / AlignmentUtilsTests.java
index 71b1bcb..d32181f 100644 (file)
  */
 package jalview.analysis;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertSame;
-import static org.junit.Assert.assertTrue;
-
+import static org.testng.AssertJUnit.assertSame;
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.AssertJUnit.assertNull;
+import org.testng.annotations.Test;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
-
-import org.junit.Test;
+import java.util.Set;
 
 import jalview.datamodel.AlignedCodonFrame;
 import jalview.datamodel.Alignment;
@@ -41,11 +44,14 @@ import jalview.datamodel.AlignmentI;
 import jalview.datamodel.Annotation;
 import jalview.datamodel.DBRefEntry;
 import jalview.datamodel.Mapping;
+import jalview.datamodel.SearchResults;
+import jalview.datamodel.SearchResults.Match;
 import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceI;
 import jalview.io.AppletFormatAdapter;
 import jalview.io.FormatAdapter;
 import jalview.util.MapList;
+import jalview.util.MappingUtils;
 
 public class AlignmentUtilsTests 
 {
@@ -82,13 +88,16 @@ public class AlignmentUtilsTests
           "GGGTCAGGCAGT\n";
   // @formatter:on
 
-  public static Sequence ts=new Sequence("short","ASDASDASDASDASDASDASDASDASDASDASDASDASD");
+  // public static Sequence ts=new
+  // Sequence("short","ASDASDASDASDASDASDASDASDASDASDASDASDASD");
+  public static Sequence ts = new Sequence("short",
+          "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm");
 
   @Test
-  public void testExpandFlanks()
+  public void testExpandContext()
   {
     AlignmentI al = new Alignment(new Sequence[] {});
-    for (int i=4;i<14;i+=3)
+    for (int i = 4; i < 14; i += 2)
     {
       SequenceI s1=ts.deriveSequence().getSubSequence(i, i+7);
       al.addSequence(s1);
@@ -96,18 +105,132 @@ public class AlignmentUtilsTests
     System.out.println(new AppletFormatAdapter().formatSequences("Clustal", al, true));
     for (int flnk=-1;flnk<25; flnk++)
     {
-      AlignmentI exp;
-      System.out.println("\nFlank size: "+flnk);
-      System.out.println(new AppletFormatAdapter().formatSequences("Clustal", exp=AlignmentUtils.expandContext(al, flnk), true));
-      if (flnk==-1) {
-        for (SequenceI sq:exp.getSequences())
+      AlignmentI exp = AlignmentUtils.expandContext(al, flnk);
+      System.out.println("\nFlank size: " + flnk);
+      System.out.println(new AppletFormatAdapter().formatSequences(
+              "Clustal", exp, true));
+      if (flnk == -1)
       {
+        /*
+         * Full expansion to complete sequences
+         */
+        for (SequenceI sq : exp.getSequences())
+        {
           String ung = sq.getSequenceAsString().replaceAll("-+", "");
-          assertTrue("Flanking sequence not the same as original dataset sequence.\n"+ung+"\n"+sq.getDatasetSequence().getSequenceAsString(),ung.equalsIgnoreCase(sq.getDatasetSequence().getSequenceAsString()));
+          final String errorMsg = "Flanking sequence not the same as original dataset sequence.\n"
+                  + ung
+                  + "\n"
+                  + sq.getDatasetSequence().getSequenceAsString();
+          assertTrue(errorMsg, ung.equalsIgnoreCase(sq.getDatasetSequence()
+                  .getSequenceAsString()));
+        }
       }
+      else if (flnk == 24)
+      {
+        /*
+         * Last sequence is fully expanded, others have leading gaps to match
+         */
+        assertTrue(exp.getSequenceAt(4).getSequenceAsString()
+                .startsWith("abc"));
+        assertTrue(exp.getSequenceAt(3).getSequenceAsString()
+                .startsWith("--abc"));
+        assertTrue(exp.getSequenceAt(2).getSequenceAsString()
+                .startsWith("----abc"));
+        assertTrue(exp.getSequenceAt(1).getSequenceAsString()
+                .startsWith("------abc"));
+        assertTrue(exp.getSequenceAt(0).getSequenceAsString()
+                .startsWith("--------abc"));
       }
     }
-    }    
+  }
+
+  /**
+   * Test that annotations are correctly adjusted by expandContext
+   */
+  @Test
+  public void testExpandContext_annotation()
+  {
+    AlignmentI al = new Alignment(new Sequence[]
+    {});
+    SequenceI ds = new Sequence("Seq1", "ABCDEFGHI");
+    // subsequence DEF:
+    SequenceI seq1 = ds.deriveSequence().getSubSequence(3, 6);
+    al.addSequence(seq1);
+
+    /*
+     * Annotate DEF with 4/5/6 respectively
+     */
+    Annotation[] anns = new Annotation[]
+    { new Annotation(4), new Annotation(5), new Annotation(6) };
+    AlignmentAnnotation ann = new AlignmentAnnotation("SS",
+            "secondary structure", anns);
+    seq1.addAlignmentAnnotation(ann);
+
+    /*
+     * The annotations array should match aligned positions
+     */
+    assertEquals(3, ann.annotations.length);
+    assertEquals(4, ann.annotations[0].value, 0.001);
+    assertEquals(5, ann.annotations[1].value, 0.001);
+    assertEquals(6, ann.annotations[2].value, 0.001);
+
+    /*
+     * Check annotation to sequence position mappings before expanding the
+     * sequence; these are set up in Sequence.addAlignmentAnnotation ->
+     * Annotation.setSequenceRef -> createSequenceMappings
+     */
+    assertNull(ann.getAnnotationForPosition(1));
+    assertNull(ann.getAnnotationForPosition(2));
+    assertNull(ann.getAnnotationForPosition(3));
+    assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001);
+    assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001);
+    assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001);
+    assertNull(ann.getAnnotationForPosition(7));
+    assertNull(ann.getAnnotationForPosition(8));
+    assertNull(ann.getAnnotationForPosition(9));
+
+    /*
+     * Expand the subsequence to the full sequence abcDEFghi
+     */
+    AlignmentI expanded = AlignmentUtils.expandContext(al, -1);
+    assertEquals("abcDEFghi", expanded.getSequenceAt(0)
+            .getSequenceAsString());
+
+    /*
+     * Confirm the alignment and sequence have the same SS annotation,
+     * referencing the expanded sequence
+     */
+    ann = expanded.getSequenceAt(0).getAnnotation()[0];
+    assertSame(ann, expanded.getAlignmentAnnotation()[0]);
+    assertSame(expanded.getSequenceAt(0), ann.sequenceRef);
+
+    /*
+     * The annotations array should have null values except for annotated
+     * positions
+     */
+    assertNull(ann.annotations[0]);
+    assertNull(ann.annotations[1]);
+    assertNull(ann.annotations[2]);
+    assertEquals(4, ann.annotations[3].value, 0.001);
+    assertEquals(5, ann.annotations[4].value, 0.001);
+    assertEquals(6, ann.annotations[5].value, 0.001);
+    assertNull(ann.annotations[6]);
+    assertNull(ann.annotations[7]);
+    assertNull(ann.annotations[8]);
+
+    /*
+     * sequence position mappings should be unchanged
+     */
+    assertNull(ann.getAnnotationForPosition(1));
+    assertNull(ann.getAnnotationForPosition(2));
+    assertNull(ann.getAnnotationForPosition(3));
+    assertEquals(4, ann.getAnnotationForPosition(4).value, 0.001);
+    assertEquals(5, ann.getAnnotationForPosition(5).value, 0.001);
+    assertEquals(6, ann.getAnnotationForPosition(6).value, 0.001);
+    assertNull(ann.getAnnotationForPosition(7));
+    assertNull(ann.getAnnotationForPosition(8));
+    assertNull(ann.getAnnotationForPosition(9));
+  }
 
   /**
    * Test method that returns a map of lists of sequences by sequence name.
@@ -869,4 +992,276 @@ public class AlignmentUtilsTests
     assertTrue(AlignmentUtils.haveCrossRef(seq1, seq2));
     assertTrue(AlignmentUtils.haveCrossRef(seq2, seq1));
   }
+
+  /**
+   * Test the method that extracts the exon-only part of a dna alignment.
+   */
+  @Test
+  public void testMakeExonAlignment()
+  {
+    SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
+    SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC");
+    SequenceI pep1 = new Sequence("pep1", "GF");
+    SequenceI pep2 = new Sequence("pep2", "GFP");
+    dna1.createDatasetSequence();
+    dna2.createDatasetSequence();
+    pep1.createDatasetSequence();
+    pep2.createDatasetSequence();
+
+    Set<AlignedCodonFrame> mappings = new HashSet<AlignedCodonFrame>();
+    MapList map = new MapList(new int[]
+    { 4, 6, 10, 12 }, new int[]
+    { 1, 2 }, 3, 1);
+    AlignedCodonFrame acf = new AlignedCodonFrame();
+    acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
+    mappings.add(acf);
+    map = new MapList(new int[]
+    { 1, 3, 7, 9, 13, 15 }, new int[]
+    { 1, 3 }, 3, 1);
+    acf = new AlignedCodonFrame();
+    acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
+    mappings.add(acf);
+    
+    AlignmentI exons = AlignmentUtils.makeExonAlignment(new SequenceI[]
+    { dna1, dna2 }, mappings);
+    assertEquals(2, exons.getSequences().size());
+    assertEquals("GGGTTT", exons.getSequenceAt(0).getSequenceAsString());
+    assertEquals("GGGTTTCCC", exons.getSequenceAt(1).getSequenceAsString());
+
+    /*
+     * Verify updated mappings
+     */
+    assertEquals(2, mappings.size());
+
+    /*
+     * Mapping from pep1 to GGGTTT in first new exon sequence
+     */
+    List<AlignedCodonFrame> pep1Mapping = MappingUtils
+            .findMappingsForSequence(pep1, mappings);
+    assertEquals(1, pep1Mapping.size());
+    // map G to GGG
+    SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings);
+    assertEquals(1, sr.getResults().size());
+    Match m = sr.getResults().get(0);
+    assertEquals(exons.getSequenceAt(0).getDatasetSequence(),
+            m.getSequence());
+    assertEquals(1, m.getStart());
+    assertEquals(3, m.getEnd());
+    // map F to TTT
+    sr = MappingUtils.buildSearchResults(pep1, 2, mappings);
+    m = sr.getResults().get(0);
+    assertEquals(exons.getSequenceAt(0).getDatasetSequence(),
+            m.getSequence());
+    assertEquals(4, m.getStart());
+    assertEquals(6, m.getEnd());
+
+    /*
+     * Mapping from pep2 to GGGTTTCCC in second new exon sequence
+     */
+    List<AlignedCodonFrame> pep2Mapping = MappingUtils
+            .findMappingsForSequence(pep2, mappings);
+    assertEquals(1, pep2Mapping.size());
+    // map G to GGG
+    sr = MappingUtils.buildSearchResults(pep2, 1, mappings);
+    assertEquals(1, sr.getResults().size());
+    m = sr.getResults().get(0);
+    assertEquals(exons.getSequenceAt(1).getDatasetSequence(),
+            m.getSequence());
+    assertEquals(1, m.getStart());
+    assertEquals(3, m.getEnd());
+    // map F to TTT
+    sr = MappingUtils.buildSearchResults(pep2, 2, mappings);
+    m = sr.getResults().get(0);
+    assertEquals(exons.getSequenceAt(1).getDatasetSequence(),
+            m.getSequence());
+    assertEquals(4, m.getStart());
+    assertEquals(6, m.getEnd());
+    // map P to CCC
+    sr = MappingUtils.buildSearchResults(pep2, 3, mappings);
+    m = sr.getResults().get(0);
+    assertEquals(exons.getSequenceAt(1).getDatasetSequence(),
+            m.getSequence());
+    assertEquals(7, m.getStart());
+    assertEquals(9, m.getEnd());
+  }
+
+  /**
+   * Test the method that makes an exon-only sequence from a DNA sequence and
+   * its product mapping. Test includes the expected case that the DNA sequence
+   * already has a protein product (Uniprot translation) which in turn has an
+   * x-ref to the EMBLCDS record.
+   */
+  @Test
+  public void testMakeExonSequences()
+  {
+    SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
+    SequenceI pep1 = new Sequence("pep1", "GF");
+    dna1.createDatasetSequence();
+    pep1.createDatasetSequence();
+    pep1.getDatasetSequence().addDBRef(
+            new DBRefEntry("EMBLCDS", "2", "A12345"));
+
+    /*
+     * Make the mapping from dna to protein. The protein sequence has a DBRef to
+     * EMBLCDS|A12345.
+     */
+    Set<AlignedCodonFrame> mappings = new HashSet<AlignedCodonFrame>();
+    MapList map = new MapList(new int[]
+    { 4, 6, 10, 12 }, new int[]
+    { 1, 2 }, 3, 1);
+    AlignedCodonFrame acf = new AlignedCodonFrame();
+    acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
+    mappings.add(acf);
+
+    AlignedCodonFrame newMapping = new AlignedCodonFrame();
+    List<SequenceI> exons = AlignmentUtils.makeExonSequences(dna1, acf,
+            newMapping);
+    assertEquals(1, exons.size());
+    SequenceI exon = exons.get(0);
+
+    assertEquals("GGGTTT", exon.getSequenceAsString());
+    assertEquals("dna1|A12345", exon.getName());
+    assertEquals(1, exon.getDBRef().length);
+    DBRefEntry cdsRef = exon.getDBRef()[0];
+    assertEquals("EMBLCDS", cdsRef.getSource());
+    assertEquals("2", cdsRef.getVersion());
+    assertEquals("A12345", cdsRef.getAccessionId());
+  }
+
+  /**
+   * Test the method that makes an exon-only alignment from a DNA sequence and
+   * its product mappings, for the case where there are multiple exon mappings
+   * to different protein products.
+   */
+  @Test
+  public void testMakeExonAlignment_multipleProteins()
+  {
+    SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa");
+    SequenceI pep1 = new Sequence("pep1", "GF"); // GGGTTT
+    SequenceI pep2 = new Sequence("pep2", "KP"); // aaaccc
+    SequenceI pep3 = new Sequence("pep3", "KF"); // aaaTTT
+    dna1.createDatasetSequence();
+    pep1.createDatasetSequence();
+    pep2.createDatasetSequence();
+    pep3.createDatasetSequence();
+    pep1.getDatasetSequence().addDBRef(
+            new DBRefEntry("EMBLCDS", "2", "A12345"));
+    pep2.getDatasetSequence().addDBRef(
+            new DBRefEntry("EMBLCDS", "3", "A12346"));
+    pep3.getDatasetSequence().addDBRef(
+            new DBRefEntry("EMBLCDS", "4", "A12347"));
+
+    /*
+     * Make the mappings from dna to protein. Using LinkedHashset is a
+     * convenience so results are in the input order. There is no assertion that
+     * the generated exon sequences are in any particular order.
+     */
+    Set<AlignedCodonFrame> mappings = new LinkedHashSet<AlignedCodonFrame>();
+    // map ...GGG...TTT to GF
+    MapList map = new MapList(new int[]
+    { 4, 6, 10, 12 }, new int[]
+    { 1, 2 }, 3, 1);
+    AlignedCodonFrame acf = new AlignedCodonFrame();
+    acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map);
+    mappings.add(acf);
+
+    // map aaa...ccc to KP
+    map = new MapList(new int[]
+    { 1, 3, 7, 9 }, new int[]
+    { 1, 2 }, 3, 1);
+    acf = new AlignedCodonFrame();
+    acf.addMap(dna1.getDatasetSequence(), pep2.getDatasetSequence(), map);
+    mappings.add(acf);
+
+    // map aaa......TTT to KF
+    map = new MapList(new int[]
+    { 1, 3, 10, 12 }, new int[]
+    { 1, 2 }, 3, 1);
+    acf = new AlignedCodonFrame();
+    acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map);
+    mappings.add(acf);
+
+    /*
+     * Create the Exon alignment; also replaces the dna-to-protein mappings with
+     * exon-to-protein and exon-to-dna mappings
+     */
+    AlignmentI exal = AlignmentUtils.makeExonAlignment(new SequenceI[]
+    { dna1 }, mappings);
+
+    /*
+     * Verify we have 3 exon sequences, mapped to pep1/2/3 respectively
+     */
+    List<SequenceI> exons = exal.getSequences();
+    assertEquals(3, exons.size());
+
+    SequenceI exon = exons.get(0);
+    assertEquals("GGGTTT", exon.getSequenceAsString());
+    assertEquals("dna1|A12345", exon.getName());
+    assertEquals(1, exon.getDBRef().length);
+    DBRefEntry cdsRef = exon.getDBRef()[0];
+    assertEquals("EMBLCDS", cdsRef.getSource());
+    assertEquals("2", cdsRef.getVersion());
+    assertEquals("A12345", cdsRef.getAccessionId());
+
+    exon = exons.get(1);
+    assertEquals("aaaccc", exon.getSequenceAsString());
+    assertEquals("dna1|A12346", exon.getName());
+    assertEquals(1, exon.getDBRef().length);
+    cdsRef = exon.getDBRef()[0];
+    assertEquals("EMBLCDS", cdsRef.getSource());
+    assertEquals("3", cdsRef.getVersion());
+    assertEquals("A12346", cdsRef.getAccessionId());
+
+    exon = exons.get(2);
+    assertEquals("aaaTTT", exon.getSequenceAsString());
+    assertEquals("dna1|A12347", exon.getName());
+    assertEquals(1, exon.getDBRef().length);
+    cdsRef = exon.getDBRef()[0];
+    assertEquals("EMBLCDS", cdsRef.getSource());
+    assertEquals("4", cdsRef.getVersion());
+    assertEquals("A12347", cdsRef.getAccessionId());
+
+    /*
+     * Verify there are mappings from each exon sequence to its protein product
+     * and also to its dna source
+     */
+    Iterator<AlignedCodonFrame> newMappingsIterator = mappings.iterator();
+
+    // mappings for dna1 - exon1 - pep1
+    AlignedCodonFrame exonMapping = newMappingsIterator.next();
+    List<Mapping> dnaMappings = exonMapping.getMappingsForSequence(dna1);
+    assertEquals(1, dnaMappings.size());
+    assertSame(exons.get(0).getDatasetSequence(), dnaMappings.get(0)
+            .getTo());
+    assertEquals("G(1) in CDS should map to G(4) in DNA", 4, dnaMappings
+            .get(0).getMap().getToPosition(1));
+    List<Mapping> peptideMappings = exonMapping
+            .getMappingsForSequence(pep1);
+    assertEquals(1, peptideMappings.size());
+    assertSame(pep1.getDatasetSequence(), peptideMappings.get(0).getTo());
+
+    // mappings for dna1 - exon2 - pep2
+    exonMapping = newMappingsIterator.next();
+    dnaMappings = exonMapping.getMappingsForSequence(dna1);
+    assertEquals(1, dnaMappings.size());
+    assertSame(exons.get(1).getDatasetSequence(), dnaMappings.get(0)
+            .getTo());
+    assertEquals("c(4) in CDS should map to c(7) in DNA", 7, dnaMappings
+            .get(0).getMap().getToPosition(4));
+    peptideMappings = exonMapping.getMappingsForSequence(pep2);
+    assertEquals(1, peptideMappings.size());
+    assertSame(pep2.getDatasetSequence(), peptideMappings.get(0).getTo());
+
+    // mappings for dna1 - exon3 - pep3
+    exonMapping = newMappingsIterator.next();
+    dnaMappings = exonMapping.getMappingsForSequence(dna1);
+    assertEquals(1, dnaMappings.size());
+    assertSame(exons.get(2).getDatasetSequence(), dnaMappings.get(0)
+            .getTo());
+    assertEquals("T(4) in CDS should map to T(10) in DNA", 10, dnaMappings
+            .get(0).getMap().getToPosition(4));
+    peptideMappings = exonMapping.getMappingsForSequence(pep3);
+    assertEquals(1, peptideMappings.size());
+    assertSame(pep3.getDatasetSequence(), peptideMappings.get(0).getTo());
+  }
 }