JAL-2738 JAL-2154 add DBRef from transcript to synthesized CDS and vice
[jalview.git] / test / jalview / analysis / AlignmentUtilsTests.java
index 1e71c41..7c64193 100644 (file)
@@ -1,6 +1,6 @@
 /*
- * Jalview - A Sequence Alignment Editor and Viewer (Version 2.9.0b2)
- * Copyright (C) 2015 The Jalview Authors
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
  * 
  * This file is part of Jalview.
  * 
@@ -35,12 +35,17 @@ import jalview.datamodel.AlignmentI;
 import jalview.datamodel.Annotation;
 import jalview.datamodel.DBRefEntry;
 import jalview.datamodel.Mapping;
-import jalview.datamodel.SearchResults;
-import jalview.datamodel.SearchResults.Match;
+import jalview.datamodel.SearchResultMatchI;
+import jalview.datamodel.SearchResultsI;
 import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
+import jalview.datamodel.features.SequenceFeatures;
+import jalview.gui.JvOptionPane;
 import jalview.io.AppletFormatAdapter;
+import jalview.io.DataSourceType;
+import jalview.io.FileFormat;
+import jalview.io.FileFormatI;
 import jalview.io.FormatAdapter;
 import jalview.util.MapList;
 import jalview.util.MappingUtils;
@@ -53,10 +58,19 @@ import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
 
+import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
 public class AlignmentUtilsTests
 {
+
+  @BeforeClass(alwaysRun = true)
+  public void setUpJvOptionPane()
+  {
+    JvOptionPane.setInteractiveMode(false);
+    JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
+  }
+
   public static Sequence ts = new Sequence("short",
           "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm");
 
@@ -69,14 +83,15 @@ public class AlignmentUtilsTests
       SequenceI s1 = ts.deriveSequence().getSubSequence(i, i + 7);
       al.addSequence(s1);
     }
-    System.out.println(new AppletFormatAdapter().formatSequences("Clustal",
+    System.out.println(new AppletFormatAdapter().formatSequences(
+            FileFormat.Clustal,
             al, true));
     for (int flnk = -1; flnk < 25; flnk++)
     {
       AlignmentI exp = AlignmentUtils.expandContext(al, flnk);
       System.out.println("\nFlank size: " + flnk);
       System.out.println(new AppletFormatAdapter().formatSequences(
-              "Clustal", exp, true));
+              FileFormat.Clustal, exp, true));
       if (flnk == -1)
       {
         /*
@@ -209,7 +224,7 @@ public class AlignmentUtilsTests
   {
     final String data = ">Seq1Name\nKQYL\n" + ">Seq2Name\nRFPW\n"
             + ">Seq1Name\nABCD\n";
-    AlignmentI al = loadAlignment(data, "FASTA");
+    AlignmentI al = loadAlignment(data, FileFormat.Fasta);
     Map<String, List<SequenceI>> map = AlignmentUtils
             .getSequencesByName(al);
     assertEquals(2, map.keySet().size());
@@ -229,11 +244,11 @@ public class AlignmentUtilsTests
    * @return
    * @throws IOException
    */
-  protected AlignmentI loadAlignment(final String data, String format)
+  protected AlignmentI loadAlignment(final String data, FileFormatI format)
           throws IOException
   {
     AlignmentI a = new FormatAdapter().readFile(data,
-            AppletFormatAdapter.PASTE, format);
+            DataSourceType.PASTE, format);
     a.setDataset(null);
     return a;
   }
@@ -1029,14 +1044,18 @@ public class AlignmentUtilsTests
     dna.addCodonFrame(acf);
 
     /*
-     * In this case, mappings originally came from matching Uniprot accessions - so need an xref on dna involving those regions. These are normally constructed from CDS annotation
+     * In this case, mappings originally came from matching Uniprot accessions 
+     * - so need an xref on dna involving those regions. 
+     * These are normally constructed from CDS annotation
      */
     DBRefEntry dna1xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep1",
             new Mapping(mapfordna1));
-    dna1.getDatasetSequence().addDBRef(dna1xref);
+    dna1.addDBRef(dna1xref);
+    assertEquals(2, dna1.getDBRefs().length); // to self and to pep1
     DBRefEntry dna2xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep2",
             new Mapping(mapfordna2));
-    dna2.getDatasetSequence().addDBRef(dna2xref);
+    dna2.addDBRef(dna2xref);
+    assertEquals(2, dna2.getDBRefs().length); // to self and to pep2
 
     /*
      * execute method under test:
@@ -1091,6 +1110,38 @@ public class AlignmentUtilsTests
     assertEquals(cdsMapping.getInverse(), dbref.getMap().getMap());
 
     /*
+     * verify cDNA has added a dbref with mapping to CDS
+     */
+    assertEquals(3, dna1.getDBRefs().length);
+    DBRefEntry dbRefEntry = dna1.getDBRefs()[2];
+    assertSame(cds1Dss, dbRefEntry.getMap().getTo());
+    MapList dnaToCdsMapping = new MapList(new int[] { 4, 6, 10, 12 },
+            new int[] { 1, 6 }, 1, 1);
+    assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap());
+    assertEquals(3, dna2.getDBRefs().length);
+    dbRefEntry = dna2.getDBRefs()[2];
+    assertSame(cds2Dss, dbRefEntry.getMap().getTo());
+    dnaToCdsMapping = new MapList(new int[] { 1, 3, 7, 9, 13, 15 },
+            new int[] { 1, 9 }, 1, 1);
+    assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap());
+
+    /*
+     * verify CDS has added a dbref with mapping to cDNA
+     */
+    assertEquals(2, cds1Dss.getDBRefs().length);
+    dbRefEntry = cds1Dss.getDBRefs()[1];
+    assertSame(dna1.getDatasetSequence(), dbRefEntry.getMap().getTo());
+    MapList cdsToDnaMapping = new MapList(new int[] { 1, 6 }, new int[] {
+        4, 6, 10, 12 }, 1, 1);
+    assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap());
+    assertEquals(2, cds2Dss.getDBRefs().length);
+    dbRefEntry = cds2Dss.getDBRefs()[1];
+    assertSame(dna2.getDatasetSequence(), dbRefEntry.getMap().getTo());
+    cdsToDnaMapping = new MapList(new int[] { 1, 9 }, new int[] { 1, 3, 7,
+        9, 13, 15 }, 1, 1);
+    assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap());
+
+    /*
      * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
      * the mappings are on the shared alignment dataset
      * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep) 
@@ -1119,9 +1170,9 @@ public class AlignmentUtilsTests
     assertEquals(1, mappings.size());
 
     // map G to GGG
-    SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings);
+    SearchResultsI sr = MappingUtils.buildSearchResults(pep1, 1, mappings);
     assertEquals(1, sr.getResults().size());
-    Match m = sr.getResults().get(0);
+    SearchResultMatchI m = sr.getResults().get(0);
     assertSame(cds1Dss, m.getSequence());
     assertEquals(1, m.getStart());
     assertEquals(3, m.getEnd());
@@ -1165,12 +1216,12 @@ public class AlignmentUtilsTests
     /*
      * check cds2 acquired a variant feature in position 5
      */
-    SequenceFeature[] sfs = cds2Dss.getSequenceFeatures();
+    List<SequenceFeature> sfs = cds2Dss.getSequenceFeatures();
     assertNotNull(sfs);
-    assertEquals(1, sfs.length);
-    assertEquals("variant", sfs[0].type);
-    assertEquals(5, sfs[0].begin);
-    assertEquals(5, sfs[0].end);
+    assertEquals(1, sfs.size());
+    assertEquals("variant", sfs.get(0).type);
+    assertEquals(5, sfs.get(0).begin);
+    assertEquals(5, sfs.get(0).end);
   }
 
   /**
@@ -1475,39 +1526,39 @@ public class AlignmentUtilsTests
      * that partially overlap 5' or 3' (start or end) of target sequence
      */
     AlignmentUtils.transferFeatures(dna, cds, map, null);
-    SequenceFeature[] sfs = cds.getSequenceFeatures();
-    assertEquals(6, sfs.length);
+    List<SequenceFeature> sfs = cds.getSequenceFeatures();
+    assertEquals(6, sfs.size());
 
-    SequenceFeature sf = sfs[0];
+    SequenceFeature sf = sfs.get(0);
     assertEquals("type2", sf.getType());
     assertEquals("desc2", sf.getDescription());
     assertEquals(2f, sf.getScore());
     assertEquals(1, sf.getBegin());
     assertEquals(1, sf.getEnd());
 
-    sf = sfs[1];
+    sf = sfs.get(1);
     assertEquals("type3", sf.getType());
     assertEquals("desc3", sf.getDescription());
     assertEquals(3f, sf.getScore());
     assertEquals(1, sf.getBegin());
     assertEquals(3, sf.getEnd());
 
-    sf = sfs[2];
+    sf = sfs.get(2);
     assertEquals("type4", sf.getType());
     assertEquals(2, sf.getBegin());
     assertEquals(5, sf.getEnd());
 
-    sf = sfs[3];
+    sf = sfs.get(3);
     assertEquals("type5", sf.getType());
     assertEquals(1, sf.getBegin());
     assertEquals(6, sf.getEnd());
 
-    sf = sfs[4];
+    sf = sfs.get(4);
     assertEquals("type8", sf.getType());
     assertEquals(6, sf.getBegin());
     assertEquals(6, sf.getEnd());
 
-    sf = sfs[5];
+    sf = sfs.get(5);
     assertEquals("type9", sf.getType());
     assertEquals(6, sf.getBegin());
     assertEquals(6, sf.getEnd());
@@ -1537,10 +1588,10 @@ public class AlignmentUtilsTests
 
     // desc4 and desc8 are the 'omit these' varargs
     AlignmentUtils.transferFeatures(dna, cds, map, null, "type4", "type8");
-    SequenceFeature[] sfs = cds.getSequenceFeatures();
-    assertEquals(1, sfs.length);
+    List<SequenceFeature> sfs = cds.getSequenceFeatures();
+    assertEquals(1, sfs.size());
 
-    SequenceFeature sf = sfs[0];
+    SequenceFeature sf = sfs.get(0);
     assertEquals("type5", sf.getType());
     assertEquals(1, sf.getBegin());
     assertEquals(6, sf.getEnd());
@@ -1570,10 +1621,10 @@ public class AlignmentUtilsTests
 
     // "type5" is the 'select this type' argument
     AlignmentUtils.transferFeatures(dna, cds, map, "type5");
-    SequenceFeature[] sfs = cds.getSequenceFeatures();
-    assertEquals(1, sfs.length);
+    List<SequenceFeature> sfs = cds.getSequenceFeatures();
+    assertEquals(1, sfs.size());
 
-    SequenceFeature sf = sfs[0];
+    SequenceFeature sf = sfs.get(0);
     assertEquals("type5", sf.getType());
     assertEquals(1, sf.getBegin());
     assertEquals(6, sf.getEnd());
@@ -1650,10 +1701,10 @@ public class AlignmentUtilsTests
     List<AlignedCodonFrame> pep1CdsMappings = MappingUtils
             .findMappingsForSequence(cds.getSequenceAt(0), pep1Mappings);
     assertEquals(1, pep1CdsMappings.size());
-    SearchResults sr = MappingUtils.buildSearchResults(pep1, 1,
+    SearchResultsI sr = MappingUtils.buildSearchResults(pep1, 1,
             pep1CdsMappings);
     assertEquals(1, sr.getResults().size());
-    Match m = sr.getResults().get(0);
+    SearchResultMatchI m = sr.getResults().get(0);
     assertEquals(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
     assertEquals(1, m.getStart());
     assertEquals(3, m.getEnd());
@@ -2064,24 +2115,29 @@ public class AlignmentUtilsTests
      * var6 P -> H COSMIC
      * var6 P -> R COSMIC
      */
-    SequenceFeature[] sfs = peptide.getSequenceFeatures();
-    assertEquals(5, sfs.length);
+    List<SequenceFeature> sfs = peptide.getSequenceFeatures();
+    SequenceFeatures.sortFeatures(sfs, true);
+    assertEquals(5, sfs.size());
 
-    SequenceFeature sf = sfs[0];
+    /*
+     * features are sorted by start position ascending, but in no
+     * particular order where start positions match; asserts here
+     * simply match the data returned (the order is not important)
+     */
+    SequenceFeature sf = sfs.get(0);
     assertEquals(1, sf.getBegin());
     assertEquals(1, sf.getEnd());
-    assertEquals("p.Lys1Glu", sf.getDescription());
-    assertEquals("var1.125A>G", sf.getValue("ID"));
-    assertNull(sf.getValue("clinical_significance"));
-    assertEquals("ID=var1.125A>G", sf.getAttributes());
+    assertEquals("p.Lys1Asn", sf.getDescription());
+    assertEquals("var4", sf.getValue("ID"));
+    assertEquals("Benign", sf.getValue("clinical_significance"));
+    assertEquals("ID=var4;clinical_significance=Benign", sf.getAttributes());
     assertEquals(1, sf.links.size());
-    // link to variation is urlencoded
     assertEquals(
-            "p.Lys1Glu var1.125A>G|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var1.125A%3EG",
+            "p.Lys1Asn var4|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var4",
             sf.links.get(0));
     assertEquals(ensembl, sf.getFeatureGroup());
 
-    sf = sfs[1];
+    sf = sfs.get(1);
     assertEquals(1, sf.getBegin());
     assertEquals(1, sf.getEnd());
     assertEquals("p.Lys1Gln", sf.getDescription());
@@ -2094,43 +2150,44 @@ public class AlignmentUtilsTests
             sf.links.get(0));
     assertEquals(dbSnp, sf.getFeatureGroup());
 
-    sf = sfs[2];
+    sf = sfs.get(2);
     assertEquals(1, sf.getBegin());
     assertEquals(1, sf.getEnd());
-    assertEquals("p.Lys1Asn", sf.getDescription());
-    assertEquals("var4", sf.getValue("ID"));
-    assertEquals("Benign", sf.getValue("clinical_significance"));
-    assertEquals("ID=var4;clinical_significance=Benign", sf.getAttributes());
+    assertEquals("p.Lys1Glu", sf.getDescription());
+    assertEquals("var1.125A>G", sf.getValue("ID"));
+    assertNull(sf.getValue("clinical_significance"));
+    assertEquals("ID=var1.125A>G", sf.getAttributes());
     assertEquals(1, sf.links.size());
+    // link to variation is urlencoded
     assertEquals(
-            "p.Lys1Asn var4|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var4",
+            "p.Lys1Glu var1.125A>G|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var1.125A%3EG",
             sf.links.get(0));
     assertEquals(ensembl, sf.getFeatureGroup());
 
-    // var5 generates two distinct protein variant features
-    sf = sfs[3];
+    sf = sfs.get(3);
     assertEquals(3, sf.getBegin());
     assertEquals(3, sf.getEnd());
-    assertEquals("p.Pro3His", sf.getDescription());
+    assertEquals("p.Pro3Arg", sf.getDescription());
     assertEquals("var6", sf.getValue("ID"));
     assertEquals("Good", sf.getValue("clinical_significance"));
     assertEquals("ID=var6;clinical_significance=Good", sf.getAttributes());
     assertEquals(1, sf.links.size());
     assertEquals(
-            "p.Pro3His var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6",
+            "p.Pro3Arg var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6",
             sf.links.get(0));
     assertEquals(cosmic, sf.getFeatureGroup());
 
-    sf = sfs[4];
+    // var5 generates two distinct protein variant features
+    sf = sfs.get(4);
     assertEquals(3, sf.getBegin());
     assertEquals(3, sf.getEnd());
-    assertEquals("p.Pro3Arg", sf.getDescription());
+    assertEquals("p.Pro3His", sf.getDescription());
     assertEquals("var6", sf.getValue("ID"));
     assertEquals("Good", sf.getValue("clinical_significance"));
     assertEquals("ID=var6;clinical_significance=Good", sf.getAttributes());
     assertEquals(1, sf.links.size());
     assertEquals(
-            "p.Pro3Arg var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6",
+            "p.Pro3His var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6",
             sf.links.get(0));
     assertEquals(cosmic, sf.getFeatureGroup());
   }
@@ -2388,9 +2445,9 @@ public class AlignmentUtilsTests
     assertEquals(1, mappings.size());
 
     // map G to GGG
-    SearchResults sr = MappingUtils.buildSearchResults(pep3, 1, mappings);
+    SearchResultsI sr = MappingUtils.buildSearchResults(pep3, 1, mappings);
     assertEquals(1, sr.getResults().size());
-    Match m = sr.getResults().get(0);
+    SearchResultMatchI m = sr.getResults().get(0);
     assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence());
     assertEquals(1, m.getStart());
     assertEquals(3, m.getEnd());