JAL-2738 JAL-2154 add DBRef from transcript to synthesized CDS and vice
[jalview.git] / test / jalview / analysis / AlignmentUtilsTests.java
index 8a667b3..7c64193 100644 (file)
@@ -40,8 +40,12 @@ import jalview.datamodel.SearchResultsI;
 import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
+import jalview.datamodel.features.SequenceFeatures;
 import jalview.gui.JvOptionPane;
 import jalview.io.AppletFormatAdapter;
+import jalview.io.DataSourceType;
+import jalview.io.FileFormat;
+import jalview.io.FileFormatI;
 import jalview.io.FormatAdapter;
 import jalview.util.MapList;
 import jalview.util.MappingUtils;
@@ -79,14 +83,15 @@ public class AlignmentUtilsTests
       SequenceI s1 = ts.deriveSequence().getSubSequence(i, i + 7);
       al.addSequence(s1);
     }
-    System.out.println(new AppletFormatAdapter().formatSequences("Clustal",
+    System.out.println(new AppletFormatAdapter().formatSequences(
+            FileFormat.Clustal,
             al, true));
     for (int flnk = -1; flnk < 25; flnk++)
     {
       AlignmentI exp = AlignmentUtils.expandContext(al, flnk);
       System.out.println("\nFlank size: " + flnk);
       System.out.println(new AppletFormatAdapter().formatSequences(
-              "Clustal", exp, true));
+              FileFormat.Clustal, exp, true));
       if (flnk == -1)
       {
         /*
@@ -219,7 +224,7 @@ public class AlignmentUtilsTests
   {
     final String data = ">Seq1Name\nKQYL\n" + ">Seq2Name\nRFPW\n"
             + ">Seq1Name\nABCD\n";
-    AlignmentI al = loadAlignment(data, "FASTA");
+    AlignmentI al = loadAlignment(data, FileFormat.Fasta);
     Map<String, List<SequenceI>> map = AlignmentUtils
             .getSequencesByName(al);
     assertEquals(2, map.keySet().size());
@@ -239,11 +244,11 @@ public class AlignmentUtilsTests
    * @return
    * @throws IOException
    */
-  protected AlignmentI loadAlignment(final String data, String format)
+  protected AlignmentI loadAlignment(final String data, FileFormatI format)
           throws IOException
   {
     AlignmentI a = new FormatAdapter().readFile(data,
-            AppletFormatAdapter.PASTE, format);
+            DataSourceType.PASTE, format);
     a.setDataset(null);
     return a;
   }
@@ -1039,14 +1044,18 @@ public class AlignmentUtilsTests
     dna.addCodonFrame(acf);
 
     /*
-     * In this case, mappings originally came from matching Uniprot accessions - so need an xref on dna involving those regions. These are normally constructed from CDS annotation
+     * In this case, mappings originally came from matching Uniprot accessions 
+     * - so need an xref on dna involving those regions. 
+     * These are normally constructed from CDS annotation
      */
     DBRefEntry dna1xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep1",
             new Mapping(mapfordna1));
-    dna1.getDatasetSequence().addDBRef(dna1xref);
+    dna1.addDBRef(dna1xref);
+    assertEquals(2, dna1.getDBRefs().length); // to self and to pep1
     DBRefEntry dna2xref = new DBRefEntry("UNIPROT", "ENSEMBL", "pep2",
             new Mapping(mapfordna2));
-    dna2.getDatasetSequence().addDBRef(dna2xref);
+    dna2.addDBRef(dna2xref);
+    assertEquals(2, dna2.getDBRefs().length); // to self and to pep2
 
     /*
      * execute method under test:
@@ -1101,6 +1110,38 @@ public class AlignmentUtilsTests
     assertEquals(cdsMapping.getInverse(), dbref.getMap().getMap());
 
     /*
+     * verify cDNA has added a dbref with mapping to CDS
+     */
+    assertEquals(3, dna1.getDBRefs().length);
+    DBRefEntry dbRefEntry = dna1.getDBRefs()[2];
+    assertSame(cds1Dss, dbRefEntry.getMap().getTo());
+    MapList dnaToCdsMapping = new MapList(new int[] { 4, 6, 10, 12 },
+            new int[] { 1, 6 }, 1, 1);
+    assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap());
+    assertEquals(3, dna2.getDBRefs().length);
+    dbRefEntry = dna2.getDBRefs()[2];
+    assertSame(cds2Dss, dbRefEntry.getMap().getTo());
+    dnaToCdsMapping = new MapList(new int[] { 1, 3, 7, 9, 13, 15 },
+            new int[] { 1, 9 }, 1, 1);
+    assertEquals(dnaToCdsMapping, dbRefEntry.getMap().getMap());
+
+    /*
+     * verify CDS has added a dbref with mapping to cDNA
+     */
+    assertEquals(2, cds1Dss.getDBRefs().length);
+    dbRefEntry = cds1Dss.getDBRefs()[1];
+    assertSame(dna1.getDatasetSequence(), dbRefEntry.getMap().getTo());
+    MapList cdsToDnaMapping = new MapList(new int[] { 1, 6 }, new int[] {
+        4, 6, 10, 12 }, 1, 1);
+    assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap());
+    assertEquals(2, cds2Dss.getDBRefs().length);
+    dbRefEntry = cds2Dss.getDBRefs()[1];
+    assertSame(dna2.getDatasetSequence(), dbRefEntry.getMap().getTo());
+    cdsToDnaMapping = new MapList(new int[] { 1, 9 }, new int[] { 1, 3, 7,
+        9, 13, 15 }, 1, 1);
+    assertEquals(cdsToDnaMapping, dbRefEntry.getMap().getMap());
+
+    /*
      * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide
      * the mappings are on the shared alignment dataset
      * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep) 
@@ -1175,12 +1216,12 @@ public class AlignmentUtilsTests
     /*
      * check cds2 acquired a variant feature in position 5
      */
-    SequenceFeature[] sfs = cds2Dss.getSequenceFeatures();
+    List<SequenceFeature> sfs = cds2Dss.getSequenceFeatures();
     assertNotNull(sfs);
-    assertEquals(1, sfs.length);
-    assertEquals("variant", sfs[0].type);
-    assertEquals(5, sfs[0].begin);
-    assertEquals(5, sfs[0].end);
+    assertEquals(1, sfs.size());
+    assertEquals("variant", sfs.get(0).type);
+    assertEquals(5, sfs.get(0).begin);
+    assertEquals(5, sfs.get(0).end);
   }
 
   /**
@@ -1485,39 +1526,39 @@ public class AlignmentUtilsTests
      * that partially overlap 5' or 3' (start or end) of target sequence
      */
     AlignmentUtils.transferFeatures(dna, cds, map, null);
-    SequenceFeature[] sfs = cds.getSequenceFeatures();
-    assertEquals(6, sfs.length);
+    List<SequenceFeature> sfs = cds.getSequenceFeatures();
+    assertEquals(6, sfs.size());
 
-    SequenceFeature sf = sfs[0];
+    SequenceFeature sf = sfs.get(0);
     assertEquals("type2", sf.getType());
     assertEquals("desc2", sf.getDescription());
     assertEquals(2f, sf.getScore());
     assertEquals(1, sf.getBegin());
     assertEquals(1, sf.getEnd());
 
-    sf = sfs[1];
+    sf = sfs.get(1);
     assertEquals("type3", sf.getType());
     assertEquals("desc3", sf.getDescription());
     assertEquals(3f, sf.getScore());
     assertEquals(1, sf.getBegin());
     assertEquals(3, sf.getEnd());
 
-    sf = sfs[2];
+    sf = sfs.get(2);
     assertEquals("type4", sf.getType());
     assertEquals(2, sf.getBegin());
     assertEquals(5, sf.getEnd());
 
-    sf = sfs[3];
+    sf = sfs.get(3);
     assertEquals("type5", sf.getType());
     assertEquals(1, sf.getBegin());
     assertEquals(6, sf.getEnd());
 
-    sf = sfs[4];
+    sf = sfs.get(4);
     assertEquals("type8", sf.getType());
     assertEquals(6, sf.getBegin());
     assertEquals(6, sf.getEnd());
 
-    sf = sfs[5];
+    sf = sfs.get(5);
     assertEquals("type9", sf.getType());
     assertEquals(6, sf.getBegin());
     assertEquals(6, sf.getEnd());
@@ -1547,10 +1588,10 @@ public class AlignmentUtilsTests
 
     // desc4 and desc8 are the 'omit these' varargs
     AlignmentUtils.transferFeatures(dna, cds, map, null, "type4", "type8");
-    SequenceFeature[] sfs = cds.getSequenceFeatures();
-    assertEquals(1, sfs.length);
+    List<SequenceFeature> sfs = cds.getSequenceFeatures();
+    assertEquals(1, sfs.size());
 
-    SequenceFeature sf = sfs[0];
+    SequenceFeature sf = sfs.get(0);
     assertEquals("type5", sf.getType());
     assertEquals(1, sf.getBegin());
     assertEquals(6, sf.getEnd());
@@ -1580,10 +1621,10 @@ public class AlignmentUtilsTests
 
     // "type5" is the 'select this type' argument
     AlignmentUtils.transferFeatures(dna, cds, map, "type5");
-    SequenceFeature[] sfs = cds.getSequenceFeatures();
-    assertEquals(1, sfs.length);
+    List<SequenceFeature> sfs = cds.getSequenceFeatures();
+    assertEquals(1, sfs.size());
 
-    SequenceFeature sf = sfs[0];
+    SequenceFeature sf = sfs.get(0);
     assertEquals("type5", sf.getType());
     assertEquals(1, sf.getBegin());
     assertEquals(6, sf.getEnd());
@@ -2074,24 +2115,29 @@ public class AlignmentUtilsTests
      * var6 P -> H COSMIC
      * var6 P -> R COSMIC
      */
-    SequenceFeature[] sfs = peptide.getSequenceFeatures();
-    assertEquals(5, sfs.length);
+    List<SequenceFeature> sfs = peptide.getSequenceFeatures();
+    SequenceFeatures.sortFeatures(sfs, true);
+    assertEquals(5, sfs.size());
 
-    SequenceFeature sf = sfs[0];
+    /*
+     * features are sorted by start position ascending, but in no
+     * particular order where start positions match; asserts here
+     * simply match the data returned (the order is not important)
+     */
+    SequenceFeature sf = sfs.get(0);
     assertEquals(1, sf.getBegin());
     assertEquals(1, sf.getEnd());
-    assertEquals("p.Lys1Glu", sf.getDescription());
-    assertEquals("var1.125A>G", sf.getValue("ID"));
-    assertNull(sf.getValue("clinical_significance"));
-    assertEquals("ID=var1.125A>G", sf.getAttributes());
+    assertEquals("p.Lys1Asn", sf.getDescription());
+    assertEquals("var4", sf.getValue("ID"));
+    assertEquals("Benign", sf.getValue("clinical_significance"));
+    assertEquals("ID=var4;clinical_significance=Benign", sf.getAttributes());
     assertEquals(1, sf.links.size());
-    // link to variation is urlencoded
     assertEquals(
-            "p.Lys1Glu var1.125A>G|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var1.125A%3EG",
+            "p.Lys1Asn var4|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var4",
             sf.links.get(0));
     assertEquals(ensembl, sf.getFeatureGroup());
 
-    sf = sfs[1];
+    sf = sfs.get(1);
     assertEquals(1, sf.getBegin());
     assertEquals(1, sf.getEnd());
     assertEquals("p.Lys1Gln", sf.getDescription());
@@ -2104,43 +2150,44 @@ public class AlignmentUtilsTests
             sf.links.get(0));
     assertEquals(dbSnp, sf.getFeatureGroup());
 
-    sf = sfs[2];
+    sf = sfs.get(2);
     assertEquals(1, sf.getBegin());
     assertEquals(1, sf.getEnd());
-    assertEquals("p.Lys1Asn", sf.getDescription());
-    assertEquals("var4", sf.getValue("ID"));
-    assertEquals("Benign", sf.getValue("clinical_significance"));
-    assertEquals("ID=var4;clinical_significance=Benign", sf.getAttributes());
+    assertEquals("p.Lys1Glu", sf.getDescription());
+    assertEquals("var1.125A>G", sf.getValue("ID"));
+    assertNull(sf.getValue("clinical_significance"));
+    assertEquals("ID=var1.125A>G", sf.getAttributes());
     assertEquals(1, sf.links.size());
+    // link to variation is urlencoded
     assertEquals(
-            "p.Lys1Asn var4|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var4",
+            "p.Lys1Glu var1.125A>G|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var1.125A%3EG",
             sf.links.get(0));
     assertEquals(ensembl, sf.getFeatureGroup());
 
-    // var5 generates two distinct protein variant features
-    sf = sfs[3];
+    sf = sfs.get(3);
     assertEquals(3, sf.getBegin());
     assertEquals(3, sf.getEnd());
-    assertEquals("p.Pro3His", sf.getDescription());
+    assertEquals("p.Pro3Arg", sf.getDescription());
     assertEquals("var6", sf.getValue("ID"));
     assertEquals("Good", sf.getValue("clinical_significance"));
     assertEquals("ID=var6;clinical_significance=Good", sf.getAttributes());
     assertEquals(1, sf.links.size());
     assertEquals(
-            "p.Pro3His var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6",
+            "p.Pro3Arg var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6",
             sf.links.get(0));
     assertEquals(cosmic, sf.getFeatureGroup());
 
-    sf = sfs[4];
+    // var5 generates two distinct protein variant features
+    sf = sfs.get(4);
     assertEquals(3, sf.getBegin());
     assertEquals(3, sf.getEnd());
-    assertEquals("p.Pro3Arg", sf.getDescription());
+    assertEquals("p.Pro3His", sf.getDescription());
     assertEquals("var6", sf.getValue("ID"));
     assertEquals("Good", sf.getValue("clinical_significance"));
     assertEquals("ID=var6;clinical_significance=Good", sf.getAttributes());
     assertEquals(1, sf.links.size());
     assertEquals(
-            "p.Pro3Arg var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6",
+            "p.Pro3His var6|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=var6",
             sf.links.get(0));
     assertEquals(cosmic, sf.getFeatureGroup());
   }