JAL-2023 CDS sequences added to / share alignment dataset
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 7 Mar 2016 13:25:06 +0000 (13:25 +0000)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 7 Mar 2016 13:25:06 +0000 (13:25 +0000)
src/jalview/analysis/AlignmentUtils.java
src/jalview/gui/AlignFrame.java
src/jalview/gui/Jalview2XML.java
src/jalview/viewmodel/AlignmentViewport.java
test/jalview/analysis/AlignmentUtilsTests.java

index 2f9fcb2..450ae27 100644 (file)
@@ -1480,12 +1480,12 @@ public class AlignmentUtils
    *          aligned dna sequences
    * @param mappings
    *          from dna to protein; these are replaced with new mappings
-   * @param gapChar
+   * @param al
    * @return an alignment whose sequences are the cds-only parts of the dna
    *         sequences (or null if no mappings are found)
    */
   public static AlignmentI makeCdsAlignment(SequenceI[] dna,
-          List<AlignedCodonFrame> mappings, char gapChar)
+          List<AlignedCodonFrame> mappings, AlignmentI al)
   {
     List<int[]> cdsColumns = findCdsColumns(dna);
 
@@ -1495,6 +1495,7 @@ public class AlignmentUtils
      */
     List<AlignedCodonFrame> newMappings = new ArrayList<AlignedCodonFrame>();
     List<SequenceI> cdsSequences = new ArrayList<SequenceI>();
+    char gap = al.getGapCharacter();
 
     for (SequenceI dnaSeq : dna)
     {
@@ -1505,7 +1506,7 @@ public class AlignmentUtils
       {
         AlignedCodonFrame newMapping = new AlignedCodonFrame();
         final List<SequenceI> mappedCds = makeCdsSequences(dnaSeq, acf,
-                cdsColumns, newMapping, gapChar);
+                cdsColumns, newMapping, gap);
         if (!mappedCds.isEmpty())
         {
           cdsSequences.addAll(mappedCds);
@@ -1513,10 +1514,21 @@ public class AlignmentUtils
         }
       }
     }
-    AlignmentI al = new Alignment(
+    AlignmentI newAl = new Alignment(
             cdsSequences.toArray(new SequenceI[cdsSequences.size()]));
-    al.setGapCharacter(gapChar);
-    al.setDataset(null);
+
+    /*
+     * add new sequences to the shared dataset, set it on the new alignment
+     */
+    List<SequenceI> dsseqs = al.getDataset().getSequences();
+    for (SequenceI seq : newAl.getSequences())
+    {
+      if (!dsseqs.contains(seq.getDatasetSequence()))
+      {
+        dsseqs.add(seq.getDatasetSequence());
+      }
+    }
+    newAl.setDataset(al.getDataset());
 
     /*
      * Replace the old mappings with the new ones
@@ -1524,7 +1536,7 @@ public class AlignmentUtils
     mappings.clear();
     mappings.addAll(newMappings);
 
-    return al;
+    return newAl;
   }
 
   /**
@@ -1682,6 +1694,7 @@ public class AlignmentUtils
     {
       SequenceI cds = makeCdsSequence(dnaSeq, seqMapping,
               ungappedCdsColumns, gapChar);
+      cds.createDatasetSequence();
       cdsSequences.add(cds);
 
       /*
index 8d54f08..433afba 100644 (file)
@@ -4780,13 +4780,11 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
               AlignmentI copyAlignment = null;
               final SequenceI[] sequenceSelection = AlignFrame.this.viewport
                       .getSequenceSelection();
-              final char gapChar = AlignFrame.this.viewport
-                      .getGapCharacter();
               List<AlignedCodonFrame> cf = xrefs.getCodonFrames();
               if (dna)
               {
                 copyAlignment = AlignmentUtils.makeCdsAlignment(
-                        sequenceSelection, cf, gapChar);
+                        sequenceSelection, cf, alignment);
                 al.getCodonFrames().clear();
                 al.getCodonFrames().addAll(cf);
               }
@@ -4795,8 +4793,9 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
                 copyAlignment = new Alignment(new Alignment(
                         sequenceSelection));
                 copyAlignment.getCodonFrames().addAll(cf);
-                copyAlignment.setGapCharacter(gapChar);
               }
+              copyAlignment.setGapCharacter(AlignFrame.this.viewport
+                      .getGapCharacter());
               StructureSelectionManager ssm = StructureSelectionManager
                       .getStructureSelectionManager(Desktop.instance);
               ssm.registerMappings(cf);
@@ -4890,17 +4889,21 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
       }
 
       /**
-       * @param alignment
-       * @param prods
+       * Makes an alignment containing the given sequences; the sequences are
+       * added to the given alignment dataset, and the dataset is set on (shared
+       * by) the new alignment
+       * 
+       * @param dataset
+       * @param seqs
        * @return
        */
       protected Alignment makeCrossReferencesAlignment(Alignment dataset,
-              Alignment prods)
+              Alignment seqs)
       {
-        SequenceI[] sprods = new SequenceI[prods.getHeight()];
+        SequenceI[] sprods = new SequenceI[seqs.getHeight()];
         for (int s = 0; s < sprods.length; s++)
         {
-          sprods[s] = (prods.getSequenceAt(s)).deriveSequence();
+          sprods[s] = (seqs.getSequenceAt(s)).deriveSequence();
           if (dataset.getSequences() == null
                   || !dataset.getSequences().contains(
                           sprods[s].getDatasetSequence()))
index 3f3e8f6..2799a7e 100644 (file)
@@ -163,7 +163,7 @@ public class Jalview2XML
    */
   Map<String, SequenceI> seqRefIds = null;
 
-  Vector frefedSequence = null;
+  Vector<Object[]> frefedSequence = null;
 
   boolean raiseGUI = true; // whether errors are raised in dialog boxes or not
 
@@ -258,7 +258,7 @@ public class Jalview2XML
       int r = 0, rSize = frefedSequence.size();
       while (r < rSize)
       {
-        Object[] ref = (Object[]) frefedSequence.elementAt(r);
+        Object[] ref = frefedSequence.elementAt(r);
         if (ref != null)
         {
           String sref = (String) ref[0];
@@ -2254,7 +2254,7 @@ public class Jalview2XML
     }
     if (frefedSequence == null)
     {
-      frefedSequence = new Vector();
+      frefedSequence = new Vector<Object[]>();
     }
 
     AlignFrame af = null, _af = null;
@@ -2850,7 +2850,7 @@ public class Jalview2XML
             {
               mapping = addMapping(maps[m].getMapping());
             }
-            if (dnaseq != null)
+            if (dnaseq != null && mapping.getTo() != null)
             {
               cf.addMap(dnaseq, mapping.getTo(), mapping.getMap());
             }
index 07cb689..6322243 100644 (file)
@@ -49,6 +49,7 @@ import jalview.structure.CommandListener;
 import jalview.structure.StructureSelectionManager;
 import jalview.structure.VamsasSource;
 import jalview.util.Comparison;
+import jalview.util.MapList;
 import jalview.util.MappingUtils;
 import jalview.viewmodel.styles.ViewStyle;
 import jalview.workers.AlignCalcManager;
@@ -848,7 +849,9 @@ public abstract class AlignmentViewport implements AlignViewportI,
        */
       AlignedCodonFrame mapping = al.getCodonFrames().iterator().next();
       // TODO hold mapping type e.g. dna-to-protein in AlignedCodonFrame?
-      if (mapping.getdnaToProt()[0].getFromRatio() == 3)
+      MapList[] mapLists = mapping.getdnaToProt();
+      // mapLists can be empty if project load has not finished resolving seqs
+      if (mapLists.length > 0 && mapLists[0].getFromRatio() == 3)
       {
         if (calculator
                 .getRegisteredWorkersOfClass(ComplementConsensusThread.class) == null)
@@ -1797,7 +1800,9 @@ public abstract class AlignmentViewport implements AlignViewportI,
         // fudge: check mappings are not protein-to-protein
         // TODO: nicer
         AlignedCodonFrame mapping = codonMappings.iterator().next();
-        if (mapping.getdnaToProt()[0].getFromRatio() == 3)
+        MapList[] mapLists = mapping.getdnaToProt();
+        // mapLists can be empty if project load has not finished resolving seqs
+        if (mapLists.length > 0 && mapLists[0].getFromRatio() == 3)
         {
           complementConsensus = new AlignmentAnnotation("cDNA Consensus",
                   "PID for cDNA", new Annotation[1], 0f, 100f,
index f9c1a11..abe3f55 100644 (file)
@@ -1024,6 +1024,8 @@ public class AlignmentUtilsTests
             null));
     dna2.addSequenceFeature(new SequenceFeature("CDS", "cds5", 13, 15, 0f,
             null));
+    AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 });
+    dna.setDataset(null);
 
     List<AlignedCodonFrame> mappings = new ArrayList<AlignedCodonFrame>();
     MapList map = new MapList(new int[] { 4, 6, 10, 12 },
@@ -1038,7 +1040,7 @@ public class AlignmentUtilsTests
     mappings.add(acf);
 
     AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
-        dna1, dna2 }, mappings, '-');
+        dna1, dna2 }, mappings, dna);
     assertEquals(2, cds.getSequences().size());
     assertEquals("---GGG---TTT---", cds.getSequenceAt(0)
             .getSequenceAsString());
@@ -1046,6 +1048,15 @@ public class AlignmentUtilsTests
             .getSequenceAsString());
 
     /*
+     * verify shared, extended alignment dataset
+     */
+    assertSame(dna.getDataset(), cds.getDataset());
+    assertTrue(dna.getDataset().getSequences()
+            .contains(cds.getSequenceAt(0).getDatasetSequence()));
+    assertTrue(dna.getDataset().getSequences()
+            .contains(cds.getSequenceAt(1).getDatasetSequence()));
+
+    /*
      * Verify updated mappings
      */
     assertEquals(2, mappings.size());
@@ -1210,8 +1221,10 @@ public class AlignmentUtilsTests
      * Create the Exon alignment; also replaces the dna-to-protein mappings with
      * exon-to-protein and exon-to-dna mappings
      */
+    AlignmentI dna = new Alignment(new SequenceI[] { dna1 });
+    dna.setDataset(null);
     AlignmentI exal = AlignmentUtils.makeCdsAlignment(
-            new SequenceI[] { dna1 }, mappings, '-');
+            new SequenceI[] { dna1 }, mappings, dna);
 
     /*
      * Verify we have 3 cds sequences, mapped to pep1/2/3 respectively
@@ -1219,6 +1232,20 @@ public class AlignmentUtilsTests
     List<SequenceI> cds = exal.getSequences();
     assertEquals(3, cds.size());
 
+    /*
+     * verify shared, extended alignment dataset
+     */
+    assertSame(exal.getDataset(), dna.getDataset());
+    assertTrue(dna.getDataset().getSequences()
+            .contains(cds.get(0).getDatasetSequence()));
+    assertTrue(dna.getDataset().getSequences()
+            .contains(cds.get(1).getDatasetSequence()));
+    assertTrue(dna.getDataset().getSequences()
+            .contains(cds.get(2).getDatasetSequence()));
+
+    /*
+     * verify aligned cds sequences and their xrefs
+     */
     SequenceI cdsSeq = cds.get(0);
     assertEquals("---GGG---TTT", cdsSeq.getSequenceAsString());
     assertEquals("dna1|A12345", cdsSeq.getName());
@@ -1562,13 +1589,25 @@ public class AlignmentUtilsTests
     acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map);
     mappings.add(acf);
   
+    AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 });
+    dna.setDataset(null);
     AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] {
-        dna1, dna2, dna3 }, mappings, '-');
-    assertEquals(2, cds.getSequences().size());
-    assertEquals("GGGCCCTTTGGG", cds.getSequenceAt(0).getSequenceAsString());
-    assertEquals("GGGCC---TGGG", cds.getSequenceAt(1).getSequenceAsString());
+        dna1, dna2, dna3 }, mappings, dna);
+    List<SequenceI> cdsSeqs = cds.getSequences();
+    assertEquals(2, cdsSeqs.size());
+    assertEquals("GGGCCCTTTGGG", cdsSeqs.get(0).getSequenceAsString());
+    assertEquals("GGGCC---TGGG", cdsSeqs.get(1).getSequenceAsString());
   
     /*
+     * verify shared, extended alignment dataset
+     */
+    assertSame(dna.getDataset(), cds.getDataset());
+    assertTrue(dna.getDataset().getSequences()
+            .contains(cdsSeqs.get(0).getDatasetSequence()));
+    assertTrue(dna.getDataset().getSequences()
+            .contains(cdsSeqs.get(1).getDatasetSequence()));
+
+    /*
      * Verify updated mappings
      */
     assertEquals(2, mappings.size());