JAL-1619 load/align cDNA for protein (wip)
authorgmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 12 Jan 2015 17:13:45 +0000 (17:13 +0000)
committergmungoc <g.m.carstairs@dundee.ac.uk>
Mon, 12 Jan 2015 17:13:45 +0000 (17:13 +0000)
resources/lang/Messages.properties
src/jalview/analysis/AlignmentUtils.java
src/jalview/datamodel/AlignedCodonFrame.java
src/jalview/datamodel/Alignment.java
src/jalview/datamodel/AlignmentI.java
src/jalview/gui/AlignFrame.java
src/jalview/jbgui/GAlignFrame.java
src/jalview/schemes/ResidueProperties.java
src/jalview/util/MapList.java
test/jalview/analysis/AlignmentUtilsTests.java
test/jalview/datamodel/AlignmentTest.java

index ef303bc..1652eb7 100644 (file)
@@ -688,8 +688,15 @@ label.load_tree_for_sequence_set = Load a tree for this sequence set
 label.export_image = Export Image
 label.vamsas_store = VAMSAS store
 label.translate_cDNA = Translate cDNA
-label.cDNA = cDNA
-label.associate = Associate
+label.cdna = cDNA
+label.link_cdna = Link cDNA
+label.link_cdna_tip = Link to any compatible cDNA alignments.<br>Sequences are linked that have the same name and compatible lengths.
+label.no_cdna = No compatible cDNA was found
+label.linked_cdna = {0} cDNA alignments linked
+label.cdna_all_linked = All {0} compatible cDNA alignments are already linked
+label.align_cdna = Align linked cDNA
+label.align_cdna_tip = Any linked cDNA sequences will be realigned to match this alignment.
+label.cdna_aligned = {0} sequences in {1} alignments were realigned
 label.align = Align
 label.extract_scores = Extract Scores
 label.get_cross_refs = Get Cross References
@@ -1181,5 +1188,3 @@ label.no_colour_selection_in_scheme = Please, make a colour selection before to
 label.no_colour_selection_warn = Error saving colour scheme
 label.nonstandard_translation = Non-standard translation
 warn.nonstandard_translation = Non-standard translation(s) detected at {0}.<br>Do you wish to proceed?
-label.cdna_realign = Warning
-warn.cdna_realign = cDNA will be realigned if necessary to match the protein alignment.<br>Do you wish to proceed?
index 6385fa7..741e5e4 100644 (file)
  */
 package jalview.analysis;
 
+import jalview.datamodel.AlignedCodonFrame;
 import jalview.datamodel.AlignmentAnnotation;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.SequenceI;
+import jalview.schemes.ResidueProperties;
+import jalview.util.MapList;
 
 import java.util.ArrayList;
+import java.util.LinkedHashMap;
 import java.util.List;
+import java.util.Map;
 
 /**
  * grab bag of useful alignment manipulation operations Expect these to be
@@ -159,4 +164,147 @@ public class AlignmentUtils
     }
     return result;
   }
+
+  /**
+   * Returns a map of lists of sequences in the alignment, keyed by sequence
+   * name. For use in mapping between different alignment views of the same
+   * sequences.
+   * 
+   * @see jalview.datamodel.AlignmentI#getSequencesByName()
+   */
+  public static Map<String, List<SequenceI>> getSequencesByName(
+          AlignmentI al)
+  {
+    Map<String, List<SequenceI>> theMap = new LinkedHashMap<String, List<SequenceI>>();
+    for (SequenceI seq : al.getSequences())
+    {
+      String name = seq.getName();
+      if (name != null)
+      {
+        List<SequenceI> seqs = theMap.get(name);
+        if (seqs == null)
+        {
+          seqs = new ArrayList<SequenceI>();
+          theMap.put(name, seqs);
+        }
+        seqs.add(seq);
+      }
+    }
+    return theMap;
+  }
+
+  /**
+   * Build mapping of protein to cDNA alignment. Mappings are made between
+   * sequences which have the same name and compatible lengths.
+   * 
+   * @param proteinAlignment
+   * @param cdnaAlignment
+   * @return
+   */
+  public static boolean mapProteinToCdna(final AlignmentI proteinAlignment,
+          final AlignmentI cdnaAlignment)
+  {
+    boolean mapped = false;
+    List<SequenceI> thisSeqs = proteinAlignment.getSequences();
+  
+    /*
+     * Build a look-up of cDNA sequences by name, for matching purposes.
+     */
+    Map<String, List<SequenceI>> cdnaSeqs = cdnaAlignment
+            .getSequencesByName();
+  
+    for (SequenceI aaSeq : thisSeqs)
+    {
+      AlignedCodonFrame acf = new AlignedCodonFrame(
+              proteinAlignment.getWidth());
+      List<SequenceI> candidates = cdnaSeqs.get(aaSeq.getName());
+      if (candidates == null)
+      {
+        /*
+         * No cDNA sequence with matching name, so no mapping for this protein
+         * sequence
+         */
+        continue;
+      }
+      for (SequenceI cdnaSeq : candidates)
+      {
+        MapList map = mapProteinToCdna(aaSeq, cdnaSeq);
+        if (map != null)
+        {
+          acf.addMap(cdnaSeq, aaSeq, map);
+          mapped = true;
+        }
+      }
+      proteinAlignment.addCodonFrame(acf);
+    }
+    return mapped;
+  }
+
+  /**
+   * Build a mapping (if possible) of a protein to a cDNA sequence. The cDNA
+   * must be three times the length of the protein, possibly after ignoring
+   * start and/or stop codons. Returns null if no mapping is determined.
+   * 
+   * @param proteinSeqs
+   * @param cdnaSeq
+   * @return
+   */
+  public static MapList mapProteinToCdna(SequenceI proteinSeq,
+          SequenceI cdnaSeq)
+  {
+    String aaSeqString = proteinSeq.getDatasetSequence()
+            .getSequenceAsString();
+    String cdnaSeqString = cdnaSeq.getDatasetSequence()
+            .getSequenceAsString();
+    if (aaSeqString == null || cdnaSeqString == null)
+    {
+      return null;
+    }
+
+    final int mappedLength = 3 * aaSeqString.length();
+    int cdnaLength = cdnaSeqString.length();
+    int cdnaStart = 1;
+    int cdnaEnd = cdnaLength;
+    final int proteinStart = 1;
+    final int proteinEnd = aaSeqString.length();
+
+    /*
+     * If lengths don't match, try ignoring stop codon.
+     */
+    if (cdnaLength != mappedLength)
+    {
+      for (Object stop : ResidueProperties.STOP)
+      {
+        if (cdnaSeqString.toUpperCase().endsWith((String) stop))
+        {
+          cdnaEnd -= 3;
+          cdnaLength -= 3;
+          break;
+        }
+      }
+    }
+
+    /*
+     * If lengths still don't match, try ignoring start codon.
+     */
+    if (cdnaLength != mappedLength
+            && cdnaSeqString.toUpperCase().startsWith(
+                    ResidueProperties.START))
+    {
+      cdnaStart += 3;
+      cdnaLength -= 3;
+    }
+
+    if (cdnaLength == mappedLength)
+    {
+      MapList map = new MapList(new int[]
+      { cdnaStart, cdnaEnd }, new int[]
+      { proteinStart, proteinEnd }, 3, 1);
+      return map;
+    }
+    else
+    {
+      return null;
+    }
+  }
 }
index 0740795..4016ee5 100644 (file)
@@ -226,10 +226,24 @@ public class AlignedCodonFrame
   /**
    * 
    * @param sequenceRef
-   * @return null or corresponding aaSeq entry for dnaSeq entry
+   * @return null or corresponding aaSeq dataset sequence for dnaSeq entry
    */
   public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef)
   {
+    return getAaForDnaSeq(dnaSeqRef, true);
+  }
+
+  /**
+   * Return the corresponding aligned or dataset aa sequence for given dna
+   * sequence, null if not found.
+   * 
+   * @param sequenceRef
+   * @param returnDataset
+   *          if true, return the aa dataset, else the aligned sequence
+   * @return
+   */
+  public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef, boolean returnDataset)
+  {
     if (dnaSeqs == null)
     {
       return null;
@@ -239,7 +253,16 @@ public class AlignedCodonFrame
     {
       if (dnaSeqs[ds] == dnaSeqRef || dnaSeqs[ds] == dnads)
       {
-        return dnaToProt[ds].to;
+        if (returnDataset)
+        {
+          return dnaToProt[ds].to;
+        }
+        else
+        {
+          // TODO very fragile - depends on dnaSeqs, dnaToProt, a_aaSeqs moving
+          // in parallel; revise data model to guarantee this
+          return (SequenceI) a_aaSeqs.elementAt(ds);
+        }
       }
     }
     return null;
@@ -329,4 +352,36 @@ public class AlignedCodonFrame
       }
     }
   }
+
+  /**
+   * Returns the DNA codon positions (base 1) for the given position (base 1) in
+   * a mapped protein sequence, or null if no mapping is found.
+   * 
+   * Intended for use in aligning cDNA to match aligned protein. Only the first
+   * mapping found is returned, so not suitable for use if multiple protein
+   * sequences are mapped to the same cDNA (but aligning cDNA as protein is
+   * ill-defined for this case anyway).
+   * 
+   * @param seq
+   *          the DNA dataset sequence
+   * @param aaPos
+   *          residue position (base 1) in a protein sequence
+   * @return
+   */
+  public int[] getDnaPosition(SequenceI seq, int aaPos)
+  {
+    /*
+     * Adapted from markMappedRegion().
+     */
+    MapList ml = null;
+    for (int i = 0; i < dnaToProt.length; i++)
+    {
+      if (dnaSeqs[i] == seq)
+      {
+        ml = getdnaToProt()[i];
+        break;
+      }
+    }
+    return ml == null ? null : ml.locateInFrom(aaPos, aaPos);
+  }
 }
index 91108bc..5d11a20 100755 (executable)
@@ -20,6 +20,7 @@
  */
 package jalview.datamodel;
 
+import jalview.analysis.AlignmentUtils;
 import jalview.util.MessageManager;
 
 import java.util.ArrayList;
@@ -157,6 +158,17 @@ public class Alignment implements AlignmentI
   }
 
   /**
+   * Returns a map of lists of sequences keyed by sequence name.
+   * 
+   * @return
+   */
+  @Override
+  public Map<String, List<SequenceI>> getSequencesByName()
+  {
+    return AlignmentUtils.getSequencesByName(this);
+  }
+
+  /**
    * DOCUMENT ME!
    * 
    * @param i
@@ -1596,114 +1608,156 @@ public class Alignment implements AlignmentI
   }
 
   /**
-   * Answers true if the supplied alignment has the same number of sequences,
-   * and they are of equivalent length, ignoring gaps. Alignments should be of
-   * the same type (protein/nucleotide) or different types with 3:1 length
-   * scaling.
+   * Align this alignment 'the same as' the given one. Mapped sequences only are
+   * realigned. If both of the same type (nucleotide/protein) then align both
+   * identically. If this is nucleotide and the other is protein, make 3 gaps
+   * for each gap in the protein sequences. If this is protein and the other is
+   * nucleotide, insert a gap for each 3 gaps (or part thereof) between
+   * nucleotide bases. Does nothing if alignment of protein from cDNA is
+   * requested (not yet implemented).
    * 
    * @param al
    */
   @Override
-  public boolean isMappableTo(AlignmentI al)
+  public int alignAs(AlignmentI al)
   {
-    int thisCodonScale = this.isNucleotide() ? 1 : 3;
-    int thatCodonScale = al.isNucleotide() ? 1 : 3;
-    if (this == al || this.getHeight() != al.getHeight())
+    int count = 0;
+    boolean thisIsNucleotide = this.isNucleotide();
+    boolean thatIsProtein = !al.isNucleotide();
+    if (!thatIsProtein && !thisIsNucleotide)
     {
-      return false;
+      System.err
+              .println("Alignment of protein from cDNA not yet implemented");
+      return 0;
+      // todo: build it - a variant of Dna.CdnaTranslate()
     }
+    char thisGapChar = this.getGapCharacter();
+    char thatGapChar = al.getGapCharacter();
+    String gap = thisIsNucleotide && thatIsProtein ? String
+            .valueOf(new char[]
+            { thisGapChar, thisGapChar, thisGapChar }) : String
+            .valueOf(thisGapChar);
+    int ratio = thisIsNucleotide && thatIsProtein ? 3 : 1;
 
-    // TODO: match sequence ids, allow different sequence ordering?
-    // TODO: allow for stop/start codons?
-    // TODO: exclude introns
-    int i = 0;
-    for (SequenceI seq : this.getSequences())
-    {
-      final int thisSequenceDnaLength = seq.getDatasetSequence()
-              .getLength() * thisCodonScale;
-      final int thatSequenceDnaLength = al.getSequenceAt(i)
-              .getDatasetSequence().getLength()
-              * thatCodonScale;
-      if (thisSequenceDnaLength != thatSequenceDnaLength)
+    /*
+     * Get mappings from 'that' alignment's sequences to this.
+     */
+    for (SequenceI alignTo : getSequences())
+    {
+      AlignedCodonFrame[] mappings = al.getCodonFrame(alignTo);
+      if (mappings != null)
       {
-        return false;
+        for (AlignedCodonFrame mapping : mappings)
+        {
+          count += alignSequenceAs(alignTo, mapping, thatGapChar, gap,
+                  ratio) ? 1 : 0;
+        }
       }
-      i++;
     }
-    return true;
+    return count;
   }
 
   /**
-   * Align this alignment the same as the given one. If both of the same type
-   * (nucleotide/protein) then align both identically. If this is nucleotide and
-   * the other is protein, make 3 gaps for each gap in the protein sequences. If
-   * this is protein and the other is nucleotide, insert a gap for each 3 gaps
-   * (or part thereof) between nucleotide bases. The two alignments should be
-   * compatible in height and lengths, but if not, then discrepancies will be
-   * ignored with unpredictable results.
+   * Align sequence 'seq' the same way as 'other'. Note this currently assumes
+   * that we are aligned cDNA to match protein.
    * 
-   * @param al
-   * @throws UnsupportedOperation
-   *           if alignment of protein from cDNA is requested (not yet
-   *           implemented)
+   * @param seq
+   *          the sequence to be realigned
+   * @param mapping
+   *          holds mapping from the sequence whose alignment is to be 'copied'
+   * @param thatGapChar
+   *          gap character used in the 'other' sequence
+   * @param gap
+   *          character string represent a gap in the realigned sequence
+   * @param ratio
+   *          the number of positions in the realigned sequence corresponding to
+   *          one in the 'other'
+   * @return true if the sequence was realigned, false if it could not be
    */
-  @Override
-  public void alignAs(AlignmentI al)
-  {
-    boolean thisIsNucleotide = this.isNucleotide();
-    boolean thatIsProtein = !al.isNucleotide();
-    if (!thatIsProtein && !thisIsNucleotide)
+  protected boolean alignSequenceAs(SequenceI seq,
+          AlignedCodonFrame mapping,
+          char thatGapChar,
+          String gap, int ratio)
+  {
+    char myGapChar = gap.charAt(0);
+    // TODO rework this to use the mapping to match 'this' to 'that' residue
+    // position, to handle introns and exons correctly.
+    // TODO generalise to work for Protein-Protein, dna-dna, dna-protein
+    SequenceI alignFrom = mapping.getAaForDnaSeq(seq, false);
+    if (alignFrom == null)
     {
-      throw new UnsupportedOperationException(
-              "Alignment of protein from cDNA not implemented");
+      return false;
     }
-    char thisGapChar = this.getGapCharacter();
-    char thatGapChar = al.getGapCharacter();
-    String gap = thisIsNucleotide && thatIsProtein ? String
-            .valueOf(new char[]
-            { thisGapChar, thisGapChar, thisGapChar }) : String
-            .valueOf(thisGapChar);
-    int ratio = thisIsNucleotide && thatIsProtein ? 3 : 1;
-    int i = 0;
-    for (SequenceI seq : this.getSequences())
+    final char[] thisSeq = seq.getSequence();
+    final char[] thisDs = seq.getDatasetSequence().getSequence();
+    final char[] thatAligned = alignFrom.getSequence();
+    StringBuilder thisAligned = new StringBuilder(2 * thisDs.length);
+
+    /*
+     * Find the DNA dataset position that corresponds to the first protein
+     * residue (e.g. ignoring start codon in cDNA).
+     */
+    int[] dnaStart = mapping.getDnaPosition(seq.getDatasetSequence(), 1);
+    int thisDsPosition = dnaStart == null ? 0 : dnaStart[0] - 1;
+    int thisSeqPos = 0;
+
+    /*
+     * Copy aligned cDNA up to (excluding) the first mapped base.
+     */
+    int basesWritten = 0;
+    while (basesWritten < thisDsPosition && thisSeqPos < thisSeq.length)
+    {
+      char c = thisSeq[thisSeqPos++];
+      thisAligned.append(c);
+      if (c != myGapChar)
+      {
+        basesWritten++;
+      }
+    }
+
+    /*
+     * Now traverse the aligned protein mirroring its gaps in cDNA.
+     */
+    for (char thatChar : thatAligned)
     {
-      SequenceI other = al.getSequenceAt(i++);
-      if (other == null)
+      if (thatChar == thatGapChar)
       {
-        continue;
+        /*
+         * Add (equivalent of) a gap
+         */
+        thisAligned.append(gap);
       }
-      char[] thisDs = seq.getDatasetSequence().getSequence();
-      char[] thatDs = other.getSequence();
-      StringBuilder thisAligned = new StringBuilder(2 * thisDs.length);
-      int thisDsPosition = 0;
-      for (char thatChar : thatDs)
+      else
       {
-        if (thatChar == thatGapChar)
-        {
-          /*
-           * Add (equivalent of) a gap
-           */
-          thisAligned.append(gap);
-        }
-        else
+        /*
+         * Add (equivalent of) a residue
+         */
+        for (int j = 0; j < ratio && thisDsPosition < thisDs.length; j++)
         {
+          thisAligned.append(thisDs[thisDsPosition++]);
+
           /*
-           * Add (equivalent of) a residue
+           * Also advance over any gaps and the next residue in the old aligned
+           * sequence
            */
-          for (int j = 0; j < ratio && thisDsPosition < thisDs.length; j++)
+          while (thisSeq[thisSeqPos] == myGapChar
+                  && thisSeqPos < thisSeq.length)
           {
-            thisAligned.append(thisDs[thisDsPosition++]);
+            thisSeqPos++;
           }
+          thisSeqPos++;
         }
       }
-      /*
-       * Include any 'extra' residues (there shouldn't be).
-       */
-      while (thisDsPosition < thisDs.length)
-      {
-        thisAligned.append(thisDs[thisDsPosition++]);
-      }
-      seq.setSequence(new String(thisAligned));
     }
+
+    /*
+     * Finally copy any 'extra' aligned cDNA (e.g. stop codon, introns).
+     */
+    while (thisSeqPos < thisSeq.length)
+    {
+      thisAligned.append(thisSeq[thisSeqPos++]);
+    }
+    seq.setSequence(new String(thisAligned));
+    return true;
   }
 }
index 130a073..bd9ba9e 100755 (executable)
@@ -37,6 +37,7 @@ public interface AlignmentI extends AnnotatedCollectionI
   public int getHeight();
 
   /**
+   * 
    * Calculates the maximum width of the alignment, including gaps.
    * 
    * @return Greatest sequence length within alignment.
@@ -87,6 +88,13 @@ public interface AlignmentI extends AnnotatedCollectionI
   public SequenceI getSequenceAt(int i);
 
   /**
+   * Returns a map of lists of sequences keyed by sequence name.
+   * 
+   * @return
+   */
+  public Map<String, List<SequenceI>> getSequencesByName();
+
+  /**
    * Add a new sequence to this alignment.
    * 
    * @param seq
@@ -484,24 +492,15 @@ public interface AlignmentI extends AnnotatedCollectionI
   public void validateAnnotation(AlignmentAnnotation alignmentAnnotation);
 
   /**
-   * Answers true if the two alignments residues could be put into
-   * correspondence, i.e. the supplied alignment has the same number of
-   * sequences, and they are of equivalent length, ignoring gaps. Alignments
-   * should be of the same type (protein/nucleotide) or different types with 3:1
-   * length scaling.
-   * 
-   * @param al
-   */
-  public boolean isMappableTo(AlignmentI al);
-
-  /**
    * Align this alignment the same as the given one. If both of the same type
    * (nucleotide/protein) then align both identically. If this is nucleotide and
    * the other is protein, make 3 gaps for each gap in the protein sequences. If
    * this is protein and the other is nucleotide, insert a gap for each 3 gaps
-   * (or part thereof) between nucleotide bases.
+   * (or part thereof) between nucleotide bases. Returns the number of mapped
+   * sequences that were realigned .
    * 
    * @param al
+   * @return
    */
-  public void alignAs(AlignmentI al);
+  public int alignAs(AlignmentI al);
 }
index b839a38..7a36754 100644 (file)
@@ -85,7 +85,6 @@ import jalview.schemes.TurnColourScheme;
 import jalview.schemes.UserColourScheme;
 import jalview.schemes.ZappoColourScheme;
 import jalview.structure.StructureSelectionManager;
-import jalview.util.MapList;
 import jalview.util.MessageManager;
 import jalview.ws.jws1.Discoverer;
 import jalview.ws.jws2.Jws2Discoverer;
@@ -121,7 +120,6 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Enumeration;
 import java.util.Hashtable;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Vector;
 
@@ -707,7 +705,6 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
   {
     showTranslation.setVisible(nucleotide);
     cdna.setVisible(!nucleotide);
-    configureCdnaMenu();
     conservationMenuItem.setEnabled(!nucleotide);
     modifyConservation.setEnabled(!nucleotide);
     showGroupConservation.setEnabled(!nucleotide);
@@ -716,241 +713,99 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener,
   }
 
   /**
-   * Add any suitable options to the 'cDNA' sub-menu. Options may be to
-   * associate a cDNA alignment, or to align an associated alignment. To be
-   * suitable for association, an AlignFrame has to be nucleotide, and have the
-   * right number of sequences of corresponding length to this one.
+   * Builds codon mappings from this (protein) alignment to any compatible
+   * nucleotide alignments. Mappings are built between sequences with the same
+   * name and compatible lengths. Also makes the cDNA alignment a
+   * CommandListener for the protein alignment so that edits are mirrored.
    */
-  protected void configureCdnaMenu()
+  @Override
+  protected void linkCdna_actionPerformed()
   {
-    cdna.removeAll();
+    int linkedCount = 0;
+    int alreadyLinkedCount = 0;
+    final AlignmentI thisAlignment = this.alignPanel.getAlignment();
 
-    /*
-     * Identify candidates for 'associate cDNA', add to menu.
-     */
-    List<AlignFrame> candidates = getCdnaCandidates();
-    for (final AlignFrame candidate : candidates)
+    for (AlignFrame af : Desktop.getAlignframes())
     {
-      final String text = MessageManager.getString("label.associate") + " "
-              + candidate.getTitle();
-      JMenuItem option = new JMenuItem(text);
-      option.addActionListener(new ActionListener()
+      if (af.alignPanel != null)
       {
-        @Override
-        public void actionPerformed(ActionEvent e)
+        final AlignmentI thatAlignment = af.alignPanel.getAlignment();
+        if (thatAlignment.isNucleotide())
         {
-          associateCdna(candidate);
-        }
-      });
-      cdna.add(option);
-    }
-
-    /*
-     * Identify candidates for 'align cDNA', add to menu.
-     */
-    final AlignFrame[] alignframes = Desktop.getAlignframes();
-    if (alignframes != null)
-    {
-      for (final AlignFrame af : alignframes)
-      {
-        if (af != this)
-        {
-          if (this.viewport.getStructureSelectionManager()
-                  .hasCommandListener(af.viewport))
+          // TODO exclude an AlignFrame which is already mapped to this one
+          // temporary version: exclude if already a CommandListener (should
+          // cover most cases but not all)
+          final boolean alreadyMapped = this.viewport
+                  .getStructureSelectionManager().hasCommandListener(
+                          af.viewport);
+          if (alreadyMapped)
+          {
+            alreadyLinkedCount++;
+          }
+          else
           {
-            final String text = MessageManager.getString("label.align")
-                    + " " + af.getTitle();
-            JMenuItem option = new JMenuItem(text);
-            option.addActionListener(new ActionListener()
+            boolean mapped = AlignmentUtils.mapProteinToCdna(thisAlignment,
+                    thatAlignment);
+            if (mapped)
             {
-              @Override
-              public void actionPerformed(ActionEvent e)
-              {
-                af.alignPanel.getAlignment().alignAs(
-                        AlignFrame.this.alignPanel.getAlignment());
-                af.viewport.alignmentChanged(af.alignPanel);
-              }
-            });
-            cdna.add(option);
+              final StructureSelectionManager ssm = StructureSelectionManager
+                      .getStructureSelectionManager(Desktop.instance);
+              ssm.addMappings(thisAlignment.getCodonFrames());
+              ssm.addCommandListener(af.getViewport());
+              linkedCount++;
+            }
           }
         }
       }
     }
-
-    cdna.setEnabled(cdna.getMenuComponentCount() > 0);
+    String msg = "";
+    if (linkedCount == 0 && alreadyLinkedCount == 0)
+    {
+      msg = MessageManager.getString("label.no_cdna");
+    }
+    else if (linkedCount > 0)
+    {
+      msg = MessageManager.formatMessage("label.linked_cdna", linkedCount);
+    }
+    else
+    {
+      msg = MessageManager.formatMessage("label.cdna_all_linked",
+              alreadyLinkedCount);
+    }
+    setStatus(msg);
   }
 
   /**
-   * Returns a list of AlignFrame which are valid candidates for being the cDNA
-   * to map to this (protein) alignment. Valid means a nucleotide alignment with
-   * matching number of sequences and sequence lengths (excluding gaps).
-   * 
-   * @return
+   * Align any linked cDNA to match the alignment of this (protein) alignment.
+   * Any mapped sequence regions will be realigned, unmapped sequences are not
+   * affected.
    */
-  protected List<AlignFrame> getCdnaCandidates()
+  @Override
+  protected void alignCdna_actionPerformed()
   {
-    List<AlignFrame> result = new ArrayList<AlignFrame>();
-    if (this.alignPanel != null)
+    int seqCount = 0;
+    int alignCount = 0;
+    final AlignmentI thisAlignment = this.alignPanel.getAlignment();
+    for (AlignFrame af : Desktop.getAlignframes())
     {
-      AlignmentI thisAlignment = this.alignPanel.getAlignment();
-      if (thisAlignment == null || thisAlignment.isNucleotide())
+      if (af.alignPanel != null)
       {
-        return result;
-      }
-      final AlignFrame[] alignframes = Desktop.getAlignframes();
-      if (alignframes != null)
-      {
-        for (AlignFrame af : alignframes)
+        final AlignmentI thatAlignment = af.alignPanel.getAlignment();
+        if (thatAlignment.isNucleotide())
         {
-          if (af.alignPanel != null)
+          int seqsAligned = thatAlignment.alignAs(thisAlignment);
+          seqCount += seqsAligned;
+          if (seqsAligned > 0)
           {
-            final AlignmentI thatAlignment = af.alignPanel.getAlignment();
-            if (thatAlignment.isNucleotide()
-                    && thisAlignment.isMappableTo(thatAlignment))
-            {
-              // TODO exclude an AlignFrame which is already mapped to this one
-              // simple version: exclude if already a CommandListener (should
-              // cover most cases but not all)
-              if (!this.viewport.getStructureSelectionManager()
-                      .hasCommandListener(af.viewport))
-              {
-                result.add(af);
-              }
-            }
+            af.viewport.alignmentChanged(af.alignPanel);
+            alignCount++;
           }
         }
       }
     }
-    return result;
-  }
-
-  /**
-   * Build the codon mappings between the given (nucleotide) alignment and this
-   * (protein) alignment. Also make the cDNA alignment a CommandListener for the
-   * protein alignment so that edits are mirrored. The alignments must have the
-   * same number, and equivalent lengths, of (unaligned) sequence.
-   * 
-   * @param cdna
-   * @throws IllegalStateException
-   *           if sequence counts or lengths are incompatible
-   */
-  protected void associateCdna(AlignFrame cdna)
-  {
-    /*
-     * Warn that cDNA may be realigned to match protein
-     */
-    // int confirm = JOptionPane.showConfirmDialog(
-    // this,
-    // JvSwingUtils.wrapTooltip(true,
-    // MessageManager.getString("warn.cdna_realign")),
-    // MessageManager.getString("label.cdna_realign"),
-    // JOptionPane.OK_CANCEL_OPTION);
-    // if (confirm == JOptionPane.CANCEL_OPTION
-    // || confirm == JOptionPane.CLOSED_OPTION)
-    // {
-    // return;
-    // }
-
-    final AlignmentI aaAlignment = this.alignPanel.getAlignment();
-    Iterator<SequenceI> thisSeqs = aaAlignment
-            .getSequences().iterator();
-    Iterator<SequenceI> cdnaSeqs = cdna.alignPanel.getAlignment()
-            .getSequences().iterator();
-    AlignedCodonFrame acf = new AlignedCodonFrame(aaAlignment.getWidth());
-    while (thisSeqs.hasNext())
-    {
-      if (!cdnaSeqs.hasNext())
-      {
-        throw new IllegalStateException("Too few sequences to map");
-      }
-      final SequenceI aaSeq = thisSeqs.next();
-      String aaSeqString = aaSeq.getDatasetSequence()
-              .getSequenceAsString();
-      final SequenceI cdnaSeq = cdnaSeqs.next();
-      String cdnaSeqString = cdnaSeq.getDatasetSequence()
-              .getSequenceAsString();
-      final int aaLength = aaSeqString.length();
-      final int cdnaLength = cdnaSeqString.length();
-      if (cdnaLength != 3 * aaLength)
-      {
-        throw new IllegalStateException(
-                "Protein/cDNA lengths don't match: " + aaLength + "/"
-                        + cdnaLength);
-      }
-
-      /*
-       * Warn if mapping includes non-standard translations
-       */
-      if (!doTranslationWarningCheck(aaSeq.getName(), aaSeqString,
-              cdnaSeqString))
-      {
-        return;
-      }
-
-      MapList map = new MapList(new int[]
-      { 1, cdnaLength }, new int[]
-      { 1, aaLength }, 3, 1);
-      acf.addMap(cdnaSeq, aaSeq, map);
-
-      aaAlignment.addCodonFrame(acf);
-
-      final StructureSelectionManager ssm = StructureSelectionManager
-              .getStructureSelectionManager(Desktop.instance);
-      ssm.addMappings(aaAlignment.getCodonFrames());
-      ssm.addCommandListener(cdna.getViewport());
-
-      /*
-       * Rebuild 'associate cDna' menu so it now excludes the one just
-       * associated.
-       */
-      configureCdnaMenu();
-    }
-  }
-
-  /**
-   * Show a warning if any non-standard cDNA to protein would result from
-   * mapping the sequences.
-   * 
-   * @param aaSeqName
-   * @param aaSeqString
-   * @param aaSeqString
-   * @return true if no warning, or it is accepted, false if user chooses not to
-   *         proceed.
-   */
-  protected boolean doTranslationWarningCheck(String aaSeqName,
-          String aaSeqString, String cdnaSeqString)
-  {
-    final int aaLength = aaSeqString.length();
-    boolean warning = false;
-    String msg = aaSeqName;
-    for (int i = 0; i < aaLength; i++)
-    {
-      String codon = cdnaSeqString.substring(i * 3, i * 3 + 3);
-      String aa = ResidueProperties.codonTranslate(codon);
-      if (!(aa.charAt(0) == aaSeqString.charAt(i)))
-      {
-        warning = true;
-        msg += ":" + (i + 1) + ":" + aaSeqString.charAt(i) + "/" + codon
-                + ":" + aa;
-        break;
-      }
-    }
-    if (warning)
-    {
-      final String txt = JvSwingUtils.wrapTooltip(true, MessageManager
-              .formatMessage("warn.nonstandard_translation", msg));
-      int confirm = JOptionPane.showConfirmDialog(this, txt,
-              MessageManager.getString("label.nonstandard_translation"),
-              JOptionPane.OK_CANCEL_OPTION);
-      if (confirm == JOptionPane.CANCEL_OPTION
-              || confirm == JOptionPane.CLOSED_OPTION)
-      {
-        return false;
-      }
-    }
-    return true;
+    setStatus(MessageManager.formatMessage("label.cdna_aligned", seqCount,
+            alignCount));
   }
-
   /**
    * set up menus for the current viewport. This may be called after any
    * operation that affects the data in the current view (selection changed,
index edb4ac5..8dba34d 100755 (executable)
@@ -1811,7 +1811,38 @@ public class GAlignFrame extends JInternalFrame
         showTranslation_actionPerformed(e);
       }
     });
-    cdna.setText(MessageManager.getString("label.cDNA"));
+
+    /*
+     * cDNA menu options
+     */
+    cdna.setText(MessageManager.getString("label.cdna"));
+    JMenuItem linkCdna = new JMenuItem(
+            MessageManager.getString("label.link_cdna"));
+    linkCdna.setToolTipText(JvSwingUtils.wrapTooltip(true,
+            MessageManager.getString("label.link_cdna_tip")));
+    linkCdna.addActionListener(new ActionListener()
+    {
+      @Override
+      public void actionPerformed(ActionEvent e)
+      {
+        linkCdna_actionPerformed();
+      }
+    });
+    cdna.add(linkCdna);
+    JMenuItem alignCdna = new JMenuItem(
+            MessageManager.getString("label.align_cdna"));
+    alignCdna.setToolTipText(JvSwingUtils.wrapTooltip(true,
+            MessageManager.getString("label.align_cdna_tip")));
+    alignCdna.addActionListener(new ActionListener()
+    {
+      @Override
+      public void actionPerformed(ActionEvent e)
+      {
+        alignCdna_actionPerformed();
+      }
+    });
+    cdna.add(alignCdna);
+
     extractScores.setText(MessageManager.getString("label.extract_scores")
             + "...");
     extractScores.addActionListener(new ActionListener()
@@ -2404,6 +2435,18 @@ public class GAlignFrame extends JInternalFrame
     // selectMenu.add(listenToViewSelections);
   }
 
+  protected void alignCdna_actionPerformed()
+  {
+    // TODO Auto-generated method stub
+
+  }
+
+  protected void linkCdna_actionPerformed()
+  {
+    // TODO Auto-generated method stub
+
+  }
+
   /**
    * Action on clicking sort annotations by type.
    * 
index 8acf1f2..8801f50 100755 (executable)
@@ -675,6 +675,8 @@ public class ResidueProperties
 
   public static Vector STOP = new Vector();
 
+  public static String START = "ATG";
+
   static
   {
     codonHash.put("K", Lys);
index 4c62500..5fbc956 100644 (file)
@@ -39,6 +39,7 @@ public class MapList
    */
   public boolean equals(MapList obj)
   {
+    // TODO should have @Override and arg0 of type Object
     if (obj == this)
     {
       return true;
index 18b4252..f76362f 100644 (file)
  */
 package jalview.analysis;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
-
-import org.junit.Test;
-
+import jalview.datamodel.AlignedCodonFrame;
 import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentI;
+import jalview.datamodel.Mapping;
 import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceI;
 import jalview.io.AppletFormatAdapter;
+import jalview.io.FormatAdapter;
+import jalview.util.MapList;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+import org.junit.Test;
 
 public class AlignmentUtilsTests 
 {
+  // @formatter:off
+  private static final String TEST_DATA = 
+          "# STOCKHOLM 1.0\n" +
+          "#=GS D.melanogaster.1 AC AY119185.1/838-902\n" +
+          "#=GS D.melanogaster.2 AC AC092237.1/57223-57161\n" +
+          "#=GS D.melanogaster.3 AC AY060611.1/560-627\n" +
+          "D.melanogaster.1          G.AGCC.CU...AUGAUCGA\n" +
+          "#=GR D.melanogaster.1 SS  ................((((\n" +
+          "D.melanogaster.2          C.AUUCAACU.UAUGAGGAU\n" +
+          "#=GR D.melanogaster.2 SS  ................((((\n" +
+          "D.melanogaster.3          G.UGGCGCU..UAUGACGCA\n" +
+          "#=GR D.melanogaster.3 SS  (.(((...(....(((((((\n" +
+          "//";
+
+  private static final String AA_SEQS_1 = 
+          ">Seq1Name\n" +
+          "K-QY--L\n" +
+          ">Seq2Name\n" +
+          "-R-FP-W-\n";
+
+  private static final String CDNA_SEQS_1 = 
+          ">Seq1Name\n" +
+          "AC-GG--CUC-CAA-CT\n" +
+          ">Seq2Name\n" +
+          "-CG-TTA--ACG---AAGT\n";
+
+  private static final String CDNA_SEQS_2 = 
+          ">Seq1Name\n" +
+          "GCTCGUCGTACT\n" +
+          ">Seq2Name\n" +
+          "GGGTCAGGCAGT\n";
+  // @formatter:on
+
   public static Sequence ts=new Sequence("short","ASDASDASDASDASDASDASDASDASDASDASDASDASD");
+
   @Test
   public void testExpandFlanks()
   {
@@ -55,6 +99,222 @@ public class AlignmentUtilsTests
           assertTrue("Flanking sequence not the same as original dataset sequence.\n"+ung+"\n"+sq.getDatasetSequence().getSequenceAsString(),ung.equalsIgnoreCase(sq.getDatasetSequence().getSequenceAsString()));
       }
       }
+    }
     }    
+
+  /**
+   * Test method that returns a map of lists of sequences by sequence name.
+   * 
+   * @throws IOException
+   */
+  @Test
+  public void testGetSequencesByName() throws IOException
+  {
+    final String data = ">Seq1Name\nKQYL\n" + ">Seq2Name\nRFPW\n"
+            + ">Seq1Name\nABCD\n";
+    AlignmentI al = loadAlignment(data, "FASTA");
+    Map<String, List<SequenceI>> map = AlignmentUtils
+            .getSequencesByName(al);
+    assertEquals(2, map.keySet().size());
+    assertEquals(2, map.get("Seq1Name").size());
+    assertEquals("KQYL", map.get("Seq1Name").get(0).getSequenceAsString());
+    assertEquals("ABCD", map.get("Seq1Name").get(1).getSequenceAsString());
+    assertEquals(1, map.get("Seq2Name").size());
+    assertEquals("RFPW", map.get("Seq2Name").get(0).getSequenceAsString());
+  }
+  /**
+   * Helper method to load an alignment and ensure dataset sequences are set up.
+   * 
+   * @param data
+   * @param format TODO
+   * @return
+   * @throws IOException
+   */
+  protected AlignmentI loadAlignment(final String data, String format) throws IOException
+  {
+    Alignment a = new FormatAdapter().readFile(data,
+            AppletFormatAdapter.PASTE, format);
+    a.setDataset(null);
+    return a;
+  }
+  /**
+   * Test mapping of protein to cDNA.
+   * 
+   * @throws IOException
+   */
+  @Test
+  public void testMapProteinToCdna() throws IOException
+  {
+    // protein: Human + Mouse, 3 residues
+    AlignmentI protein = loadAlignment(
+            ">Human\nKQY\n>Mouse\nAFP\n>Worm\nRST\n",
+            "FASTA");
+    // cDNA: Mouse, Human, Mouse, 9 bases
+    // @formatter:off
+    String dnaData = 
+            ">Mouse\nGAAATCCAG\n" + 
+            ">Human\nTTCGATTAC\n" + 
+            ">Mouse\nGTCGTTTGC\n" + 
+            ">Mouse\nGTCGTTTGCgac\n" + // not mapped - wrong length 
+            ">Fly\nGTCGTTTGC\n"; // not mapped - no name match
+    // @formatter:on
+    AlignmentI cdna1 = loadAlignment(
+            dnaData,
+            "FASTA");
+    boolean mapped = AlignmentUtils.mapProteinToCdna(protein, cdna1);
+    assertTrue(mapped);
+
+    /*
+     * Check two mappings (one for Mouse, one for Human)
+     */
+    assertEquals(2, protein.getCodonFrames().length);
+    assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).length);
+    assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).length);
+
+    /*
+     * Inspect mapping for Human protein
+     */
+    AlignedCodonFrame humanMapping = protein.getCodonFrame(protein
+            .getSequenceAt(0))[0];
+    assertEquals(1, humanMapping.getdnaSeqs().length);
+    assertEquals(cdna1.getSequenceAt(1).getDatasetSequence(),
+            humanMapping.getdnaSeqs()[0]);
+    Mapping[] protMappings = humanMapping.getProtMappings();
+    assertEquals(1, protMappings.length);
+    MapList mapList = protMappings[0].getMap();
+    assertEquals(3, mapList.getFromRatio());
+    assertEquals(1, mapList.getToRatio());
+    assertTrue(Arrays.equals(new int[]
+    { 1, 9 }, mapList.getFromRanges()));
+    assertTrue(Arrays.equals(new int[]
+    { 1, 3 }, mapList.getToRanges()));
+
+    /*
+     * Inspect mappings for Mouse protein
+     */
+    AlignedCodonFrame mouseMapping1 = protein.getCodonFrame(protein
+            .getSequenceAt(1))[0];
+    assertEquals(2, mouseMapping1.getdnaSeqs().length);
+    assertEquals(cdna1.getSequenceAt(0).getDatasetSequence(),
+            mouseMapping1.getdnaSeqs()[0]);
+    assertEquals(cdna1.getSequenceAt(2).getDatasetSequence(),
+            mouseMapping1.getdnaSeqs()[1]);
+    protMappings = mouseMapping1.getProtMappings();
+    assertEquals(2, protMappings.length);
+    for (int i = 0; i < 2; i++)
+    {
+      mapList = protMappings[i].getMap();
+      assertEquals(3, mapList.getFromRatio());
+      assertEquals(1, mapList.getToRatio());
+      assertTrue(Arrays.equals(new int[]
+      { 1, 9 }, mapList.getFromRanges()));
+      assertTrue(Arrays.equals(new int[]
+      { 1, 3 }, mapList.getToRanges()));
+    }
+  }
+
+  /**
+   * Test mapping of protein to cDNA which may include start and/or stop codons.
+   * 
+   * @throws IOException
+   */
+  @Test
+  public void testMapProteinToCdna_stopStartCodons() throws IOException
+  {
+    // protein: Human + Mouse, 3 residues
+    AlignmentI protein = loadAlignment(
+            ">Human\nKQY\n>Mouse\nAFP\n>Worm\nRST\n", "FASTA");
+    // @formatter:off
+    String dnaData = 
+            ">Mouse\natgGAAATCCAG\n" + // Mouse with start codon
+            ">Human\nTTCGATtactaa\n" + // Human with stop codon TAA
+            ">Mouse\nGTCGTTTGctaG\n" + // Mouse with stop codon TAG 
+            ">Human\nGTCGTTTgctGa\n" + // Human with stop codon TGA
+            ">Mouse\nATGGTCGTTTGCtag\n"; // Mouse with start and stop codons 
+    // @formatter:on
+    AlignmentI cdna1 = loadAlignment(
+            dnaData,
+            "FASTA");
+    boolean mapped = AlignmentUtils.mapProteinToCdna(protein, cdna1);
+    assertTrue(mapped);
+
+    /*
+     * Check two mappings (one for Mouse, one for Human)
+     */
+    assertEquals(2, protein.getCodonFrames().length);
+    assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).length);
+    assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).length);
+
+    /*
+     * Inspect mapping for Human protein - should map to 2nd and 4th cDNA seqs
+     */
+    AlignedCodonFrame humanMapping = protein.getCodonFrame(protein
+            .getSequenceAt(0))[0];
+    assertEquals(2, humanMapping.getdnaSeqs().length);
+    assertEquals(cdna1.getSequenceAt(1).getDatasetSequence(),
+            humanMapping.getdnaSeqs()[0]);
+    assertEquals(cdna1.getSequenceAt(3).getDatasetSequence(),
+            humanMapping.getdnaSeqs()[1]);
+    Mapping[] protMappings = humanMapping.getProtMappings();
+    // two mappings, both to cDNA with stop codon
+    assertEquals(2, protMappings.length);
+    MapList mapList = protMappings[0].getMap();
+    assertEquals(3, mapList.getFromRatio());
+    assertEquals(1, mapList.getToRatio());
+    assertTrue(Arrays.equals(new int[]
+    { 1, 9 }, mapList.getFromRanges()));
+    assertTrue(Arrays.equals(new int[]
+    { 1, 3 }, mapList.getToRanges()));
+    mapList = protMappings[1].getMap();
+    assertEquals(3, mapList.getFromRatio());
+    assertEquals(1, mapList.getToRatio());
+    assertTrue(Arrays.equals(new int[]
+    { 1, 9 }, mapList.getFromRanges()));
+    assertTrue(Arrays.equals(new int[]
+    { 1, 3 }, mapList.getToRanges()));
+
+    /*
+     * Inspect mapping for Mouse protein - should map to 1st/3rd/5th cDNA seqs
+     */
+    AlignedCodonFrame mouseMapping = protein.getCodonFrame(protein
+            .getSequenceAt(1))[0];
+    assertEquals(3, mouseMapping.getdnaSeqs().length);
+    assertEquals(cdna1.getSequenceAt(0).getDatasetSequence(),
+            mouseMapping.getdnaSeqs()[0]);
+    assertEquals(cdna1.getSequenceAt(2).getDatasetSequence(),
+            mouseMapping.getdnaSeqs()[1]);
+    assertEquals(cdna1.getSequenceAt(4).getDatasetSequence(),
+            mouseMapping.getdnaSeqs()[2]);
+
+    // three mappings
+    protMappings = mouseMapping.getProtMappings();
+    assertEquals(3, protMappings.length);
+
+    // first mapping to cDNA with start codon
+    mapList = protMappings[0].getMap();
+    assertEquals(3, mapList.getFromRatio());
+    assertEquals(1, mapList.getToRatio());
+    assertTrue(Arrays.equals(new int[]
+    { 4, 12 }, mapList.getFromRanges()));
+    assertTrue(Arrays.equals(new int[]
+    { 1, 3 }, mapList.getToRanges()));
+
+    // second mapping to cDNA with stop codon
+    mapList = protMappings[1].getMap();
+    assertEquals(3, mapList.getFromRatio());
+    assertEquals(1, mapList.getToRatio());
+    assertTrue(Arrays.equals(new int[]
+    { 1, 9 }, mapList.getFromRanges()));
+    assertTrue(Arrays.equals(new int[]
+    { 1, 3 }, mapList.getToRanges()));
+
+    // third mapping to cDNA with start and stop codon
+    mapList = protMappings[2].getMap();
+    assertEquals(3, mapList.getFromRatio());
+    assertEquals(1, mapList.getToRatio());
+    assertTrue(Arrays.equals(new int[]
+    { 4, 12 }, mapList.getFromRanges()));
+    assertTrue(Arrays.equals(new int[]
+    { 1, 3 }, mapList.getToRanges()));
   }
 }
index 1713bc6..2b1fc72 100644 (file)
@@ -3,9 +3,9 @@ package jalview.datamodel;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
 import jalview.io.AppletFormatAdapter;
 import jalview.io.FormatAdapter;
+import jalview.util.MapList;
 
 import java.io.IOException;
 import java.util.Iterator;
@@ -35,26 +35,42 @@ public class AlignmentTest
           "#=GR D.melanogaster.3 SS  (.(((...(....(((((((\n" +
           "//";
 
-  private static final String TEST_DATA2 = 
-          ">TEST21 test21\n" +
-          "AC-GG--CUC-CAA-CT\n" +
-          ">TEST22 test22\n" +
-          "-CG-TTA--ACG---AAGT\n";
-
-  private static final String TEST_DATA3 = 
-          ">TEST31 test31\n" +
+  private static final String AA_SEQS_1 = 
+          ">Seq1Name\n" +
           "K-QY--L\n" +
-          ">TEST32 test32\n" +
+          ">Seq2Name\n" +
           "-R-FP-W-\n";
 
-  private static final String TEST_DATA4 = 
-          ">TEST41 test41\n" +
+  private static final String CDNA_SEQS_1 = 
+          ">Seq1Name\n" +
+          "AC-GG--CUC-CAA-CT\n" +
+          ">Seq2Name\n" +
+          "-CG-TTA--ACG---AAGT\n";
+
+  private static final String CDNA_SEQS_2 = 
+          ">Seq1Name\n" +
           "GCTCGUCGTACT\n" +
-          ">TEST42 test42\n" +
+          ">Seq2Name\n" +
           "GGGTCAGGCAGT\n";
   // @formatter:on
 
-  private Alignment al;
+  private AlignmentI al;
+
+  /**
+   * Helper method to load an alignment and ensure dataset sequences are set up.
+   * 
+   * @param data
+   * @param format TODO
+   * @return
+   * @throws IOException
+   */
+  protected AlignmentI loadAlignment(final String data, String format) throws IOException
+  {
+    Alignment a = new FormatAdapter().readFile(data,
+            AppletFormatAdapter.PASTE, format);
+    a.setDataset(null);
+    return a;
+  }
 
   /*
    * Read in Stockholm format test data including secondary structure
@@ -63,8 +79,7 @@ public class AlignmentTest
   @Before
   public void setUp() throws IOException
   {
-    al = new FormatAdapter().readFile(TEST_DATA,
-            AppletFormatAdapter.PASTE, "STH");
+    al = loadAlignment(TEST_DATA, "STH");
     int i = 0;
     for (AlignmentAnnotation ann : al.getAlignmentAnnotation())
     {
@@ -90,42 +105,11 @@ public class AlignmentTest
   }
 
   /**
-   * Tests for method that checks for alignment 'mappability'.
+   * Tests for realigning as per a supplied alignment: Dna as Dna.
    * 
-   * @throws IOException
-   */
-  @Test
-  public void testIsMappableTo() throws IOException
-  {
-    al = new FormatAdapter().readFile(TEST_DATA2,
-            AppletFormatAdapter.PASTE, "FASTA");
-    al.setDataset(null);
-
-    // not mappable to self
-    assertFalse(al.isMappableTo(al));
-
-    // dna mappable to protein and vice versa
-    AlignmentI alp = new FormatAdapter().readFile(TEST_DATA3,
-            AppletFormatAdapter.PASTE, "FASTA");
-    alp.setDataset(null);
-    assertTrue(al.isMappableTo(alp));
-    assertTrue(alp.isMappableTo(al));
-    assertFalse(alp.isMappableTo(alp));
-
-    // not mappable if any sequence length mismatch
-    alp.getSequenceAt(1).setSequence("-R--FP-");
-    alp.getSequenceAt(1).setDatasetSequence(new Sequence("", "RFP"));
-    assertFalse(alp.isMappableTo(al));
-    assertFalse(al.isMappableTo(alp));
-
-    // not mappable if number of sequences differs
-    alp.deleteSequence(1);
-    assertFalse(alp.isMappableTo(al));
-    assertFalse(al.isMappableTo(alp));
-  }
-
-  /**
-   * Tests for realigning as per a supplied alignment.
+   * Note: AlignedCodonFrame's state variables are named for protein-to-cDNA
+   * mapping, but can be exploited for a general 'sequence-to-sequence' mapping
+   * as here.
    * 
    * @throws IOException
    */
@@ -133,13 +117,21 @@ public class AlignmentTest
   public void testAlignAs_dnaAsDna() throws IOException
   {
     // aligned cDNA:
-    Alignment al1 = new FormatAdapter().readFile(TEST_DATA2,
-            AppletFormatAdapter.PASTE, "FASTA");
-    al1.setDataset(null);
+    AlignmentI al1 = loadAlignment(CDNA_SEQS_1, "FASTA");
     // unaligned cDNA:
-    Alignment al2 = new FormatAdapter().readFile(TEST_DATA4,
-            AppletFormatAdapter.PASTE, "FASTA");
-    al2.setDataset(null);
+    AlignmentI al2 = loadAlignment(CDNA_SEQS_2, "FASTA");
+
+    /*
+     * Make mappings between sequences. The 'aligned cDNA' is playing the role
+     * of what would normally be protein here.
+     */
+    AlignedCodonFrame acf = new AlignedCodonFrame(al1.getWidth());
+    MapList ml = new MapList(new int[]
+    { 1, 12 }, new int[]
+    { 1, 12 }, 1, 1);
+    acf.addMap(al2.getSequenceAt(0), al1.getSequenceAt(0), ml);
+    acf.addMap(al2.getSequenceAt(1), al1.getSequenceAt(1), ml);
+    al1.addCodonFrame(acf);
 
     al2.alignAs(al1);
     assertEquals("GC-TC--GUC-GTA-CT", al2.getSequenceAt(0)
@@ -149,30 +141,21 @@ public class AlignmentTest
   }
 
   /**
-   * Aligning protein from cDNA yet to be implemented.
+   * Aligning protein from cDNA yet to be implemented, does nothing.
    * 
    * @throws IOException
    */
   @Test
   public void testAlignAs_proteinAsCdna() throws IOException
   {
-    // aligned cDNA:
-    Alignment al1 = new FormatAdapter().readFile(TEST_DATA2,
-            AppletFormatAdapter.PASTE, "FASTA");
-    al1.setDataset(null);
-    // unaligned cDNA:
-    Alignment al2 = new FormatAdapter().readFile(TEST_DATA3,
-            AppletFormatAdapter.PASTE, "FASTA");
-    al2.setDataset(null);
+    AlignmentI al1 = loadAlignment(CDNA_SEQS_1, "FASTA");
+    AlignmentI al2 = loadAlignment(AA_SEQS_1, "FASTA");
+    String before0 = al2.getSequenceAt(0).getSequenceAsString();
+    String before1 = al2.getSequenceAt(1).getSequenceAsString();
 
-    try
-    {
-      al2.alignAs(al1);
-      fail("No exception thrown");
-    } catch (UnsupportedOperationException e)
-    {
-      // expected;
-    }
+    al2.alignAs(al1);
+    assertEquals(before0, al2.getSequenceAt(0).getSequenceAsString());
+    assertEquals(before1, al2.getSequenceAt(1).getSequenceAsString());
   }
 
   /**
@@ -183,14 +166,18 @@ public class AlignmentTest
   @Test
   public void testAlignAs_cdnaAsProtein() throws IOException
   {
-    // aligned cDNA:
-    Alignment al1 = new FormatAdapter().readFile(TEST_DATA2,
-            AppletFormatAdapter.PASTE, "FASTA");
-    al1.setDataset(null);
-    // unaligned cDNA:
-    Alignment al2 = new FormatAdapter().readFile(TEST_DATA3,
-            AppletFormatAdapter.PASTE, "FASTA");
-    al2.setDataset(null);
+    /*
+     * Load alignments and add mappings for cDNA to protein
+     */
+    AlignmentI al1 = loadAlignment(CDNA_SEQS_1, "FASTA");
+    AlignmentI al2 = loadAlignment(AA_SEQS_1, "FASTA");
+    AlignedCodonFrame acf = new AlignedCodonFrame(al2.getWidth());
+    MapList ml = new MapList(new int[]
+    { 1, 12 }, new int[]
+    { 1, 4 }, 3, 1);
+    acf.addMap(al1.getSequenceAt(0), al2.getSequenceAt(0), ml);
+    acf.addMap(al1.getSequenceAt(1), al2.getSequenceAt(1), ml);
+    al2.addCodonFrame(acf);
 
     al1.alignAs(al2);
     assertEquals("ACG---GCUCCA------ACT", al1.getSequenceAt(0)