JAL-845 linked protein/dna 'slave' further PoC functionality
[jalview.git] / src / jalview / analysis / AlignmentUtils.java
index 2dbe015..f3e126d 100644 (file)
@@ -417,7 +417,9 @@ public class AlignmentUtils
 
   /**
    * Align sequence 'alignTo' the same way as 'alignFrom', using the mapping to
-   * match residues and codons.
+   * match residues and codons. Flags control whether existing gaps in unmapped
+   * (intron) and mapped (exon) regions are preserved or not. Gaps linking intro
+   * and exon are only retained if both flags are set.
    * 
    * @param alignTo
    * @param alignFrom
@@ -448,12 +450,13 @@ public class AlignmentUtils
     /*
      * Traverse the aligned protein sequence.
      */
-    int sourceGapLength = 0;
+    int sourceGapMappedLength = 0;
+    boolean inExon = false;
     for (char sourceChar : thatAligned)
     {
       if (sourceChar == sourceGap)
       {
-        sourceGapLength++;
+        sourceGapMappedLength += ratio;
         continue;
       }
 
@@ -476,7 +479,7 @@ public class AlignmentUtils
 
       int mappedCodonStart = mappedPos[0]; // position (1...) of codon start
       int mappedCodonEnd = mappedPos[mappedPos.length - 1]; // codon end pos
-      int trailingCopiedGapLength = 0;
+      StringBuilder trailingCopiedGap = new StringBuilder();
 
       /*
        * Copy dna sequence up to and including this codon. Optionally, include
@@ -486,7 +489,7 @@ public class AlignmentUtils
        * Note this only works for 'linear' splicing, not reverse or interleaved.
        * But then 'align dna as protein' doesn't make much sense otherwise.
        */
-      boolean inCodon = false;
+      int intronLength = 0;
       while (basesWritten < mappedCodonEnd && thisSeqPos < thisSeq.length)
       {
         final char c = thisSeq[thisSeqPos++];
@@ -494,40 +497,49 @@ public class AlignmentUtils
         {
           basesWritten++;
 
-          /*
-           * Is this the start of the mapped codon? If so, add in any extra gap
-           * due to the protein alignment.
-           */
-          if (basesWritten == mappedCodonStart)
+          if (basesWritten < mappedCodonStart)
           {
-            inCodon = true;
-            int gapsToAdd = Math.max(0, ratio * sourceGapLength
-                    - trailingCopiedGapLength);
+            /*
+             * Found an unmapped (intron) base. First add in any preceding gaps
+             * (if wanted).
+             */
+            if (preserveUnmappedGaps && trailingCopiedGap.length() > 0)
+            {
+              thisAligned.append(trailingCopiedGap.toString());
+              intronLength += trailingCopiedGap.length();
+              trailingCopiedGap = new StringBuilder();
+            }
+            intronLength++;
+            inExon = false;
+          }
+          else
+          {
+            final boolean startOfCodon = basesWritten == mappedCodonStart;
+            int gapsToAdd = calculateGapsToInsert(preserveMappedGaps,
+                    preserveUnmappedGaps, sourceGapMappedLength, inExon,
+                    trailingCopiedGap.length(), intronLength, startOfCodon);
             for (int i = 0; i < gapsToAdd; i++)
             {
               thisAligned.append(myGapChar);
             }
-            sourceGapLength = 0;
+            sourceGapMappedLength = 0;
+            inExon = true;
           }
           thisAligned.append(c);
-          trailingCopiedGapLength = 0;
+          trailingCopiedGap = new StringBuilder();
         }
-        else if ((!inCodon && preserveUnmappedGaps)
-                || (inCodon && preserveMappedGaps))
+        else
         {
-          thisAligned.append(c);
-          trailingCopiedGapLength++;
+          if (inExon && preserveMappedGaps)
+          {
+            trailingCopiedGap.append(myGapChar);
+          }
+          else if (!inExon && preserveUnmappedGaps)
+          {
+            trailingCopiedGap.append(myGapChar);
+          }
         }
       }
-
-      /*
-       * Expand (if necessary) the trailing gap to the size of the aligned gap.
-       */
-      int gapsToAdd = (ratio * sourceGapLength - trailingCopiedGapLength);
-      for (int i = 0; i < gapsToAdd; i++)
-      {
-        thisAligned.append(myGapChar);
-      }
     }
 
     /*
@@ -548,4 +560,69 @@ public class AlignmentUtils
      */
     alignTo.setSequence(new String(thisAligned));
   }
+
+  /**
+   * Helper method to work out how many gaps to insert when realigning.
+   * 
+   * @param preserveMappedGaps
+   * @param preserveUnmappedGaps
+   * @param sourceGapMappedLength
+   * @param inExon
+   * @param trailingCopiedGap
+   * @param intronLength
+   * @param startOfCodon
+   * @return
+   */
+  protected static int calculateGapsToInsert(boolean preserveMappedGaps,
+          boolean preserveUnmappedGaps, int sourceGapMappedLength,
+          boolean inExon, int trailingGapLength,
+          int intronLength, final boolean startOfCodon)
+  {
+    int gapsToAdd = 0;
+    if (startOfCodon)
+    {
+      /*
+       * Reached start of codon. Ignore trailing gaps in intron unless we are
+       * preserving gaps in both exon and intron. Ignore them anyway if the
+       * protein alignment introduces a gap at least as large as the intronic
+       * region.
+       */
+      if (inExon && !preserveMappedGaps)
+      {
+        trailingGapLength = 0;
+      }
+      if (!inExon && !(preserveMappedGaps && preserveUnmappedGaps))
+      {
+        trailingGapLength = 0;
+      }
+      if (inExon)
+      {
+        gapsToAdd = Math.max(sourceGapMappedLength, trailingGapLength);
+      }
+      else
+      {
+        if (intronLength + trailingGapLength <= sourceGapMappedLength)
+        {
+          gapsToAdd = sourceGapMappedLength - intronLength;
+        }
+        else
+        {
+          gapsToAdd = Math.min(intronLength + trailingGapLength
+                  - sourceGapMappedLength, trailingGapLength);
+        }
+      }
+    }
+    else
+    {
+      /*
+       * second or third base of codon; check for any gaps in dna
+       */
+      if (!preserveMappedGaps)
+      {
+        trailingGapLength = 0;
+      }
+      gapsToAdd = Math.max(sourceGapMappedLength, trailingGapLength);
+    }
+    return gapsToAdd;
+  }
 }