JAL-1619 first draft of 'linked protein and cDNA'

[jalview.git] / src / jalview / analysis / Dna.java
diff --git a/src/jalview/analysis/Dna.java b/src/jalview/analysis/Dna.java

index cbe871d..0c020dd 100644 (file)
--- a/src/jalview/analysis/Dna.java
+++ b/src/jalview/analysis/Dna.java
@@ -1,26 +1,25 @@
  /*
- * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8)
- * Copyright (C) 2012 J Procter, AM Waterhouse, LM Lui, J Engelhardt, G Barton, M Clamp, S Searle
+ * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2)
+ * Copyright (C) 2014 The Jalview Authors
   * 
   * This file is part of Jalview.
   * 
   * Jalview is free software: you can redistribute it and/or
   * modify it under the terms of the GNU General Public License 
- * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
   *  
   * Jalview is distributed in the hope that it will be useful, but 
   * WITHOUT ANY WARRANTY; without even the implied warranty 
   * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
   * PURPOSE.  See the GNU General Public License for more details.
   * 
- * You should have received a copy of the GNU General Public License along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview.  If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
   */
  package jalview.analysis;
  
-import java.util.ArrayList;
-import java.util.Hashtable;
-import java.util.Vector;
-
  import jalview.datamodel.AlignedCodonFrame;
  import jalview.datamodel.Alignment;
  import jalview.datamodel.AlignmentAnnotation;
@@ -28,14 +27,25 @@ import jalview.datamodel.AlignmentI;
  import jalview.datamodel.Annotation;
  import jalview.datamodel.DBRefEntry;
  import jalview.datamodel.FeatureProperties;
+import jalview.datamodel.GraphLine;
  import jalview.datamodel.Mapping;
  import jalview.datamodel.Sequence;
  import jalview.datamodel.SequenceFeature;
  import jalview.datamodel.SequenceI;
  import jalview.schemes.ResidueProperties;
+import jalview.util.Comparison;
  import jalview.util.MapList;
  import jalview.util.ShiftList;
  
+import java.util.ArrayList;
+import java.util.Hashtable;
+import java.util.List;
+import java.util.Vector;
+
+import java.util.ArrayList;
+import java.util.Hashtable;
+import java.util.Vector;
+
  public class Dna
  {
    /**
@@ -49,9 +59,13 @@ public class Dna
    {
      if (cdp2 == null
              || (cdp1[0] == cdp2[0] && cdp1[1] == cdp2[1] && cdp1[2] == cdp2[2]))
+    {
        return 0;
+    }
      if (cdp1[0] < cdp2[0] || cdp1[1] < cdp2[1] || cdp1[2] < cdp2[2])
+     {
        return -1; // one base in cdp1 precedes the corresponding base in the
+    }
      // other codon
      return 1; // one base in cdp1 appears after the corresponding base in the
      // other codon.
@@ -125,7 +139,8 @@ public class Dna
      {
        SequenceI newseq = translateCodingRegion(selection[s], seqstring[s],
                viscontigs, codons, gapCharacter,
-              (product != null) ? product[s] : null); // possibly anonymous
+              (product != null) ? product[s] : null, false); // possibly
+                                                             // anonymous
        // product
        if (newseq != null)
        {
@@ -142,13 +157,15 @@ public class Dna
        }
      }
      if (codons.aaWidth == 0)
+    {
        return null;
+    }
      SequenceI[] newseqs = new SequenceI[pepseqs.size()];
      pepseqs.copyInto(newseqs);
      AlignmentI al = new Alignment(newseqs);
      al.padGaps(); // ensure we look aligned.
      al.setDataset(dataset);
-    translateAlignedAnnotations(annotations, al, codons);
+    // translateAlignedAnnotations(annotations, al, codons);
      al.addCodonFrame(codons);
      return al;
    }
@@ -260,7 +277,9 @@ public class Dna
              // generate seqstring for this sequence based on mapping
  
              if (sqstr.length() > alwidth)
+            {
                alwidth = sqstr.length();
+            }
              cdnasqs.addElement(sqstr.toString());
              cdnasqi.addElement(dna);
              cdnaprod.addElement(intersect);
@@ -280,7 +299,7 @@ public class Dna
    }
  
    /**
-   * translate na alignment annotations onto translated amino acid alignment al
+   * Translate na alignment annotations onto translated amino acid alignment al
     * using codon mapping codons
     * 
     * @param annotations
@@ -291,69 +310,71 @@ public class Dna
            AlignmentAnnotation[] annotations, AlignmentI al,
            AlignedCodonFrame codons)
    {
-    // //////////////////////////////
-    // Copy annotations across
-    //
      // Can only do this for columns with consecutive codons, or where
      // annotation is sequence associated.
  
-    int pos, a, aSize;
      if (annotations != null)
      {
-      for (int i = 0; i < annotations.length; i++)
+      for (AlignmentAnnotation annotation : annotations)
        {
-        // Skip any autogenerated annotation
-        if (annotations[i].autoCalculated)
+        /*
+         * Skip hidden or autogenerated annotation. Also (for now), RNA
+         * secondary structure annotation. If we want to show this against
+         * protein we need a smarter way to 'translate' without generating
+         * invalid (unbalanced) structure annotation.
+         */
+        if (annotation.autoCalculated || !annotation.visible
+                || annotation.isRNA())
          {
            continue;
          }
  
-        aSize = codons.getaaWidth(); // aa alignment width.
-        jalview.datamodel.Annotation[] anots = (annotations[i].annotations == null) ? null
-                : new jalview.datamodel.Annotation[aSize];
+        int aSize = codons.getaaWidth(); // aa alignment width.
+        Annotation[] anots = (annotation.annotations == null) ? null
+                : new Annotation[aSize];
          if (anots != null)
          {
-          for (a = 0; a < aSize; a++)
+          for (int a = 0; a < aSize; a++)
            {
              // process through codon map.
-            if (codons.codons[a] != null
+            if (a < codons.codons.length && codons.codons[a] != null
                      && codons.codons[a][0] == (codons.codons[a][2] - 2))
              {
                anots[a] = getCodonAnnotation(codons.codons[a],
-                      annotations[i].annotations);
+                      annotation.annotations);
              }
            }
          }
  
-        jalview.datamodel.AlignmentAnnotation aa = new jalview.datamodel.AlignmentAnnotation(
-                annotations[i].label, annotations[i].description, anots);
-        aa.graph = annotations[i].graph;
-        aa.graphGroup = annotations[i].graphGroup;
-        aa.graphHeight = annotations[i].graphHeight;
-        if (annotations[i].getThreshold() != null)
+        AlignmentAnnotation aa = new AlignmentAnnotation(annotation.label,
+                annotation.description, anots);
+        aa.graph = annotation.graph;
+        aa.graphGroup = annotation.graphGroup;
+        aa.graphHeight = annotation.graphHeight;
+        if (annotation.getThreshold() != null)
          {
-          aa.setThreshold(new jalview.datamodel.GraphLine(annotations[i]
+          aa.setThreshold(new GraphLine(annotation
                    .getThreshold()));
          }
-        if (annotations[i].hasScore)
+        if (annotation.hasScore)
          {
-          aa.setScore(annotations[i].getScore());
+          aa.setScore(annotation.getScore());
          }
-        if (annotations[i].sequenceRef != null)
+
+        final SequenceI seqRef = annotation.sequenceRef;
+        if (seqRef != null)
          {
-          SequenceI aaSeq = codons
-                  .getAaForDnaSeq(annotations[i].sequenceRef);
+          SequenceI aaSeq = codons.getAaForDnaSeq(seqRef);
            if (aaSeq != null)
            {
              // aa.compactAnnotationArray(); // throw away alignment annotation
              // positioning
              aa.setSequenceRef(aaSeq);
-            aa.createSequenceMapping(aaSeq, aaSeq.getStart(), true); // rebuild
-            // mapping
+            // rebuild mapping
+            aa.createSequenceMapping(aaSeq, aaSeq.getStart(), true);
              aa.adjustForAlignment();
              aaSeq.addAlignmentAnnotation(aa);
            }
-
          }
          al.addAnnotation(aa);
        }
@@ -403,7 +424,7 @@ public class Dna
      }
      if (contrib > 1)
      {
-      annot.value /= (float) contrib;
+      annot.value /= contrib;
      }
      return annot;
    }
@@ -420,14 +441,41 @@ public class Dna
     * @param codons
     *          Definition of global ORF alignment reference frame
     * @param gapCharacter
-   * @param newSeq
     * @return sequence ready to be added to alignment.
+   * @deprecated Use
+   *             {@link #translateCodingRegion(SequenceI,String,int[],AlignedCodonFrame,char,DBRefEntry,boolean)}
+   *             instead
     */
+  @Deprecated
    public static SequenceI translateCodingRegion(SequenceI selection,
            String seqstring, int[] viscontigs, AlignedCodonFrame codons,
            char gapCharacter, DBRefEntry product)
    {
-    java.util.List skip = new ArrayList();
+    return translateCodingRegion(selection, seqstring, viscontigs, codons,
+            gapCharacter, product, false);
+  }
+
+  /**
+   * Translate a na sequence
+   * 
+   * @param selection
+   *          sequence displayed under viscontigs visible columns
+   * @param seqstring
+   *          ORF read in some global alignment reference frame
+   * @param viscontigs
+   *          mapping from global reference frame to visible seqstring ORF read
+   * @param codons
+   *          Definition of global ORF alignment reference frame
+   * @param gapCharacter
+   * @param starForStop
+   *          when true stop codons will translate as '*', otherwise as 'X'
+   * @return sequence ready to be added to alignment.
+   */
+  public static SequenceI translateCodingRegion(SequenceI selection,
+          String seqstring, int[] viscontigs, AlignedCodonFrame codons,
+          char gapCharacter, DBRefEntry product, final boolean starForStop)
+  {
+    List<int[]> skip = new ArrayList<int[]>();
      int skipint[] = null;
      ShiftList vismapping = new ShiftList(); // map from viscontigs to seqstring
      // intervals
@@ -448,7 +496,8 @@ public class Dna
        scontigs[vc + 1] = viscontigs[vc + 1];
      }
  
-    StringBuffer protein = new StringBuffer();
+    // allocate a roughly sized buffer for the protein sequence
+    StringBuilder protein = new StringBuilder(seqstring.length() / 2);
      String seq = seqstring.replace('U', 'T');
      char codon[] = new char[3];
      int cdp[] = new int[3], rf = 0, lastnpos = 0, nend;
@@ -456,14 +505,16 @@ public class Dna
      int resSize = 0;
      for (npos = 0, nend = seq.length(); npos < nend; npos++)
      {
-      if (!jalview.util.Comparison.isGap(seq.charAt(npos)))
+      if (!Comparison.isGap(seq.charAt(npos)))
        {
          cdp[rf] = npos; // store position
          codon[rf++] = seq.charAt(npos); // store base
        }
-      // filled an RF yet ?
        if (rf == 3)
        {
+        /*
+         * Filled up a reading frame...
+         */
          String aa = ResidueProperties.codonTranslate(new String(codon));
          rf = 0;
          if (aa == null)
@@ -483,23 +534,84 @@ public class Dna
              // edit scontigs
              skipint[0] = vismapping.shift(skipint[0]);
              skipint[1] = vismapping.shift(skipint[1]);
-            for (vc = 0; vc < scontigs.length; vc += 2)
+            for (vc = 0; vc < scontigs.length;)
              {
                if (scontigs[vc + 1] < skipint[0])
                {
+                // before skipint starts
+                vc += 2;
                  continue;
                }
+              if (scontigs[vc] > skipint[1])
+              {
+                // finished editing so
+                break;
+              }
+              // Edit the contig list to include the skipped region which did
+              // not translate
+              int[] t;
+              // from : s1 e1 s2 e2 s3 e3
+              // to s: s1 e1 s2 k0 k1 e2 s3 e3
+              // list increases by one unless one boundary (s2==k0 or e2==k1)
+              // matches, and decreases by one if skipint intersects whole
+              // visible contig
                if (scontigs[vc] <= skipint[0])
                {
                  if (skipint[0] == scontigs[vc])
                  {
-
+                  // skipint at start of contig
+                  // shift the start of this contig
+                  if (scontigs[vc + 1] > skipint[1])
+                  {
+                    scontigs[vc] = skipint[1];
+                    vc += 2;
+                  }
+                  else
+                  {
+                    if (scontigs[vc + 1] == skipint[1])
+                    {
+                      // remove the contig
+                      t = new int[scontigs.length - 2];
+                      if (vc > 0)
+                      {
+                        System.arraycopy(scontigs, 0, t, 0, vc - 1);
+                      }
+                      if (vc + 2 < t.length)
+                      {
+                        System.arraycopy(scontigs, vc + 2, t, vc, t.length
+                                - vc + 2);
+                      }
+                      scontigs = t;
+                    }
+                    else
+                    {
+                      // truncate contig to before the skipint region
+                      scontigs[vc + 1] = skipint[0] - 1;
+                      vc += 2;
+                    }
+                  }
                  }
                  else
                  {
-                  int[] t = new int[scontigs.length + 2];
-                  System.arraycopy(scontigs, 0, t, 0, vc - 1);
-                  // scontigs[vc]; //
+                  // scontig starts before start of skipint
+                  if (scontigs[vc + 1] < skipint[1])
+                  {
+                    // skipint truncates end of scontig
+                    scontigs[vc + 1] = skipint[0] - 1;
+                    vc += 2;
+                  }
+                  else
+                  {
+                    // divide region to new contigs
+                    t = new int[scontigs.length + 2];
+                    System.arraycopy(scontigs, 0, t, 0, vc + 1);
+                    t[vc + 1] = skipint[0];
+                    t[vc + 2] = skipint[1];
+                    System.arraycopy(scontigs, vc + 1, t, vc + 3,
+                            scontigs.length - (vc + 1));
+                    scontigs = t;
+                    vc += 4;
+                  }
                  }
                }
              }
@@ -508,7 +620,7 @@ public class Dna
            }
            if (aa.equals("STOP"))
            {
-            aa = "X";
+            aa = starForStop ? "*" : "X";
            }
            resSize++;
          }
@@ -565,10 +677,15 @@ public class Dna
                protein.toString());
        if (rf != 0)
        {
-        if (jalview.bin.Cache.log!=null) {
-          jalview.bin.Cache.log.debug("trimming contigs for incomplete terminal codon.");
-        } else {
-          System.err.println("trimming contigs for incomplete terminal codon.");
+        if (jalview.bin.Cache.log != null)
+        {
+          jalview.bin.Cache.log
+                  .debug("trimming contigs for incomplete terminal codon.");
+        }
+        else
+        {
+          System.err
+                  .println("trimming contigs for incomplete terminal codon.");
          }
          // map and trim contigs to ORF region
          vc = scontigs.length - 1;
@@ -598,7 +715,9 @@ public class Dna
            scontigs = t;
          }
          if (vc <= 0)
+        {
            scontigs = null;
+        }
        }
        if (scontigs != null)
        {
@@ -609,7 +728,9 @@ public class Dna
            scontigs[vc] = selection.findPosition(scontigs[vc]); // not from 1!
            scontigs[vc + 1] = selection.findPosition(scontigs[vc + 1]); // exclusive
            if (scontigs[vc + 1] == selection.getEnd())
+          {
              break;
+          }
          }
          // trim trailing empty intervals.
          if ((vc + 2) < scontigs.length)
@@ -682,7 +803,8 @@ public class Dna
    private static void transferCodedFeatures(SequenceI dna, SequenceI pep,
            MapList map, Hashtable featureTypes, Hashtable featureGroups)
    {
-    SequenceFeature[] sf = (dna.getDatasetSequence()!=null ? dna.getDatasetSequence() : dna).getSequenceFeatures();
+    SequenceFeature[] sf = (dna.getDatasetSequence() != null ? dna
+            .getDatasetSequence() : dna).getSequenceFeatures();
      Boolean fgstate;
      jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils
              .selectRefs(dna.getDBRef(),