JAL-1619 first draft of 'linked protein and cDNA'
[jalview.git] / src / jalview / analysis / Dna.java
index 2e56e67..0c020dd 100644 (file)
  */
 package jalview.analysis;
 
-import java.util.ArrayList;
-import java.util.Hashtable;
-import java.util.Vector;
-
 import jalview.datamodel.AlignedCodonFrame;
 import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentAnnotation;
@@ -31,14 +27,25 @@ import jalview.datamodel.AlignmentI;
 import jalview.datamodel.Annotation;
 import jalview.datamodel.DBRefEntry;
 import jalview.datamodel.FeatureProperties;
+import jalview.datamodel.GraphLine;
 import jalview.datamodel.Mapping;
 import jalview.datamodel.Sequence;
 import jalview.datamodel.SequenceFeature;
 import jalview.datamodel.SequenceI;
 import jalview.schemes.ResidueProperties;
+import jalview.util.Comparison;
 import jalview.util.MapList;
 import jalview.util.ShiftList;
 
+import java.util.ArrayList;
+import java.util.Hashtable;
+import java.util.List;
+import java.util.Vector;
+
+import java.util.ArrayList;
+import java.util.Hashtable;
+import java.util.Vector;
+
 public class Dna
 {
   /**
@@ -52,9 +59,13 @@ public class Dna
   {
     if (cdp2 == null
             || (cdp1[0] == cdp2[0] && cdp1[1] == cdp2[1] && cdp1[2] == cdp2[2]))
+    {
       return 0;
+    }
     if (cdp1[0] < cdp2[0] || cdp1[1] < cdp2[1] || cdp1[2] < cdp2[2])
+     {
       return -1; // one base in cdp1 precedes the corresponding base in the
+    }
     // other codon
     return 1; // one base in cdp1 appears after the corresponding base in the
     // other codon.
@@ -146,13 +157,15 @@ public class Dna
       }
     }
     if (codons.aaWidth == 0)
+    {
       return null;
+    }
     SequenceI[] newseqs = new SequenceI[pepseqs.size()];
     pepseqs.copyInto(newseqs);
     AlignmentI al = new Alignment(newseqs);
     al.padGaps(); // ensure we look aligned.
     al.setDataset(dataset);
-    translateAlignedAnnotations(annotations, al, codons);
+    // translateAlignedAnnotations(annotations, al, codons);
     al.addCodonFrame(codons);
     return al;
   }
@@ -264,7 +277,9 @@ public class Dna
             // generate seqstring for this sequence based on mapping
 
             if (sqstr.length() > alwidth)
+            {
               alwidth = sqstr.length();
+            }
             cdnasqs.addElement(sqstr.toString());
             cdnasqi.addElement(dna);
             cdnaprod.addElement(intersect);
@@ -284,7 +299,7 @@ public class Dna
   }
 
   /**
-   * translate na alignment annotations onto translated amino acid alignment al
+   * Translate na alignment annotations onto translated amino acid alignment al
    * using codon mapping codons
    * 
    * @param annotations
@@ -295,69 +310,71 @@ public class Dna
           AlignmentAnnotation[] annotations, AlignmentI al,
           AlignedCodonFrame codons)
   {
-    // //////////////////////////////
-    // Copy annotations across
-    //
     // Can only do this for columns with consecutive codons, or where
     // annotation is sequence associated.
 
-    int pos, a, aSize;
     if (annotations != null)
     {
-      for (int i = 0; i < annotations.length; i++)
+      for (AlignmentAnnotation annotation : annotations)
       {
-        // Skip any autogenerated annotation
-        if (annotations[i].autoCalculated)
+        /*
+         * Skip hidden or autogenerated annotation. Also (for now), RNA
+         * secondary structure annotation. If we want to show this against
+         * protein we need a smarter way to 'translate' without generating
+         * invalid (unbalanced) structure annotation.
+         */
+        if (annotation.autoCalculated || !annotation.visible
+                || annotation.isRNA())
         {
           continue;
         }
 
-        aSize = codons.getaaWidth(); // aa alignment width.
-        jalview.datamodel.Annotation[] anots = (annotations[i].annotations == null) ? null
-                : new jalview.datamodel.Annotation[aSize];
+        int aSize = codons.getaaWidth(); // aa alignment width.
+        Annotation[] anots = (annotation.annotations == null) ? null
+                : new Annotation[aSize];
         if (anots != null)
         {
-          for (a = 0; a < aSize; a++)
+          for (int a = 0; a < aSize; a++)
           {
             // process through codon map.
-            if (codons.codons[a] != null
+            if (a < codons.codons.length && codons.codons[a] != null
                     && codons.codons[a][0] == (codons.codons[a][2] - 2))
             {
               anots[a] = getCodonAnnotation(codons.codons[a],
-                      annotations[i].annotations);
+                      annotation.annotations);
             }
           }
         }
 
-        jalview.datamodel.AlignmentAnnotation aa = new jalview.datamodel.AlignmentAnnotation(
-                annotations[i].label, annotations[i].description, anots);
-        aa.graph = annotations[i].graph;
-        aa.graphGroup = annotations[i].graphGroup;
-        aa.graphHeight = annotations[i].graphHeight;
-        if (annotations[i].getThreshold() != null)
+        AlignmentAnnotation aa = new AlignmentAnnotation(annotation.label,
+                annotation.description, anots);
+        aa.graph = annotation.graph;
+        aa.graphGroup = annotation.graphGroup;
+        aa.graphHeight = annotation.graphHeight;
+        if (annotation.getThreshold() != null)
         {
-          aa.setThreshold(new jalview.datamodel.GraphLine(annotations[i]
+          aa.setThreshold(new GraphLine(annotation
                   .getThreshold()));
         }
-        if (annotations[i].hasScore)
+        if (annotation.hasScore)
         {
-          aa.setScore(annotations[i].getScore());
+          aa.setScore(annotation.getScore());
         }
-        if (annotations[i].sequenceRef != null)
+
+        final SequenceI seqRef = annotation.sequenceRef;
+        if (seqRef != null)
         {
-          SequenceI aaSeq = codons
-                  .getAaForDnaSeq(annotations[i].sequenceRef);
+          SequenceI aaSeq = codons.getAaForDnaSeq(seqRef);
           if (aaSeq != null)
           {
             // aa.compactAnnotationArray(); // throw away alignment annotation
             // positioning
             aa.setSequenceRef(aaSeq);
-            aa.createSequenceMapping(aaSeq, aaSeq.getStart(), true); // rebuild
-            // mapping
+            // rebuild mapping
+            aa.createSequenceMapping(aaSeq, aaSeq.getStart(), true);
             aa.adjustForAlignment();
             aaSeq.addAlignmentAnnotation(aa);
           }
-
         }
         al.addAnnotation(aa);
       }
@@ -407,7 +424,7 @@ public class Dna
     }
     if (contrib > 1)
     {
-      annot.value /= (float) contrib;
+      annot.value /= contrib;
     }
     return annot;
   }
@@ -429,6 +446,7 @@ public class Dna
    *             {@link #translateCodingRegion(SequenceI,String,int[],AlignedCodonFrame,char,DBRefEntry,boolean)}
    *             instead
    */
+  @Deprecated
   public static SequenceI translateCodingRegion(SequenceI selection,
           String seqstring, int[] viscontigs, AlignedCodonFrame codons,
           char gapCharacter, DBRefEntry product)
@@ -457,7 +475,7 @@ public class Dna
           String seqstring, int[] viscontigs, AlignedCodonFrame codons,
           char gapCharacter, DBRefEntry product, final boolean starForStop)
   {
-    java.util.List skip = new ArrayList();
+    List<int[]> skip = new ArrayList<int[]>();
     int skipint[] = null;
     ShiftList vismapping = new ShiftList(); // map from viscontigs to seqstring
     // intervals
@@ -478,7 +496,8 @@ public class Dna
       scontigs[vc + 1] = viscontigs[vc + 1];
     }
 
-    StringBuffer protein = new StringBuffer();
+    // allocate a roughly sized buffer for the protein sequence
+    StringBuilder protein = new StringBuilder(seqstring.length() / 2);
     String seq = seqstring.replace('U', 'T');
     char codon[] = new char[3];
     int cdp[] = new int[3], rf = 0, lastnpos = 0, nend;
@@ -486,14 +505,16 @@ public class Dna
     int resSize = 0;
     for (npos = 0, nend = seq.length(); npos < nend; npos++)
     {
-      if (!jalview.util.Comparison.isGap(seq.charAt(npos)))
+      if (!Comparison.isGap(seq.charAt(npos)))
       {
         cdp[rf] = npos; // store position
         codon[rf++] = seq.charAt(npos); // store base
       }
-      // filled an RF yet ?
       if (rf == 3)
       {
+        /*
+         * Filled up a reading frame...
+         */
         String aa = ResidueProperties.codonTranslate(new String(codon));
         rf = 0;
         if (aa == null)
@@ -694,7 +715,9 @@ public class Dna
           scontigs = t;
         }
         if (vc <= 0)
+        {
           scontigs = null;
+        }
       }
       if (scontigs != null)
       {
@@ -705,7 +728,9 @@ public class Dna
           scontigs[vc] = selection.findPosition(scontigs[vc]); // not from 1!
           scontigs[vc + 1] = selection.findPosition(scontigs[vc + 1]); // exclusive
           if (scontigs[vc + 1] == selection.getEnd())
+          {
             break;
+          }
         }
         // trim trailing empty intervals.
         if ((vc + 2) < scontigs.length)