Merge branch 'develop' into features/JAL-845splitPaneMergeDevelop

[jalview.git] / src / jalview / analysis / Dna.java
diff --git a/src/jalview/analysis/Dna.java b/src/jalview/analysis/Dna.java

index ebaee8b..a785073 100644 (file)
--- a/src/jalview/analysis/Dna.java
+++ b/src/jalview/analysis/Dna.java
@@ -1,6 +1,6 @@
  /*
- * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2)
- * Copyright (C) 2014 The Jalview Authors
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
   * 
   * This file is part of Jalview.
   * 
@@ -22,6 +22,7 @@ package jalview.analysis;
  
  import jalview.api.AlignViewportI;
  import jalview.bin.Cache;
+import jalview.datamodel.AlignedCodon;
  import jalview.datamodel.AlignedCodonFrame;
  import jalview.datamodel.Alignment;
  import jalview.datamodel.AlignmentAnnotation;
@@ -43,13 +44,20 @@ import jalview.util.ShiftList;
  
  import java.util.ArrayList;
  import java.util.Arrays;
-import java.util.Hashtable;
+import java.util.Comparator;
  import java.util.List;
+import java.util.Map;
  
  public class Dna
  {
    private static final String STOP_X = "X";
  
+  private static final Comparator<AlignedCodon> comparator = new CodonComparator();
+
+  /*
+   * 'final' variables describe the inputs to the translation, which should not
+   * be modified.
+   */
    final private List<SequenceI> selection;
  
    final private String[] seqstring;
@@ -64,8 +72,21 @@ public class Dna
  
    final private Alignment dataset;
  
+  /*
+   * Working variables for the translation.
+   * 
+   * The width of the translation-in-progress protein alignment.
+   */
    private int aaWidth = 0;
  
+  /*
+   * This array will be built up so that position i holds the codon positions
+   * e.g. [7, 9, 10] that match column i (base 0) in the aligned translation.
+   * Note this implies a contract that if two codons do not align exactly, their
+   * translated products must occupy different column positions.
+   */
+  private AlignedCodon[] alignedCodons;
+
    /**
     * Constructor given a viewport and the visible contigs.
     * 
@@ -96,19 +117,31 @@ public class Dna
     * <li>compareCodonPos([3,4,5], [2,5,6]) also returns -1</li>
     * </ul>
     * 
-   * @param cdp1
-   * @param cdp2
+   * @param ac1
+   * @param ac2
     * @return
     */
-  public static int compareCodonPos(int[] cdp1, int[] cdp2)
+  public static final int compareCodonPos(AlignedCodon ac1, AlignedCodon ac2)
    {
-    if (cdp1 == null
-            || cdp2 == null
-            || (cdp1[0] == cdp2[0] && cdp1[1] == cdp2[1] && cdp1[2] == cdp2[2]))
+    return comparator.compare(ac1, ac2);
+    // return jalview_2_8_2compare(ac1, ac2);
+  }
+
+  /**
+   * Codon comparison up to Jalview 2.8.2. This rule is sequence order dependent
+   * - see http://issues.jalview.org/browse/JAL-1635
+   * 
+   * @param ac1
+   * @param ac2
+   * @return
+   */
+  private static int jalview_2_8_2compare(AlignedCodon ac1, AlignedCodon ac2)
+  {
+    if (ac1 == null || ac2 == null || (ac1.equals(ac2)))
      {
        return 0;
      }
-    if (cdp1[0] < cdp2[0] || cdp1[1] < cdp2[1] || cdp1[2] < cdp2[2])
+    if (ac1.pos1 < ac2.pos1 || ac1.pos2 < ac2.pos2 || ac1.pos3 < ac2.pos3)
      {
        // one base in cdp1 precedes the corresponding base in the other codon
        return -1;
@@ -125,13 +158,7 @@ public class Dna
    {
      AlignedCodonFrame acf = new AlignedCodonFrame();
  
-    /*
-     * This array will be built up so that position i holds the codon positions
-     * e.g. [7, 9, 10] that match column i (base 0) in the aligned translation.
-     * Note this implies a contract that if two codons do not align exactly,
-     * their translated products must occupy different column positions.
-     */
-    int[][] alignedCodons = new int[dnaWidth][];
+    alignedCodons = new AlignedCodon[dnaWidth];
  
      int s;
      int sSize = selection.size();
@@ -139,7 +166,7 @@ public class Dna
      for (s = 0; s < sSize; s++)
      {
        SequenceI newseq = translateCodingRegion(selection.get(s),
-              seqstring[s], acf, alignedCodons, pepseqs);
+              seqstring[s], acf, pepseqs);
  
        if (newseq != null)
        {
@@ -162,7 +189,7 @@ public class Dna
      al.padGaps();
      // link the protein translation to the DNA dataset
      al.setDataset(dataset);
-    translateAlignedAnnotations(annotations, al, acf, alignedCodons);
+    translateAlignedAnnotations(al, acf);
      al.addCodonFrame(acf);
      return al;
    }
@@ -222,16 +249,14 @@ public class Dna
    }
  
    /**
-   * Translate na alignment annotations onto translated amino acid alignment al
-   * using codon mapping codons
+   * Translate nucleotide alignment annotations onto translated amino acid
+   * alignment using codon mapping codons
     * 
-   * @param annotations
     * @param al
-   * @param acf
+   *          the translated protein alignment
     */
-  protected static void translateAlignedAnnotations(
-          AlignmentAnnotation[] annotations, AlignmentI al,
-          AlignedCodonFrame acf, int[][] codons)
+  protected void translateAlignedAnnotations(AlignmentI al,
+          AlignedCodonFrame acf)
    {
      // Can only do this for columns with consecutive codons, or where
      // annotation is sequence associated.
@@ -252,7 +277,7 @@ public class Dna
            continue;
          }
  
-        int aSize = acf.getaaWidth(); // aa alignment width.
+        int aSize = aaWidth;
          Annotation[] anots = (annotation.annotations == null) ? null
                  : new Annotation[aSize];
          if (anots != null)
@@ -260,10 +285,10 @@ public class Dna
            for (int a = 0; a < aSize; a++)
            {
              // process through codon map.
-            if (a < codons.length && codons[a] != null
-                    && codons[a][0] == (codons[a][2] - 2))
+            if (a < alignedCodons.length && alignedCodons[a] != null
+                    && alignedCodons[a].pos1 == (alignedCodons[a].pos3 - 2))
              {
-              anots[a] = getCodonAnnotation(codons[a],
+              anots[a] = getCodonAnnotation(alignedCodons[a],
                        annotation.annotations);
              }
            }
@@ -304,26 +329,27 @@ public class Dna
      }
    }
  
-  private static Annotation getCodonAnnotation(int[] is,
+  private static Annotation getCodonAnnotation(AlignedCodon is,
            Annotation[] annotations)
    {
      // Have a look at all the codon positions for annotation and put the first
      // one found into the translated annotation pos.
      int contrib = 0;
      Annotation annot = null;
-    for (int p = 0; p < 3; p++)
+    for (int p = 1; p <= 3; p++)
      {
-      if (annotations[is[p]] != null)
+      int dnaCol = is.getBaseColumn(p);
+      if (annotations[dnaCol] != null)
        {
          if (annot == null)
          {
-          annot = new Annotation(annotations[is[p]]);
+          annot = new Annotation(annotations[dnaCol]);
            contrib = 1;
          }
          else
          {
            // merge with last
-          Annotation cpy = new Annotation(annotations[is[p]]);
+          Annotation cpy = new Annotation(annotations[dnaCol]);
            if (annot.colour == null)
            {
              annot.colour = cpy.colour;
@@ -359,20 +385,14 @@ public class Dna
     *          sequence displayed under viscontigs visible columns
     * @param seqstring
     *          ORF read in some global alignment reference frame
-   * @param viscontigs
-   *          mapping from global reference frame to visible seqstring ORF read
     * @param acf
     *          Definition of global ORF alignment reference frame
-   * @param alignedCodons
     * @param proteinSeqs
-   * @param gapCharacter
-   * @param starForStop
-   *          when true stop codons will translate as '*', otherwise as 'X'
     * @return sequence ready to be added to alignment.
     */
    protected SequenceI translateCodingRegion(SequenceI selection,
            String seqstring, AlignedCodonFrame acf,
-          int[][] alignedCodons, List<SequenceI> proteinSeqs)
+          List<SequenceI> proteinSeqs)
    {
      List<int[]> skip = new ArrayList<int[]>();
      int skipint[] = null;
@@ -400,7 +420,10 @@ public class Dna
      StringBuilder protein = new StringBuilder(seqstring.length() / 2);
      String seq = seqstring.replace('U', 'T').replace('u', 'T');
      char codon[] = new char[3];
-    int cdp[] = new int[3], rf = 0, lastnpos = 0, nend;
+    int cdp[] = new int[3];
+    int rf = 0;
+    int lastnpos = 0;
+    int nend;
      int aspos = 0;
      int resSize = 0;
      for (npos = 0, nend = seq.length(); npos < nend; npos++)
@@ -415,6 +438,7 @@ public class Dna
          /*
           * Filled up a reading frame...
           */
+        AlignedCodon alignedCodon = new AlignedCodon(cdp[0], cdp[1], cdp[2]);
          String aa = ResidueProperties.codonTranslate(new String(codon));
          rf = 0;
          final String gapString = String.valueOf(gapChar);
@@ -424,9 +448,9 @@ public class Dna
            if (skipint == null)
            {
              skipint = new int[]
-            { cdp[0], cdp[2] };
+            { alignedCodon.pos1, alignedCodon.pos3 /* cdp[0], cdp[2] */};
            }
-          skipint[1] = cdp[2];
+          skipint[1] = alignedCodon.pos3; // cdp[2];
          }
          else
          {
@@ -525,31 +549,15 @@ public class Dna
            }
            resSize++;
          }
-        // insert gaps prior to this codon - if necessary
          boolean findpos = true;
          while (findpos)
          {
-          // expand the codons array if necessary
-          alignedCodons = checkCodonFrameWidth(alignedCodons, aspos);
-
            /*
             * Compare this codon's base positions with those currently aligned to
             * this column in the translation.
             */
-          final int compareCodonPos = Dna.compareCodonPos(cdp,
+          final int compareCodonPos = compareCodonPos(alignedCodon,
                    alignedCodons[aspos]);
-          // debug
-          System.out.println(seq + "/" + aa + " codons: "
-                  + Arrays.deepToString(alignedCodons));
-          for (SequenceI s : proteinSeqs)
-          {
-            System.out.println(s.getSequenceAsString());
-          }
-          System.out
-                  .println(("Compare " + Arrays.toString(cdp) + " at pos "
-                          + aspos + " with "
-                          + Arrays.toString(alignedCodons[aspos]) + " got " + compareCodonPos));
-          // end debug
            switch (compareCodonPos)
            {
            case -1:
@@ -558,7 +566,7 @@ public class Dna
               * This codon should precede the mapped positions - need to insert a
               * gap in all prior sequences.
               */
-            alignedCodons = insertAAGap(aspos, alignedCodons, proteinSeqs);
+            insertAAGap(aspos, proteinSeqs);
              findpos = false;
              break;
  
@@ -585,20 +593,17 @@ public class Dna
          if (alignedCodons[aspos] == null)
          {
            // mark this column as aligning to this aligned reading frame
-          alignedCodons[aspos] = new int[]
-          { cdp[0], cdp[1], cdp[2] };
+          alignedCodons[aspos] = alignedCodon;
          }
-        else if (!Arrays.equals(alignedCodons[aspos], cdp))
+        else if (!alignedCodons[aspos].equals(alignedCodon))
          {
            throw new IllegalStateException("Tried to coalign "
-                  + Arrays.asList(alignedCodons[aspos], cdp));
+                  + alignedCodons[aspos].toString() + " with "
+                  + alignedCodon.toString());
          }
-        System.out.println(aspos + "/" + aaWidth);
          if (aspos >= aaWidth)
          {
            // update maximum alignment width
-          // (we can do this without calling checkCodonFrameWidth because it was
-          // already done above)
            aaWidth = aspos;
          }
          // ready for next translated reading frame alignment position (if any)
@@ -714,56 +719,47 @@ public class Dna
     * Insert a gap into the aligned proteins and the codon mapping array.
     * 
     * @param pos
-   * @param alignedCodons
     * @param proteinSeqs
     * @return
     */
-  protected int[][] insertAAGap(int pos, int[][] alignedCodons,
+  protected void insertAAGap(int pos,
            List<SequenceI> proteinSeqs)
    {
-    System.out.println("insertAAGap " + pos + "/" + proteinSeqs.size());
      aaWidth++;
      for (SequenceI seq : proteinSeqs)
      {
        seq.insertCharAt(pos, gapChar);
      }
  
-    int[][] resized = checkCodonFrameWidth(alignedCodons, pos);
+    checkCodonFrameWidth();
      if (pos < aaWidth)
      {
        aaWidth++;
-      System.arraycopy(resized, pos, resized, pos + 1, resized.length - pos
-              - 1);
-      resized[pos] = null; // clear so new codon position can be marked.
+
+      /*
+       * Shift from [pos] to the end one to the right, and null out [pos]
+       */
+      System.arraycopy(alignedCodons, pos, alignedCodons, pos + 1,
+              alignedCodons.length - pos - 1);
+      alignedCodons[pos] = null;
      }
-    return resized;
    }
  
    /**
-   * Check the codons array is big enough to accommodate the given position, if
-   * not resize it.
-   * 
-   * @param alignedCodons
-   * @param aspos
-   * @return the resized array (or the original if no resize needed)
+   * Check the codons array can accommodate a single insertion, if not resize
+   * it.
     */
-  protected static int[][] checkCodonFrameWidth(int[][] alignedCodons,
-          int aspos)
+  protected void checkCodonFrameWidth()
    {
-    // TODO why not codons.length < aspos ?
-    // should codons expand if length is 2 or 3 and aslen==2 ?
-    System.out.println("Checking " + alignedCodons.length + "/" + aspos);
-    if (alignedCodons.length <= aspos + 1)
+    if (alignedCodons[alignedCodons.length - 1] != null)
      {
-      // probably never have to do this ?
-      int[][] c = new int[alignedCodons.length + 10][];
-      for (int i = 0; i < alignedCodons.length; i++)
-      {
-        c[i] = alignedCodons[i];
-      }
-      return c;
+      /*
+       * arraycopy insertion would bump a filled slot off the end, so expand.
+       */
+      AlignedCodon[] c = new AlignedCodon[alignedCodons.length + 10];
+      System.arraycopy(alignedCodons, 0, c, 0, alignedCodons.length);
+      alignedCodons = c;
      }
-    return alignedCodons;
    }
  
    /**
@@ -776,15 +772,16 @@ public class Dna
     * @param pep
     * @param map
     * @param featureTypes
-   *          hash who's keys are the displayed feature type strings
+   *          hash whose keys are the displayed feature type strings
     * @param featureGroups
     *          hash where keys are feature groups and values are Boolean objects
     *          indicating if they are displayed.
     */
    private static void transferCodedFeatures(SequenceI dna, SequenceI pep,
-          MapList map, Hashtable featureTypes, Hashtable featureGroups)
+          MapList map, Map<String, Object> featureTypes,
+          Map<String, Boolean> featureGroups)
    {
-    SequenceFeature[] sf = (dna.getDatasetSequence() != null ? dna
+    SequenceFeature[] sfs = (dna.getDatasetSequence() != null ? dna
              .getDatasetSequence() : dna).getSequenceFeatures();
      Boolean fgstate;
      DBRefEntry[] dnarefs = DBRefUtils.selectRefs(dna.getDBRef(),
@@ -800,16 +797,16 @@ public class Dna
          }
        }
      }
-    if (sf != null)
+    if (sfs != null)
      {
-      for (int f = 0; f < sf.length; f++)
+      for (SequenceFeature sf : sfs)
        {
-        fgstate = (featureGroups == null) ? null : ((Boolean) featureGroups
-                .get(sf[f].featureGroup));
-        if ((featureTypes == null || featureTypes.containsKey(sf[f]
-                .getType())) && (fgstate == null || fgstate.booleanValue()))
+        fgstate = (featureGroups == null) ? null : featureGroups
+                .get(sf.featureGroup);
+        if ((featureTypes == null || featureTypes.containsKey(sf.getType()))
+                && (fgstate == null || fgstate.booleanValue()))
          {
-          if (FeatureProperties.isCodingFeature(null, sf[f].getType()))
+          if (FeatureProperties.isCodingFeature(null, sf.getType()))
            {
              // if (map.intersectsFrom(sf[f].begin, sf[f].end))
              {