Merge branch 'develop' into features/JAL-845splitPaneMergeDevelop

[jalview.git] / src / jalview / analysis / Dna.java
diff --git a/src/jalview/analysis/Dna.java b/src/jalview/analysis/Dna.java

index 0e7d736..a785073 100644 (file)
--- a/src/jalview/analysis/Dna.java
+++ b/src/jalview/analysis/Dna.java
@@ -1,6 +1,6 @@
  /*
- * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2)
- * Copyright (C) 2014 The Jalview Authors
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
   * 
   * This file is part of Jalview.
   * 
@@ -20,7 +20,9 @@
   */
  package jalview.analysis;
  
+import jalview.api.AlignViewportI;
  import jalview.bin.Cache;
+import jalview.datamodel.AlignedCodon;
  import jalview.datamodel.AlignedCodonFrame;
  import jalview.datamodel.Alignment;
  import jalview.datamodel.AlignmentAnnotation;
@@ -42,11 +44,66 @@ import jalview.util.ShiftList;
  
  import java.util.ArrayList;
  import java.util.Arrays;
-import java.util.Hashtable;
+import java.util.Comparator;
  import java.util.List;
+import java.util.Map;
  
  public class Dna
  {
+  private static final String STOP_X = "X";
+
+  private static final Comparator<AlignedCodon> comparator = new CodonComparator();
+
+  /*
+   * 'final' variables describe the inputs to the translation, which should not
+   * be modified.
+   */
+  final private List<SequenceI> selection;
+
+  final private String[] seqstring;
+
+  final private int[] contigs;
+
+  final private char gapChar;
+
+  final private AlignmentAnnotation[] annotations;
+
+  final private int dnaWidth;
+
+  final private Alignment dataset;
+
+  /*
+   * Working variables for the translation.
+   * 
+   * The width of the translation-in-progress protein alignment.
+   */
+  private int aaWidth = 0;
+
+  /*
+   * This array will be built up so that position i holds the codon positions
+   * e.g. [7, 9, 10] that match column i (base 0) in the aligned translation.
+   * Note this implies a contract that if two codons do not align exactly, their
+   * translated products must occupy different column positions.
+   */
+  private AlignedCodon[] alignedCodons;
+
+  /**
+   * Constructor given a viewport and the visible contigs.
+   * 
+   * @param viewport
+   * @param visibleContigs
+   */
+  public Dna(AlignViewportI viewport, int[] visibleContigs)
+  {
+    this.selection = Arrays.asList(viewport.getSequenceSelection());
+    this.seqstring = viewport.getViewAsString(true);
+    this.contigs = visibleContigs;
+    this.gapChar = viewport.getGapCharacter();
+    this.annotations = viewport.getAlignment().getAlignmentAnnotation();
+    this.dnaWidth = viewport.getAlignment().getWidth();
+    this.dataset = viewport.getAlignment().getDataset();
+  }
+
    /**
     * Test whether codon positions cdp1 should align before, with, or after cdp2.
     * Returns zero if all positions match (or either argument is null). Returns
@@ -60,19 +117,31 @@ public class Dna
     * <li>compareCodonPos([3,4,5], [2,5,6]) also returns -1</li>
     * </ul>
     * 
-   * @param cdp1
-   * @param cdp2
+   * @param ac1
+   * @param ac2
+   * @return
+   */
+  public static final int compareCodonPos(AlignedCodon ac1, AlignedCodon ac2)
+  {
+    return comparator.compare(ac1, ac2);
+    // return jalview_2_8_2compare(ac1, ac2);
+  }
+
+  /**
+   * Codon comparison up to Jalview 2.8.2. This rule is sequence order dependent
+   * - see http://issues.jalview.org/browse/JAL-1635
+   * 
+   * @param ac1
+   * @param ac2
     * @return
     */
-  public static int compareCodonPos(int[] cdp1, int[] cdp2)
+  private static int jalview_2_8_2compare(AlignedCodon ac1, AlignedCodon ac2)
    {
-    if (cdp1 == null
-            || cdp2 == null
-            || (cdp1[0] == cdp2[0] && cdp1[1] == cdp2[1] && cdp1[2] == cdp2[2]))
+    if (ac1 == null || ac2 == null || (ac1.equals(ac2)))
      {
        return 0;
      }
-    if (cdp1[0] < cdp2[0] || cdp1[1] < cdp2[1] || cdp1[2] < cdp2[2])
+    if (ac1.pos1 < ac2.pos1 || ac1.pos2 < ac2.pos2 || ac1.pos3 < ac2.pos3)
      {
        // one base in cdp1 precedes the corresponding base in the other codon
        return -1;
@@ -82,80 +151,22 @@ public class Dna
    }
  
    /**
-   * DNA->mapped protein sequence alignment translation given set of sequences
-   * 1. id distinct coding regions within selected region for each sequence 2.
-   * generate peptides based on inframe (or given) translation or (optionally
-   * and where specified) out of frame translations (annotated appropriately) 3.
-   * align peptides based on codon alignment
-   */
-  /**
-   * id potential products from dna 1. search for distinct products within
-   * selected region for each selected sequence 2. group by associated DB type.
-   * 3. return as form for input into above function
-   */
-  /**
-   * 
-   */
-  /**
-   * create a new alignment of protein sequences by an inframe translation of
-   * the provided NA sequences
-   * 
-   * @param selection
-   * @param seqstring
-   * @param viscontigs
-   * @param gapCharacter
-   * @param annotations
-   * @param aWidth
-   * @param dataset
-   *          destination dataset for translated sequences and mappings
-   * @return
-   */
-  public static AlignmentI cdnaTranslate(SequenceI[] selection,
-          String[] seqstring, int viscontigs[], char gapCharacter,
-          AlignmentAnnotation[] annotations, int aWidth, Alignment dataset)
-  {
-    return cdnaTranslate(Arrays.asList(selection), seqstring, null,
-            viscontigs, gapCharacter, annotations, aWidth, dataset);
-  }
-
-  /**
     * 
-   * @param cdnaseqs
-   * @param seqstring
-   * @param product
-   *          - array of DbRefEntry objects from which exon map in seqstring is
-   *          derived
-   * @param viscontigs
-   * @param gapCharacter
-   * @param annotations
-   * @param aWidth
-   * @param dataset
     * @return
     */
-  public static AlignmentI cdnaTranslate(List<SequenceI> cdnaseqs,
-          String[] seqstring, DBRefEntry[] product, int viscontigs[],
-          char gapCharacter, AlignmentAnnotation[] annotations, int aWidth,
-          Alignment dataset)
+  public AlignmentI translateCdna()
    {
-    AlignedCodonFrame acf = new AlignedCodonFrame(aWidth);
+    AlignedCodonFrame acf = new AlignedCodonFrame();
  
-    /*
-     * This array will be built up so that position i holds the codon positions
-     * e.g. [7, 9, 10] that match column i (base 0) in the aligned translation.
-     * Note this implies a contract that if two codons do not align exactly,
-     * their translated products must occupy different column positions.
-     */
-    int[][] alignedCodons = new int[aWidth][];
+    alignedCodons = new AlignedCodon[dnaWidth];
  
      int s;
-    int sSize = cdnaseqs.size();
+    int sSize = selection.size();
      List<SequenceI> pepseqs = new ArrayList<SequenceI>();
      for (s = 0; s < sSize; s++)
      {
-      SequenceI newseq = translateCodingRegion(cdnaseqs.get(s),
-              seqstring[s], viscontigs, acf, alignedCodons, pepseqs,
-              gapCharacter,
-              false);
+      SequenceI newseq = translateCodingRegion(selection.get(s),
+              seqstring[s], acf, pepseqs);
  
        if (newseq != null)
        {
@@ -178,7 +189,7 @@ public class Dna
      al.padGaps();
      // link the protein translation to the DNA dataset
      al.setDataset(dataset);
-    translateAlignedAnnotations(annotations, al, acf, alignedCodons);
+    translateAlignedAnnotations(al, acf);
      al.addCodonFrame(acf);
      return al;
    }
@@ -238,16 +249,14 @@ public class Dna
    }
  
    /**
-   * Translate na alignment annotations onto translated amino acid alignment al
-   * using codon mapping codons
+   * Translate nucleotide alignment annotations onto translated amino acid
+   * alignment using codon mapping codons
     * 
-   * @param annotations
     * @param al
-   * @param acf
+   *          the translated protein alignment
     */
-  protected static void translateAlignedAnnotations(
-          AlignmentAnnotation[] annotations, AlignmentI al,
-          AlignedCodonFrame acf, int[][] codons)
+  protected void translateAlignedAnnotations(AlignmentI al,
+          AlignedCodonFrame acf)
    {
      // Can only do this for columns with consecutive codons, or where
      // annotation is sequence associated.
@@ -268,7 +277,7 @@ public class Dna
            continue;
          }
  
-        int aSize = acf.getaaWidth(); // aa alignment width.
+        int aSize = aaWidth;
          Annotation[] anots = (annotation.annotations == null) ? null
                  : new Annotation[aSize];
          if (anots != null)
@@ -276,10 +285,10 @@ public class Dna
            for (int a = 0; a < aSize; a++)
            {
              // process through codon map.
-            if (a < codons.length && codons[a] != null
-                    && codons[a][0] == (codons[a][2] - 2))
+            if (a < alignedCodons.length && alignedCodons[a] != null
+                    && alignedCodons[a].pos1 == (alignedCodons[a].pos3 - 2))
              {
-              anots[a] = getCodonAnnotation(codons[a],
+              anots[a] = getCodonAnnotation(alignedCodons[a],
                        annotation.annotations);
              }
            }
@@ -320,26 +329,27 @@ public class Dna
      }
    }
  
-  private static Annotation getCodonAnnotation(int[] is,
+  private static Annotation getCodonAnnotation(AlignedCodon is,
            Annotation[] annotations)
    {
      // Have a look at all the codon positions for annotation and put the first
      // one found into the translated annotation pos.
      int contrib = 0;
      Annotation annot = null;
-    for (int p = 0; p < 3; p++)
+    for (int p = 1; p <= 3; p++)
      {
-      if (annotations[is[p]] != null)
+      int dnaCol = is.getBaseColumn(p);
+      if (annotations[dnaCol] != null)
        {
          if (annot == null)
          {
-          annot = new Annotation(annotations[is[p]]);
+          annot = new Annotation(annotations[dnaCol]);
            contrib = 1;
          }
          else
          {
            // merge with last
-          Annotation cpy = new Annotation(annotations[is[p]]);
+          Annotation cpy = new Annotation(annotations[dnaCol]);
            if (annot.colour == null)
            {
              annot.colour = cpy.colour;
@@ -375,49 +385,45 @@ public class Dna
     *          sequence displayed under viscontigs visible columns
     * @param seqstring
     *          ORF read in some global alignment reference frame
-   * @param viscontigs
-   *          mapping from global reference frame to visible seqstring ORF read
     * @param acf
     *          Definition of global ORF alignment reference frame
-   * @param alignedCodons
     * @param proteinSeqs
-   * @param gapCharacter
-   * @param starForStop
-   *          when true stop codons will translate as '*', otherwise as 'X'
     * @return sequence ready to be added to alignment.
     */
-  protected static SequenceI translateCodingRegion(SequenceI selection,
-          String seqstring, int[] viscontigs, AlignedCodonFrame acf,
-          int[][] alignedCodons, List<SequenceI> proteinSeqs,
-          char gapCharacter,
-          final boolean starForStop)
+  protected SequenceI translateCodingRegion(SequenceI selection,
+          String seqstring, AlignedCodonFrame acf,
+          List<SequenceI> proteinSeqs)
    {
      List<int[]> skip = new ArrayList<int[]>();
      int skipint[] = null;
      ShiftList vismapping = new ShiftList(); // map from viscontigs to seqstring
      // intervals
-    int vc, scontigs[] = new int[viscontigs.length];
+    int vc;
+    int[] scontigs = new int[contigs.length];
      int npos = 0;
-    for (vc = 0; vc < viscontigs.length; vc += 2)
+    for (vc = 0; vc < contigs.length; vc += 2)
      {
        if (vc == 0)
        {
-        vismapping.addShift(npos, viscontigs[vc]);
+        vismapping.addShift(npos, contigs[vc]);
        }
        else
        {
          // hidden region
-        vismapping.addShift(npos, viscontigs[vc] - viscontigs[vc - 1] + 1);
+        vismapping.addShift(npos, contigs[vc] - contigs[vc - 1] + 1);
        }
-      scontigs[vc] = viscontigs[vc];
-      scontigs[vc + 1] = viscontigs[vc + 1];
+      scontigs[vc] = contigs[vc];
+      scontigs[vc + 1] = contigs[vc + 1];
      }
  
      // allocate a roughly sized buffer for the protein sequence
      StringBuilder protein = new StringBuilder(seqstring.length() / 2);
      String seq = seqstring.replace('U', 'T').replace('u', 'T');
      char codon[] = new char[3];
-    int cdp[] = new int[3], rf = 0, lastnpos = 0, nend;
+    int cdp[] = new int[3];
+    int rf = 0;
+    int lastnpos = 0;
+    int nend;
      int aspos = 0;
      int resSize = 0;
      for (npos = 0, nend = seq.length(); npos < nend; npos++)
@@ -432,18 +438,19 @@ public class Dna
          /*
           * Filled up a reading frame...
           */
+        AlignedCodon alignedCodon = new AlignedCodon(cdp[0], cdp[1], cdp[2]);
          String aa = ResidueProperties.codonTranslate(new String(codon));
          rf = 0;
-        final String gapString = String.valueOf(gapCharacter);
+        final String gapString = String.valueOf(gapChar);
          if (aa == null)
          {
            aa = gapString;
            if (skipint == null)
            {
              skipint = new int[]
-            { cdp[0], cdp[2] };
+            { alignedCodon.pos1, alignedCodon.pos3 /* cdp[0], cdp[2] */};
            }
-          skipint[1] = cdp[2];
+          skipint[1] = alignedCodon.pos3; // cdp[2];
          }
          else
          {
@@ -538,31 +545,19 @@ public class Dna
            }
            if (aa.equals("STOP"))
            {
-            aa = starForStop ? "*" : "X";
+            aa = STOP_X;
            }
            resSize++;
          }
-        // insert gaps prior to this codon - if necessary
          boolean findpos = true;
          while (findpos)
          {
-          // expand the codons array if necessary
-          alignedCodons = checkCodonFrameWidth(alignedCodons, aspos);
-
            /*
             * Compare this codon's base positions with those currently aligned to
             * this column in the translation.
             */
-          final int compareCodonPos = Dna.compareCodonPos(cdp,
+          final int compareCodonPos = compareCodonPos(alignedCodon,
                    alignedCodons[aspos]);
-          // debug
-          System.out.println(seq + "/" + aa + " codons: "
-                  + Arrays.deepToString(alignedCodons));
-          System.out
-                  .println(("Compare " + Arrays.toString(cdp) + " at pos "
-                          + aspos + " with "
-                          + Arrays.toString(alignedCodons[aspos]) + " got " + compareCodonPos));
-          // end debug
            switch (compareCodonPos)
            {
            case -1:
@@ -571,7 +566,7 @@ public class Dna
               * This codon should precede the mapped positions - need to insert a
               * gap in all prior sequences.
               */
-            insertAAGap(aspos, gapCharacter, alignedCodons, proteinSeqs);
+            insertAAGap(aspos, proteinSeqs);
              findpos = false;
              break;
  
@@ -598,21 +593,18 @@ public class Dna
          if (alignedCodons[aspos] == null)
          {
            // mark this column as aligning to this aligned reading frame
-          alignedCodons[aspos] = new int[]
-          { cdp[0], cdp[1], cdp[2] };
+          alignedCodons[aspos] = alignedCodon;
          }
-        else if (!Arrays.equals(alignedCodons[aspos], cdp))
+        else if (!alignedCodons[aspos].equals(alignedCodon))
          {
            throw new IllegalStateException("Tried to coalign "
-                  + Arrays.asList(alignedCodons[aspos], cdp));
+                  + alignedCodons[aspos].toString() + " with "
+                  + alignedCodon.toString());
          }
-        if (aspos >= acf.aaWidth)
+        if (aspos >= aaWidth)
          {
            // update maximum alignment width
-          // (we can do this without calling checkCodonFrameWidth because it was
-          // already done above)
-          System.out.println("aspos " + aspos + " >= " + acf.aaWidth);
-          acf.setAaWidth(aspos);
+          aaWidth = aspos;
          }
          // ready for next translated reading frame alignment position (if any)
          aspos++;
@@ -727,53 +719,47 @@ public class Dna
     * Insert a gap into the aligned proteins and the codon mapping array.
     * 
     * @param pos
-   * @param gapCharacter
-   * @param alignedCodons
     * @param proteinSeqs
+   * @return
     */
-  protected static void insertAAGap(int pos, char gapCharacter,
-          int[][] alignedCodons, List<SequenceI> proteinSeqs)
+  protected void insertAAGap(int pos,
+          List<SequenceI> proteinSeqs)
    {
-    System.out.println("insertAAGap " + pos + "/" + proteinSeqs.size());
-    // aaWidth++;
+    aaWidth++;
      for (SequenceI seq : proteinSeqs)
      {
-      seq.insertCharAt(pos, gapCharacter);
+      seq.insertCharAt(pos, gapChar);
      }
  
-    // if (pos < aaWidth)
-    // {
-    // aaWidth++;
-    System.arraycopy(alignedCodons, pos, alignedCodons, pos + 1,
-            alignedCodons.length - pos - 1);
-    alignedCodons[pos] = null; // clear so new codon position can be marked.
-    // }
+    checkCodonFrameWidth();
+    if (pos < aaWidth)
+    {
+      aaWidth++;
+
+      /*
+       * Shift from [pos] to the end one to the right, and null out [pos]
+       */
+      System.arraycopy(alignedCodons, pos, alignedCodons, pos + 1,
+              alignedCodons.length - pos - 1);
+      alignedCodons[pos] = null;
+    }
    }
  
    /**
-   * Check the codons array is big enough to accommodate the given position, if
-   * not resize it.
-   * 
-   * @param alignedCodons
-   * @param aspos
-   * @return the resized array (or the original if no resize needed)
+   * Check the codons array can accommodate a single insertion, if not resize
+   * it.
     */
-  protected static int[][] checkCodonFrameWidth(int[][] alignedCodons,
-          int aspos)
+  protected void checkCodonFrameWidth()
    {
-    // TODO why not codons.length < aspos ?
-    // should codons expand if length is 2 or 3 and aslen==2 ?
-    if (alignedCodons.length <= aspos + 1)
+    if (alignedCodons[alignedCodons.length - 1] != null)
      {
-      // probably never have to do this ?
-      int[][] c = new int[alignedCodons.length + 10][];
-      for (int i = 0; i < alignedCodons.length; i++)
-      {
-        c[i] = alignedCodons[i];
-      }
-      return c;
+      /*
+       * arraycopy insertion would bump a filled slot off the end, so expand.
+       */
+      AlignedCodon[] c = new AlignedCodon[alignedCodons.length + 10];
+      System.arraycopy(alignedCodons, 0, c, 0, alignedCodons.length);
+      alignedCodons = c;
      }
-    return alignedCodons;
    }
  
    /**
@@ -786,15 +772,16 @@ public class Dna
     * @param pep
     * @param map
     * @param featureTypes
-   *          hash who's keys are the displayed feature type strings
+   *          hash whose keys are the displayed feature type strings
     * @param featureGroups
     *          hash where keys are feature groups and values are Boolean objects
     *          indicating if they are displayed.
     */
    private static void transferCodedFeatures(SequenceI dna, SequenceI pep,
-          MapList map, Hashtable featureTypes, Hashtable featureGroups)
+          MapList map, Map<String, Object> featureTypes,
+          Map<String, Boolean> featureGroups)
    {
-    SequenceFeature[] sf = (dna.getDatasetSequence() != null ? dna
+    SequenceFeature[] sfs = (dna.getDatasetSequence() != null ? dna
              .getDatasetSequence() : dna).getSequenceFeatures();
      Boolean fgstate;
      DBRefEntry[] dnarefs = DBRefUtils.selectRefs(dna.getDBRef(),
@@ -810,16 +797,16 @@ public class Dna
          }
        }
      }
-    if (sf != null)
+    if (sfs != null)
      {
-      for (int f = 0; f < sf.length; f++)
+      for (SequenceFeature sf : sfs)
        {
-        fgstate = (featureGroups == null) ? null : ((Boolean) featureGroups
-                .get(sf[f].featureGroup));
-        if ((featureTypes == null || featureTypes.containsKey(sf[f]
-                .getType())) && (fgstate == null || fgstate.booleanValue()))
+        fgstate = (featureGroups == null) ? null : featureGroups
+                .get(sf.featureGroup);
+        if ((featureTypes == null || featureTypes.containsKey(sf.getType()))
+                && (fgstate == null || fgstate.booleanValue()))
          {
-          if (FeatureProperties.isCodingFeature(null, sf[f].getType()))
+          if (FeatureProperties.isCodingFeature(null, sf.getType()))
            {
              // if (map.intersectsFrom(sf[f].begin, sf[f].end))
              {