X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FDna.java;h=2ef63d408652e7a47dd94ba1665c768086504d2b;hb=61f1a8b75ea5ce352d6214c34fbdcd58bafbbb73;hp=0e7d73611d63d57642c9dca23ef55d447cbb35e1;hpb=316032bb49f563f07b3cf23f9d97bfceeda21961;p=jalview.git

diff --git a/src/jalview/analysis/Dna.java b/src/jalview/analysis/Dna.java
index 0e7d736..2ef63d4 100644
--- a/src/jalview/analysis/Dna.java
+++ b/src/jalview/analysis/Dna.java
@@ -20,7 +20,9 @@
  */
 package jalview.analysis;
 
+import jalview.api.AlignViewportI;
 import jalview.bin.Cache;
+import jalview.datamodel.AlignedCodon;
 import jalview.datamodel.AlignedCodonFrame;
 import jalview.datamodel.Alignment;
 import jalview.datamodel.AlignmentAnnotation;
@@ -42,11 +44,66 @@ import jalview.util.ShiftList;
 
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Hashtable;
+import java.util.Comparator;
 import java.util.List;
+import java.util.Map;
 
 public class Dna
 {
+  private static final String STOP_X = "X";
+
+  private static final Comparator<AlignedCodon> comparator = new CodonComparator();
+
+  /*
+   * 'final' variables describe the inputs to the translation, which should not
+   * be modified.
+   */
+  final private List<SequenceI> selection;
+
+  final private String[] seqstring;
+
+  final private int[] contigs;
+
+  final private char gapChar;
+
+  final private AlignmentAnnotation[] annotations;
+
+  final private int dnaWidth;
+
+  final private Alignment dataset;
+
+  /*
+   * Working variables for the translation.
+   * 
+   * The width of the translation-in-progress protein alignment.
+   */
+  private int aaWidth = 0;
+
+  /*
+   * This array will be built up so that position i holds the codon positions
+   * e.g. [7, 9, 10] that match column i (base 0) in the aligned translation.
+   * Note this implies a contract that if two codons do not align exactly, their
+   * translated products must occupy different column positions.
+   */
+  private AlignedCodon[] alignedCodons;
+
+  /**
+   * Constructor given a viewport and the visible contigs.
+   * 
+   * @param viewport
+   * @param visibleContigs
+   */
+  public Dna(AlignViewportI viewport, int[] visibleContigs)
+  {
+    this.selection = Arrays.asList(viewport.getSequenceSelection());
+    this.seqstring = viewport.getViewAsString(true);
+    this.contigs = visibleContigs;
+    this.gapChar = viewport.getGapCharacter();
+    this.annotations = viewport.getAlignment().getAlignmentAnnotation();
+    this.dnaWidth = viewport.getAlignment().getWidth();
+    this.dataset = viewport.getAlignment().getDataset();
+  }
+
   /**
    * Test whether codon positions cdp1 should align before, with, or after cdp2.
    * Returns zero if all positions match (or either argument is null). Returns
@@ -60,19 +117,31 @@ public class Dna
    * <li>compareCodonPos([3,4,5], [2,5,6]) also returns -1</li>
    * </ul>
    * 
-   * @param cdp1
-   * @param cdp2
+   * @param ac1
+   * @param ac2
+   * @return
+   */
+  public static final int compareCodonPos(AlignedCodon ac1, AlignedCodon ac2)
+  {
+    return comparator.compare(ac1, ac2);
+    // return jalview_2_8_2compare(ac1, ac2);
+  }
+
+  /**
+   * Codon comparison up to Jalview 2.8.2. This rule is sequence order dependent
+   * - see http://issues.jalview.org/browse/JAL-1635
+   * 
+   * @param ac1
+   * @param ac2
    * @return
    */
-  public static int compareCodonPos(int[] cdp1, int[] cdp2)
+  private static int jalview_2_8_2compare(AlignedCodon ac1, AlignedCodon ac2)
   {
-    if (cdp1 == null
-            || cdp2 == null
-            || (cdp1[0] == cdp2[0] && cdp1[1] == cdp2[1] && cdp1[2] == cdp2[2]))
+    if (ac1 == null || ac2 == null || (ac1.equals(ac2)))
     {
       return 0;
     }
-    if (cdp1[0] < cdp2[0] || cdp1[1] < cdp2[1] || cdp1[2] < cdp2[2])
+    if (ac1.pos1 < ac2.pos1 || ac1.pos2 < ac2.pos2 || ac1.pos3 < ac2.pos3)
     {
       // one base in cdp1 precedes the corresponding base in the other codon
       return -1;
@@ -82,80 +151,22 @@ public class Dna
   }
 
   /**
-   * DNA->mapped protein sequence alignment translation given set of sequences
-   * 1. id distinct coding regions within selected region for each sequence 2.
-   * generate peptides based on inframe (or given) translation or (optionally
-   * and where specified) out of frame translations (annotated appropriately) 3.
-   * align peptides based on codon alignment
-   */
-  /**
-   * id potential products from dna 1. search for distinct products within
-   * selected region for each selected sequence 2. group by associated DB type.
-   * 3. return as form for input into above function
-   */
-  /**
-   * 
-   */
-  /**
-   * create a new alignment of protein sequences by an inframe translation of
-   * the provided NA sequences
-   * 
-   * @param selection
-   * @param seqstring
-   * @param viscontigs
-   * @param gapCharacter
-   * @param annotations
-   * @param aWidth
-   * @param dataset
-   *          destination dataset for translated sequences and mappings
-   * @return
-   */
-  public static AlignmentI cdnaTranslate(SequenceI[] selection,
-          String[] seqstring, int viscontigs[], char gapCharacter,
-          AlignmentAnnotation[] annotations, int aWidth, Alignment dataset)
-  {
-    return cdnaTranslate(Arrays.asList(selection), seqstring, null,
-            viscontigs, gapCharacter, annotations, aWidth, dataset);
-  }
-
-  /**
    * 
-   * @param cdnaseqs
-   * @param seqstring
-   * @param product
-   *          - array of DbRefEntry objects from which exon map in seqstring is
-   *          derived
-   * @param viscontigs
-   * @param gapCharacter
-   * @param annotations
-   * @param aWidth
-   * @param dataset
    * @return
    */
-  public static AlignmentI cdnaTranslate(List<SequenceI> cdnaseqs,
-          String[] seqstring, DBRefEntry[] product, int viscontigs[],
-          char gapCharacter, AlignmentAnnotation[] annotations, int aWidth,
-          Alignment dataset)
+  public AlignmentI translateCdna()
   {
-    AlignedCodonFrame acf = new AlignedCodonFrame(aWidth);
+    AlignedCodonFrame acf = new AlignedCodonFrame();
 
-    /*
-     * This array will be built up so that position i holds the codon positions
-     * e.g. [7, 9, 10] that match column i (base 0) in the aligned translation.
-     * Note this implies a contract that if two codons do not align exactly,
-     * their translated products must occupy different column positions.
-     */
-    int[][] alignedCodons = new int[aWidth][];
+    alignedCodons = new AlignedCodon[dnaWidth];
 
     int s;
-    int sSize = cdnaseqs.size();
+    int sSize = selection.size();
     List<SequenceI> pepseqs = new ArrayList<SequenceI>();
     for (s = 0; s < sSize; s++)
     {
-      SequenceI newseq = translateCodingRegion(cdnaseqs.get(s),
-              seqstring[s], viscontigs, acf, alignedCodons, pepseqs,
-              gapCharacter,
-              false);
+      SequenceI newseq = translateCodingRegion(selection.get(s),
+              seqstring[s], acf, pepseqs);
 
       if (newseq != null)
       {
@@ -178,7 +189,7 @@ public class Dna
     al.padGaps();
     // link the protein translation to the DNA dataset
     al.setDataset(dataset);
-    translateAlignedAnnotations(annotations, al, acf, alignedCodons);
+    translateAlignedAnnotations(al, acf);
     al.addCodonFrame(acf);
     return al;
   }
@@ -238,16 +249,14 @@ public class Dna
   }
 
   /**
-   * Translate na alignment annotations onto translated amino acid alignment al
-   * using codon mapping codons
+   * Translate nucleotide alignment annotations onto translated amino acid
+   * alignment using codon mapping codons
    * 
-   * @param annotations
    * @param al
-   * @param acf
+   *          the translated protein alignment
    */
-  protected static void translateAlignedAnnotations(
-          AlignmentAnnotation[] annotations, AlignmentI al,
-          AlignedCodonFrame acf, int[][] codons)
+  protected void translateAlignedAnnotations(AlignmentI al,
+          AlignedCodonFrame acf)
   {
     // Can only do this for columns with consecutive codons, or where
     // annotation is sequence associated.
@@ -268,7 +277,7 @@ public class Dna
           continue;
         }
 
-        int aSize = acf.getaaWidth(); // aa alignment width.
+        int aSize = aaWidth;
         Annotation[] anots = (annotation.annotations == null) ? null
                 : new Annotation[aSize];
         if (anots != null)
@@ -276,10 +285,10 @@ public class Dna
           for (int a = 0; a < aSize; a++)
           {
             // process through codon map.
-            if (a < codons.length && codons[a] != null
-                    && codons[a][0] == (codons[a][2] - 2))
+            if (a < alignedCodons.length && alignedCodons[a] != null
+                    && alignedCodons[a].pos1 == (alignedCodons[a].pos3 - 2))
             {
-              anots[a] = getCodonAnnotation(codons[a],
+              anots[a] = getCodonAnnotation(alignedCodons[a],
                       annotation.annotations);
             }
           }
@@ -320,26 +329,27 @@ public class Dna
     }
   }
 
-  private static Annotation getCodonAnnotation(int[] is,
+  private static Annotation getCodonAnnotation(AlignedCodon is,
           Annotation[] annotations)
   {
     // Have a look at all the codon positions for annotation and put the first
     // one found into the translated annotation pos.
     int contrib = 0;
     Annotation annot = null;
-    for (int p = 0; p < 3; p++)
+    for (int p = 1; p <= 3; p++)
     {
-      if (annotations[is[p]] != null)
+      int dnaCol = is.getBaseColumn(p);
+      if (annotations[dnaCol] != null)
       {
         if (annot == null)
         {
-          annot = new Annotation(annotations[is[p]]);
+          annot = new Annotation(annotations[dnaCol]);
           contrib = 1;
         }
         else
         {
           // merge with last
-          Annotation cpy = new Annotation(annotations[is[p]]);
+          Annotation cpy = new Annotation(annotations[dnaCol]);
           if (annot.colour == null)
           {
             annot.colour = cpy.colour;
@@ -375,49 +385,45 @@ public class Dna
    *          sequence displayed under viscontigs visible columns
    * @param seqstring
    *          ORF read in some global alignment reference frame
-   * @param viscontigs
-   *          mapping from global reference frame to visible seqstring ORF read
    * @param acf
    *          Definition of global ORF alignment reference frame
-   * @param alignedCodons
    * @param proteinSeqs
-   * @param gapCharacter
-   * @param starForStop
-   *          when true stop codons will translate as '*', otherwise as 'X'
    * @return sequence ready to be added to alignment.
    */
-  protected static SequenceI translateCodingRegion(SequenceI selection,
-          String seqstring, int[] viscontigs, AlignedCodonFrame acf,
-          int[][] alignedCodons, List<SequenceI> proteinSeqs,
-          char gapCharacter,
-          final boolean starForStop)
+  protected SequenceI translateCodingRegion(SequenceI selection,
+          String seqstring, AlignedCodonFrame acf,
+          List<SequenceI> proteinSeqs)
   {
     List<int[]> skip = new ArrayList<int[]>();
     int skipint[] = null;
     ShiftList vismapping = new ShiftList(); // map from viscontigs to seqstring
     // intervals
-    int vc, scontigs[] = new int[viscontigs.length];
+    int vc;
+    int[] scontigs = new int[contigs.length];
     int npos = 0;
-    for (vc = 0; vc < viscontigs.length; vc += 2)
+    for (vc = 0; vc < contigs.length; vc += 2)
     {
       if (vc == 0)
       {
-        vismapping.addShift(npos, viscontigs[vc]);
+        vismapping.addShift(npos, contigs[vc]);
       }
       else
       {
         // hidden region
-        vismapping.addShift(npos, viscontigs[vc] - viscontigs[vc - 1] + 1);
+        vismapping.addShift(npos, contigs[vc] - contigs[vc - 1] + 1);
       }
-      scontigs[vc] = viscontigs[vc];
-      scontigs[vc + 1] = viscontigs[vc + 1];
+      scontigs[vc] = contigs[vc];
+      scontigs[vc + 1] = contigs[vc + 1];
     }
 
     // allocate a roughly sized buffer for the protein sequence
     StringBuilder protein = new StringBuilder(seqstring.length() / 2);
     String seq = seqstring.replace('U', 'T').replace('u', 'T');
     char codon[] = new char[3];
-    int cdp[] = new int[3], rf = 0, lastnpos = 0, nend;
+    int cdp[] = new int[3];
+    int rf = 0;
+    int lastnpos = 0;
+    int nend;
     int aspos = 0;
     int resSize = 0;
     for (npos = 0, nend = seq.length(); npos < nend; npos++)
@@ -432,18 +438,19 @@ public class Dna
         /*
          * Filled up a reading frame...
          */
+        AlignedCodon alignedCodon = new AlignedCodon(cdp[0], cdp[1], cdp[2]);
         String aa = ResidueProperties.codonTranslate(new String(codon));
         rf = 0;
-        final String gapString = String.valueOf(gapCharacter);
+        final String gapString = String.valueOf(gapChar);
         if (aa == null)
         {
           aa = gapString;
           if (skipint == null)
           {
             skipint = new int[]
-            { cdp[0], cdp[2] };
+            { alignedCodon.pos1, alignedCodon.pos3 /* cdp[0], cdp[2] */};
           }
-          skipint[1] = cdp[2];
+          skipint[1] = alignedCodon.pos3; // cdp[2];
         }
         else
         {
@@ -538,31 +545,19 @@ public class Dna
           }
           if (aa.equals("STOP"))
           {
-            aa = starForStop ? "*" : "X";
+            aa = STOP_X;
           }
           resSize++;
         }
-        // insert gaps prior to this codon - if necessary
         boolean findpos = true;
         while (findpos)
         {
-          // expand the codons array if necessary
-          alignedCodons = checkCodonFrameWidth(alignedCodons, aspos);
-
           /*
            * Compare this codon's base positions with those currently aligned to
            * this column in the translation.
            */
-          final int compareCodonPos = Dna.compareCodonPos(cdp,
+          final int compareCodonPos = compareCodonPos(alignedCodon,
                   alignedCodons[aspos]);
-          // debug
-          System.out.println(seq + "/" + aa + " codons: "
-                  + Arrays.deepToString(alignedCodons));
-          System.out
-                  .println(("Compare " + Arrays.toString(cdp) + " at pos "
-                          + aspos + " with "
-                          + Arrays.toString(alignedCodons[aspos]) + " got " + compareCodonPos));
-          // end debug
           switch (compareCodonPos)
           {
           case -1:
@@ -571,7 +566,7 @@ public class Dna
              * This codon should precede the mapped positions - need to insert a
              * gap in all prior sequences.
              */
-            insertAAGap(aspos, gapCharacter, alignedCodons, proteinSeqs);
+            insertAAGap(aspos, proteinSeqs);
             findpos = false;
             break;
 
@@ -598,21 +593,18 @@ public class Dna
         if (alignedCodons[aspos] == null)
         {
           // mark this column as aligning to this aligned reading frame
-          alignedCodons[aspos] = new int[]
-          { cdp[0], cdp[1], cdp[2] };
+          alignedCodons[aspos] = alignedCodon;
         }
-        else if (!Arrays.equals(alignedCodons[aspos], cdp))
+        else if (!alignedCodons[aspos].equals(alignedCodon))
         {
           throw new IllegalStateException("Tried to coalign "
-                  + Arrays.asList(alignedCodons[aspos], cdp));
+                  + alignedCodons[aspos].toString() + " with "
+                  + alignedCodon.toString());
         }
-        if (aspos >= acf.aaWidth)
+        if (aspos >= aaWidth)
         {
           // update maximum alignment width
-          // (we can do this without calling checkCodonFrameWidth because it was
-          // already done above)
-          System.out.println("aspos " + aspos + " >= " + acf.aaWidth);
-          acf.setAaWidth(aspos);
+          aaWidth = aspos;
         }
         // ready for next translated reading frame alignment position (if any)
         aspos++;
@@ -727,53 +719,47 @@ public class Dna
    * Insert a gap into the aligned proteins and the codon mapping array.
    * 
    * @param pos
-   * @param gapCharacter
-   * @param alignedCodons
    * @param proteinSeqs
+   * @return
    */
-  protected static void insertAAGap(int pos, char gapCharacter,
-          int[][] alignedCodons, List<SequenceI> proteinSeqs)
+  protected void insertAAGap(int pos,
+          List<SequenceI> proteinSeqs)
   {
-    System.out.println("insertAAGap " + pos + "/" + proteinSeqs.size());
-    // aaWidth++;
+    aaWidth++;
     for (SequenceI seq : proteinSeqs)
     {
-      seq.insertCharAt(pos, gapCharacter);
+      seq.insertCharAt(pos, gapChar);
     }
 
-    // if (pos < aaWidth)
-    // {
-    // aaWidth++;
-    System.arraycopy(alignedCodons, pos, alignedCodons, pos + 1,
-            alignedCodons.length - pos - 1);
-    alignedCodons[pos] = null; // clear so new codon position can be marked.
-    // }
+    checkCodonFrameWidth();
+    if (pos < aaWidth)
+    {
+      aaWidth++;
+
+      /*
+       * Shift from [pos] to the end one to the right, and null out [pos]
+       */
+      System.arraycopy(alignedCodons, pos, alignedCodons, pos + 1,
+              alignedCodons.length - pos - 1);
+      alignedCodons[pos] = null;
+    }
   }
 
   /**
-   * Check the codons array is big enough to accommodate the given position, if
-   * not resize it.
-   * 
-   * @param alignedCodons
-   * @param aspos
-   * @return the resized array (or the original if no resize needed)
+   * Check the codons array can accommodate a single insertion, if not resize
+   * it.
    */
-  protected static int[][] checkCodonFrameWidth(int[][] alignedCodons,
-          int aspos)
+  protected void checkCodonFrameWidth()
   {
-    // TODO why not codons.length < aspos ?
-    // should codons expand if length is 2 or 3 and aslen==2 ?
-    if (alignedCodons.length <= aspos + 1)
+    if (alignedCodons[alignedCodons.length - 1] != null)
     {
-      // probably never have to do this ?
-      int[][] c = new int[alignedCodons.length + 10][];
-      for (int i = 0; i < alignedCodons.length; i++)
-      {
-        c[i] = alignedCodons[i];
-      }
-      return c;
+      /*
+       * arraycopy insertion would bump a filled slot off the end, so expand.
+       */
+      AlignedCodon[] c = new AlignedCodon[alignedCodons.length + 10];
+      System.arraycopy(alignedCodons, 0, c, 0, alignedCodons.length);
+      alignedCodons = c;
     }
-    return alignedCodons;
   }
 
   /**
@@ -786,15 +772,16 @@ public class Dna
    * @param pep
    * @param map
    * @param featureTypes
-   *          hash who's keys are the displayed feature type strings
+   *          hash whose keys are the displayed feature type strings
    * @param featureGroups
    *          hash where keys are feature groups and values are Boolean objects
    *          indicating if they are displayed.
    */
   private static void transferCodedFeatures(SequenceI dna, SequenceI pep,
-          MapList map, Hashtable featureTypes, Hashtable featureGroups)
+          MapList map, Map<String, Object> featureTypes,
+          Map<String, Boolean> featureGroups)
   {
-    SequenceFeature[] sf = (dna.getDatasetSequence() != null ? dna
+    SequenceFeature[] sfs = (dna.getDatasetSequence() != null ? dna
             .getDatasetSequence() : dna).getSequenceFeatures();
     Boolean fgstate;
     DBRefEntry[] dnarefs = DBRefUtils.selectRefs(dna.getDBRef(),
@@ -810,16 +797,16 @@ public class Dna
         }
       }
     }
-    if (sf != null)
+    if (sfs != null)
     {
-      for (int f = 0; f < sf.length; f++)
+      for (SequenceFeature sf : sfs)
       {
-        fgstate = (featureGroups == null) ? null : ((Boolean) featureGroups
-                .get(sf[f].featureGroup));
-        if ((featureTypes == null || featureTypes.containsKey(sf[f]
-                .getType())) && (fgstate == null || fgstate.booleanValue()))
+        fgstate = (featureGroups == null) ? null : featureGroups
+                .get(sf.featureGroup);
+        if ((featureTypes == null || featureTypes.containsKey(sf.getType()))
+                && (fgstate == null || fgstate.booleanValue()))
         {
-          if (FeatureProperties.isCodingFeature(null, sf[f].getType()))
+          if (FeatureProperties.isCodingFeature(null, sf.getType()))
           {
             // if (map.intersectsFrom(sf[f].begin, sf[f].end))
             {