X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FDna.java;h=c6bcfdad4ddf13c2743e7687a739cf8139ae58f0;hb=cd5b2de469fb4c09242955cb4b74279e2da348d6;hp=960a6db69f058a0bc28395e9113c2137773d66c8;hpb=6173092ff5cb03f039cac674bfc8bc4f969976a5;p=jalview.git

diff --git a/src/jalview/analysis/Dna.java b/src/jalview/analysis/Dna.java
index 960a6db..c6bcfda 100644
--- a/src/jalview/analysis/Dna.java
+++ b/src/jalview/analysis/Dna.java
@@ -1,5 +1,6 @@
 package jalview.analysis;
 
+import java.util.Enumeration;
 import java.util.Hashtable;
 import java.util.Vector;
 
@@ -9,6 +10,7 @@ import jalview.datamodel.AlignmentAnnotation;
 import jalview.datamodel.AlignmentI;
 import jalview.datamodel.Annotation;
 import jalview.datamodel.ColumnSelection;
+import jalview.datamodel.DBRefEntry;
 import jalview.datamodel.FeatureProperties;
 import jalview.datamodel.Mapping;
 import jalview.datamodel.Sequence;
@@ -34,9 +36,9 @@ public class Dna
       return 0;
     if (cdp1[0] < cdp2[0] || cdp1[1] < cdp2[1] || cdp1[2] < cdp2[2])
       return -1; // one base in cdp1 precedes the corresponding base in the
-                  // other codon
+    // other codon
     return 1; // one base in cdp1 appears after the corresponding base in the
-              // other codon.
+    // other codon.
   }
 
   /**
@@ -64,27 +66,54 @@ public class Dna
    * @param gapCharacter
    * @param annotations
    * @param aWidth
+   * @param dataset destination dataset for translated sequences and mappings
    * @return
    */
   public static AlignmentI CdnaTranslate(SequenceI[] selection,
           String[] seqstring, int viscontigs[], char gapCharacter,
-          AlignmentAnnotation[] annotations, int aWidth)
+          AlignmentAnnotation[] annotations, int aWidth, Alignment dataset)
+  {
+    return CdnaTranslate(selection, seqstring, null, viscontigs,
+            gapCharacter, annotations, aWidth, dataset);
+  }
+
+  /**
+   * 
+   * @param selection
+   * @param seqstring
+   * @param product - array of DbRefEntry objects from which exon map in seqstring is derived
+   * @param viscontigs
+   * @param gapCharacter
+   * @param annotations
+   * @param aWidth
+   * @param dataset
+   * @return
+   */
+  public static AlignmentI CdnaTranslate(SequenceI[] selection,
+          String[] seqstring, DBRefEntry[] product, int viscontigs[],
+          char gapCharacter, AlignmentAnnotation[] annotations, int aWidth, Alignment dataset)
   {
     AlignedCodonFrame codons = new AlignedCodonFrame(aWidth); // stores hash of
-                                                              // subsequent
-                                                              // positions for
-                                                              // each codon
-                                                              // start position
-                                                              // in alignment
+    // subsequent
+    // positions for
+    // each codon
+    // start position
+    // in alignment
     int s, sSize = selection.length;
     Vector pepseqs = new Vector();
     for (s = 0; s < sSize; s++)
     {
       SequenceI newseq = translateCodingRegion(selection[s], seqstring[s],
-              viscontigs, codons, gapCharacter);
+              viscontigs, codons, gapCharacter, (product!=null) ? product[s] : null); // possibly anonymous product
       if (newseq != null)
       {
         pepseqs.addElement(newseq);
+        SequenceI ds = newseq;
+        while (ds.getDatasetSequence()!=null)
+        {
+          ds = ds.getDatasetSequence();
+        }
+        dataset.addSequence(ds);
       }
     }
     if (codons.aaWidth == 0)
@@ -93,11 +122,125 @@ public class Dna
     pepseqs.copyInto(newseqs);
     AlignmentI al = new Alignment(newseqs);
     al.padGaps(); // ensure we look aligned.
-    al.setDataset(null);
+    al.setDataset(dataset);
     translateAlignedAnnotations(annotations, al, codons);
     al.addCodonFrame(codons);
     return al;
   }
+  /**
+   * fake the collection of DbRefs with associated exon mappings to identify
+   * if a translation would generate distinct product in the currently selected region.
+   * @param selection
+   * @param viscontigs
+   * @return
+   */
+  public static boolean canTranslate(SequenceI[] selection, int viscontigs[])
+  {
+    for (int gd=0; gd<selection.length; gd++)
+    {
+      SequenceI dna = selection[gd];
+      jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils
+      .selectRefs(dna.getDBRef(),
+              jalview.datamodel.DBRefSource.DNACODINGDBS);
+      if (dnarefs != null)
+      {
+        // intersect with pep
+        // intersect with pep
+        Vector mappedrefs = new Vector();
+        DBRefEntry[] refs=dna.getDBRef();
+        for (int d=0; d<refs.length; d++)
+        {
+          if (refs[d].getMap()!=null && refs[d].getMap().getMap()!=null && refs[d].getMap().getMap().getFromRatio()==3
+                  && refs[d].getMap().getMap().getToRatio()==1)
+          {
+            mappedrefs.addElement(refs[d]); // add translated protein maps
+          }
+        }
+        dnarefs = new DBRefEntry[mappedrefs.size()];
+        mappedrefs.copyInto(dnarefs);
+        for (int d = 0; d < dnarefs.length; d++)
+        {
+          Mapping mp = dnarefs[d].getMap();
+          if (mp != null)
+          {
+            for (int vc=0; vc<viscontigs.length; vc+=2)
+            {
+              int[] mpr=mp.locateMappedRange(viscontigs[vc], viscontigs[vc+1]);
+              if (mpr!=null) {
+                return true;
+              }
+            }
+          }
+        }
+      }
+    }
+    return false;
+  }
+  /**
+   * generate a set of translated protein products from annotated sequenceI
+   * @param selection
+   * @param viscontigs
+   * @param gapCharacter
+   * @param dataset destination dataset for translated sequences
+   * @param annotations
+   * @param aWidth
+   * @return
+   */
+  public static AlignmentI CdnaTranslate(SequenceI[] selection,
+          int viscontigs[], char gapCharacter, Alignment dataset)
+  {
+    int alwidth=0;
+    Vector cdnasqs=new Vector();
+    Vector cdnasqi=new Vector();
+    Vector cdnaprod=new Vector();
+    for (int gd=0; gd<selection.length; gd++)
+    {
+      SequenceI dna = selection[gd];
+      jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils
+      .selectRefs(dna.getDBRef(),
+              jalview.datamodel.DBRefSource.DNACODINGDBS);
+      if (dnarefs != null)
+      {
+        // intersect with pep
+        Vector mappedrefs = new Vector();
+        DBRefEntry[] refs=dna.getDBRef();
+        for (int d=0; d<refs.length; d++)
+        {
+          if (refs[d].getMap()!=null && refs[d].getMap().getMap()!=null && refs[d].getMap().getMap().getFromRatio()==3
+                  && refs[d].getMap().getMap().getToRatio()==1)
+          {
+            mappedrefs.addElement(refs[d]); // add translated protein maps
+          }
+        }
+        dnarefs = new DBRefEntry[mappedrefs.size()];
+        mappedrefs.copyInto(dnarefs);
+        for (int d = 0; d < dnarefs.length; d++)
+        {
+          Mapping mp = dnarefs[d].getMap();
+          StringBuffer sqstr=new StringBuffer();
+          if (mp != null)
+          {
+            Mapping intersect = mp.intersectVisContigs(viscontigs);
+            // generate seqstring for this sequence based on mapping
+            
+            if (sqstr.length()>alwidth)
+              alwidth = sqstr.length();
+            cdnasqs.addElement(sqstr.toString());
+            cdnasqi.addElement(dna);
+            cdnaprod.addElement(intersect);
+          }
+        }
+      }
+      SequenceI[] cdna = new SequenceI[cdnasqs.size()];
+      DBRefEntry[] prods = new DBRefEntry[cdnaprod.size()];
+      String[] xons = new String[cdnasqs.size()];
+      cdnasqs.copyInto(xons);
+      cdnaprod.copyInto(prods);
+      cdnasqi.copyInto(cdna);
+      return CdnaTranslate(cdna, xons, prods, viscontigs, gapCharacter, null, alwidth, dataset);
+    }
+    return null;
+  }
 
   /**
    * translate na alignment annotations onto translated amino acid alignment al
@@ -139,18 +282,20 @@ public class Dna
             if (codons.codons[a] != null
                     && codons.codons[a][0] == (codons.codons[a][2] - 2))
             {
-              pos = codons.codons[a][0];
-              if (annotations[i].annotations[pos] == null
-                      || annotations[i].annotations[pos] == null)
-                continue;
-              // We just take the annotation in the first base in the codon
-              anots[a] = new Annotation(annotations[i].annotations[pos]);
+              anots[a] = getCodonAnnotation(codons.codons[a], annotations[i].annotations);
             }
           }
         }
 
         jalview.datamodel.AlignmentAnnotation aa = new jalview.datamodel.AlignmentAnnotation(
                 annotations[i].label, annotations[i].description, anots);
+        aa.graph = annotations[i].graph;
+        aa.graphGroup = annotations[i].graphGroup;
+        aa.graphHeight = annotations[i].graphHeight;
+        if (annotations[i].getThreshold()!=null)
+        {
+          aa.setThreshold(new jalview.datamodel.GraphLine(annotations[i].getThreshold()));        
+        }
         if (annotations[i].hasScore)
         {
           aa.setScore(annotations[i].getScore());
@@ -165,7 +310,7 @@ public class Dna
             // positioning
             aa.setSequenceRef(aaSeq);
             aa.createSequenceMapping(aaSeq, aaSeq.getStart(), true); // rebuild
-                                                                      // mapping
+            // mapping
             aa.adjustForAlignment();
             aaSeq.addAlignmentAnnotation(aa);
           }
@@ -176,31 +321,51 @@ public class Dna
     }
   }
 
+  private static Annotation getCodonAnnotation(int[] is, Annotation[] annotations)
+  { 
+    // Have a look at all the codon positions for annotation and put the first
+    // one found into the translated annotation pos.
+    for (int p=0; p<3; p++)
+    {
+      if (annotations[is[p]]!=null)
+      {
+        return new Annotation(annotations[is[p]]);
+      }
+    }
+    return null;
+  }
+
   /**
    * Translate a na sequence
    * 
-   * @param selection
-   * @param seqstring
-   * @param viscontigs
-   * @param codons
+   * @param selection sequence displayed under viscontigs visible columns
+   * @param seqstring ORF read in some global alignment reference frame
+   * @param viscontigs mapping from global reference frame to visible seqstring ORF read
+   * @param codons Definition of global ORF alignment reference frame
    * @param gapCharacter
    * @param newSeq
    * @return sequence ready to be added to alignment.
    */
   public static SequenceI translateCodingRegion(SequenceI selection,
           String seqstring, int[] viscontigs, AlignedCodonFrame codons,
-          char gapCharacter)
+          char gapCharacter, DBRefEntry product)
   {
+    Vector skip=new Vector();
+    int skipint[]=null;
     ShiftList vismapping = new ShiftList(); // map from viscontigs to seqstring
-                                            // intervals
+    // intervals
     int vc, scontigs[] = new int[viscontigs.length];
     int npos = 0;
     for (vc = 0; vc < viscontigs.length; vc += 2)
     {
+      if (vc==0) {
       vismapping.addShift(npos, viscontigs[vc]);
-      scontigs[vc] = npos;
-      npos += viscontigs[vc + 1];
-      scontigs[vc + 1] = npos;
+      } else {
+        // hidden region
+        vismapping.addShift(npos, viscontigs[vc]-viscontigs[vc-1]+1);
+      }
+      scontigs[vc] = viscontigs[vc];
+      scontigs[vc + 1] = viscontigs[vc+1];
     }
 
     StringBuffer protein = new StringBuffer();
@@ -222,9 +387,40 @@ public class Dna
         String aa = ResidueProperties.codonTranslate(new String(codon));
         rf = 0;
         if (aa == null)
-          aa = String.valueOf(gapCharacter);
-        else
         {
+          aa = String.valueOf(gapCharacter);
+          if (skipint==null)
+          {
+            skipint = new int[] { cdp[0],cdp[2] };
+          }
+          skipint[1] = cdp[2];
+        } else {
+          if (skipint!=null)
+          {
+            // edit scontigs
+            skipint[0] = vismapping.shift(skipint[0]);
+            skipint[1] = vismapping.shift(skipint[1]);
+            for (vc=0; vc<scontigs.length; vc+=2)
+            {
+              if (scontigs[vc+1]<skipint[0])
+              {
+                continue;
+              }
+              if (scontigs[vc]<=skipint[0])
+              {
+               if (skipint[0]==scontigs[vc])
+               {
+                 
+               } else {
+                 int[] t = new int[scontigs.length+2];
+                 System.arraycopy(scontigs, 0, t, 0, vc-1);
+                 // scontigs[vc]; //
+               }
+              }
+            }
+            skip.addElement(skipint);
+            skipint=null;
+          }
           if (aa.equals("STOP"))
           {
             aa = "X";
@@ -250,8 +446,8 @@ public class Dna
             // with a gap
             aa = "" + gapCharacter + aa;
             aspos++;
-            if (aspos >= codons.aaWidth)
-              codons.aaWidth = aspos + 1;
+            //if (aspos >= codons.aaWidth)
+            //  codons.aaWidth = aspos + 1;
             break; // check the next position for alignment
           case 0:
             // codon aligns at aspos position.
@@ -269,7 +465,9 @@ public class Dna
         }
         aspos++;
         if (aspos >= codons.aaWidth)
+        {
           codons.aaWidth = aspos + 1;
+        }
       }
     }
     if (resSize > 0)
@@ -283,8 +481,8 @@ public class Dna
         // map and trim contigs to ORF region
         vc = scontigs.length - 1;
         lastnpos = vismapping.shift(lastnpos); // place npos in context of
-                                                // whole dna alignment (rather
-                                                // than visible contigs)
+        // whole dna alignment (rather
+        // than visible contigs)
         // incomplete ORF could be broken over one or two visible contig
         // intervals.
         while (vc >= 0 && scontigs[vc] > lastnpos)
@@ -313,14 +511,12 @@ public class Dna
       if (scontigs != null)
       {
         npos = 0;
-        // Find sequence position for scontigs positions on the nucleotide
-        // sequence string we were passed.
-        for (vc = 0; vc < viscontigs.length; vc += 2)
+        // map scontigs to actual sequence positions on selection
+        for (vc = 0; vc < scontigs.length; vc += 2)
         {
           scontigs[vc] = selection.findPosition(scontigs[vc]); // not from 1!
-          npos += viscontigs[vc];
           scontigs[vc + 1] = selection
-                  .findPosition(npos + scontigs[vc + 1]); // exclusive
+                  .findPosition(scontigs[vc + 1]); // exclusive
           if (scontigs[vc + 1] == selection.getEnd())
             break;
         }
@@ -331,17 +527,72 @@ public class Dna
           System.arraycopy(scontigs, 0, t, 0, vc + 2);
           scontigs = t;
         }
-
+        /*
+         * delete intervals in scontigs which are not translated.
+         * 1. map skip into sequence position intervals
+         * 2. truncate existing ranges and add new ranges to exclude untranslated regions.
+        if (skip.size()>0)
+        {
+          Vector narange = new Vector();
+          for (vc=0; vc<scontigs.length; vc++)
+          {
+            narange.addElement(new int[] {scontigs[vc]});
+          }
+          int sint=0,iv[];
+          vc = 0;
+          while (sint<skip.size())
+          {
+            skipint = (int[]) skip.elementAt(sint);
+            do {
+              iv = (int[]) narange.elementAt(vc);
+              if (iv[0]>=skipint[0] && iv[0]<=skipint[1])
+              {
+                if (iv[0]==skipint[0])
+                {
+                  // delete beginning of range
+                } else {
+                  // truncate range and create new one if necessary
+                  iv = (int[]) narange.elementAt(vc+1);
+                  if (iv[0]<=skipint[1])
+                  {
+                    // truncate range
+                    iv[0] = skipint[1];
+                  } else {
+                    
+                  }
+                }
+              } else
+                if (iv[0]<skipint[0])
+                {
+                  iv = (int[]) narange.elementAt(vc+1);
+                }
+            } while (iv[0])
+          }
+        }*/
         MapList map = new MapList(scontigs, new int[]
-        { 1, resSize }, 3, 1); // TODO: store mapping on newSeq for linked
-                                // DNA/Protein viewing.
+                                                    { 1, resSize }, 3, 1);
+        
+        // update newseq as if it was generated as mapping from product
+        
+        if (product != null)
+        {
+          newseq.setName(product.getSource() + "|"
+                  + product.getAccessionId());
+          if (product.getMap() != null)
+          {
+            //Mapping mp = product.getMap();
+            //newseq.setStart(mp.getPosition(scontigs[0]));
+            //newseq.setEnd(mp
+            //        .getPosition(scontigs[scontigs.length - 1]));
+          }
+        }
         transferCodedFeatures(selection, newseq, map, null, null);
         SequenceI rseq = newseq.deriveSequence(); // construct a dataset
-                                                  // sequence for our new
-                                                  // peptide, regardless.
+        // sequence for our new
+        // peptide, regardless.
         // store a mapping (this actually stores a mapping between the dataset
         // sequences for the two sequences
-        codons.addMap(selection, newseq, map);
+        codons.addMap(selection, rseq, map);
         return rseq;
       }
     }