bugfixes, partially working code for translation using existing translation associate...
authorjprocter <Jim Procter>
Fri, 13 Jul 2007 14:59:57 +0000 (14:59 +0000)
committerjprocter <Jim Procter>
Fri, 13 Jul 2007 14:59:57 +0000 (14:59 +0000)
src/jalview/analysis/Dna.java

index 960a6db..7bac46e 100644 (file)
@@ -1,5 +1,6 @@
 package jalview.analysis;\r
 \r
+import java.util.Enumeration;\r
 import java.util.Hashtable;\r
 import java.util.Vector;\r
 \r
@@ -9,6 +10,7 @@ import jalview.datamodel.AlignmentAnnotation;
 import jalview.datamodel.AlignmentI;\r
 import jalview.datamodel.Annotation;\r
 import jalview.datamodel.ColumnSelection;\r
+import jalview.datamodel.DBRefEntry;\r
 import jalview.datamodel.FeatureProperties;\r
 import jalview.datamodel.Mapping;\r
 import jalview.datamodel.Sequence;\r
@@ -34,9 +36,9 @@ public class Dna
       return 0;\r
     if (cdp1[0] < cdp2[0] || cdp1[1] < cdp2[1] || cdp1[2] < cdp2[2])\r
       return -1; // one base in cdp1 precedes the corresponding base in the\r
-                  // other codon\r
+    // other codon\r
     return 1; // one base in cdp1 appears after the corresponding base in the\r
-              // other codon.\r
+    // other codon.\r
   }\r
 \r
   /**\r
@@ -64,27 +66,54 @@ public class Dna
    * @param gapCharacter\r
    * @param annotations\r
    * @param aWidth\r
+   * @param dataset destination dataset for translated sequences and mappings\r
    * @return\r
    */\r
   public static AlignmentI CdnaTranslate(SequenceI[] selection,\r
           String[] seqstring, int viscontigs[], char gapCharacter,\r
-          AlignmentAnnotation[] annotations, int aWidth)\r
+          AlignmentAnnotation[] annotations, int aWidth, Alignment dataset)\r
+  {\r
+    return CdnaTranslate(selection, seqstring, null, viscontigs,\r
+            gapCharacter, annotations, aWidth, dataset);\r
+  }\r
+\r
+  /**\r
+   * \r
+   * @param selection\r
+   * @param seqstring\r
+   * @param product - array of DbRefEntry objects from which exon map in seqstring is derived\r
+   * @param viscontigs\r
+   * @param gapCharacter\r
+   * @param annotations\r
+   * @param aWidth\r
+   * @param dataset\r
+   * @return\r
+   */\r
+  public static AlignmentI CdnaTranslate(SequenceI[] selection,\r
+          String[] seqstring, DBRefEntry[] product, int viscontigs[],\r
+          char gapCharacter, AlignmentAnnotation[] annotations, int aWidth, Alignment dataset)\r
   {\r
     AlignedCodonFrame codons = new AlignedCodonFrame(aWidth); // stores hash of\r
-                                                              // subsequent\r
-                                                              // positions for\r
-                                                              // each codon\r
-                                                              // start position\r
-                                                              // in alignment\r
+    // subsequent\r
+    // positions for\r
+    // each codon\r
+    // start position\r
+    // in alignment\r
     int s, sSize = selection.length;\r
     Vector pepseqs = new Vector();\r
     for (s = 0; s < sSize; s++)\r
     {\r
       SequenceI newseq = translateCodingRegion(selection[s], seqstring[s],\r
-              viscontigs, codons, gapCharacter);\r
+              viscontigs, codons, gapCharacter, (product!=null) ? product[s] : null); // possibly anonymous product\r
       if (newseq != null)\r
       {\r
         pepseqs.addElement(newseq);\r
+        SequenceI ds = newseq;\r
+        while (ds.getDatasetSequence()!=null)\r
+        {\r
+          ds = ds.getDatasetSequence();\r
+        }\r
+        dataset.addSequence(ds);\r
       }\r
     }\r
     if (codons.aaWidth == 0)\r
@@ -93,11 +122,125 @@ public class Dna
     pepseqs.copyInto(newseqs);\r
     AlignmentI al = new Alignment(newseqs);\r
     al.padGaps(); // ensure we look aligned.\r
-    al.setDataset(null);\r
+    al.setDataset(dataset);\r
     translateAlignedAnnotations(annotations, al, codons);\r
     al.addCodonFrame(codons);\r
     return al;\r
   }\r
+  /**\r
+   * fake the collection of DbRefs with associated exon mappings to identify\r
+   * if a translation would generate distinct product in the currently selected region.\r
+   * @param selection\r
+   * @param viscontigs\r
+   * @return\r
+   */\r
+  public static boolean canTranslate(SequenceI[] selection, int viscontigs[])\r
+  {\r
+    for (int gd=0; gd<selection.length; gd++)\r
+    {\r
+      SequenceI dna = selection[gd];\r
+      jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils\r
+      .selectRefs(dna.getDBRef(),\r
+              jalview.datamodel.DBRefSource.DNACODINGDBS);\r
+      if (dnarefs != null)\r
+      {\r
+        // intersect with pep\r
+        // intersect with pep\r
+        Vector mappedrefs = new Vector();\r
+        DBRefEntry[] refs=dna.getDBRef();\r
+        for (int d=0; d<refs.length; d++)\r
+        {\r
+          if (refs[d].getMap()!=null && refs[d].getMap().getMap()!=null && refs[d].getMap().getMap().getFromRatio()==3\r
+                  && refs[d].getMap().getMap().getToRatio()==1)\r
+          {\r
+            mappedrefs.addElement(refs[d]); // add translated protein maps\r
+          }\r
+        }\r
+        dnarefs = new DBRefEntry[mappedrefs.size()];\r
+        mappedrefs.copyInto(dnarefs);\r
+        for (int d = 0; d < dnarefs.length; d++)\r
+        {\r
+          Mapping mp = dnarefs[d].getMap();\r
+          if (mp != null)\r
+          {\r
+            for (int vc=0; vc<viscontigs.length; vc+=2)\r
+            {\r
+              int[] mpr=mp.locateMappedRange(viscontigs[vc], viscontigs[vc+1]);\r
+              if (mpr!=null) {\r
+                return true;\r
+              }\r
+            }\r
+          }\r
+        }\r
+      }\r
+    }\r
+    return false;\r
+  }\r
+  /**\r
+   * generate a set of translated protein products from annotated sequenceI\r
+   * @param selection\r
+   * @param viscontigs\r
+   * @param gapCharacter\r
+   * @param dataset destination dataset for translated sequences\r
+   * @param annotations\r
+   * @param aWidth\r
+   * @return\r
+   */\r
+  public static AlignmentI CdnaTranslate(SequenceI[] selection,\r
+          int viscontigs[], char gapCharacter, Alignment dataset)\r
+  {\r
+    int alwidth=0;\r
+    Vector cdnasqs=new Vector();\r
+    Vector cdnasqi=new Vector();\r
+    Vector cdnaprod=new Vector();\r
+    for (int gd=0; gd<selection.length; gd++)\r
+    {\r
+      SequenceI dna = selection[gd];\r
+      jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils\r
+      .selectRefs(dna.getDBRef(),\r
+              jalview.datamodel.DBRefSource.DNACODINGDBS);\r
+      if (dnarefs != null)\r
+      {\r
+        // intersect with pep\r
+        Vector mappedrefs = new Vector();\r
+        DBRefEntry[] refs=dna.getDBRef();\r
+        for (int d=0; d<refs.length; d++)\r
+        {\r
+          if (refs[d].getMap()!=null && refs[d].getMap().getMap()!=null && refs[d].getMap().getMap().getFromRatio()==3\r
+                  && refs[d].getMap().getMap().getToRatio()==1)\r
+          {\r
+            mappedrefs.addElement(refs[d]); // add translated protein maps\r
+          }\r
+        }\r
+        dnarefs = new DBRefEntry[mappedrefs.size()];\r
+        mappedrefs.copyInto(dnarefs);\r
+        for (int d = 0; d < dnarefs.length; d++)\r
+        {\r
+          Mapping mp = dnarefs[d].getMap();\r
+          StringBuffer sqstr=new StringBuffer();\r
+          if (mp != null)\r
+          {\r
+            Mapping intersect = mp.intersectVisContigs(viscontigs);\r
+            // generate seqstring for this sequence based on mapping\r
+            \r
+            if (sqstr.length()>alwidth)\r
+              alwidth = sqstr.length();\r
+            cdnasqs.addElement(sqstr.toString());\r
+            cdnasqi.addElement(dna);\r
+            cdnaprod.addElement(intersect);\r
+          }\r
+        }\r
+      }\r
+      SequenceI[] cdna = new SequenceI[cdnasqs.size()];\r
+      DBRefEntry[] prods = new DBRefEntry[cdnaprod.size()];\r
+      String[] xons = new String[cdnasqs.size()];\r
+      cdnasqs.copyInto(xons);\r
+      cdnaprod.copyInto(prods);\r
+      cdnasqi.copyInto(cdna);\r
+      return CdnaTranslate(cdna, xons, prods, viscontigs, gapCharacter, null, alwidth, dataset);\r
+    }\r
+    return null;\r
+  }\r
 \r
   /**\r
    * translate na alignment annotations onto translated amino acid alignment al\r
@@ -139,18 +282,20 @@ public class Dna
             if (codons.codons[a] != null\r
                     && codons.codons[a][0] == (codons.codons[a][2] - 2))\r
             {\r
-              pos = codons.codons[a][0];\r
-              if (annotations[i].annotations[pos] == null\r
-                      || annotations[i].annotations[pos] == null)\r
-                continue;\r
-              // We just take the annotation in the first base in the codon\r
-              anots[a] = new Annotation(annotations[i].annotations[pos]);\r
+              anots[a] = getCodonAnnotation(codons.codons[a], annotations[i].annotations);\r
             }\r
           }\r
         }\r
 \r
         jalview.datamodel.AlignmentAnnotation aa = new jalview.datamodel.AlignmentAnnotation(\r
                 annotations[i].label, annotations[i].description, anots);\r
+        aa.graph = annotations[i].graph;\r
+        aa.graphGroup = annotations[i].graphGroup;\r
+        aa.graphHeight = annotations[i].graphHeight;\r
+        if (annotations[i].getThreshold()!=null)\r
+        {\r
+          aa.setThreshold(new jalview.datamodel.GraphLine(annotations[i].getThreshold()));        \r
+        }\r
         if (annotations[i].hasScore)\r
         {\r
           aa.setScore(annotations[i].getScore());\r
@@ -165,7 +310,7 @@ public class Dna
             // positioning\r
             aa.setSequenceRef(aaSeq);\r
             aa.createSequenceMapping(aaSeq, aaSeq.getStart(), true); // rebuild\r
-                                                                      // mapping\r
+            // mapping\r
             aa.adjustForAlignment();\r
             aaSeq.addAlignmentAnnotation(aa);\r
           }\r
@@ -176,23 +321,37 @@ public class Dna
     }\r
   }\r
 \r
+  private static Annotation getCodonAnnotation(int[] is, Annotation[] annotations)\r
+  { \r
+    // Have a look at all the codon positions for annotation and put the first\r
+    // one found into the translated annotation pos.\r
+    for (int p=0; p<3; p++)\r
+    {\r
+      if (annotations[is[p]]!=null)\r
+      {\r
+        return new Annotation(annotations[is[p]]);\r
+      }\r
+    }\r
+    return null;\r
+  }\r
+\r
   /**\r
    * Translate a na sequence\r
    * \r
-   * @param selection\r
-   * @param seqstring\r
-   * @param viscontigs\r
-   * @param codons\r
+   * @param selection sequence displayed under viscontigs visible columns\r
+   * @param seqstring ORF read in some global alignment reference frame\r
+   * @param viscontigs mapping from global reference frame to visible seqstring ORF read\r
+   * @param codons Definition of global ORF alignment reference frame\r
    * @param gapCharacter\r
    * @param newSeq\r
    * @return sequence ready to be added to alignment.\r
    */\r
   public static SequenceI translateCodingRegion(SequenceI selection,\r
           String seqstring, int[] viscontigs, AlignedCodonFrame codons,\r
-          char gapCharacter)\r
+          char gapCharacter, DBRefEntry product)\r
   {\r
     ShiftList vismapping = new ShiftList(); // map from viscontigs to seqstring\r
-                                            // intervals\r
+    // intervals\r
     int vc, scontigs[] = new int[viscontigs.length];\r
     int npos = 0;\r
     for (vc = 0; vc < viscontigs.length; vc += 2)\r
@@ -250,8 +409,8 @@ public class Dna
             // with a gap\r
             aa = "" + gapCharacter + aa;\r
             aspos++;\r
-            if (aspos >= codons.aaWidth)\r
-              codons.aaWidth = aspos + 1;\r
+            //if (aspos >= codons.aaWidth)\r
+            //  codons.aaWidth = aspos + 1;\r
             break; // check the next position for alignment\r
           case 0:\r
             // codon aligns at aspos position.\r
@@ -283,8 +442,8 @@ public class Dna
         // map and trim contigs to ORF region\r
         vc = scontigs.length - 1;\r
         lastnpos = vismapping.shift(lastnpos); // place npos in context of\r
-                                                // whole dna alignment (rather\r
-                                                // than visible contigs)\r
+        // whole dna alignment (rather\r
+        // than visible contigs)\r
         // incomplete ORF could be broken over one or two visible contig\r
         // intervals.\r
         while (vc >= 0 && scontigs[vc] > lastnpos)\r
@@ -331,17 +490,29 @@ public class Dna
           System.arraycopy(scontigs, 0, t, 0, vc + 2);\r
           scontigs = t;\r
         }\r
-\r
         MapList map = new MapList(scontigs, new int[]\r
-        { 1, resSize }, 3, 1); // TODO: store mapping on newSeq for linked\r
-                                // DNA/Protein viewing.\r
+                                                    { 1, resSize }, 3, 1);\r
+        // update newseq as if it was generated as mapping from product\r
+        \r
+        if (product != null)\r
+        {\r
+          newseq.setName(product.getSource() + "|"\r
+                  + product.getAccessionId());\r
+          if (product.getMap() != null)\r
+          {\r
+            //Mapping mp = product.getMap();\r
+            //newseq.setStart(mp.getPosition(scontigs[0]));\r
+            //newseq.setEnd(mp\r
+            //        .getPosition(scontigs[scontigs.length - 1]));\r
+          }\r
+        }\r
         transferCodedFeatures(selection, newseq, map, null, null);\r
         SequenceI rseq = newseq.deriveSequence(); // construct a dataset\r
-                                                  // sequence for our new\r
-                                                  // peptide, regardless.\r
+        // sequence for our new\r
+        // peptide, regardless.\r
         // store a mapping (this actually stores a mapping between the dataset\r
         // sequences for the two sequences\r
-        codons.addMap(selection, newseq, map);\r
+        codons.addMap(selection, rseq, map);\r
         return rseq;\r
       }\r
     }\r