src/jalview/analysis/Dna.java

   1 package jalview.analysis;\r
   2 \r
   3 import java.util.Hashtable;\r
   4 import java.util.Vector;\r
   5 \r
   6 import jalview.datamodel.AlignedCodonFrame;\r
   7 import jalview.datamodel.Alignment;\r
   8 import jalview.datamodel.AlignmentAnnotation;\r
   9 import jalview.datamodel.AlignmentI;\r
  10 import jalview.datamodel.Annotation;\r
  11 import jalview.datamodel.ColumnSelection;\r
  12 import jalview.datamodel.FeatureProperties;\r
  13 import jalview.datamodel.Mapping;\r
  14 import jalview.datamodel.Sequence;\r
  15 import jalview.datamodel.SequenceFeature;\r
  16 import jalview.datamodel.SequenceI;\r
  17 import jalview.schemes.ResidueProperties;\r
  18 import jalview.util.MapList;\r
  19 import jalview.util.ShiftList;\r
  20 \r
  21 public class Dna\r
  22 {\r
  23   /**\r
  24    * \r
  25    * @param cdp1\r
  26    * @param cdp2\r
  27    * @return -1 if cdp1 aligns before cdp2, 0 if in the same column or cdp2 is\r
  28    *         null, +1 if after cdp2\r
  29    */\r
  30   private static int compare_codonpos(int[] cdp1, int[] cdp2)\r
  31   {\r
  32     if (cdp2 == null\r
  33             || (cdp1[0] == cdp2[0] && cdp1[1] == cdp2[1] && cdp1[2] == cdp2[2]))\r
  34       return 0;\r
  35     if (cdp1[0] < cdp2[0] || cdp1[1] < cdp2[1] || cdp1[2] < cdp2[2])\r
  36       return -1; // one base in cdp1 precedes the corresponding base in the\r
  37                   // other codon\r
  38     return 1; // one base in cdp1 appears after the corresponding base in the\r
  39               // other codon.\r
  40   }\r
  41 \r
  42   /**\r
  43    * DNA->mapped protein sequence alignment translation given set of sequences\r
  44    * 1. id distinct coding regions within selected region for each sequence 2.\r
  45    * generate peptides based on inframe (or given) translation or (optionally\r
  46    * and where specified) out of frame translations (annotated appropriately) 3.\r
  47    * align peptides based on codon alignment\r
  48    */\r
  49   /**\r
  50    * id potential products from dna 1. search for distinct products within\r
  51    * selected region for each selected sequence 2. group by associated DB type.\r
  52    * 3. return as form for input into above function\r
  53    */\r
  54   /**\r
  55    * \r
  56    */\r
  57   /**\r
  58    * create a new alignment of protein sequences by an inframe translation of\r
  59    * the provided NA sequences\r
  60    * \r
  61    * @param selection\r
  62    * @param seqstring\r
  63    * @param viscontigs\r
  64    * @param gapCharacter\r
  65    * @param annotations\r
  66    * @param aWidth\r
  67    * @return\r
  68    */\r
  69   public static AlignmentI CdnaTranslate(SequenceI[] selection,\r
  70           String[] seqstring, int viscontigs[], char gapCharacter,\r
  71           AlignmentAnnotation[] annotations, int aWidth)\r
  72   {\r
  73     AlignedCodonFrame codons = new AlignedCodonFrame(aWidth); // stores hash of\r
  74                                                               // subsequent\r
  75                                                               // positions for\r
  76                                                               // each codon\r
  77                                                               // start position\r
  78                                                               // in alignment\r
  79     int s, sSize = selection.length;\r
  80     Vector pepseqs = new Vector();\r
  81     for (s = 0; s < sSize; s++)\r
  82     {\r
  83       SequenceI newseq = translateCodingRegion(selection[s], seqstring[s],\r
  84               viscontigs, codons, gapCharacter);\r
  85       if (newseq != null)\r
  86       {\r
  87         pepseqs.addElement(newseq);\r
  88       }\r
  89     }\r
  90     if (codons.aaWidth == 0)\r
  91       return null;\r
  92     SequenceI[] newseqs = new SequenceI[pepseqs.size()];\r
  93     pepseqs.copyInto(newseqs);\r
  94     AlignmentI al = new Alignment(newseqs);\r
  95     al.padGaps(); // ensure we look aligned.\r
  96     al.setDataset(null);\r
  97     translateAlignedAnnotations(annotations, al, codons);\r
  98     al.addCodonFrame(codons);\r
  99     return al;\r
 100   }\r
 101 \r
 102   /**\r
 103    * translate na alignment annotations onto translated amino acid alignment al\r
 104    * using codon mapping codons\r
 105    * \r
 106    * @param annotations\r
 107    * @param al\r
 108    * @param codons\r
 109    */\r
 110   public static void translateAlignedAnnotations(\r
 111           AlignmentAnnotation[] annotations, AlignmentI al,\r
 112           AlignedCodonFrame codons)\r
 113   {\r
 114     // //////////////////////////////\r
 115     // Copy annotations across\r
 116     //\r
 117     // Can only do this for columns with consecutive codons, or where\r
 118     // annotation is sequence associated.\r
 119 \r
 120     int pos, a, aSize;\r
 121     if (annotations != null)\r
 122     {\r
 123       for (int i = 0; i < annotations.length; i++)\r
 124       {\r
 125         // Skip any autogenerated annotation\r
 126         if (annotations[i].autoCalculated)\r
 127         {\r
 128           continue;\r
 129         }\r
 130 \r
 131         aSize = codons.getaaWidth(); // aa alignment width.\r
 132         jalview.datamodel.Annotation[] anots = (annotations[i].annotations == null) ? null\r
 133                 : new jalview.datamodel.Annotation[aSize];\r
 134         if (anots != null)\r
 135         {\r
 136           for (a = 0; a < aSize; a++)\r
 137           {\r
 138             // process through codon map.\r
 139             if (codons.codons[a] != null\r
 140                     && codons.codons[a][0] == (codons.codons[a][2] - 2))\r
 141             {\r
 142               pos = codons.codons[a][0];\r
 143               if (annotations[i].annotations[pos] == null\r
 144                       || annotations[i].annotations[pos] == null)\r
 145                 continue;\r
 146               // We just take the annotation in the first base in the codon\r
 147               anots[a] = new Annotation(annotations[i].annotations[pos]);\r
 148             }\r
 149           }\r
 150         }\r
 151 \r
 152         jalview.datamodel.AlignmentAnnotation aa = new jalview.datamodel.AlignmentAnnotation(\r
 153                 annotations[i].label, annotations[i].description, anots);\r
 154         if (annotations[i].hasScore)\r
 155         {\r
 156           aa.setScore(annotations[i].getScore());\r
 157         }\r
 158         if (annotations[i].sequenceRef != null)\r
 159         {\r
 160           SequenceI aaSeq = codons\r
 161                   .getAaForDnaSeq(annotations[i].sequenceRef);\r
 162           if (aaSeq != null)\r
 163           {\r
 164             // aa.compactAnnotationArray(); // throw away alignment annotation\r
 165             // positioning\r
 166             aa.setSequenceRef(aaSeq);\r
 167             aa.createSequenceMapping(aaSeq, aaSeq.getStart(), true); // rebuild\r
 168                                                                       // mapping\r
 169             aa.adjustForAlignment();\r
 170             aaSeq.addAlignmentAnnotation(aa);\r
 171           }\r
 172 \r
 173         }\r
 174         al.addAnnotation(aa);\r
 175       }\r
 176     }\r
 177   }\r
 178 \r
 179   /**\r
 180    * Translate a na sequence\r
 181    * \r
 182    * @param selection\r
 183    * @param seqstring\r
 184    * @param viscontigs\r
 185    * @param codons\r
 186    * @param gapCharacter\r
 187    * @param newSeq\r
 188    * @return sequence ready to be added to alignment.\r
 189    */\r
 190   public static SequenceI translateCodingRegion(SequenceI selection,\r
 191           String seqstring, int[] viscontigs, AlignedCodonFrame codons,\r
 192           char gapCharacter)\r
 193   {\r
 194     ShiftList vismapping = new ShiftList(); // map from viscontigs to seqstring\r
 195                                             // intervals\r
 196     int vc, scontigs[] = new int[viscontigs.length];\r
 197     int npos = 0;\r
 198     for (vc = 0; vc < viscontigs.length; vc += 2)\r
 199     {\r
 200       vismapping.addShift(npos, viscontigs[vc]);\r
 201       scontigs[vc] = npos;\r
 202       npos += viscontigs[vc + 1];\r
 203       scontigs[vc + 1] = npos;\r
 204     }\r
 205 \r
 206     StringBuffer protein = new StringBuffer();\r
 207     String seq = seqstring.replace('U', 'T');\r
 208     char codon[] = new char[3];\r
 209     int cdp[] = new int[3], rf = 0, lastnpos = 0, nend;\r
 210     int aspos = 0;\r
 211     int resSize = 0;\r
 212     for (npos = 0, nend = seq.length(); npos < nend; npos++)\r
 213     {\r
 214       if (!jalview.util.Comparison.isGap(seq.charAt(npos)))\r
 215       {\r
 216         cdp[rf] = npos; // store position\r
 217         codon[rf++] = seq.charAt(npos); // store base\r
 218       }\r
 219       // filled an RF yet ?\r
 220       if (rf == 3)\r
 221       {\r
 222         String aa = ResidueProperties.codonTranslate(new String(codon));\r
 223         rf = 0;\r
 224         if (aa == null)\r
 225           aa = String.valueOf(gapCharacter);\r
 226         else\r
 227         {\r
 228           if (aa.equals("STOP"))\r
 229           {\r
 230             aa = "X";\r
 231           }\r
 232           resSize++;\r
 233         }\r
 234         // insert/delete gaps prior to this codon - if necessary\r
 235         boolean findpos = true;\r
 236         while (findpos)\r
 237         {\r
 238           // first ensure that the codons array is long enough.\r
 239           codons.checkCodonFrameWidth(aspos);\r
 240           // now check to see if we place the aa at the current aspos in the\r
 241           // protein alignment\r
 242           switch (Dna.compare_codonpos(cdp, codons.codons[aspos]))\r
 243           {\r
 244           case -1:\r
 245             codons.insertAAGap(aspos, gapCharacter);\r
 246             findpos = false;\r
 247             break;\r
 248           case +1:\r
 249             // this aa appears after the aligned codons at aspos, so prefix it\r
 250             // with a gap\r
 251             aa = "" + gapCharacter + aa;\r
 252             aspos++;\r
 253             if (aspos >= codons.aaWidth)\r
 254               codons.aaWidth = aspos + 1;\r
 255             break; // check the next position for alignment\r
 256           case 0:\r
 257             // codon aligns at aspos position.\r
 258             findpos = false;\r
 259           }\r
 260         }\r
 261         // codon aligns with all other sequence residues found at aspos\r
 262         protein.append(aa);\r
 263         lastnpos = npos;\r
 264         if (codons.codons[aspos] == null)\r
 265         {\r
 266           // mark this column as aligning to this aligned reading frame\r
 267           codons.codons[aspos] = new int[]\r
 268           { cdp[0], cdp[1], cdp[2] };\r
 269         }\r
 270         aspos++;\r
 271         if (aspos >= codons.aaWidth)\r
 272           codons.aaWidth = aspos + 1;\r
 273       }\r
 274     }\r
 275     if (resSize > 0)\r
 276     {\r
 277       SequenceI newseq = new Sequence(selection.getName(), protein\r
 278               .toString());\r
 279       if (rf != 0)\r
 280       {\r
 281         jalview.bin.Cache.log\r
 282                 .debug("trimming contigs for incomplete terminal codon.");\r
 283         // map and trim contigs to ORF region\r
 284         vc = scontigs.length - 1;\r
 285         lastnpos = vismapping.shift(lastnpos); // place npos in context of\r
 286                                                 // whole dna alignment (rather\r
 287                                                 // than visible contigs)\r
 288         // incomplete ORF could be broken over one or two visible contig\r
 289         // intervals.\r
 290         while (vc >= 0 && scontigs[vc] > lastnpos)\r
 291         {\r
 292           if (vc > 0 && scontigs[vc - 1] > lastnpos)\r
 293           {\r
 294             vc -= 2;\r
 295           }\r
 296           else\r
 297           {\r
 298             // correct last interval in list.\r
 299             scontigs[vc] = lastnpos;\r
 300           }\r
 301         }\r
 302 \r
 303         if (vc > 0 && (vc + 1) < scontigs.length)\r
 304         {\r
 305           // truncate map list to just vc elements\r
 306           int t[] = new int[vc + 1];\r
 307           System.arraycopy(scontigs, 0, t, 0, vc + 1);\r
 308           scontigs = t;\r
 309         }\r
 310         if (vc <= 0)\r
 311           scontigs = null;\r
 312       }\r
 313       if (scontigs != null)\r
 314       {\r
 315         npos = 0;\r
 316         // Find sequence position for scontigs positions on the nucleotide\r
 317         // sequence string we were passed.\r
 318         for (vc = 0; vc < viscontigs.length; vc += 2)\r
 319         {\r
 320           scontigs[vc] = selection.findPosition(scontigs[vc]); // not from 1!\r
 321           npos += viscontigs[vc];\r
 322           scontigs[vc + 1] = selection\r
 323                   .findPosition(npos + scontigs[vc + 1]); // exclusive\r
 324           if (scontigs[vc + 1] == selection.getEnd())\r
 325             break;\r
 326         }\r
 327         // trim trailing empty intervals.\r
 328         if ((vc + 2) < scontigs.length)\r
 329         {\r
 330           int t[] = new int[vc + 2];\r
 331           System.arraycopy(scontigs, 0, t, 0, vc + 2);\r
 332           scontigs = t;\r
 333         }\r
 334 \r
 335         MapList map = new MapList(scontigs, new int[]\r
 336         { 1, resSize }, 3, 1); // TODO: store mapping on newSeq for linked\r
 337                                 // DNA/Protein viewing.\r
 338         transferCodedFeatures(selection, newseq, map, null, null);\r
 339         SequenceI rseq = newseq.deriveSequence(); // construct a dataset\r
 340                                                   // sequence for our new\r
 341                                                   // peptide, regardless.\r
 342         // store a mapping (this actually stores a mapping between the dataset\r
 343         // sequences for the two sequences\r
 344         codons.addMap(selection, newseq, map);\r
 345         return rseq;\r
 346       }\r
 347     }\r
 348     // register the mapping somehow\r
 349     // \r
 350     return null;\r
 351   }\r
 352 \r
 353   /**\r
 354    * Given a peptide newly translated from a dna sequence, copy over and set any\r
 355    * features on the peptide from the DNA. If featureTypes is null, all features\r
 356    * on the dna sequence are searched (rather than just the displayed ones), and\r
 357    * similarly for featureGroups.\r
 358    * \r
 359    * @param dna\r
 360    * @param pep\r
 361    * @param map\r
 362    * @param featureTypes\r
 363    *          hash who's keys are the displayed feature type strings\r
 364    * @param featureGroups\r
 365    *          hash where keys are feature groups and values are Boolean objects\r
 366    *          indicating if they are displayed.\r
 367    */\r
 368   private static void transferCodedFeatures(SequenceI dna, SequenceI pep,\r
 369           MapList map, Hashtable featureTypes, Hashtable featureGroups)\r
 370   {\r
 371     SequenceFeature[] sf = dna.getDatasetSequence().getSequenceFeatures();\r
 372     Boolean fgstate;\r
 373     jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils\r
 374             .selectRefs(dna.getDBRef(),\r
 375                     jalview.datamodel.DBRefSource.DNACODINGDBS);\r
 376     if (dnarefs != null)\r
 377     {\r
 378       // intersect with pep\r
 379       for (int d = 0; d < dnarefs.length; d++)\r
 380       {\r
 381         Mapping mp = dnarefs[d].getMap();\r
 382         if (mp != null)\r
 383         {\r
 384         }\r
 385       }\r
 386     }\r
 387     if (sf != null)\r
 388     {\r
 389       for (int f = 0; f < sf.length; f++)\r
 390       {\r
 391         fgstate = (featureGroups == null) ? null : ((Boolean) featureGroups\r
 392                 .get(sf[f].featureGroup));\r
 393         if ((featureTypes == null || featureTypes.containsKey(sf[f]\r
 394                 .getType()))\r
 395                 && (fgstate == null || fgstate.booleanValue()))\r
 396         {\r
 397           if (FeatureProperties.isCodingFeature(null, sf[f].getType()))\r
 398           {\r
 399             // if (map.intersectsFrom(sf[f].begin, sf[f].end))\r
 400             {\r
 401 \r
 402             }\r
 403           }\r
 404         }\r
 405       }\r
 406     }\r
 407   }\r
 408 }\r