src/jalview/analysis/Dna.java

   1 package jalview.analysis;\r
   2 \r
   3 import java.util.Enumeration;\r
   4 import java.util.Hashtable;\r
   5 import java.util.Vector;\r
   6 \r
   7 import jalview.datamodel.AlignedCodonFrame;\r
   8 import jalview.datamodel.Alignment;\r
   9 import jalview.datamodel.AlignmentAnnotation;\r
  10 import jalview.datamodel.AlignmentI;\r
  11 import jalview.datamodel.Annotation;\r
  12 import jalview.datamodel.ColumnSelection;\r
  13 import jalview.datamodel.DBRefEntry;\r
  14 import jalview.datamodel.FeatureProperties;\r
  15 import jalview.datamodel.Mapping;\r
  16 import jalview.datamodel.Sequence;\r
  17 import jalview.datamodel.SequenceFeature;\r
  18 import jalview.datamodel.SequenceI;\r
  19 import jalview.schemes.ResidueProperties;\r
  20 import jalview.util.MapList;\r
  21 import jalview.util.ShiftList;\r
  22 \r
  23 public class Dna\r
  24 {\r
  25   /**\r
  26    * \r
  27    * @param cdp1\r
  28    * @param cdp2\r
  29    * @return -1 if cdp1 aligns before cdp2, 0 if in the same column or cdp2 is\r
  30    *         null, +1 if after cdp2\r
  31    */\r
  32   private static int compare_codonpos(int[] cdp1, int[] cdp2)\r
  33   {\r
  34     if (cdp2 == null\r
  35             || (cdp1[0] == cdp2[0] && cdp1[1] == cdp2[1] && cdp1[2] == cdp2[2]))\r
  36       return 0;\r
  37     if (cdp1[0] < cdp2[0] || cdp1[1] < cdp2[1] || cdp1[2] < cdp2[2])\r
  38       return -1; // one base in cdp1 precedes the corresponding base in the\r
  39     // other codon\r
  40     return 1; // one base in cdp1 appears after the corresponding base in the\r
  41     // other codon.\r
  42   }\r
  43 \r
  44   /**\r
  45    * DNA->mapped protein sequence alignment translation given set of sequences\r
  46    * 1. id distinct coding regions within selected region for each sequence 2.\r
  47    * generate peptides based on inframe (or given) translation or (optionally\r
  48    * and where specified) out of frame translations (annotated appropriately) 3.\r
  49    * align peptides based on codon alignment\r
  50    */\r
  51   /**\r
  52    * id potential products from dna 1. search for distinct products within\r
  53    * selected region for each selected sequence 2. group by associated DB type.\r
  54    * 3. return as form for input into above function\r
  55    */\r
  56   /**\r
  57    * \r
  58    */\r
  59   /**\r
  60    * create a new alignment of protein sequences by an inframe translation of\r
  61    * the provided NA sequences\r
  62    * \r
  63    * @param selection\r
  64    * @param seqstring\r
  65    * @param viscontigs\r
  66    * @param gapCharacter\r
  67    * @param annotations\r
  68    * @param aWidth\r
  69    * @param dataset destination dataset for translated sequences and mappings\r
  70    * @return\r
  71    */\r
  72   public static AlignmentI CdnaTranslate(SequenceI[] selection,\r
  73           String[] seqstring, int viscontigs[], char gapCharacter,\r
  74           AlignmentAnnotation[] annotations, int aWidth, Alignment dataset)\r
  75   {\r
  76     return CdnaTranslate(selection, seqstring, null, viscontigs,\r
  77             gapCharacter, annotations, aWidth, dataset);\r
  78   }\r
  79 \r
  80   /**\r
  81    * \r
  82    * @param selection\r
  83    * @param seqstring\r
  84    * @param product - array of DbRefEntry objects from which exon map in seqstring is derived\r
  85    * @param viscontigs\r
  86    * @param gapCharacter\r
  87    * @param annotations\r
  88    * @param aWidth\r
  89    * @param dataset\r
  90    * @return\r
  91    */\r
  92   public static AlignmentI CdnaTranslate(SequenceI[] selection,\r
  93           String[] seqstring, DBRefEntry[] product, int viscontigs[],\r
  94           char gapCharacter, AlignmentAnnotation[] annotations, int aWidth, Alignment dataset)\r
  95   {\r
  96     AlignedCodonFrame codons = new AlignedCodonFrame(aWidth); // stores hash of\r
  97     // subsequent\r
  98     // positions for\r
  99     // each codon\r
 100     // start position\r
 101     // in alignment\r
 102     int s, sSize = selection.length;\r
 103     Vector pepseqs = new Vector();\r
 104     for (s = 0; s < sSize; s++)\r
 105     {\r
 106       SequenceI newseq = translateCodingRegion(selection[s], seqstring[s],\r
 107               viscontigs, codons, gapCharacter, (product!=null) ? product[s] : null); // possibly anonymous product\r
 108       if (newseq != null)\r
 109       {\r
 110         pepseqs.addElement(newseq);\r
 111         SequenceI ds = newseq;\r
 112         while (ds.getDatasetSequence()!=null)\r
 113         {\r
 114           ds = ds.getDatasetSequence();\r
 115         }\r
 116         dataset.addSequence(ds);\r
 117       }\r
 118     }\r
 119     if (codons.aaWidth == 0)\r
 120       return null;\r
 121     SequenceI[] newseqs = new SequenceI[pepseqs.size()];\r
 122     pepseqs.copyInto(newseqs);\r
 123     AlignmentI al = new Alignment(newseqs);\r
 124     al.padGaps(); // ensure we look aligned.\r
 125     al.setDataset(dataset);\r
 126     translateAlignedAnnotations(annotations, al, codons);\r
 127     al.addCodonFrame(codons);\r
 128     return al;\r
 129   }\r
 130   /**\r
 131    * fake the collection of DbRefs with associated exon mappings to identify\r
 132    * if a translation would generate distinct product in the currently selected region.\r
 133    * @param selection\r
 134    * @param viscontigs\r
 135    * @return\r
 136    */\r
 137   public static boolean canTranslate(SequenceI[] selection, int viscontigs[])\r
 138   {\r
 139     for (int gd=0; gd<selection.length; gd++)\r
 140     {\r
 141       SequenceI dna = selection[gd];\r
 142       jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils\r
 143       .selectRefs(dna.getDBRef(),\r
 144               jalview.datamodel.DBRefSource.DNACODINGDBS);\r
 145       if (dnarefs != null)\r
 146       {\r
 147         // intersect with pep\r
 148         // intersect with pep\r
 149         Vector mappedrefs = new Vector();\r
 150         DBRefEntry[] refs=dna.getDBRef();\r
 151         for (int d=0; d<refs.length; d++)\r
 152         {\r
 153           if (refs[d].getMap()!=null && refs[d].getMap().getMap()!=null && refs[d].getMap().getMap().getFromRatio()==3\r
 154                   && refs[d].getMap().getMap().getToRatio()==1)\r
 155           {\r
 156             mappedrefs.addElement(refs[d]); // add translated protein maps\r
 157           }\r
 158         }\r
 159         dnarefs = new DBRefEntry[mappedrefs.size()];\r
 160         mappedrefs.copyInto(dnarefs);\r
 161         for (int d = 0; d < dnarefs.length; d++)\r
 162         {\r
 163           Mapping mp = dnarefs[d].getMap();\r
 164           if (mp != null)\r
 165           {\r
 166             for (int vc=0; vc<viscontigs.length; vc+=2)\r
 167             {\r
 168               int[] mpr=mp.locateMappedRange(viscontigs[vc], viscontigs[vc+1]);\r
 169               if (mpr!=null) {\r
 170                 return true;\r
 171               }\r
 172             }\r
 173           }\r
 174         }\r
 175       }\r
 176     }\r
 177     return false;\r
 178   }\r
 179   /**\r
 180    * generate a set of translated protein products from annotated sequenceI\r
 181    * @param selection\r
 182    * @param viscontigs\r
 183    * @param gapCharacter\r
 184    * @param dataset destination dataset for translated sequences\r
 185    * @param annotations\r
 186    * @param aWidth\r
 187    * @return\r
 188    */\r
 189   public static AlignmentI CdnaTranslate(SequenceI[] selection,\r
 190           int viscontigs[], char gapCharacter, Alignment dataset)\r
 191   {\r
 192     int alwidth=0;\r
 193     Vector cdnasqs=new Vector();\r
 194     Vector cdnasqi=new Vector();\r
 195     Vector cdnaprod=new Vector();\r
 196     for (int gd=0; gd<selection.length; gd++)\r
 197     {\r
 198       SequenceI dna = selection[gd];\r
 199       jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils\r
 200       .selectRefs(dna.getDBRef(),\r
 201               jalview.datamodel.DBRefSource.DNACODINGDBS);\r
 202       if (dnarefs != null)\r
 203       {\r
 204         // intersect with pep\r
 205         Vector mappedrefs = new Vector();\r
 206         DBRefEntry[] refs=dna.getDBRef();\r
 207         for (int d=0; d<refs.length; d++)\r
 208         {\r
 209           if (refs[d].getMap()!=null && refs[d].getMap().getMap()!=null && refs[d].getMap().getMap().getFromRatio()==3\r
 210                   && refs[d].getMap().getMap().getToRatio()==1)\r
 211           {\r
 212             mappedrefs.addElement(refs[d]); // add translated protein maps\r
 213           }\r
 214         }\r
 215         dnarefs = new DBRefEntry[mappedrefs.size()];\r
 216         mappedrefs.copyInto(dnarefs);\r
 217         for (int d = 0; d < dnarefs.length; d++)\r
 218         {\r
 219           Mapping mp = dnarefs[d].getMap();\r
 220           StringBuffer sqstr=new StringBuffer();\r
 221           if (mp != null)\r
 222           {\r
 223             Mapping intersect = mp.intersectVisContigs(viscontigs);\r
 224             // generate seqstring for this sequence based on mapping\r
 225             \r
 226             if (sqstr.length()>alwidth)\r
 227               alwidth = sqstr.length();\r
 228             cdnasqs.addElement(sqstr.toString());\r
 229             cdnasqi.addElement(dna);\r
 230             cdnaprod.addElement(intersect);\r
 231           }\r
 232         }\r
 233       }\r
 234       SequenceI[] cdna = new SequenceI[cdnasqs.size()];\r
 235       DBRefEntry[] prods = new DBRefEntry[cdnaprod.size()];\r
 236       String[] xons = new String[cdnasqs.size()];\r
 237       cdnasqs.copyInto(xons);\r
 238       cdnaprod.copyInto(prods);\r
 239       cdnasqi.copyInto(cdna);\r
 240       return CdnaTranslate(cdna, xons, prods, viscontigs, gapCharacter, null, alwidth, dataset);\r
 241     }\r
 242     return null;\r
 243   }\r
 244 \r
 245   /**\r
 246    * translate na alignment annotations onto translated amino acid alignment al\r
 247    * using codon mapping codons\r
 248    * \r
 249    * @param annotations\r
 250    * @param al\r
 251    * @param codons\r
 252    */\r
 253   public static void translateAlignedAnnotations(\r
 254           AlignmentAnnotation[] annotations, AlignmentI al,\r
 255           AlignedCodonFrame codons)\r
 256   {\r
 257     // //////////////////////////////\r
 258     // Copy annotations across\r
 259     //\r
 260     // Can only do this for columns with consecutive codons, or where\r
 261     // annotation is sequence associated.\r
 262 \r
 263     int pos, a, aSize;\r
 264     if (annotations != null)\r
 265     {\r
 266       for (int i = 0; i < annotations.length; i++)\r
 267       {\r
 268         // Skip any autogenerated annotation\r
 269         if (annotations[i].autoCalculated)\r
 270         {\r
 271           continue;\r
 272         }\r
 273 \r
 274         aSize = codons.getaaWidth(); // aa alignment width.\r
 275         jalview.datamodel.Annotation[] anots = (annotations[i].annotations == null) ? null\r
 276                 : new jalview.datamodel.Annotation[aSize];\r
 277         if (anots != null)\r
 278         {\r
 279           for (a = 0; a < aSize; a++)\r
 280           {\r
 281             // process through codon map.\r
 282             if (codons.codons[a] != null\r
 283                     && codons.codons[a][0] == (codons.codons[a][2] - 2))\r
 284             {\r
 285               anots[a] = getCodonAnnotation(codons.codons[a], annotations[i].annotations);\r
 286             }\r
 287           }\r
 288         }\r
 289 \r
 290         jalview.datamodel.AlignmentAnnotation aa = new jalview.datamodel.AlignmentAnnotation(\r
 291                 annotations[i].label, annotations[i].description, anots);\r
 292         aa.graph = annotations[i].graph;\r
 293         aa.graphGroup = annotations[i].graphGroup;\r
 294         aa.graphHeight = annotations[i].graphHeight;\r
 295         if (annotations[i].getThreshold()!=null)\r
 296         {\r
 297           aa.setThreshold(new jalview.datamodel.GraphLine(annotations[i].getThreshold()));        \r
 298         }\r
 299         if (annotations[i].hasScore)\r
 300         {\r
 301           aa.setScore(annotations[i].getScore());\r
 302         }\r
 303         if (annotations[i].sequenceRef != null)\r
 304         {\r
 305           SequenceI aaSeq = codons\r
 306                   .getAaForDnaSeq(annotations[i].sequenceRef);\r
 307           if (aaSeq != null)\r
 308           {\r
 309             // aa.compactAnnotationArray(); // throw away alignment annotation\r
 310             // positioning\r
 311             aa.setSequenceRef(aaSeq);\r
 312             aa.createSequenceMapping(aaSeq, aaSeq.getStart(), true); // rebuild\r
 313             // mapping\r
 314             aa.adjustForAlignment();\r
 315             aaSeq.addAlignmentAnnotation(aa);\r
 316           }\r
 317 \r
 318         }\r
 319         al.addAnnotation(aa);\r
 320       }\r
 321     }\r
 322   }\r
 323 \r
 324   private static Annotation getCodonAnnotation(int[] is, Annotation[] annotations)\r
 325   { \r
 326     // Have a look at all the codon positions for annotation and put the first\r
 327     // one found into the translated annotation pos.\r
 328     for (int p=0; p<3; p++)\r
 329     {\r
 330       if (annotations[is[p]]!=null)\r
 331       {\r
 332         return new Annotation(annotations[is[p]]);\r
 333       }\r
 334     }\r
 335     return null;\r
 336   }\r
 337 \r
 338   /**\r
 339    * Translate a na sequence\r
 340    * \r
 341    * @param selection sequence displayed under viscontigs visible columns\r
 342    * @param seqstring ORF read in some global alignment reference frame\r
 343    * @param viscontigs mapping from global reference frame to visible seqstring ORF read\r
 344    * @param codons Definition of global ORF alignment reference frame\r
 345    * @param gapCharacter\r
 346    * @param newSeq\r
 347    * @return sequence ready to be added to alignment.\r
 348    */\r
 349   public static SequenceI translateCodingRegion(SequenceI selection,\r
 350           String seqstring, int[] viscontigs, AlignedCodonFrame codons,\r
 351           char gapCharacter, DBRefEntry product)\r
 352   {\r
 353     ShiftList vismapping = new ShiftList(); // map from viscontigs to seqstring\r
 354     // intervals\r
 355     int vc, scontigs[] = new int[viscontigs.length];\r
 356     int npos = 0;\r
 357     for (vc = 0; vc < viscontigs.length; vc += 2)\r
 358     {\r
 359       vismapping.addShift(npos, viscontigs[vc]);\r
 360       scontigs[vc] = npos;\r
 361       npos += viscontigs[vc + 1];\r
 362       scontigs[vc + 1] = npos;\r
 363     }\r
 364 \r
 365     StringBuffer protein = new StringBuffer();\r
 366     String seq = seqstring.replace('U', 'T');\r
 367     char codon[] = new char[3];\r
 368     int cdp[] = new int[3], rf = 0, lastnpos = 0, nend;\r
 369     int aspos = 0;\r
 370     int resSize = 0;\r
 371     for (npos = 0, nend = seq.length(); npos < nend; npos++)\r
 372     {\r
 373       if (!jalview.util.Comparison.isGap(seq.charAt(npos)))\r
 374       {\r
 375         cdp[rf] = npos; // store position\r
 376         codon[rf++] = seq.charAt(npos); // store base\r
 377       }\r
 378       // filled an RF yet ?\r
 379       if (rf == 3)\r
 380       {\r
 381         String aa = ResidueProperties.codonTranslate(new String(codon));\r
 382         rf = 0;\r
 383         if (aa == null)\r
 384           aa = String.valueOf(gapCharacter);\r
 385         else\r
 386         {\r
 387           if (aa.equals("STOP"))\r
 388           {\r
 389             aa = "X";\r
 390           }\r
 391           resSize++;\r
 392         }\r
 393         // insert/delete gaps prior to this codon - if necessary\r
 394         boolean findpos = true;\r
 395         while (findpos)\r
 396         {\r
 397           // first ensure that the codons array is long enough.\r
 398           codons.checkCodonFrameWidth(aspos);\r
 399           // now check to see if we place the aa at the current aspos in the\r
 400           // protein alignment\r
 401           switch (Dna.compare_codonpos(cdp, codons.codons[aspos]))\r
 402           {\r
 403           case -1:\r
 404             codons.insertAAGap(aspos, gapCharacter);\r
 405             findpos = false;\r
 406             break;\r
 407           case +1:\r
 408             // this aa appears after the aligned codons at aspos, so prefix it\r
 409             // with a gap\r
 410             aa = "" + gapCharacter + aa;\r
 411             aspos++;\r
 412             //if (aspos >= codons.aaWidth)\r
 413             //  codons.aaWidth = aspos + 1;\r
 414             break; // check the next position for alignment\r
 415           case 0:\r
 416             // codon aligns at aspos position.\r
 417             findpos = false;\r
 418           }\r
 419         }\r
 420         // codon aligns with all other sequence residues found at aspos\r
 421         protein.append(aa);\r
 422         lastnpos = npos;\r
 423         if (codons.codons[aspos] == null)\r
 424         {\r
 425           // mark this column as aligning to this aligned reading frame\r
 426           codons.codons[aspos] = new int[]\r
 427           { cdp[0], cdp[1], cdp[2] };\r
 428         }\r
 429         aspos++;\r
 430         if (aspos >= codons.aaWidth)\r
 431           codons.aaWidth = aspos + 1;\r
 432       }\r
 433     }\r
 434     if (resSize > 0)\r
 435     {\r
 436       SequenceI newseq = new Sequence(selection.getName(), protein\r
 437               .toString());\r
 438       if (rf != 0)\r
 439       {\r
 440         jalview.bin.Cache.log\r
 441                 .debug("trimming contigs for incomplete terminal codon.");\r
 442         // map and trim contigs to ORF region\r
 443         vc = scontigs.length - 1;\r
 444         lastnpos = vismapping.shift(lastnpos); // place npos in context of\r
 445         // whole dna alignment (rather\r
 446         // than visible contigs)\r
 447         // incomplete ORF could be broken over one or two visible contig\r
 448         // intervals.\r
 449         while (vc >= 0 && scontigs[vc] > lastnpos)\r
 450         {\r
 451           if (vc > 0 && scontigs[vc - 1] > lastnpos)\r
 452           {\r
 453             vc -= 2;\r
 454           }\r
 455           else\r
 456           {\r
 457             // correct last interval in list.\r
 458             scontigs[vc] = lastnpos;\r
 459           }\r
 460         }\r
 461 \r
 462         if (vc > 0 && (vc + 1) < scontigs.length)\r
 463         {\r
 464           // truncate map list to just vc elements\r
 465           int t[] = new int[vc + 1];\r
 466           System.arraycopy(scontigs, 0, t, 0, vc + 1);\r
 467           scontigs = t;\r
 468         }\r
 469         if (vc <= 0)\r
 470           scontigs = null;\r
 471       }\r
 472       if (scontigs != null)\r
 473       {\r
 474         npos = 0;\r
 475         // Find sequence position for scontigs positions on the nucleotide\r
 476         // sequence string we were passed.\r
 477         for (vc = 0; vc < viscontigs.length; vc += 2)\r
 478         {\r
 479           scontigs[vc] = selection.findPosition(scontigs[vc]); // not from 1!\r
 480           npos += viscontigs[vc];\r
 481           scontigs[vc + 1] = selection\r
 482                   .findPosition(npos + scontigs[vc + 1]); // exclusive\r
 483           if (scontigs[vc + 1] == selection.getEnd())\r
 484             break;\r
 485         }\r
 486         // trim trailing empty intervals.\r
 487         if ((vc + 2) < scontigs.length)\r
 488         {\r
 489           int t[] = new int[vc + 2];\r
 490           System.arraycopy(scontigs, 0, t, 0, vc + 2);\r
 491           scontigs = t;\r
 492         }\r
 493         MapList map = new MapList(scontigs, new int[]\r
 494                                                     { 1, resSize }, 3, 1);\r
 495         // update newseq as if it was generated as mapping from product\r
 496         \r
 497         if (product != null)\r
 498         {\r
 499           newseq.setName(product.getSource() + "|"\r
 500                   + product.getAccessionId());\r
 501           if (product.getMap() != null)\r
 502           {\r
 503             //Mapping mp = product.getMap();\r
 504             //newseq.setStart(mp.getPosition(scontigs[0]));\r
 505             //newseq.setEnd(mp\r
 506             //        .getPosition(scontigs[scontigs.length - 1]));\r
 507           }\r
 508         }\r
 509         transferCodedFeatures(selection, newseq, map, null, null);\r
 510         SequenceI rseq = newseq.deriveSequence(); // construct a dataset\r
 511         // sequence for our new\r
 512         // peptide, regardless.\r
 513         // store a mapping (this actually stores a mapping between the dataset\r
 514         // sequences for the two sequences\r
 515         codons.addMap(selection, rseq, map);\r
 516         return rseq;\r
 517       }\r
 518     }\r
 519     // register the mapping somehow\r
 520     // \r
 521     return null;\r
 522   }\r
 523 \r
 524   /**\r
 525    * Given a peptide newly translated from a dna sequence, copy over and set any\r
 526    * features on the peptide from the DNA. If featureTypes is null, all features\r
 527    * on the dna sequence are searched (rather than just the displayed ones), and\r
 528    * similarly for featureGroups.\r
 529    * \r
 530    * @param dna\r
 531    * @param pep\r
 532    * @param map\r
 533    * @param featureTypes\r
 534    *          hash who's keys are the displayed feature type strings\r
 535    * @param featureGroups\r
 536    *          hash where keys are feature groups and values are Boolean objects\r
 537    *          indicating if they are displayed.\r
 538    */\r
 539   private static void transferCodedFeatures(SequenceI dna, SequenceI pep,\r
 540           MapList map, Hashtable featureTypes, Hashtable featureGroups)\r
 541   {\r
 542     SequenceFeature[] sf = dna.getDatasetSequence().getSequenceFeatures();\r
 543     Boolean fgstate;\r
 544     jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils\r
 545             .selectRefs(dna.getDBRef(),\r
 546                     jalview.datamodel.DBRefSource.DNACODINGDBS);\r
 547     if (dnarefs != null)\r
 548     {\r
 549       // intersect with pep\r
 550       for (int d = 0; d < dnarefs.length; d++)\r
 551       {\r
 552         Mapping mp = dnarefs[d].getMap();\r
 553         if (mp != null)\r
 554         {\r
 555         }\r
 556       }\r
 557     }\r
 558     if (sf != null)\r
 559     {\r
 560       for (int f = 0; f < sf.length; f++)\r
 561       {\r
 562         fgstate = (featureGroups == null) ? null : ((Boolean) featureGroups\r
 563                 .get(sf[f].featureGroup));\r
 564         if ((featureTypes == null || featureTypes.containsKey(sf[f]\r
 565                 .getType()))\r
 566                 && (fgstate == null || fgstate.booleanValue()))\r
 567         {\r
 568           if (FeatureProperties.isCodingFeature(null, sf[f].getType()))\r
 569           {\r
 570             // if (map.intersectsFrom(sf[f].begin, sf[f].end))\r
 571             {\r
 572 \r
 573             }\r
 574           }\r
 575         }\r
 576       }\r
 577     }\r
 578   }\r
 579 }\r