import java.util.Hashtable;\r
import java.util.Vector;\r
\r
+import jalview.datamodel.AlignedCodonFrame;\r
import jalview.datamodel.Alignment;\r
import jalview.datamodel.AlignmentAnnotation;\r
import jalview.datamodel.AlignmentI;\r
import jalview.datamodel.Annotation;\r
import jalview.datamodel.ColumnSelection;\r
+import jalview.datamodel.FeatureProperties;\r
+import jalview.datamodel.Mapping;\r
import jalview.datamodel.Sequence;\r
import jalview.datamodel.SequenceFeature;\r
import jalview.datamodel.SequenceI;\r
import jalview.schemes.ResidueProperties;\r
import jalview.util.MapList;\r
+import jalview.util.ShiftList;\r
\r
-public class Dna {\r
+public class Dna\r
+{\r
/**\r
* \r
* @param cdp1\r
* @param cdp2\r
- * @return -1 if cdp1 aligns before cdp2, 0 if in the same column or cdp2 is null, +1 if after cdp2\r
+ * @return -1 if cdp1 aligns before cdp2, 0 if in the same column or cdp2 is\r
+ * null, +1 if after cdp2\r
*/\r
- private static int compare_codonpos(int[] cdp1, int[] cdp2) {\r
- if (cdp2==null || (cdp1[0]==cdp2[0] && cdp1[1] == cdp2[1] && cdp1[2] == cdp2[2]))\r
+ private static int compare_codonpos(int[] cdp1, int[] cdp2)\r
+ {\r
+ if (cdp2 == null\r
+ || (cdp1[0] == cdp2[0] && cdp1[1] == cdp2[1] && cdp1[2] == cdp2[2]))\r
return 0;\r
- if (cdp1[0]<cdp2[0] || cdp1[1]<cdp2[1] || cdp1[2]<cdp2[2])\r
- return -1; // one base in cdp1 precedes the corresponding base in the other codon\r
- return 1; // one base in cdp1 appears after the corresponding base in the other codon.\r
+ if (cdp1[0] < cdp2[0] || cdp1[1] < cdp2[1] || cdp1[2] < cdp2[2])\r
+ return -1; // one base in cdp1 precedes the corresponding base in the\r
+ // other codon\r
+ return 1; // one base in cdp1 appears after the corresponding base in the\r
+ // other codon.\r
}\r
+\r
+ /**\r
+ * DNA->mapped protein sequence alignment translation given set of sequences\r
+ * 1. id distinct coding regions within selected region for each sequence 2.\r
+ * generate peptides based on inframe (or given) translation or (optionally\r
+ * and where specified) out of frame translations (annotated appropriately) 3.\r
+ * align peptides based on codon alignment\r
+ */\r
+ /**\r
+ * id potential products from dna 1. search for distinct products within\r
+ * selected region for each selected sequence 2. group by associated DB type.\r
+ * 3. return as form for input into above function\r
+ */\r
+ /**\r
+ * \r
+ */\r
/**\r
- * create a new alignment of protein sequences\r
- * by an inframe translation of the provided NA sequences\r
+ * create a new alignment of protein sequences by an inframe translation of\r
+ * the provided NA sequences\r
+ * \r
* @param selection\r
* @param seqstring\r
* @param viscontigs\r
* @param aWidth\r
* @return\r
*/\r
- public static AlignmentI CdnaTranslate(SequenceI[] selection, String[] seqstring, int viscontigs[], char gapCharacter, \r
- AlignmentAnnotation[] annotations, int aWidth) {\r
+ public static AlignmentI CdnaTranslate(SequenceI[] selection,\r
+ String[] seqstring, int viscontigs[], char gapCharacter,\r
+ AlignmentAnnotation[] annotations, int aWidth)\r
+ {\r
+ AlignedCodonFrame codons = new AlignedCodonFrame(aWidth); // stores hash of\r
+ // subsequent\r
+ // positions for\r
+ // each codon\r
+ // start position\r
+ // in alignment\r
int s, sSize = selection.length;\r
- SequenceI [] newSeq = new SequenceI[sSize];\r
- int res, resSize;\r
- StringBuffer protein;\r
- String seq;\r
-\r
- int[][] codons = new int[aWidth][]; // stores hash of subsequent positions for each codon start position in alignment\r
-\r
- for (res=0;res<aWidth;res++)\r
- codons[res]=null;\r
- int aslen=0; // final width of aligned translated aa sequences\r
- for(s=0; s<sSize; s++)\r
+ Vector pepseqs = new Vector();\r
+ for (s = 0; s < sSize; s++)\r
{\r
- int vc,scontigs[]=new int[viscontigs.length];\r
-\r
- for (vc=0;vc<scontigs.length; vc+=2)\r
+ SequenceI newseq = translateCodingRegion(selection[s], seqstring[s],\r
+ viscontigs, codons, gapCharacter);\r
+ if (newseq != null)\r
{\r
- scontigs[vc]=selection[s].findPosition(viscontigs[vc]); // not from 1!\r
- scontigs[vc+1]=selection[s].findPosition(viscontigs[vc+1]-1); // exclusive\r
- if (scontigs[vc+1]==selection[s].getEnd())\r
- break;\r
+ pepseqs.addElement(newseq);\r
}\r
- if ((vc+2)<scontigs.length) {\r
- int t[] = new int[vc+2];\r
- System.arraycopy(scontigs, 0, t, 0, vc+2);\r
- scontigs = t;\r
- }\r
- protein = new StringBuffer();\r
- seq = seqstring[s].replace('U', 'T');\r
- char codon[]=new char[3];\r
- int cdp[]=new int[3],rf=0,gf=0,nend,npos;\r
- int aspos=0;\r
- resSize=0;\r
- for (npos=0,nend=seq.length(); npos<nend; npos++) {\r
- if (!jalview.util.Comparison.isGap(seq.charAt(npos))) { \r
- cdp[rf] = npos; // store position\r
- codon[rf++]=seq.charAt(npos); // store base\r
- }\r
- // filled an RF yet ?\r
- if (rf==3) {\r
- String aa = ResidueProperties.codonTranslate(new String(codon));\r
- rf=0;\r
- if(aa==null)\r
- aa=String.valueOf(gapCharacter);\r
- else {\r
- if(aa.equals("STOP"))\r
- {\r
- aa="X";\r
- }\r
- resSize++;\r
- }\r
- // insert/delete gaps prior to this codon - if necessary\r
- boolean findpos=true;\r
- while (findpos) \r
- {\r
- // first ensure that the codons array is long enough.\r
- if (codons.length<=aslen+1) {\r
- // probably never have to do this ?\r
- int[][] c = new int[codons.length+10][];\r
- for (int i=0; i<codons.length; i++) {\r
- c[i] = codons[i];\r
- codons[i]=null;\r
- }\r
- codons = c;\r
- }\r
- // now check to see if we place the aa at the current aspos in the protein alignment\r
- switch (Dna.compare_codonpos(cdp, codons[aspos])) \r
- {\r
- case -1:\r
- // this aa appears before the aligned codons at aspos - so shift them.\r
- aslen++;\r
- for (int sq=0;sq<s; sq++) {\r
- newSeq[sq].insertCharAt(aspos, gapCharacter);\r
- }\r
- System.arraycopy(codons, aspos, codons, aspos+1, aslen-aspos);\r
- codons[aspos]=null; // clear so new codon position can be marked.\r
- findpos=false;\r
- break;\r
- case +1:\r
- // this aa appears after the aligned codons at aspos, so prefix it with a gap\r
- aa = ""+gapCharacter+aa;\r
- aspos++;\r
- if (aspos>=aslen)\r
- aslen=aspos+1;\r
- break; // check the next position for alignment\r
- case 0:\r
- // codon aligns at aspos position.\r
- findpos = false;\r
- }\r
- }\r
- // codon aligns with all other sequence residues found at aspos\r
- protein.append(aa);\r
- if (codons[aspos]==null) \r
- {\r
- // mark this column as aligning to this aligned reading frame \r
- codons[aspos] = new int[] { cdp[0], cdp[1], cdp[2] };\r
- }\r
- aspos++;\r
- if (aspos>=aslen)\r
- aslen=aspos+1;\r
- }\r
- }\r
- if (resSize>0) \r
- {\r
- newSeq[s] = new Sequence(selection[s].getName(),\r
- protein.toString());\r
- if (rf!=0) \r
- {\r
- jalview.bin.Cache.log.debug("trimming contigs for incomplete terminal codon.");\r
- // trim contigs\r
- vc=scontigs.length-1;\r
- nend-=rf;\r
- // incomplete ORF could be broken over one or two visible contig intervals.\r
- while (vc>0 && scontigs[vc]>nend)\r
- {\r
- if (scontigs[vc-1]>nend) \r
- {\r
- vc-=2;\r
- } else {\r
- // correct last interval in list.\r
- scontigs[vc]=nend;\r
- }\r
- }\r
- if ((vc+2)<scontigs.length) {\r
- // truncate map list\r
- int t[] = new int[vc+1];\r
- System.arraycopy(scontigs,0,t,0,vc+1);\r
- scontigs=t;\r
- }\r
- }\r
- MapList map = new MapList(scontigs, new int[] { 1, resSize },3,1); // TODO: store mapping on newSeq for linked DNA/Protein viewing.\r
- }\r
- // register the mapping somehow\r
- // \r
}\r
- if (aslen==0)\r
+ if (codons.aaWidth == 0)\r
return null;\r
- AlignmentI al = new Alignment(newSeq);\r
- al.padGaps(); // ensure we look aligned.\r
+ SequenceI[] newseqs = new SequenceI[pepseqs.size()];\r
+ pepseqs.copyInto(newseqs);\r
+ AlignmentI al = new Alignment(newseqs);\r
+ al.padGaps(); // ensure we look aligned.\r
al.setDataset(null);\r
+ translateAlignedAnnotations(annotations, al, codons);\r
+ al.addCodonFrame(codons);\r
+ return al;\r
+ }\r
\r
-\r
- ////////////////////////////////\r
+ /**\r
+ * translate na alignment annotations onto translated amino acid alignment al\r
+ * using codon mapping codons\r
+ * \r
+ * @param annotations\r
+ * @param al\r
+ * @param codons\r
+ */\r
+ public static void translateAlignedAnnotations(\r
+ AlignmentAnnotation[] annotations, AlignmentI al,\r
+ AlignedCodonFrame codons)\r
+ {\r
+ // //////////////////////////////\r
// Copy annotations across\r
//\r
// Can only do this for columns with consecutive codons, or where\r
// annotation is sequence associated.\r
- \r
- int pos,a,aSize;\r
- if(annotations!=null)\r
+\r
+ int pos, a, aSize;\r
+ if (annotations != null)\r
{\r
for (int i = 0; i < annotations.length; i++)\r
{\r
// Skip any autogenerated annotation\r
- if (annotations[i].autoCalculated) {\r
+ if (annotations[i].autoCalculated)\r
+ {\r
continue;\r
}\r
- \r
- aSize = aslen; // aa alignment width.\r
- jalview.datamodel.Annotation[] anots = \r
- (annotations[i].annotations==null) \r
- ? null :\r
- new jalview.datamodel.Annotation[aSize];\r
- if (anots!=null)\r
+\r
+ aSize = codons.getaaWidth(); // aa alignment width.\r
+ jalview.datamodel.Annotation[] anots = (annotations[i].annotations == null) ? null\r
+ : new jalview.datamodel.Annotation[aSize];\r
+ if (anots != null)\r
{\r
for (a = 0; a < aSize; a++)\r
{\r
// process through codon map.\r
- if (codons[a]!=null && codons[a][0]==(codons[a][2]-2))\r
+ if (codons.codons[a] != null\r
+ && codons.codons[a][0] == (codons.codons[a][2] - 2))\r
{\r
- pos = codons[a][0];\r
+ pos = codons.codons[a][0];\r
if (annotations[i].annotations[pos] == null\r
|| annotations[i].annotations[pos] == null)\r
continue;\r
- \r
+ // We just take the annotation in the first base in the codon\r
anots[a] = new Annotation(annotations[i].annotations[pos]);\r
}\r
}\r
}\r
\r
- jalview.datamodel.AlignmentAnnotation aa\r
- = new jalview.datamodel.AlignmentAnnotation(annotations[i].label,\r
- annotations[i].description, anots);\r
+ jalview.datamodel.AlignmentAnnotation aa = new jalview.datamodel.AlignmentAnnotation(\r
+ annotations[i].label, annotations[i].description, anots);\r
if (annotations[i].hasScore)\r
{\r
aa.setScore(annotations[i].getScore());\r
}\r
+ if (annotations[i].sequenceRef != null)\r
+ {\r
+ SequenceI aaSeq = codons\r
+ .getAaForDnaSeq(annotations[i].sequenceRef);\r
+ if (aaSeq != null)\r
+ {\r
+ // aa.compactAnnotationArray(); // throw away alignment annotation\r
+ // positioning\r
+ aa.setSequenceRef(aaSeq);\r
+ aa.createSequenceMapping(aaSeq, aaSeq.getStart(), true); // rebuild\r
+ // mapping\r
+ aa.adjustForAlignment();\r
+ aaSeq.addAlignmentAnnotation(aa);\r
+ }\r
+\r
+ }\r
al.addAnnotation(aa);\r
}\r
}\r
- return al;\r
+ }\r
+\r
+ /**\r
+ * Translate a na sequence\r
+ * \r
+ * @param selection\r
+ * @param seqstring\r
+ * @param viscontigs\r
+ * @param codons\r
+ * @param gapCharacter\r
+ * @param newSeq\r
+ * @return sequence ready to be added to alignment.\r
+ */\r
+ public static SequenceI translateCodingRegion(SequenceI selection,\r
+ String seqstring, int[] viscontigs, AlignedCodonFrame codons,\r
+ char gapCharacter)\r
+ {\r
+ ShiftList vismapping = new ShiftList(); // map from viscontigs to seqstring\r
+ // intervals\r
+ int vc, scontigs[] = new int[viscontigs.length];\r
+ int npos = 0;\r
+ for (vc = 0; vc < viscontigs.length; vc += 2)\r
+ {\r
+ vismapping.addShift(npos, viscontigs[vc]);\r
+ scontigs[vc] = npos;\r
+ npos += viscontigs[vc + 1];\r
+ scontigs[vc + 1] = npos;\r
+ }\r
+\r
+ StringBuffer protein = new StringBuffer();\r
+ String seq = seqstring.replace('U', 'T');\r
+ char codon[] = new char[3];\r
+ int cdp[] = new int[3], rf = 0, lastnpos = 0, nend;\r
+ int aspos = 0;\r
+ int resSize = 0;\r
+ for (npos = 0, nend = seq.length(); npos < nend; npos++)\r
+ {\r
+ if (!jalview.util.Comparison.isGap(seq.charAt(npos)))\r
+ {\r
+ cdp[rf] = npos; // store position\r
+ codon[rf++] = seq.charAt(npos); // store base\r
+ }\r
+ // filled an RF yet ?\r
+ if (rf == 3)\r
+ {\r
+ String aa = ResidueProperties.codonTranslate(new String(codon));\r
+ rf = 0;\r
+ if (aa == null)\r
+ aa = String.valueOf(gapCharacter);\r
+ else\r
+ {\r
+ if (aa.equals("STOP"))\r
+ {\r
+ aa = "X";\r
+ }\r
+ resSize++;\r
+ }\r
+ // insert/delete gaps prior to this codon - if necessary\r
+ boolean findpos = true;\r
+ while (findpos)\r
+ {\r
+ // first ensure that the codons array is long enough.\r
+ codons.checkCodonFrameWidth(aspos);\r
+ // now check to see if we place the aa at the current aspos in the\r
+ // protein alignment\r
+ switch (Dna.compare_codonpos(cdp, codons.codons[aspos]))\r
+ {\r
+ case -1:\r
+ codons.insertAAGap(aspos, gapCharacter);\r
+ findpos = false;\r
+ break;\r
+ case +1:\r
+ // this aa appears after the aligned codons at aspos, so prefix it\r
+ // with a gap\r
+ aa = "" + gapCharacter + aa;\r
+ aspos++;\r
+ if (aspos >= codons.aaWidth)\r
+ codons.aaWidth = aspos + 1;\r
+ break; // check the next position for alignment\r
+ case 0:\r
+ // codon aligns at aspos position.\r
+ findpos = false;\r
+ }\r
+ }\r
+ // codon aligns with all other sequence residues found at aspos\r
+ protein.append(aa);\r
+ lastnpos = npos;\r
+ if (codons.codons[aspos] == null)\r
+ {\r
+ // mark this column as aligning to this aligned reading frame\r
+ codons.codons[aspos] = new int[]\r
+ { cdp[0], cdp[1], cdp[2] };\r
+ }\r
+ aspos++;\r
+ if (aspos >= codons.aaWidth)\r
+ codons.aaWidth = aspos + 1;\r
+ }\r
+ }\r
+ if (resSize > 0)\r
+ {\r
+ SequenceI newseq = new Sequence(selection.getName(), protein\r
+ .toString());\r
+ if (rf != 0)\r
+ {\r
+ jalview.bin.Cache.log\r
+ .debug("trimming contigs for incomplete terminal codon.");\r
+ // map and trim contigs to ORF region\r
+ vc = scontigs.length - 1;\r
+ lastnpos = vismapping.shift(lastnpos); // place npos in context of\r
+ // whole dna alignment (rather\r
+ // than visible contigs)\r
+ // incomplete ORF could be broken over one or two visible contig\r
+ // intervals.\r
+ while (vc >= 0 && scontigs[vc] > lastnpos)\r
+ {\r
+ if (vc > 0 && scontigs[vc - 1] > lastnpos)\r
+ {\r
+ vc -= 2;\r
+ }\r
+ else\r
+ {\r
+ // correct last interval in list.\r
+ scontigs[vc] = lastnpos;\r
+ }\r
+ }\r
+\r
+ if (vc > 0 && (vc + 1) < scontigs.length)\r
+ {\r
+ // truncate map list to just vc elements\r
+ int t[] = new int[vc + 1];\r
+ System.arraycopy(scontigs, 0, t, 0, vc + 1);\r
+ scontigs = t;\r
+ }\r
+ if (vc <= 0)\r
+ scontigs = null;\r
+ }\r
+ if (scontigs != null)\r
+ {\r
+ npos = 0;\r
+ // Find sequence position for scontigs positions on the nucleotide\r
+ // sequence string we were passed.\r
+ for (vc = 0; vc < viscontigs.length; vc += 2)\r
+ {\r
+ scontigs[vc] = selection.findPosition(scontigs[vc]); // not from 1!\r
+ npos += viscontigs[vc];\r
+ scontigs[vc + 1] = selection\r
+ .findPosition(npos + scontigs[vc + 1]); // exclusive\r
+ if (scontigs[vc + 1] == selection.getEnd())\r
+ break;\r
+ }\r
+ // trim trailing empty intervals.\r
+ if ((vc + 2) < scontigs.length)\r
+ {\r
+ int t[] = new int[vc + 2];\r
+ System.arraycopy(scontigs, 0, t, 0, vc + 2);\r
+ scontigs = t;\r
+ }\r
+\r
+ MapList map = new MapList(scontigs, new int[]\r
+ { 1, resSize }, 3, 1); // TODO: store mapping on newSeq for linked\r
+ // DNA/Protein viewing.\r
+ transferCodedFeatures(selection, newseq, map, null, null);\r
+ SequenceI rseq = newseq.deriveSequence(); // construct a dataset\r
+ // sequence for our new\r
+ // peptide, regardless.\r
+ // store a mapping (this actually stores a mapping between the dataset\r
+ // sequences for the two sequences\r
+ codons.addMap(selection, newseq, map);\r
+ return rseq;\r
+ }\r
+ }\r
+ // register the mapping somehow\r
+ // \r
+ return null;\r
+ }\r
+\r
+ /**\r
+ * Given a peptide newly translated from a dna sequence, copy over and set any\r
+ * features on the peptide from the DNA. If featureTypes is null, all features\r
+ * on the dna sequence are searched (rather than just the displayed ones), and\r
+ * similarly for featureGroups.\r
+ * \r
+ * @param dna\r
+ * @param pep\r
+ * @param map\r
+ * @param featureTypes\r
+ * hash who's keys are the displayed feature type strings\r
+ * @param featureGroups\r
+ * hash where keys are feature groups and values are Boolean objects\r
+ * indicating if they are displayed.\r
+ */\r
+ private static void transferCodedFeatures(SequenceI dna, SequenceI pep,\r
+ MapList map, Hashtable featureTypes, Hashtable featureGroups)\r
+ {\r
+ SequenceFeature[] sf = dna.getDatasetSequence().getSequenceFeatures();\r
+ Boolean fgstate;\r
+ jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils\r
+ .selectRefs(dna.getDBRef(),\r
+ jalview.datamodel.DBRefSource.DNACODINGDBS);\r
+ if (dnarefs != null)\r
+ {\r
+ // intersect with pep\r
+ for (int d = 0; d < dnarefs.length; d++)\r
+ {\r
+ Mapping mp = dnarefs[d].getMap();\r
+ if (mp != null)\r
+ {\r
+ }\r
+ }\r
+ }\r
+ if (sf != null)\r
+ {\r
+ for (int f = 0; f < sf.length; f++)\r
+ {\r
+ fgstate = (featureGroups == null) ? null : ((Boolean) featureGroups\r
+ .get(sf[f].featureGroup));\r
+ if ((featureTypes == null || featureTypes.containsKey(sf[f]\r
+ .getType()))\r
+ && (fgstate == null || fgstate.booleanValue()))\r
+ {\r
+ if (FeatureProperties.isCodingFeature(null, sf[f].getType()))\r
+ {\r
+ // if (map.intersectsFrom(sf[f].begin, sf[f].end))\r
+ {\r
+\r
+ }\r
+ }\r
+ }\r
+ }\r
+ }\r
}\r
}\r