1 package jalview.analysis;
\r
3 import java.util.Hashtable;
\r
4 import java.util.Vector;
\r
6 import jalview.datamodel.Alignment;
\r
7 import jalview.datamodel.AlignmentAnnotation;
\r
8 import jalview.datamodel.AlignmentI;
\r
9 import jalview.datamodel.Annotation;
\r
10 import jalview.datamodel.ColumnSelection;
\r
11 import jalview.datamodel.Sequence;
\r
12 import jalview.datamodel.SequenceFeature;
\r
13 import jalview.datamodel.SequenceI;
\r
14 import jalview.schemes.ResidueProperties;
\r
15 import jalview.util.MapList;
\r
22 * @return -1 if cdp1 aligns before cdp2, 0 if in the same column or cdp2 is null, +1 if after cdp2
\r
24 private static int compare_codonpos(int[] cdp1, int[] cdp2) {
\r
25 if (cdp2==null || (cdp1[0]==cdp2[0] && cdp1[1] == cdp2[1] && cdp1[2] == cdp2[2]))
\r
27 if (cdp1[0]<cdp2[0] || cdp1[1]<cdp2[1] || cdp1[2]<cdp2[2])
\r
28 return -1; // one base in cdp1 precedes the corresponding base in the other codon
\r
29 return 1; // one base in cdp1 appears after the corresponding base in the other codon.
\r
32 * create a new alignment of protein sequences
\r
33 * by an inframe translation of the provided NA sequences
\r
37 * @param gapCharacter
\r
38 * @param annotations
\r
42 public static AlignmentI CdnaTranslate(SequenceI[] selection, String[] seqstring, int viscontigs[], char gapCharacter,
\r
43 AlignmentAnnotation[] annotations, int aWidth) {
\r
44 int s, sSize = selection.length;
\r
45 SequenceI [] newSeq = new SequenceI[sSize];
\r
47 StringBuffer protein;
\r
50 int[][] codons = new int[aWidth][]; // stores hash of subsequent positions for each codon start position in alignment
\r
52 for (res=0;res<aWidth;res++)
\r
54 int aslen=0; // final width of aligned translated aa sequences
\r
55 for(s=0; s<sSize; s++)
\r
57 int vc,scontigs[]=new int[viscontigs.length];
\r
59 for (vc=0;vc<scontigs.length; vc+=2)
\r
61 scontigs[vc]=selection[s].findPosition(viscontigs[vc]); // not from 1!
\r
62 scontigs[vc+1]=selection[s].findPosition(viscontigs[vc+1]-1); // exclusive
\r
63 if (scontigs[vc+1]==selection[s].getEnd())
\r
66 if ((vc+2)<scontigs.length) {
\r
67 int t[] = new int[vc+2];
\r
68 System.arraycopy(scontigs, 0, t, 0, vc+2);
\r
71 protein = new StringBuffer();
\r
72 seq = seqstring[s].replace('U', 'T');
\r
73 char codon[]=new char[3];
\r
74 int cdp[]=new int[3],rf=0,gf=0,nend,npos;
\r
77 for (npos=0,nend=seq.length(); npos<nend; npos++) {
\r
78 if (!jalview.util.Comparison.isGap(seq.charAt(npos))) {
\r
79 cdp[rf] = npos; // store position
\r
80 codon[rf++]=seq.charAt(npos); // store base
\r
82 // filled an RF yet ?
\r
84 String aa = ResidueProperties.codonTranslate(new String(codon));
\r
87 aa=String.valueOf(gapCharacter);
\r
89 if(aa.equals("STOP"))
\r
95 // insert/delete gaps prior to this codon - if necessary
\r
96 boolean findpos=true;
\r
99 // first ensure that the codons array is long enough.
\r
100 if (codons.length<=aslen+1) {
\r
101 // probably never have to do this ?
\r
102 int[][] c = new int[codons.length+10][];
\r
103 for (int i=0; i<codons.length; i++) {
\r
109 // now check to see if we place the aa at the current aspos in the protein alignment
\r
110 switch (Dna.compare_codonpos(cdp, codons[aspos]))
\r
113 // this aa appears before the aligned codons at aspos - so shift them.
\r
115 for (int sq=0;sq<s; sq++) {
\r
116 newSeq[sq].insertCharAt(aspos, gapCharacter);
\r
118 System.arraycopy(codons, aspos, codons, aspos+1, aslen-aspos);
\r
119 codons[aspos]=null; // clear so new codon position can be marked.
\r
123 // this aa appears after the aligned codons at aspos, so prefix it with a gap
\r
124 aa = ""+gapCharacter+aa;
\r
128 break; // check the next position for alignment
\r
130 // codon aligns at aspos position.
\r
134 // codon aligns with all other sequence residues found at aspos
\r
135 protein.append(aa);
\r
136 if (codons[aspos]==null)
\r
138 // mark this column as aligning to this aligned reading frame
\r
139 codons[aspos] = new int[] { cdp[0], cdp[1], cdp[2] };
\r
148 newSeq[s] = new Sequence(selection[s].getName(),
\r
149 protein.toString());
\r
152 jalview.bin.Cache.log.debug("trimming contigs for incomplete terminal codon.");
\r
154 vc=scontigs.length-1;
\r
156 // incomplete ORF could be broken over one or two visible contig intervals.
\r
157 while (vc>0 && scontigs[vc]>nend)
\r
159 if (scontigs[vc-1]>nend)
\r
163 // correct last interval in list.
\r
167 if ((vc+2)<scontigs.length) {
\r
168 // truncate map list
\r
169 int t[] = new int[vc+1];
\r
170 System.arraycopy(scontigs,0,t,0,vc+1);
\r
174 MapList map = new MapList(scontigs, new int[] { 1, resSize },3,1); // TODO: store mapping on newSeq for linked DNA/Protein viewing.
\r
176 // register the mapping somehow
\r
181 AlignmentI al = new Alignment(newSeq);
\r
182 al.padGaps(); // ensure we look aligned.
\r
183 al.setDataset(null);
\r
186 ////////////////////////////////
\r
187 // Copy annotations across
\r
189 // Can only do this for columns with consecutive codons, or where
\r
190 // annotation is sequence associated.
\r
193 if(annotations!=null)
\r
195 for (int i = 0; i < annotations.length; i++)
\r
197 // Skip any autogenerated annotation
\r
198 if (annotations[i].autoCalculated) {
\r
202 aSize = aslen; // aa alignment width.
\r
203 jalview.datamodel.Annotation[] anots =
\r
204 (annotations[i].annotations==null)
\r
206 new jalview.datamodel.Annotation[aSize];
\r
209 for (a = 0; a < aSize; a++)
\r
211 // process through codon map.
\r
212 if (codons[a]!=null && codons[a][0]==(codons[a][2]-2))
\r
214 pos = codons[a][0];
\r
215 if (annotations[i].annotations[pos] == null
\r
216 || annotations[i].annotations[pos] == null)
\r
219 anots[a] = new Annotation(annotations[i].annotations[pos]);
\r
224 jalview.datamodel.AlignmentAnnotation aa
\r
225 = new jalview.datamodel.AlignmentAnnotation(annotations[i].label,
\r
226 annotations[i].description, anots);
\r
227 if (annotations[i].hasScore)
\r
229 aa.setScore(annotations[i].getScore());
\r
231 al.addAnnotation(aa);
\r