2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)
\r
3 * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle
\r
5 * This file is part of Jalview.
\r
7 * Jalview is free software: you can redistribute it and/or
\r
8 * modify it under the terms of the GNU General Public License
\r
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
\r
11 * Jalview is distributed in the hope that it will be useful, but
\r
12 * WITHOUT ANY WARRANTY; without even the implied warranty
\r
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
\r
14 * PURPOSE. See the GNU General Public License for more details.
\r
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
\r
18 package jalview.analysis;
\r
20 import java.util.Enumeration;
\r
21 import java.util.Hashtable;
\r
22 import java.util.Vector;
\r
24 import jalview.datamodel.AlignedCodonFrame;
\r
25 import jalview.datamodel.Alignment;
\r
26 import jalview.datamodel.AlignmentAnnotation;
\r
27 import jalview.datamodel.AlignmentI;
\r
28 import jalview.datamodel.Annotation;
\r
29 import jalview.datamodel.ColumnSelection;
\r
30 import jalview.datamodel.DBRefEntry;
\r
31 import jalview.datamodel.FeatureProperties;
\r
32 import jalview.datamodel.Mapping;
\r
33 import jalview.datamodel.Sequence;
\r
34 import jalview.datamodel.SequenceFeature;
\r
35 import jalview.datamodel.SequenceI;
\r
36 import jalview.schemes.ResidueProperties;
\r
37 import jalview.util.MapList;
\r
38 import jalview.util.ShiftList;
\r
46 * @return -1 if cdp1 aligns before cdp2, 0 if in the same column or cdp2 is
\r
47 * null, +1 if after cdp2
\r
49 private static int compare_codonpos(int[] cdp1, int[] cdp2)
\r
52 || (cdp1[0] == cdp2[0] && cdp1[1] == cdp2[1] && cdp1[2] == cdp2[2]))
\r
54 if (cdp1[0] < cdp2[0] || cdp1[1] < cdp2[1] || cdp1[2] < cdp2[2])
\r
55 return -1; // one base in cdp1 precedes the corresponding base in the
\r
57 return 1; // one base in cdp1 appears after the corresponding base in the
\r
62 * DNA->mapped protein sequence alignment translation given set of sequences
\r
63 * 1. id distinct coding regions within selected region for each sequence 2.
\r
64 * generate peptides based on inframe (or given) translation or (optionally
\r
65 * and where specified) out of frame translations (annotated appropriately) 3.
\r
66 * align peptides based on codon alignment
\r
69 * id potential products from dna 1. search for distinct products within
\r
70 * selected region for each selected sequence 2. group by associated DB type.
\r
71 * 3. return as form for input into above function
\r
77 * create a new alignment of protein sequences by an inframe translation of
\r
78 * the provided NA sequences
\r
83 * @param gapCharacter
\r
84 * @param annotations
\r
87 * destination dataset for translated sequences and mappings
\r
90 public static AlignmentI CdnaTranslate(SequenceI[] selection,
\r
91 String[] seqstring, int viscontigs[], char gapCharacter,
\r
92 AlignmentAnnotation[] annotations, int aWidth, Alignment dataset)
\r
94 return CdnaTranslate(selection, seqstring, null, viscontigs,
\r
95 gapCharacter, annotations, aWidth, dataset);
\r
103 * - array of DbRefEntry objects from which exon map in seqstring is
\r
105 * @param viscontigs
\r
106 * @param gapCharacter
\r
107 * @param annotations
\r
112 public static AlignmentI CdnaTranslate(SequenceI[] selection,
\r
113 String[] seqstring, DBRefEntry[] product, int viscontigs[],
\r
114 char gapCharacter, AlignmentAnnotation[] annotations, int aWidth,
\r
117 AlignedCodonFrame codons = new AlignedCodonFrame(aWidth); // stores hash of
\r
123 int s, sSize = selection.length;
\r
124 Vector pepseqs = new Vector();
\r
125 for (s = 0; s < sSize; s++)
\r
127 SequenceI newseq = translateCodingRegion(selection[s], seqstring[s],
\r
128 viscontigs, codons, gapCharacter,
\r
129 (product != null) ? product[s] : null); // possibly anonymous
\r
131 if (newseq != null)
\r
133 pepseqs.addElement(newseq);
\r
134 SequenceI ds = newseq;
\r
135 while (ds.getDatasetSequence() != null)
\r
137 ds = ds.getDatasetSequence();
\r
139 dataset.addSequence(ds);
\r
142 if (codons.aaWidth == 0)
\r
144 SequenceI[] newseqs = new SequenceI[pepseqs.size()];
\r
145 pepseqs.copyInto(newseqs);
\r
146 AlignmentI al = new Alignment(newseqs);
\r
147 al.padGaps(); // ensure we look aligned.
\r
148 al.setDataset(dataset);
\r
149 translateAlignedAnnotations(annotations, al, codons);
\r
150 al.addCodonFrame(codons);
\r
155 * fake the collection of DbRefs with associated exon mappings to identify if
\r
156 * a translation would generate distinct product in the currently selected
\r
160 * @param viscontigs
\r
163 public static boolean canTranslate(SequenceI[] selection,
\r
166 for (int gd = 0; gd < selection.length; gd++)
\r
168 SequenceI dna = selection[gd];
\r
169 jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils
\r
170 .selectRefs(dna.getDBRef(),
\r
171 jalview.datamodel.DBRefSource.DNACODINGDBS);
\r
172 if (dnarefs != null)
\r
174 // intersect with pep
\r
175 // intersect with pep
\r
176 Vector mappedrefs = new Vector();
\r
177 DBRefEntry[] refs = dna.getDBRef();
\r
178 for (int d = 0; d < refs.length; d++)
\r
180 if (refs[d].getMap() != null && refs[d].getMap().getMap() != null
\r
181 && refs[d].getMap().getMap().getFromRatio() == 3
\r
182 && refs[d].getMap().getMap().getToRatio() == 1)
\r
184 mappedrefs.addElement(refs[d]); // add translated protein maps
\r
187 dnarefs = new DBRefEntry[mappedrefs.size()];
\r
188 mappedrefs.copyInto(dnarefs);
\r
189 for (int d = 0; d < dnarefs.length; d++)
\r
191 Mapping mp = dnarefs[d].getMap();
\r
194 for (int vc = 0; vc < viscontigs.length; vc += 2)
\r
196 int[] mpr = mp.locateMappedRange(viscontigs[vc],
\r
197 viscontigs[vc + 1]);
\r
211 * generate a set of translated protein products from annotated sequenceI
\r
214 * @param viscontigs
\r
215 * @param gapCharacter
\r
217 * destination dataset for translated sequences
\r
218 * @param annotations
\r
222 public static AlignmentI CdnaTranslate(SequenceI[] selection,
\r
223 int viscontigs[], char gapCharacter, Alignment dataset)
\r
226 Vector cdnasqs = new Vector();
\r
227 Vector cdnasqi = new Vector();
\r
228 Vector cdnaprod = new Vector();
\r
229 for (int gd = 0; gd < selection.length; gd++)
\r
231 SequenceI dna = selection[gd];
\r
232 jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils
\r
233 .selectRefs(dna.getDBRef(),
\r
234 jalview.datamodel.DBRefSource.DNACODINGDBS);
\r
235 if (dnarefs != null)
\r
237 // intersect with pep
\r
238 Vector mappedrefs = new Vector();
\r
239 DBRefEntry[] refs = dna.getDBRef();
\r
240 for (int d = 0; d < refs.length; d++)
\r
242 if (refs[d].getMap() != null && refs[d].getMap().getMap() != null
\r
243 && refs[d].getMap().getMap().getFromRatio() == 3
\r
244 && refs[d].getMap().getMap().getToRatio() == 1)
\r
246 mappedrefs.addElement(refs[d]); // add translated protein maps
\r
249 dnarefs = new DBRefEntry[mappedrefs.size()];
\r
250 mappedrefs.copyInto(dnarefs);
\r
251 for (int d = 0; d < dnarefs.length; d++)
\r
253 Mapping mp = dnarefs[d].getMap();
\r
254 StringBuffer sqstr = new StringBuffer();
\r
257 Mapping intersect = mp.intersectVisContigs(viscontigs);
\r
258 // generate seqstring for this sequence based on mapping
\r
260 if (sqstr.length() > alwidth)
\r
261 alwidth = sqstr.length();
\r
262 cdnasqs.addElement(sqstr.toString());
\r
263 cdnasqi.addElement(dna);
\r
264 cdnaprod.addElement(intersect);
\r
268 SequenceI[] cdna = new SequenceI[cdnasqs.size()];
\r
269 DBRefEntry[] prods = new DBRefEntry[cdnaprod.size()];
\r
270 String[] xons = new String[cdnasqs.size()];
\r
271 cdnasqs.copyInto(xons);
\r
272 cdnaprod.copyInto(prods);
\r
273 cdnasqi.copyInto(cdna);
\r
274 return CdnaTranslate(cdna, xons, prods, viscontigs, gapCharacter,
\r
275 null, alwidth, dataset);
\r
281 * translate na alignment annotations onto translated amino acid alignment al
\r
282 * using codon mapping codons
\r
284 * @param annotations
\r
288 public static void translateAlignedAnnotations(
\r
289 AlignmentAnnotation[] annotations, AlignmentI al,
\r
290 AlignedCodonFrame codons)
\r
292 // //////////////////////////////
\r
293 // Copy annotations across
\r
295 // Can only do this for columns with consecutive codons, or where
\r
296 // annotation is sequence associated.
\r
299 if (annotations != null)
\r
301 for (int i = 0; i < annotations.length; i++)
\r
303 // Skip any autogenerated annotation
\r
304 if (annotations[i].autoCalculated)
\r
309 aSize = codons.getaaWidth(); // aa alignment width.
\r
310 jalview.datamodel.Annotation[] anots = (annotations[i].annotations == null) ? null
\r
311 : new jalview.datamodel.Annotation[aSize];
\r
314 for (a = 0; a < aSize; a++)
\r
316 // process through codon map.
\r
317 if (codons.codons[a] != null
\r
318 && codons.codons[a][0] == (codons.codons[a][2] - 2))
\r
320 anots[a] = getCodonAnnotation(codons.codons[a],
\r
321 annotations[i].annotations);
\r
326 jalview.datamodel.AlignmentAnnotation aa = new jalview.datamodel.AlignmentAnnotation(
\r
327 annotations[i].label, annotations[i].description, anots);
\r
328 aa.graph = annotations[i].graph;
\r
329 aa.graphGroup = annotations[i].graphGroup;
\r
330 aa.graphHeight = annotations[i].graphHeight;
\r
331 if (annotations[i].getThreshold() != null)
\r
333 aa.setThreshold(new jalview.datamodel.GraphLine(annotations[i]
\r
336 if (annotations[i].hasScore)
\r
338 aa.setScore(annotations[i].getScore());
\r
340 if (annotations[i].sequenceRef != null)
\r
342 SequenceI aaSeq = codons
\r
343 .getAaForDnaSeq(annotations[i].sequenceRef);
\r
346 // aa.compactAnnotationArray(); // throw away alignment annotation
\r
348 aa.setSequenceRef(aaSeq);
\r
349 aa.createSequenceMapping(aaSeq, aaSeq.getStart(), true); // rebuild
\r
351 aa.adjustForAlignment();
\r
352 aaSeq.addAlignmentAnnotation(aa);
\r
356 al.addAnnotation(aa);
\r
361 private static Annotation getCodonAnnotation(int[] is,
\r
362 Annotation[] annotations)
\r
364 // Have a look at all the codon positions for annotation and put the first
\r
365 // one found into the translated annotation pos.
\r
367 Annotation annot = null;
\r
368 for (int p = 0; p < 3; p++)
\r
370 if (annotations[is[p]] != null)
\r
374 annot = new Annotation(annotations[is[p]]);
\r
380 Annotation cpy = new Annotation(annotations[is[p]]);
\r
381 if (annot.colour == null)
\r
383 annot.colour = cpy.colour;
\r
385 if (annot.description == null || annot.description.length() == 0)
\r
387 annot.description = cpy.description;
\r
389 if (annot.displayCharacter == null)
\r
391 annot.displayCharacter = cpy.displayCharacter;
\r
393 if (annot.secondaryStructure == 0)
\r
395 annot.secondaryStructure = cpy.secondaryStructure;
\r
397 annot.value += cpy.value;
\r
404 annot.value /= (float) contrib;
\r
410 * Translate a na sequence
\r
413 * sequence displayed under viscontigs visible columns
\r
415 * ORF read in some global alignment reference frame
\r
416 * @param viscontigs
\r
417 * mapping from global reference frame to visible seqstring ORF read
\r
419 * Definition of global ORF alignment reference frame
\r
420 * @param gapCharacter
\r
422 * @return sequence ready to be added to alignment.
\r
424 public static SequenceI translateCodingRegion(SequenceI selection,
\r
425 String seqstring, int[] viscontigs, AlignedCodonFrame codons,
\r
426 char gapCharacter, DBRefEntry product)
\r
428 Vector skip = new Vector();
\r
429 int skipint[] = null;
\r
430 ShiftList vismapping = new ShiftList(); // map from viscontigs to seqstring
\r
432 int vc, scontigs[] = new int[viscontigs.length];
\r
434 for (vc = 0; vc < viscontigs.length; vc += 2)
\r
438 vismapping.addShift(npos, viscontigs[vc]);
\r
443 vismapping.addShift(npos, viscontigs[vc] - viscontigs[vc - 1] + 1);
\r
445 scontigs[vc] = viscontigs[vc];
\r
446 scontigs[vc + 1] = viscontigs[vc + 1];
\r
449 StringBuffer protein = new StringBuffer();
\r
450 String seq = seqstring.replace('U', 'T');
\r
451 char codon[] = new char[3];
\r
452 int cdp[] = new int[3], rf = 0, lastnpos = 0, nend;
\r
455 for (npos = 0, nend = seq.length(); npos < nend; npos++)
\r
457 if (!jalview.util.Comparison.isGap(seq.charAt(npos)))
\r
459 cdp[rf] = npos; // store position
\r
460 codon[rf++] = seq.charAt(npos); // store base
\r
462 // filled an RF yet ?
\r
465 String aa = ResidueProperties.codonTranslate(new String(codon));
\r
469 aa = String.valueOf(gapCharacter);
\r
470 if (skipint == null)
\r
472 skipint = new int[]
\r
473 { cdp[0], cdp[2] };
\r
475 skipint[1] = cdp[2];
\r
479 if (skipint != null)
\r
482 skipint[0] = vismapping.shift(skipint[0]);
\r
483 skipint[1] = vismapping.shift(skipint[1]);
\r
484 for (vc = 0; vc < scontigs.length; vc += 2)
\r
486 if (scontigs[vc + 1] < skipint[0])
\r
490 if (scontigs[vc] <= skipint[0])
\r
492 if (skipint[0] == scontigs[vc])
\r
498 int[] t = new int[scontigs.length + 2];
\r
499 System.arraycopy(scontigs, 0, t, 0, vc - 1);
\r
500 // scontigs[vc]; //
\r
504 skip.addElement(skipint);
\r
507 if (aa.equals("STOP"))
\r
513 // insert/delete gaps prior to this codon - if necessary
\r
514 boolean findpos = true;
\r
517 // first ensure that the codons array is long enough.
\r
518 codons.checkCodonFrameWidth(aspos);
\r
519 // now check to see if we place the aa at the current aspos in the
\r
520 // protein alignment
\r
521 switch (Dna.compare_codonpos(cdp, codons.codons[aspos]))
\r
524 codons.insertAAGap(aspos, gapCharacter);
\r
528 // this aa appears after the aligned codons at aspos, so prefix it
\r
530 aa = "" + gapCharacter + aa;
\r
532 // if (aspos >= codons.aaWidth)
\r
533 // codons.aaWidth = aspos + 1;
\r
534 break; // check the next position for alignment
\r
536 // codon aligns at aspos position.
\r
540 // codon aligns with all other sequence residues found at aspos
\r
541 protein.append(aa);
\r
543 if (codons.codons[aspos] == null)
\r
545 // mark this column as aligning to this aligned reading frame
\r
546 codons.codons[aspos] = new int[]
\r
547 { cdp[0], cdp[1], cdp[2] };
\r
549 if (aspos >= codons.aaWidth)
\r
551 // update maximum alignment width
\r
552 // (we can do this without calling checkCodonFrameWidth because it was
\r
553 // already done above)
\r
554 codons.setAaWidth(aspos);
\r
556 // ready for next translated reading frame alignment position (if any)
\r
562 SequenceI newseq = new Sequence(selection.getName(),
\r
563 protein.toString());
\r
566 jalview.bin.Cache.log
\r
567 .debug("trimming contigs for incomplete terminal codon.");
\r
568 // map and trim contigs to ORF region
\r
569 vc = scontigs.length - 1;
\r
570 lastnpos = vismapping.shift(lastnpos); // place npos in context of
\r
571 // whole dna alignment (rather
\r
572 // than visible contigs)
\r
573 // incomplete ORF could be broken over one or two visible contig
\r
575 while (vc >= 0 && scontigs[vc] > lastnpos)
\r
577 if (vc > 0 && scontigs[vc - 1] > lastnpos)
\r
583 // correct last interval in list.
\r
584 scontigs[vc] = lastnpos;
\r
588 if (vc > 0 && (vc + 1) < scontigs.length)
\r
590 // truncate map list to just vc elements
\r
591 int t[] = new int[vc + 1];
\r
592 System.arraycopy(scontigs, 0, t, 0, vc + 1);
\r
598 if (scontigs != null)
\r
601 // map scontigs to actual sequence positions on selection
\r
602 for (vc = 0; vc < scontigs.length; vc += 2)
\r
604 scontigs[vc] = selection.findPosition(scontigs[vc]); // not from 1!
\r
605 scontigs[vc + 1] = selection.findPosition(scontigs[vc + 1]); // exclusive
\r
606 if (scontigs[vc + 1] == selection.getEnd())
\r
609 // trim trailing empty intervals.
\r
610 if ((vc + 2) < scontigs.length)
\r
612 int t[] = new int[vc + 2];
\r
613 System.arraycopy(scontigs, 0, t, 0, vc + 2);
\r
617 * delete intervals in scontigs which are not translated. 1. map skip
\r
618 * into sequence position intervals 2. truncate existing ranges and add
\r
619 * new ranges to exclude untranslated regions. if (skip.size()>0) {
\r
620 * Vector narange = new Vector(); for (vc=0; vc<scontigs.length; vc++) {
\r
621 * narange.addElement(new int[] {scontigs[vc]}); } int sint=0,iv[]; vc =
\r
622 * 0; while (sint<skip.size()) { skipint = (int[]) skip.elementAt(sint);
\r
623 * do { iv = (int[]) narange.elementAt(vc); if (iv[0]>=skipint[0] &&
\r
624 * iv[0]<=skipint[1]) { if (iv[0]==skipint[0]) { // delete beginning of
\r
625 * range } else { // truncate range and create new one if necessary iv =
\r
626 * (int[]) narange.elementAt(vc+1); if (iv[0]<=skipint[1]) { // truncate
\r
627 * range iv[0] = skipint[1]; } else { } } } else if (iv[0]<skipint[0]) {
\r
628 * iv = (int[]) narange.elementAt(vc+1); } } while (iv[0]) } }
\r
630 MapList map = new MapList(scontigs, new int[]
\r
631 { 1, resSize }, 3, 1);
\r
633 // update newseq as if it was generated as mapping from product
\r
635 if (product != null)
\r
637 newseq.setName(product.getSource() + "|"
\r
638 + product.getAccessionId());
\r
639 if (product.getMap() != null)
\r
641 // Mapping mp = product.getMap();
\r
642 // newseq.setStart(mp.getPosition(scontigs[0]));
\r
643 // newseq.setEnd(mp
\r
644 // .getPosition(scontigs[scontigs.length - 1]));
\r
647 transferCodedFeatures(selection, newseq, map, null, null);
\r
648 SequenceI rseq = newseq.deriveSequence(); // construct a dataset
\r
649 // sequence for our new
\r
650 // peptide, regardless.
\r
651 // store a mapping (this actually stores a mapping between the dataset
\r
652 // sequences for the two sequences
\r
653 codons.addMap(selection, rseq, map);
\r
657 // register the mapping somehow
\r
663 * Given a peptide newly translated from a dna sequence, copy over and set any
\r
664 * features on the peptide from the DNA. If featureTypes is null, all features
\r
665 * on the dna sequence are searched (rather than just the displayed ones), and
\r
666 * similarly for featureGroups.
\r
671 * @param featureTypes
\r
672 * hash who's keys are the displayed feature type strings
\r
673 * @param featureGroups
\r
674 * hash where keys are feature groups and values are Boolean objects
\r
675 * indicating if they are displayed.
\r
677 private static void transferCodedFeatures(SequenceI dna, SequenceI pep,
\r
678 MapList map, Hashtable featureTypes, Hashtable featureGroups)
\r
680 SequenceFeature[] sf = dna.getDatasetSequence().getSequenceFeatures();
\r
682 jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils
\r
683 .selectRefs(dna.getDBRef(),
\r
684 jalview.datamodel.DBRefSource.DNACODINGDBS);
\r
685 if (dnarefs != null)
\r
687 // intersect with pep
\r
688 for (int d = 0; d < dnarefs.length; d++)
\r
690 Mapping mp = dnarefs[d].getMap();
\r
698 for (int f = 0; f < sf.length; f++)
\r
700 fgstate = (featureGroups == null) ? null : ((Boolean) featureGroups
\r
701 .get(sf[f].featureGroup));
\r
702 if ((featureTypes == null || featureTypes.containsKey(sf[f]
\r
703 .getType())) && (fgstate == null || fgstate.booleanValue()))
\r
705 if (FeatureProperties.isCodingFeature(null, sf[f].getType()))
\r
707 // if (map.intersectsFrom(sf[f].begin, sf[f].end))
\r