2 * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.1)
3 * Copyright (C) 2014 The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
11 * Jalview is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
14 * PURPOSE. See the GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along with Jalview. If not, see <http://www.gnu.org/licenses/>.
17 * The Jalview Authors are detailed in the 'AUTHORS' file.
19 package jalview.analysis;
21 import java.util.ArrayList;
22 import java.util.Hashtable;
23 import java.util.Vector;
25 import jalview.datamodel.AlignedCodonFrame;
26 import jalview.datamodel.Alignment;
27 import jalview.datamodel.AlignmentAnnotation;
28 import jalview.datamodel.AlignmentI;
29 import jalview.datamodel.Annotation;
30 import jalview.datamodel.DBRefEntry;
31 import jalview.datamodel.FeatureProperties;
32 import jalview.datamodel.Mapping;
33 import jalview.datamodel.Sequence;
34 import jalview.datamodel.SequenceFeature;
35 import jalview.datamodel.SequenceI;
36 import jalview.schemes.ResidueProperties;
37 import jalview.util.MapList;
38 import jalview.util.ShiftList;
46 * @return -1 if cdp1 aligns before cdp2, 0 if in the same column or cdp2 is
47 * null, +1 if after cdp2
49 private static int compare_codonpos(int[] cdp1, int[] cdp2)
52 || (cdp1[0] == cdp2[0] && cdp1[1] == cdp2[1] && cdp1[2] == cdp2[2]))
54 if (cdp1[0] < cdp2[0] || cdp1[1] < cdp2[1] || cdp1[2] < cdp2[2])
55 return -1; // one base in cdp1 precedes the corresponding base in the
57 return 1; // one base in cdp1 appears after the corresponding base in the
62 * DNA->mapped protein sequence alignment translation given set of sequences
63 * 1. id distinct coding regions within selected region for each sequence 2.
64 * generate peptides based on inframe (or given) translation or (optionally
65 * and where specified) out of frame translations (annotated appropriately) 3.
66 * align peptides based on codon alignment
69 * id potential products from dna 1. search for distinct products within
70 * selected region for each selected sequence 2. group by associated DB type.
71 * 3. return as form for input into above function
77 * create a new alignment of protein sequences by an inframe translation of
78 * the provided NA sequences
87 * destination dataset for translated sequences and mappings
90 public static AlignmentI CdnaTranslate(SequenceI[] selection,
91 String[] seqstring, int viscontigs[], char gapCharacter,
92 AlignmentAnnotation[] annotations, int aWidth, Alignment dataset)
94 return CdnaTranslate(selection, seqstring, null, viscontigs,
95 gapCharacter, annotations, aWidth, dataset);
103 * - array of DbRefEntry objects from which exon map in seqstring is
106 * @param gapCharacter
112 public static AlignmentI CdnaTranslate(SequenceI[] selection,
113 String[] seqstring, DBRefEntry[] product, int viscontigs[],
114 char gapCharacter, AlignmentAnnotation[] annotations, int aWidth,
117 AlignedCodonFrame codons = new AlignedCodonFrame(aWidth); // stores hash of
123 int s, sSize = selection.length;
124 Vector pepseqs = new Vector();
125 for (s = 0; s < sSize; s++)
127 SequenceI newseq = translateCodingRegion(selection[s], seqstring[s],
128 viscontigs, codons, gapCharacter,
129 (product != null) ? product[s] : null, false); // possibly anonymous
133 pepseqs.addElement(newseq);
134 SequenceI ds = newseq;
137 while (ds.getDatasetSequence() != null)
139 ds = ds.getDatasetSequence();
141 dataset.addSequence(ds);
145 if (codons.aaWidth == 0)
147 SequenceI[] newseqs = new SequenceI[pepseqs.size()];
148 pepseqs.copyInto(newseqs);
149 AlignmentI al = new Alignment(newseqs);
150 al.padGaps(); // ensure we look aligned.
151 al.setDataset(dataset);
152 translateAlignedAnnotations(annotations, al, codons);
153 al.addCodonFrame(codons);
158 * fake the collection of DbRefs with associated exon mappings to identify if
159 * a translation would generate distinct product in the currently selected
166 public static boolean canTranslate(SequenceI[] selection,
169 for (int gd = 0; gd < selection.length; gd++)
171 SequenceI dna = selection[gd];
172 jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils
173 .selectRefs(dna.getDBRef(),
174 jalview.datamodel.DBRefSource.DNACODINGDBS);
177 // intersect with pep
178 // intersect with pep
179 Vector mappedrefs = new Vector();
180 DBRefEntry[] refs = dna.getDBRef();
181 for (int d = 0; d < refs.length; d++)
183 if (refs[d].getMap() != null && refs[d].getMap().getMap() != null
184 && refs[d].getMap().getMap().getFromRatio() == 3
185 && refs[d].getMap().getMap().getToRatio() == 1)
187 mappedrefs.addElement(refs[d]); // add translated protein maps
190 dnarefs = new DBRefEntry[mappedrefs.size()];
191 mappedrefs.copyInto(dnarefs);
192 for (int d = 0; d < dnarefs.length; d++)
194 Mapping mp = dnarefs[d].getMap();
197 for (int vc = 0; vc < viscontigs.length; vc += 2)
199 int[] mpr = mp.locateMappedRange(viscontigs[vc],
214 * generate a set of translated protein products from annotated sequenceI
218 * @param gapCharacter
220 * destination dataset for translated sequences
225 public static AlignmentI CdnaTranslate(SequenceI[] selection,
226 int viscontigs[], char gapCharacter, Alignment dataset)
229 Vector cdnasqs = new Vector();
230 Vector cdnasqi = new Vector();
231 Vector cdnaprod = new Vector();
232 for (int gd = 0; gd < selection.length; gd++)
234 SequenceI dna = selection[gd];
235 jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils
236 .selectRefs(dna.getDBRef(),
237 jalview.datamodel.DBRefSource.DNACODINGDBS);
240 // intersect with pep
241 Vector mappedrefs = new Vector();
242 DBRefEntry[] refs = dna.getDBRef();
243 for (int d = 0; d < refs.length; d++)
245 if (refs[d].getMap() != null && refs[d].getMap().getMap() != null
246 && refs[d].getMap().getMap().getFromRatio() == 3
247 && refs[d].getMap().getMap().getToRatio() == 1)
249 mappedrefs.addElement(refs[d]); // add translated protein maps
252 dnarefs = new DBRefEntry[mappedrefs.size()];
253 mappedrefs.copyInto(dnarefs);
254 for (int d = 0; d < dnarefs.length; d++)
256 Mapping mp = dnarefs[d].getMap();
257 StringBuffer sqstr = new StringBuffer();
260 Mapping intersect = mp.intersectVisContigs(viscontigs);
261 // generate seqstring for this sequence based on mapping
263 if (sqstr.length() > alwidth)
264 alwidth = sqstr.length();
265 cdnasqs.addElement(sqstr.toString());
266 cdnasqi.addElement(dna);
267 cdnaprod.addElement(intersect);
271 SequenceI[] cdna = new SequenceI[cdnasqs.size()];
272 DBRefEntry[] prods = new DBRefEntry[cdnaprod.size()];
273 String[] xons = new String[cdnasqs.size()];
274 cdnasqs.copyInto(xons);
275 cdnaprod.copyInto(prods);
276 cdnasqi.copyInto(cdna);
277 return CdnaTranslate(cdna, xons, prods, viscontigs, gapCharacter,
278 null, alwidth, dataset);
284 * translate na alignment annotations onto translated amino acid alignment al
285 * using codon mapping codons
291 public static void translateAlignedAnnotations(
292 AlignmentAnnotation[] annotations, AlignmentI al,
293 AlignedCodonFrame codons)
295 // //////////////////////////////
296 // Copy annotations across
298 // Can only do this for columns with consecutive codons, or where
299 // annotation is sequence associated.
302 if (annotations != null)
304 for (int i = 0; i < annotations.length; i++)
306 // Skip any autogenerated annotation
307 if (annotations[i].autoCalculated)
312 aSize = codons.getaaWidth(); // aa alignment width.
313 jalview.datamodel.Annotation[] anots = (annotations[i].annotations == null) ? null
314 : new jalview.datamodel.Annotation[aSize];
317 for (a = 0; a < aSize; a++)
319 // process through codon map.
320 if (codons.codons[a] != null
321 && codons.codons[a][0] == (codons.codons[a][2] - 2))
323 anots[a] = getCodonAnnotation(codons.codons[a],
324 annotations[i].annotations);
329 jalview.datamodel.AlignmentAnnotation aa = new jalview.datamodel.AlignmentAnnotation(
330 annotations[i].label, annotations[i].description, anots);
331 aa.graph = annotations[i].graph;
332 aa.graphGroup = annotations[i].graphGroup;
333 aa.graphHeight = annotations[i].graphHeight;
334 if (annotations[i].getThreshold() != null)
336 aa.setThreshold(new jalview.datamodel.GraphLine(annotations[i]
339 if (annotations[i].hasScore)
341 aa.setScore(annotations[i].getScore());
343 if (annotations[i].sequenceRef != null)
345 SequenceI aaSeq = codons
346 .getAaForDnaSeq(annotations[i].sequenceRef);
349 // aa.compactAnnotationArray(); // throw away alignment annotation
351 aa.setSequenceRef(aaSeq);
352 aa.createSequenceMapping(aaSeq, aaSeq.getStart(), true); // rebuild
354 aa.adjustForAlignment();
355 aaSeq.addAlignmentAnnotation(aa);
359 al.addAnnotation(aa);
364 private static Annotation getCodonAnnotation(int[] is,
365 Annotation[] annotations)
367 // Have a look at all the codon positions for annotation and put the first
368 // one found into the translated annotation pos.
370 Annotation annot = null;
371 for (int p = 0; p < 3; p++)
373 if (annotations[is[p]] != null)
377 annot = new Annotation(annotations[is[p]]);
383 Annotation cpy = new Annotation(annotations[is[p]]);
384 if (annot.colour == null)
386 annot.colour = cpy.colour;
388 if (annot.description == null || annot.description.length() == 0)
390 annot.description = cpy.description;
392 if (annot.displayCharacter == null)
394 annot.displayCharacter = cpy.displayCharacter;
396 if (annot.secondaryStructure == 0)
398 annot.secondaryStructure = cpy.secondaryStructure;
400 annot.value += cpy.value;
407 annot.value /= (float) contrib;
413 * Translate a na sequence
416 * sequence displayed under viscontigs visible columns
418 * ORF read in some global alignment reference frame
420 * mapping from global reference frame to visible seqstring ORF read
422 * Definition of global ORF alignment reference frame
423 * @param gapCharacter
424 * @return sequence ready to be added to alignment.
425 * @deprecated Use {@link #translateCodingRegion(SequenceI,String,int[],AlignedCodonFrame,char,DBRefEntry,boolean)} instead
427 public static SequenceI translateCodingRegion(SequenceI selection,
428 String seqstring, int[] viscontigs, AlignedCodonFrame codons,
429 char gapCharacter, DBRefEntry product)
431 return translateCodingRegion(selection, seqstring, viscontigs, codons,
432 gapCharacter, product, false);
436 * Translate a na sequence
439 * sequence displayed under viscontigs visible columns
441 * ORF read in some global alignment reference frame
443 * mapping from global reference frame to visible seqstring ORF read
445 * Definition of global ORF alignment reference frame
446 * @param gapCharacter
447 * @param starForStop when true stop codons will translate as '*', otherwise as 'X'
448 * @return sequence ready to be added to alignment.
450 public static SequenceI translateCodingRegion(SequenceI selection,
451 String seqstring, int[] viscontigs, AlignedCodonFrame codons,
452 char gapCharacter, DBRefEntry product, final boolean starForStop)
454 java.util.List skip = new ArrayList();
455 int skipint[] = null;
456 ShiftList vismapping = new ShiftList(); // map from viscontigs to seqstring
458 int vc, scontigs[] = new int[viscontigs.length];
460 for (vc = 0; vc < viscontigs.length; vc += 2)
464 vismapping.addShift(npos, viscontigs[vc]);
469 vismapping.addShift(npos, viscontigs[vc] - viscontigs[vc - 1] + 1);
471 scontigs[vc] = viscontigs[vc];
472 scontigs[vc + 1] = viscontigs[vc + 1];
475 StringBuffer protein = new StringBuffer();
476 String seq = seqstring.replace('U', 'T');
477 char codon[] = new char[3];
478 int cdp[] = new int[3], rf = 0, lastnpos = 0, nend;
481 for (npos = 0, nend = seq.length(); npos < nend; npos++)
483 if (!jalview.util.Comparison.isGap(seq.charAt(npos)))
485 cdp[rf] = npos; // store position
486 codon[rf++] = seq.charAt(npos); // store base
488 // filled an RF yet ?
491 String aa = ResidueProperties.codonTranslate(new String(codon));
495 aa = String.valueOf(gapCharacter);
508 skipint[0] = vismapping.shift(skipint[0]);
509 skipint[1] = vismapping.shift(skipint[1]);
510 for (vc = 0; vc < scontigs.length; )
512 if (scontigs[vc + 1] < skipint[0])
514 // before skipint starts
518 if (scontigs[vc]>skipint[1])
520 // finished editing so
523 // Edit the contig list to include the skipped region which did not translate
525 // from : s1 e1 s2 e2 s3 e3
526 // to s: s1 e1 s2 k0 k1 e2 s3 e3
527 // list increases by one unless one boundary (s2==k0 or e2==k1) matches, and decreases by one if skipint intersects whole visible contig
528 if (scontigs[vc] <= skipint[0])
530 if (skipint[0] == scontigs[vc])
532 // skipint at start of contig
533 // shift the start of this contig
534 if (scontigs[vc + 1] > skipint[1])
536 scontigs[vc] = skipint[1];
541 if (scontigs[vc+1]==skipint[1])
544 t = new int[scontigs.length - 2];
547 System.arraycopy(scontigs, 0, t, 0, vc - 1);
549 if (vc + 2 < t.length)
551 System.arraycopy(scontigs, vc + 2, t, vc, t.length
556 // truncate contig to before the skipint region
557 scontigs[vc+1] = skipint[0]-1;
564 // scontig starts before start of skipint
565 if (scontigs[vc+1]<skipint[1]) {
566 // skipint truncates end of scontig
567 scontigs[vc+1] = skipint[0]-1;
570 // divide region to new contigs
571 t = new int[scontigs.length + 2];
572 System.arraycopy(scontigs, 0, t, 0, vc +1);
573 t[vc+1] = skipint[0];
574 t[vc+2] = skipint[1];
575 System.arraycopy(scontigs, vc + 1, t, vc+3, scontigs.length-(vc+1));
585 if (aa.equals("STOP"))
587 aa = starForStop ? "*" : "X";
591 // insert/delete gaps prior to this codon - if necessary
592 boolean findpos = true;
595 // first ensure that the codons array is long enough.
596 codons.checkCodonFrameWidth(aspos);
597 // now check to see if we place the aa at the current aspos in the
599 switch (Dna.compare_codonpos(cdp, codons.codons[aspos]))
602 codons.insertAAGap(aspos, gapCharacter);
606 // this aa appears after the aligned codons at aspos, so prefix it
608 aa = "" + gapCharacter + aa;
610 // if (aspos >= codons.aaWidth)
611 // codons.aaWidth = aspos + 1;
612 break; // check the next position for alignment
614 // codon aligns at aspos position.
618 // codon aligns with all other sequence residues found at aspos
621 if (codons.codons[aspos] == null)
623 // mark this column as aligning to this aligned reading frame
624 codons.codons[aspos] = new int[]
625 { cdp[0], cdp[1], cdp[2] };
627 if (aspos >= codons.aaWidth)
629 // update maximum alignment width
630 // (we can do this without calling checkCodonFrameWidth because it was
631 // already done above)
632 codons.setAaWidth(aspos);
634 // ready for next translated reading frame alignment position (if any)
640 SequenceI newseq = new Sequence(selection.getName(),
644 if (jalview.bin.Cache.log!=null) {
645 jalview.bin.Cache.log.debug("trimming contigs for incomplete terminal codon.");
647 System.err.println("trimming contigs for incomplete terminal codon.");
649 // map and trim contigs to ORF region
650 vc = scontigs.length - 1;
651 lastnpos = vismapping.shift(lastnpos); // place npos in context of
652 // whole dna alignment (rather
653 // than visible contigs)
654 // incomplete ORF could be broken over one or two visible contig
656 while (vc >= 0 && scontigs[vc] > lastnpos)
658 if (vc > 0 && scontigs[vc - 1] > lastnpos)
664 // correct last interval in list.
665 scontigs[vc] = lastnpos;
669 if (vc > 0 && (vc + 1) < scontigs.length)
671 // truncate map list to just vc elements
672 int t[] = new int[vc + 1];
673 System.arraycopy(scontigs, 0, t, 0, vc + 1);
679 if (scontigs != null)
682 // map scontigs to actual sequence positions on selection
683 for (vc = 0; vc < scontigs.length; vc += 2)
685 scontigs[vc] = selection.findPosition(scontigs[vc]); // not from 1!
686 scontigs[vc + 1] = selection.findPosition(scontigs[vc + 1]); // exclusive
687 if (scontigs[vc + 1] == selection.getEnd())
690 // trim trailing empty intervals.
691 if ((vc + 2) < scontigs.length)
693 int t[] = new int[vc + 2];
694 System.arraycopy(scontigs, 0, t, 0, vc + 2);
698 * delete intervals in scontigs which are not translated. 1. map skip
699 * into sequence position intervals 2. truncate existing ranges and add
700 * new ranges to exclude untranslated regions. if (skip.size()>0) {
701 * Vector narange = new Vector(); for (vc=0; vc<scontigs.length; vc++) {
702 * narange.addElement(new int[] {scontigs[vc]}); } int sint=0,iv[]; vc =
703 * 0; while (sint<skip.size()) { skipint = (int[]) skip.elementAt(sint);
704 * do { iv = (int[]) narange.elementAt(vc); if (iv[0]>=skipint[0] &&
705 * iv[0]<=skipint[1]) { if (iv[0]==skipint[0]) { // delete beginning of
706 * range } else { // truncate range and create new one if necessary iv =
707 * (int[]) narange.elementAt(vc+1); if (iv[0]<=skipint[1]) { // truncate
708 * range iv[0] = skipint[1]; } else { } } } else if (iv[0]<skipint[0]) {
709 * iv = (int[]) narange.elementAt(vc+1); } } while (iv[0]) } }
711 MapList map = new MapList(scontigs, new int[]
712 { 1, resSize }, 3, 1);
714 // update newseq as if it was generated as mapping from product
718 newseq.setName(product.getSource() + "|"
719 + product.getAccessionId());
720 if (product.getMap() != null)
722 // Mapping mp = product.getMap();
723 // newseq.setStart(mp.getPosition(scontigs[0]));
725 // .getPosition(scontigs[scontigs.length - 1]));
728 transferCodedFeatures(selection, newseq, map, null, null);
729 SequenceI rseq = newseq.deriveSequence(); // construct a dataset
730 // sequence for our new
731 // peptide, regardless.
732 // store a mapping (this actually stores a mapping between the dataset
733 // sequences for the two sequences
734 codons.addMap(selection, rseq, map);
738 // register the mapping somehow
744 * Given a peptide newly translated from a dna sequence, copy over and set any
745 * features on the peptide from the DNA. If featureTypes is null, all features
746 * on the dna sequence are searched (rather than just the displayed ones), and
747 * similarly for featureGroups.
752 * @param featureTypes
753 * hash who's keys are the displayed feature type strings
754 * @param featureGroups
755 * hash where keys are feature groups and values are Boolean objects
756 * indicating if they are displayed.
758 private static void transferCodedFeatures(SequenceI dna, SequenceI pep,
759 MapList map, Hashtable featureTypes, Hashtable featureGroups)
761 SequenceFeature[] sf = (dna.getDatasetSequence()!=null ? dna.getDatasetSequence() : dna).getSequenceFeatures();
763 jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils
764 .selectRefs(dna.getDBRef(),
765 jalview.datamodel.DBRefSource.DNACODINGDBS);
768 // intersect with pep
769 for (int d = 0; d < dnarefs.length; d++)
771 Mapping mp = dnarefs[d].getMap();
779 for (int f = 0; f < sf.length; f++)
781 fgstate = (featureGroups == null) ? null : ((Boolean) featureGroups
782 .get(sf[f].featureGroup));
783 if ((featureTypes == null || featureTypes.containsKey(sf[f]
784 .getType())) && (fgstate == null || fgstate.booleanValue()))
786 if (FeatureProperties.isCodingFeature(null, sf[f].getType()))
788 // if (map.intersectsFrom(sf[f].begin, sf[f].end))