X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FDna.java;h=16cae4945920e174d6bf1f8c4966c915a0c923e9;hb=47168f025aefdaa044802bd5f8f510ffe43a4808;hp=5096ae82bf269fd24d797afd08b1970f20c858bc;hpb=6951479b7dc56bd022ce2c370b7f88b5571004fd;p=jalview.git diff --git a/src/jalview/analysis/Dna.java b/src/jalview/analysis/Dna.java index 5096ae8..16cae49 100644 --- a/src/jalview/analysis/Dna.java +++ b/src/jalview/analysis/Dna.java @@ -1,235 +1,799 @@ -package jalview.analysis; - -import java.util.Hashtable; -import java.util.Vector; - -import jalview.datamodel.Alignment; -import jalview.datamodel.AlignmentAnnotation; -import jalview.datamodel.AlignmentI; -import jalview.datamodel.Annotation; -import jalview.datamodel.ColumnSelection; -import jalview.datamodel.Sequence; -import jalview.datamodel.SequenceFeature; -import jalview.datamodel.SequenceI; -import jalview.schemes.ResidueProperties; -import jalview.util.MapList; - -public class Dna { - /** - * - * @param cdp1 - * @param cdp2 - * @return -1 if cdp1 aligns before cdp2, 0 if in the same column or cdp2 is null, +1 if after cdp2 - */ - private static int compare_codonpos(int[] cdp1, int[] cdp2) { - if (cdp2==null || (cdp1[0]==cdp2[0] && cdp1[1] == cdp2[1] && cdp1[2] == cdp2[2])) - return 0; - if (cdp1[0]=aslen) - aslen=aspos+1; - break; // check the next position for alignment - case 0: - // codon aligns at aspos position. - findpos = false; - } - } - // codon aligns with all other sequence residues found at aspos - protein.append(aa); - if (codons[aspos]==null) - { - // mark this column as aligning to this aligned reading frame - codons[aspos] = new int[] { cdp[0], cdp[1], cdp[2] }; - } - aspos++; - if (aspos>=aslen) - aslen=aspos+1; - } - } - if (resSize>0) - { - newSeq[s] = new Sequence(selection[s].getName(), - protein.toString()); - if (rf!=0) - { - jalview.bin.Cache.log.debug("trimming contigs for incomplete terminal codon."); - // trim contigs - vc=scontigs.length-1; - nend-=rf; - // incomplete ORF could be broken over one or two visible contig intervals. - while (vc>0 && scontigs[vc]>nend) - { - if (scontigs[vc-1]>nend) - { - vc-=2; - } else { - // correct last interval in list. - scontigs[vc]=nend; - } - } - if ((vc+2). + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.analysis; + +import java.util.ArrayList; +import java.util.Hashtable; +import java.util.Vector; + +import jalview.datamodel.AlignedCodonFrame; +import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentAnnotation; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.Annotation; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.FeatureProperties; +import jalview.datamodel.Mapping; +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; +import jalview.schemes.ResidueProperties; +import jalview.util.MapList; +import jalview.util.ShiftList; + +public class Dna +{ + /** + * + * @param cdp1 + * @param cdp2 + * @return -1 if cdp1 aligns before cdp2, 0 if in the same column or cdp2 is + * null, +1 if after cdp2 + */ + private static int compare_codonpos(int[] cdp1, int[] cdp2) + { + if (cdp2 == null + || (cdp1[0] == cdp2[0] && cdp1[1] == cdp2[1] && cdp1[2] == cdp2[2])) + return 0; + if (cdp1[0] < cdp2[0] || cdp1[1] < cdp2[1] || cdp1[2] < cdp2[2]) + return -1; // one base in cdp1 precedes the corresponding base in the + // other codon + return 1; // one base in cdp1 appears after the corresponding base in the + // other codon. + } + + /** + * DNA->mapped protein sequence alignment translation given set of sequences + * 1. id distinct coding regions within selected region for each sequence 2. + * generate peptides based on inframe (or given) translation or (optionally + * and where specified) out of frame translations (annotated appropriately) 3. + * align peptides based on codon alignment + */ + /** + * id potential products from dna 1. search for distinct products within + * selected region for each selected sequence 2. group by associated DB type. + * 3. return as form for input into above function + */ + /** + * + */ + /** + * create a new alignment of protein sequences by an inframe translation of + * the provided NA sequences + * + * @param selection + * @param seqstring + * @param viscontigs + * @param gapCharacter + * @param annotations + * @param aWidth + * @param dataset + * destination dataset for translated sequences and mappings + * @return + */ + public static AlignmentI CdnaTranslate(SequenceI[] selection, + String[] seqstring, int viscontigs[], char gapCharacter, + AlignmentAnnotation[] annotations, int aWidth, Alignment dataset) + { + return CdnaTranslate(selection, seqstring, null, viscontigs, + gapCharacter, annotations, aWidth, dataset); + } + + /** + * + * @param selection + * @param seqstring + * @param product + * - array of DbRefEntry objects from which exon map in seqstring is + * derived + * @param viscontigs + * @param gapCharacter + * @param annotations + * @param aWidth + * @param dataset + * @return + */ + public static AlignmentI CdnaTranslate(SequenceI[] selection, + String[] seqstring, DBRefEntry[] product, int viscontigs[], + char gapCharacter, AlignmentAnnotation[] annotations, int aWidth, + Alignment dataset) + { + AlignedCodonFrame codons = new AlignedCodonFrame(aWidth); // stores hash of + // subsequent + // positions for + // each codon + // start position + // in alignment + int s, sSize = selection.length; + Vector pepseqs = new Vector(); + for (s = 0; s < sSize; s++) + { + SequenceI newseq = translateCodingRegion(selection[s], seqstring[s], + viscontigs, codons, gapCharacter, + (product != null) ? product[s] : null, false); // possibly anonymous + // product + if (newseq != null) + { + pepseqs.addElement(newseq); + SequenceI ds = newseq; + if (dataset != null) + { + while (ds.getDatasetSequence() != null) + { + ds = ds.getDatasetSequence(); + } + dataset.addSequence(ds); + } + } + } + if (codons.aaWidth == 0) + return null; + SequenceI[] newseqs = new SequenceI[pepseqs.size()]; + pepseqs.copyInto(newseqs); + AlignmentI al = new Alignment(newseqs); + al.padGaps(); // ensure we look aligned. + al.setDataset(dataset); + translateAlignedAnnotations(annotations, al, codons); + al.addCodonFrame(codons); + return al; + } + + /** + * fake the collection of DbRefs with associated exon mappings to identify if + * a translation would generate distinct product in the currently selected + * region. + * + * @param selection + * @param viscontigs + * @return + */ + public static boolean canTranslate(SequenceI[] selection, + int viscontigs[]) + { + for (int gd = 0; gd < selection.length; gd++) + { + SequenceI dna = selection[gd]; + jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils + .selectRefs(dna.getDBRef(), + jalview.datamodel.DBRefSource.DNACODINGDBS); + if (dnarefs != null) + { + // intersect with pep + // intersect with pep + Vector mappedrefs = new Vector(); + DBRefEntry[] refs = dna.getDBRef(); + for (int d = 0; d < refs.length; d++) + { + if (refs[d].getMap() != null && refs[d].getMap().getMap() != null + && refs[d].getMap().getMap().getFromRatio() == 3 + && refs[d].getMap().getMap().getToRatio() == 1) + { + mappedrefs.addElement(refs[d]); // add translated protein maps + } + } + dnarefs = new DBRefEntry[mappedrefs.size()]; + mappedrefs.copyInto(dnarefs); + for (int d = 0; d < dnarefs.length; d++) + { + Mapping mp = dnarefs[d].getMap(); + if (mp != null) + { + for (int vc = 0; vc < viscontigs.length; vc += 2) + { + int[] mpr = mp.locateMappedRange(viscontigs[vc], + viscontigs[vc + 1]); + if (mpr != null) + { + return true; + } + } + } + } + } + } + return false; + } + + /** + * generate a set of translated protein products from annotated sequenceI + * + * @param selection + * @param viscontigs + * @param gapCharacter + * @param dataset + * destination dataset for translated sequences + * @param annotations + * @param aWidth + * @return + */ + public static AlignmentI CdnaTranslate(SequenceI[] selection, + int viscontigs[], char gapCharacter, Alignment dataset) + { + int alwidth = 0; + Vector cdnasqs = new Vector(); + Vector cdnasqi = new Vector(); + Vector cdnaprod = new Vector(); + for (int gd = 0; gd < selection.length; gd++) + { + SequenceI dna = selection[gd]; + jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils + .selectRefs(dna.getDBRef(), + jalview.datamodel.DBRefSource.DNACODINGDBS); + if (dnarefs != null) + { + // intersect with pep + Vector mappedrefs = new Vector(); + DBRefEntry[] refs = dna.getDBRef(); + for (int d = 0; d < refs.length; d++) + { + if (refs[d].getMap() != null && refs[d].getMap().getMap() != null + && refs[d].getMap().getMap().getFromRatio() == 3 + && refs[d].getMap().getMap().getToRatio() == 1) + { + mappedrefs.addElement(refs[d]); // add translated protein maps + } + } + dnarefs = new DBRefEntry[mappedrefs.size()]; + mappedrefs.copyInto(dnarefs); + for (int d = 0; d < dnarefs.length; d++) + { + Mapping mp = dnarefs[d].getMap(); + StringBuffer sqstr = new StringBuffer(); + if (mp != null) + { + Mapping intersect = mp.intersectVisContigs(viscontigs); + // generate seqstring for this sequence based on mapping + + if (sqstr.length() > alwidth) + alwidth = sqstr.length(); + cdnasqs.addElement(sqstr.toString()); + cdnasqi.addElement(dna); + cdnaprod.addElement(intersect); + } + } + } + SequenceI[] cdna = new SequenceI[cdnasqs.size()]; + DBRefEntry[] prods = new DBRefEntry[cdnaprod.size()]; + String[] xons = new String[cdnasqs.size()]; + cdnasqs.copyInto(xons); + cdnaprod.copyInto(prods); + cdnasqi.copyInto(cdna); + return CdnaTranslate(cdna, xons, prods, viscontigs, gapCharacter, + null, alwidth, dataset); + } + return null; + } + + /** + * translate na alignment annotations onto translated amino acid alignment al + * using codon mapping codons + * + * @param annotations + * @param al + * @param codons + */ + public static void translateAlignedAnnotations( + AlignmentAnnotation[] annotations, AlignmentI al, + AlignedCodonFrame codons) + { + // ////////////////////////////// + // Copy annotations across + // + // Can only do this for columns with consecutive codons, or where + // annotation is sequence associated. + + int pos, a, aSize; + if (annotations != null) + { + for (int i = 0; i < annotations.length; i++) + { + // Skip any autogenerated annotation + if (annotations[i].autoCalculated) + { + continue; + } + + aSize = codons.getaaWidth(); // aa alignment width. + jalview.datamodel.Annotation[] anots = (annotations[i].annotations == null) ? null + : new jalview.datamodel.Annotation[aSize]; + if (anots != null) + { + for (a = 0; a < aSize; a++) + { + // process through codon map. + if (codons.codons[a] != null + && codons.codons[a][0] == (codons.codons[a][2] - 2)) + { + anots[a] = getCodonAnnotation(codons.codons[a], + annotations[i].annotations); + } + } + } + + jalview.datamodel.AlignmentAnnotation aa = new jalview.datamodel.AlignmentAnnotation( + annotations[i].label, annotations[i].description, anots); + aa.graph = annotations[i].graph; + aa.graphGroup = annotations[i].graphGroup; + aa.graphHeight = annotations[i].graphHeight; + if (annotations[i].getThreshold() != null) + { + aa.setThreshold(new jalview.datamodel.GraphLine(annotations[i] + .getThreshold())); + } + if (annotations[i].hasScore) + { + aa.setScore(annotations[i].getScore()); + } + if (annotations[i].sequenceRef != null) + { + SequenceI aaSeq = codons + .getAaForDnaSeq(annotations[i].sequenceRef); + if (aaSeq != null) + { + // aa.compactAnnotationArray(); // throw away alignment annotation + // positioning + aa.setSequenceRef(aaSeq); + aa.createSequenceMapping(aaSeq, aaSeq.getStart(), true); // rebuild + // mapping + aa.adjustForAlignment(); + aaSeq.addAlignmentAnnotation(aa); + } + + } + al.addAnnotation(aa); + } + } + } + + private static Annotation getCodonAnnotation(int[] is, + Annotation[] annotations) + { + // Have a look at all the codon positions for annotation and put the first + // one found into the translated annotation pos. + int contrib = 0; + Annotation annot = null; + for (int p = 0; p < 3; p++) + { + if (annotations[is[p]] != null) + { + if (annot == null) + { + annot = new Annotation(annotations[is[p]]); + contrib = 1; + } + else + { + // merge with last + Annotation cpy = new Annotation(annotations[is[p]]); + if (annot.colour == null) + { + annot.colour = cpy.colour; + } + if (annot.description == null || annot.description.length() == 0) + { + annot.description = cpy.description; + } + if (annot.displayCharacter == null) + { + annot.displayCharacter = cpy.displayCharacter; + } + if (annot.secondaryStructure == 0) + { + annot.secondaryStructure = cpy.secondaryStructure; + } + annot.value += cpy.value; + contrib++; + } + } + } + if (contrib > 1) + { + annot.value /= (float) contrib; + } + return annot; + } + + /** + * Translate a na sequence + * + * @param selection + * sequence displayed under viscontigs visible columns + * @param seqstring + * ORF read in some global alignment reference frame + * @param viscontigs + * mapping from global reference frame to visible seqstring ORF read + * @param codons + * Definition of global ORF alignment reference frame + * @param gapCharacter + * @return sequence ready to be added to alignment. + * @deprecated Use {@link #translateCodingRegion(SequenceI,String,int[],AlignedCodonFrame,char,DBRefEntry,boolean)} instead + */ + public static SequenceI translateCodingRegion(SequenceI selection, + String seqstring, int[] viscontigs, AlignedCodonFrame codons, + char gapCharacter, DBRefEntry product) + { + return translateCodingRegion(selection, seqstring, viscontigs, codons, + gapCharacter, product, false); + } + + /** + * Translate a na sequence + * + * @param selection + * sequence displayed under viscontigs visible columns + * @param seqstring + * ORF read in some global alignment reference frame + * @param viscontigs + * mapping from global reference frame to visible seqstring ORF read + * @param codons + * Definition of global ORF alignment reference frame + * @param gapCharacter + * @param starForStop when true stop codons will translate as '*', otherwise as 'X' + * @return sequence ready to be added to alignment. + */ + public static SequenceI translateCodingRegion(SequenceI selection, + String seqstring, int[] viscontigs, AlignedCodonFrame codons, + char gapCharacter, DBRefEntry product, final boolean starForStop) + { + java.util.List skip = new ArrayList(); + int skipint[] = null; + ShiftList vismapping = new ShiftList(); // map from viscontigs to seqstring + // intervals + int vc, scontigs[] = new int[viscontigs.length]; + int npos = 0; + for (vc = 0; vc < viscontigs.length; vc += 2) + { + if (vc == 0) + { + vismapping.addShift(npos, viscontigs[vc]); + } + else + { + // hidden region + vismapping.addShift(npos, viscontigs[vc] - viscontigs[vc - 1] + 1); + } + scontigs[vc] = viscontigs[vc]; + scontigs[vc + 1] = viscontigs[vc + 1]; + } + + StringBuffer protein = new StringBuffer(); + String seq = seqstring.replace('U', 'T'); + char codon[] = new char[3]; + int cdp[] = new int[3], rf = 0, lastnpos = 0, nend; + int aspos = 0; + int resSize = 0; + for (npos = 0, nend = seq.length(); npos < nend; npos++) + { + if (!jalview.util.Comparison.isGap(seq.charAt(npos))) + { + cdp[rf] = npos; // store position + codon[rf++] = seq.charAt(npos); // store base + } + // filled an RF yet ? + if (rf == 3) + { + String aa = ResidueProperties.codonTranslate(new String(codon)); + rf = 0; + if (aa == null) + { + aa = String.valueOf(gapCharacter); + if (skipint == null) + { + skipint = new int[] + { cdp[0], cdp[2] }; + } + skipint[1] = cdp[2]; + } + else + { + if (skipint != null) + { + // edit scontigs + skipint[0] = vismapping.shift(skipint[0]); + skipint[1] = vismapping.shift(skipint[1]); + for (vc = 0; vc < scontigs.length; ) + { + if (scontigs[vc + 1] < skipint[0]) + { + // before skipint starts + vc += 2; + continue; + } + if (scontigs[vc]>skipint[1]) + { + // finished editing so + break; + } + // Edit the contig list to include the skipped region which did not translate + int[] t; + // from : s1 e1 s2 e2 s3 e3 + // to s: s1 e1 s2 k0 k1 e2 s3 e3 + // list increases by one unless one boundary (s2==k0 or e2==k1) matches, and decreases by one if skipint intersects whole visible contig + if (scontigs[vc] <= skipint[0]) + { + if (skipint[0] == scontigs[vc]) + { + // skipint at start of contig + // shift the start of this contig + if (scontigs[vc + 1] > skipint[1]) + { + scontigs[vc] = skipint[1]; + vc+=2; + } + else + { + if (scontigs[vc+1]==skipint[1]) + { + // remove the contig + t = new int[scontigs.length - 2]; + if (vc > 0) + { + System.arraycopy(scontigs, 0, t, 0, vc - 1); + } + if (vc + 2 < t.length) + { + System.arraycopy(scontigs, vc + 2, t, vc, t.length + - vc + 2); + } + scontigs=t; + } else { + // truncate contig to before the skipint region + scontigs[vc+1] = skipint[0]-1; + vc+=2; + } + } + } + else + { + // scontig starts before start of skipint + if (scontigs[vc+1]= codons.aaWidth) + // codons.aaWidth = aspos + 1; + break; // check the next position for alignment + case 0: + // codon aligns at aspos position. + findpos = false; + } + } + // codon aligns with all other sequence residues found at aspos + protein.append(aa); + lastnpos = npos; + if (codons.codons[aspos] == null) + { + // mark this column as aligning to this aligned reading frame + codons.codons[aspos] = new int[] + { cdp[0], cdp[1], cdp[2] }; + } + if (aspos >= codons.aaWidth) + { + // update maximum alignment width + // (we can do this without calling checkCodonFrameWidth because it was + // already done above) + codons.setAaWidth(aspos); + } + // ready for next translated reading frame alignment position (if any) + aspos++; + } + } + if (resSize > 0) + { + SequenceI newseq = new Sequence(selection.getName(), + protein.toString()); + if (rf != 0) + { + if (jalview.bin.Cache.log!=null) { + jalview.bin.Cache.log.debug("trimming contigs for incomplete terminal codon."); + } else { + System.err.println("trimming contigs for incomplete terminal codon."); + } + // map and trim contigs to ORF region + vc = scontigs.length - 1; + lastnpos = vismapping.shift(lastnpos); // place npos in context of + // whole dna alignment (rather + // than visible contigs) + // incomplete ORF could be broken over one or two visible contig + // intervals. + while (vc >= 0 && scontigs[vc] > lastnpos) + { + if (vc > 0 && scontigs[vc - 1] > lastnpos) + { + vc -= 2; + } + else + { + // correct last interval in list. + scontigs[vc] = lastnpos; + } + } + + if (vc > 0 && (vc + 1) < scontigs.length) + { + // truncate map list to just vc elements + int t[] = new int[vc + 1]; + System.arraycopy(scontigs, 0, t, 0, vc + 1); + scontigs = t; + } + if (vc <= 0) + scontigs = null; + } + if (scontigs != null) + { + npos = 0; + // map scontigs to actual sequence positions on selection + for (vc = 0; vc < scontigs.length; vc += 2) + { + scontigs[vc] = selection.findPosition(scontigs[vc]); // not from 1! + scontigs[vc + 1] = selection.findPosition(scontigs[vc + 1]); // exclusive + if (scontigs[vc + 1] == selection.getEnd()) + break; + } + // trim trailing empty intervals. + if ((vc + 2) < scontigs.length) + { + int t[] = new int[vc + 2]; + System.arraycopy(scontigs, 0, t, 0, vc + 2); + scontigs = t; + } + /* + * delete intervals in scontigs which are not translated. 1. map skip + * into sequence position intervals 2. truncate existing ranges and add + * new ranges to exclude untranslated regions. if (skip.size()>0) { + * Vector narange = new Vector(); for (vc=0; vc=skipint[0] && + * iv[0]<=skipint[1]) { if (iv[0]==skipint[0]) { // delete beginning of + * range } else { // truncate range and create new one if necessary iv = + * (int[]) narange.elementAt(vc+1); if (iv[0]<=skipint[1]) { // truncate + * range iv[0] = skipint[1]; } else { } } } else if (iv[0]