X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FDna.java;h=49c37df7f2d810881c0bc55e7a3a6b21c2f4f8ea;hb=a45774ee31d9f35d4eff46d54d7deab719afb092;hp=960a6db69f058a0bc28395e9113c2137773d66c8;hpb=6173092ff5cb03f039cac674bfc8bc4f969976a5;p=jalview.git diff --git a/src/jalview/analysis/Dna.java b/src/jalview/analysis/Dna.java index 960a6db..49c37df 100644 --- a/src/jalview/analysis/Dna.java +++ b/src/jalview/analysis/Dna.java @@ -1,5 +1,23 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7) + * Copyright (C) 2011 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with Jalview. If not, see . + */ package jalview.analysis; +import java.util.Enumeration; import java.util.Hashtable; import java.util.Vector; @@ -9,6 +27,7 @@ import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.Annotation; import jalview.datamodel.ColumnSelection; +import jalview.datamodel.DBRefEntry; import jalview.datamodel.FeatureProperties; import jalview.datamodel.Mapping; import jalview.datamodel.Sequence; @@ -34,9 +53,9 @@ public class Dna return 0; if (cdp1[0] < cdp2[0] || cdp1[1] < cdp2[1] || cdp1[2] < cdp2[2]) return -1; // one base in cdp1 precedes the corresponding base in the - // other codon + // other codon return 1; // one base in cdp1 appears after the corresponding base in the - // other codon. + // other codon. } /** @@ -64,27 +83,60 @@ public class Dna * @param gapCharacter * @param annotations * @param aWidth + * @param dataset + * destination dataset for translated sequences and mappings * @return */ public static AlignmentI CdnaTranslate(SequenceI[] selection, String[] seqstring, int viscontigs[], char gapCharacter, - AlignmentAnnotation[] annotations, int aWidth) + AlignmentAnnotation[] annotations, int aWidth, Alignment dataset) + { + return CdnaTranslate(selection, seqstring, null, viscontigs, + gapCharacter, annotations, aWidth, dataset); + } + + /** + * + * @param selection + * @param seqstring + * @param product + * - array of DbRefEntry objects from which exon map in seqstring is + * derived + * @param viscontigs + * @param gapCharacter + * @param annotations + * @param aWidth + * @param dataset + * @return + */ + public static AlignmentI CdnaTranslate(SequenceI[] selection, + String[] seqstring, DBRefEntry[] product, int viscontigs[], + char gapCharacter, AlignmentAnnotation[] annotations, int aWidth, + Alignment dataset) { AlignedCodonFrame codons = new AlignedCodonFrame(aWidth); // stores hash of - // subsequent - // positions for - // each codon - // start position - // in alignment + // subsequent + // positions for + // each codon + // start position + // in alignment int s, sSize = selection.length; Vector pepseqs = new Vector(); for (s = 0; s < sSize; s++) { SequenceI newseq = translateCodingRegion(selection[s], seqstring[s], - viscontigs, codons, gapCharacter); + viscontigs, codons, gapCharacter, + (product != null) ? product[s] : null); // possibly anonymous + // product if (newseq != null) { pepseqs.addElement(newseq); + SequenceI ds = newseq; + while (ds.getDatasetSequence() != null) + { + ds = ds.getDatasetSequence(); + } + dataset.addSequence(ds); } } if (codons.aaWidth == 0) @@ -93,13 +145,139 @@ public class Dna pepseqs.copyInto(newseqs); AlignmentI al = new Alignment(newseqs); al.padGaps(); // ensure we look aligned. - al.setDataset(null); + al.setDataset(dataset); translateAlignedAnnotations(annotations, al, codons); al.addCodonFrame(codons); return al; } /** + * fake the collection of DbRefs with associated exon mappings to identify if + * a translation would generate distinct product in the currently selected + * region. + * + * @param selection + * @param viscontigs + * @return + */ + public static boolean canTranslate(SequenceI[] selection, + int viscontigs[]) + { + for (int gd = 0; gd < selection.length; gd++) + { + SequenceI dna = selection[gd]; + jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils + .selectRefs(dna.getDBRef(), + jalview.datamodel.DBRefSource.DNACODINGDBS); + if (dnarefs != null) + { + // intersect with pep + // intersect with pep + Vector mappedrefs = new Vector(); + DBRefEntry[] refs = dna.getDBRef(); + for (int d = 0; d < refs.length; d++) + { + if (refs[d].getMap() != null && refs[d].getMap().getMap() != null + && refs[d].getMap().getMap().getFromRatio() == 3 + && refs[d].getMap().getMap().getToRatio() == 1) + { + mappedrefs.addElement(refs[d]); // add translated protein maps + } + } + dnarefs = new DBRefEntry[mappedrefs.size()]; + mappedrefs.copyInto(dnarefs); + for (int d = 0; d < dnarefs.length; d++) + { + Mapping mp = dnarefs[d].getMap(); + if (mp != null) + { + for (int vc = 0; vc < viscontigs.length; vc += 2) + { + int[] mpr = mp.locateMappedRange(viscontigs[vc], + viscontigs[vc + 1]); + if (mpr != null) + { + return true; + } + } + } + } + } + } + return false; + } + + /** + * generate a set of translated protein products from annotated sequenceI + * + * @param selection + * @param viscontigs + * @param gapCharacter + * @param dataset + * destination dataset for translated sequences + * @param annotations + * @param aWidth + * @return + */ + public static AlignmentI CdnaTranslate(SequenceI[] selection, + int viscontigs[], char gapCharacter, Alignment dataset) + { + int alwidth = 0; + Vector cdnasqs = new Vector(); + Vector cdnasqi = new Vector(); + Vector cdnaprod = new Vector(); + for (int gd = 0; gd < selection.length; gd++) + { + SequenceI dna = selection[gd]; + jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils + .selectRefs(dna.getDBRef(), + jalview.datamodel.DBRefSource.DNACODINGDBS); + if (dnarefs != null) + { + // intersect with pep + Vector mappedrefs = new Vector(); + DBRefEntry[] refs = dna.getDBRef(); + for (int d = 0; d < refs.length; d++) + { + if (refs[d].getMap() != null && refs[d].getMap().getMap() != null + && refs[d].getMap().getMap().getFromRatio() == 3 + && refs[d].getMap().getMap().getToRatio() == 1) + { + mappedrefs.addElement(refs[d]); // add translated protein maps + } + } + dnarefs = new DBRefEntry[mappedrefs.size()]; + mappedrefs.copyInto(dnarefs); + for (int d = 0; d < dnarefs.length; d++) + { + Mapping mp = dnarefs[d].getMap(); + StringBuffer sqstr = new StringBuffer(); + if (mp != null) + { + Mapping intersect = mp.intersectVisContigs(viscontigs); + // generate seqstring for this sequence based on mapping + + if (sqstr.length() > alwidth) + alwidth = sqstr.length(); + cdnasqs.addElement(sqstr.toString()); + cdnasqi.addElement(dna); + cdnaprod.addElement(intersect); + } + } + } + SequenceI[] cdna = new SequenceI[cdnasqs.size()]; + DBRefEntry[] prods = new DBRefEntry[cdnaprod.size()]; + String[] xons = new String[cdnasqs.size()]; + cdnasqs.copyInto(xons); + cdnaprod.copyInto(prods); + cdnasqi.copyInto(cdna); + return CdnaTranslate(cdna, xons, prods, viscontigs, gapCharacter, + null, alwidth, dataset); + } + return null; + } + + /** * translate na alignment annotations onto translated amino acid alignment al * using codon mapping codons * @@ -139,18 +317,22 @@ public class Dna if (codons.codons[a] != null && codons.codons[a][0] == (codons.codons[a][2] - 2)) { - pos = codons.codons[a][0]; - if (annotations[i].annotations[pos] == null - || annotations[i].annotations[pos] == null) - continue; - // We just take the annotation in the first base in the codon - anots[a] = new Annotation(annotations[i].annotations[pos]); + anots[a] = getCodonAnnotation(codons.codons[a], + annotations[i].annotations); } } } jalview.datamodel.AlignmentAnnotation aa = new jalview.datamodel.AlignmentAnnotation( annotations[i].label, annotations[i].description, anots); + aa.graph = annotations[i].graph; + aa.graphGroup = annotations[i].graphGroup; + aa.graphHeight = annotations[i].graphHeight; + if (annotations[i].getThreshold() != null) + { + aa.setThreshold(new jalview.datamodel.GraphLine(annotations[i] + .getThreshold())); + } if (annotations[i].hasScore) { aa.setScore(annotations[i].getScore()); @@ -165,7 +347,7 @@ public class Dna // positioning aa.setSequenceRef(aaSeq); aa.createSequenceMapping(aaSeq, aaSeq.getStart(), true); // rebuild - // mapping + // mapping aa.adjustForAlignment(); aaSeq.addAlignmentAnnotation(aa); } @@ -176,31 +358,92 @@ public class Dna } } + private static Annotation getCodonAnnotation(int[] is, + Annotation[] annotations) + { + // Have a look at all the codon positions for annotation and put the first + // one found into the translated annotation pos. + int contrib = 0; + Annotation annot = null; + for (int p = 0; p < 3; p++) + { + if (annotations[is[p]] != null) + { + if (annot == null) + { + annot = new Annotation(annotations[is[p]]); + contrib = 1; + } + else + { + // merge with last + Annotation cpy = new Annotation(annotations[is[p]]); + if (annot.colour == null) + { + annot.colour = cpy.colour; + } + if (annot.description == null || annot.description.length() == 0) + { + annot.description = cpy.description; + } + if (annot.displayCharacter == null) + { + annot.displayCharacter = cpy.displayCharacter; + } + if (annot.secondaryStructure == 0) + { + annot.secondaryStructure = cpy.secondaryStructure; + } + annot.value += cpy.value; + contrib++; + } + } + } + if (contrib > 1) + { + annot.value /= (float) contrib; + } + return annot; + } + /** * Translate a na sequence * * @param selection + * sequence displayed under viscontigs visible columns * @param seqstring + * ORF read in some global alignment reference frame * @param viscontigs + * mapping from global reference frame to visible seqstring ORF read * @param codons + * Definition of global ORF alignment reference frame * @param gapCharacter * @param newSeq * @return sequence ready to be added to alignment. */ public static SequenceI translateCodingRegion(SequenceI selection, String seqstring, int[] viscontigs, AlignedCodonFrame codons, - char gapCharacter) + char gapCharacter, DBRefEntry product) { + Vector skip = new Vector(); + int skipint[] = null; ShiftList vismapping = new ShiftList(); // map from viscontigs to seqstring - // intervals + // intervals int vc, scontigs[] = new int[viscontigs.length]; int npos = 0; for (vc = 0; vc < viscontigs.length; vc += 2) { - vismapping.addShift(npos, viscontigs[vc]); - scontigs[vc] = npos; - npos += viscontigs[vc + 1]; - scontigs[vc + 1] = npos; + if (vc == 0) + { + vismapping.addShift(npos, viscontigs[vc]); + } + else + { + // hidden region + vismapping.addShift(npos, viscontigs[vc] - viscontigs[vc - 1] + 1); + } + scontigs[vc] = viscontigs[vc]; + scontigs[vc + 1] = viscontigs[vc + 1]; } StringBuffer protein = new StringBuffer(); @@ -222,9 +465,45 @@ public class Dna String aa = ResidueProperties.codonTranslate(new String(codon)); rf = 0; if (aa == null) + { aa = String.valueOf(gapCharacter); + if (skipint == null) + { + skipint = new int[] + { cdp[0], cdp[2] }; + } + skipint[1] = cdp[2]; + } else { + if (skipint != null) + { + // edit scontigs + skipint[0] = vismapping.shift(skipint[0]); + skipint[1] = vismapping.shift(skipint[1]); + for (vc = 0; vc < scontigs.length; vc += 2) + { + if (scontigs[vc + 1] < skipint[0]) + { + continue; + } + if (scontigs[vc] <= skipint[0]) + { + if (skipint[0] == scontigs[vc]) + { + + } + else + { + int[] t = new int[scontigs.length + 2]; + System.arraycopy(scontigs, 0, t, 0, vc - 1); + // scontigs[vc]; // + } + } + } + skip.addElement(skipint); + skipint = null; + } if (aa.equals("STOP")) { aa = "X"; @@ -250,8 +529,8 @@ public class Dna // with a gap aa = "" + gapCharacter + aa; aspos++; - if (aspos >= codons.aaWidth) - codons.aaWidth = aspos + 1; + // if (aspos >= codons.aaWidth) + // codons.aaWidth = aspos + 1; break; // check the next position for alignment case 0: // codon aligns at aspos position. @@ -267,15 +546,21 @@ public class Dna codons.codons[aspos] = new int[] { cdp[0], cdp[1], cdp[2] }; } - aspos++; if (aspos >= codons.aaWidth) - codons.aaWidth = aspos + 1; + { + // update maximum alignment width + // (we can do this without calling checkCodonFrameWidth because it was + // already done above) + codons.setAaWidth(aspos); + } + // ready for next translated reading frame alignment position (if any) + aspos++; } } if (resSize > 0) { - SequenceI newseq = new Sequence(selection.getName(), protein - .toString()); + SequenceI newseq = new Sequence(selection.getName(), + protein.toString()); if (rf != 0) { jalview.bin.Cache.log @@ -283,8 +568,8 @@ public class Dna // map and trim contigs to ORF region vc = scontigs.length - 1; lastnpos = vismapping.shift(lastnpos); // place npos in context of - // whole dna alignment (rather - // than visible contigs) + // whole dna alignment (rather + // than visible contigs) // incomplete ORF could be broken over one or two visible contig // intervals. while (vc >= 0 && scontigs[vc] > lastnpos) @@ -313,14 +598,11 @@ public class Dna if (scontigs != null) { npos = 0; - // Find sequence position for scontigs positions on the nucleotide - // sequence string we were passed. - for (vc = 0; vc < viscontigs.length; vc += 2) + // map scontigs to actual sequence positions on selection + for (vc = 0; vc < scontigs.length; vc += 2) { scontigs[vc] = selection.findPosition(scontigs[vc]); // not from 1! - npos += viscontigs[vc]; - scontigs[vc + 1] = selection - .findPosition(npos + scontigs[vc + 1]); // exclusive + scontigs[vc + 1] = selection.findPosition(scontigs[vc + 1]); // exclusive if (scontigs[vc + 1] == selection.getEnd()) break; } @@ -331,22 +613,49 @@ public class Dna System.arraycopy(scontigs, 0, t, 0, vc + 2); scontigs = t; } - + /* + * delete intervals in scontigs which are not translated. 1. map skip + * into sequence position intervals 2. truncate existing ranges and add + * new ranges to exclude untranslated regions. if (skip.size()>0) { + * Vector narange = new Vector(); for (vc=0; vc=skipint[0] && + * iv[0]<=skipint[1]) { if (iv[0]==skipint[0]) { // delete beginning of + * range } else { // truncate range and create new one if necessary iv = + * (int[]) narange.elementAt(vc+1); if (iv[0]<=skipint[1]) { // truncate + * range iv[0] = skipint[1]; } else { } } } else if (iv[0]