X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FDna.java;h=5ff6751450507150237ee1bc16a5d1fc49168302;hb=1660a4fe03b8dda74b008ae4de2d804f8044a463;hp=960a6db69f058a0bc28395e9113c2137773d66c8;hpb=6173092ff5cb03f039cac674bfc8bc4f969976a5;p=jalview.git diff --git a/src/jalview/analysis/Dna.java b/src/jalview/analysis/Dna.java index 960a6db..5ff6751 100644 --- a/src/jalview/analysis/Dna.java +++ b/src/jalview/analysis/Dna.java @@ -1,5 +1,24 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer (Development Version 2.4.1) + * Copyright (C) 2009 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ package jalview.analysis; +import java.util.Enumeration; import java.util.Hashtable; import java.util.Vector; @@ -9,6 +28,7 @@ import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.Annotation; import jalview.datamodel.ColumnSelection; +import jalview.datamodel.DBRefEntry; import jalview.datamodel.FeatureProperties; import jalview.datamodel.Mapping; import jalview.datamodel.Sequence; @@ -34,9 +54,9 @@ public class Dna return 0; if (cdp1[0] < cdp2[0] || cdp1[1] < cdp2[1] || cdp1[2] < cdp2[2]) return -1; // one base in cdp1 precedes the corresponding base in the - // other codon + // other codon return 1; // one base in cdp1 appears after the corresponding base in the - // other codon. + // other codon. } /** @@ -64,27 +84,60 @@ public class Dna * @param gapCharacter * @param annotations * @param aWidth + * @param dataset + * destination dataset for translated sequences and mappings * @return */ public static AlignmentI CdnaTranslate(SequenceI[] selection, String[] seqstring, int viscontigs[], char gapCharacter, - AlignmentAnnotation[] annotations, int aWidth) + AlignmentAnnotation[] annotations, int aWidth, Alignment dataset) + { + return CdnaTranslate(selection, seqstring, null, viscontigs, + gapCharacter, annotations, aWidth, dataset); + } + + /** + * + * @param selection + * @param seqstring + * @param product - + * array of DbRefEntry objects from which exon map in seqstring + * is derived + * @param viscontigs + * @param gapCharacter + * @param annotations + * @param aWidth + * @param dataset + * @return + */ + public static AlignmentI CdnaTranslate(SequenceI[] selection, + String[] seqstring, DBRefEntry[] product, int viscontigs[], + char gapCharacter, AlignmentAnnotation[] annotations, int aWidth, + Alignment dataset) { AlignedCodonFrame codons = new AlignedCodonFrame(aWidth); // stores hash of - // subsequent - // positions for - // each codon - // start position - // in alignment + // subsequent + // positions for + // each codon + // start position + // in alignment int s, sSize = selection.length; Vector pepseqs = new Vector(); for (s = 0; s < sSize; s++) { SequenceI newseq = translateCodingRegion(selection[s], seqstring[s], - viscontigs, codons, gapCharacter); + viscontigs, codons, gapCharacter, + (product != null) ? product[s] : null); // possibly anonymous + // product if (newseq != null) { pepseqs.addElement(newseq); + SequenceI ds = newseq; + while (ds.getDatasetSequence() != null) + { + ds = ds.getDatasetSequence(); + } + dataset.addSequence(ds); } } if (codons.aaWidth == 0) @@ -93,13 +146,139 @@ public class Dna pepseqs.copyInto(newseqs); AlignmentI al = new Alignment(newseqs); al.padGaps(); // ensure we look aligned. - al.setDataset(null); + al.setDataset(dataset); translateAlignedAnnotations(annotations, al, codons); al.addCodonFrame(codons); return al; } /** + * fake the collection of DbRefs with associated exon mappings to identify if + * a translation would generate distinct product in the currently selected + * region. + * + * @param selection + * @param viscontigs + * @return + */ + public static boolean canTranslate(SequenceI[] selection, + int viscontigs[]) + { + for (int gd = 0; gd < selection.length; gd++) + { + SequenceI dna = selection[gd]; + jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils + .selectRefs(dna.getDBRef(), + jalview.datamodel.DBRefSource.DNACODINGDBS); + if (dnarefs != null) + { + // intersect with pep + // intersect with pep + Vector mappedrefs = new Vector(); + DBRefEntry[] refs = dna.getDBRef(); + for (int d = 0; d < refs.length; d++) + { + if (refs[d].getMap() != null && refs[d].getMap().getMap() != null + && refs[d].getMap().getMap().getFromRatio() == 3 + && refs[d].getMap().getMap().getToRatio() == 1) + { + mappedrefs.addElement(refs[d]); // add translated protein maps + } + } + dnarefs = new DBRefEntry[mappedrefs.size()]; + mappedrefs.copyInto(dnarefs); + for (int d = 0; d < dnarefs.length; d++) + { + Mapping mp = dnarefs[d].getMap(); + if (mp != null) + { + for (int vc = 0; vc < viscontigs.length; vc += 2) + { + int[] mpr = mp.locateMappedRange(viscontigs[vc], + viscontigs[vc + 1]); + if (mpr != null) + { + return true; + } + } + } + } + } + } + return false; + } + + /** + * generate a set of translated protein products from annotated sequenceI + * + * @param selection + * @param viscontigs + * @param gapCharacter + * @param dataset + * destination dataset for translated sequences + * @param annotations + * @param aWidth + * @return + */ + public static AlignmentI CdnaTranslate(SequenceI[] selection, + int viscontigs[], char gapCharacter, Alignment dataset) + { + int alwidth = 0; + Vector cdnasqs = new Vector(); + Vector cdnasqi = new Vector(); + Vector cdnaprod = new Vector(); + for (int gd = 0; gd < selection.length; gd++) + { + SequenceI dna = selection[gd]; + jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils + .selectRefs(dna.getDBRef(), + jalview.datamodel.DBRefSource.DNACODINGDBS); + if (dnarefs != null) + { + // intersect with pep + Vector mappedrefs = new Vector(); + DBRefEntry[] refs = dna.getDBRef(); + for (int d = 0; d < refs.length; d++) + { + if (refs[d].getMap() != null && refs[d].getMap().getMap() != null + && refs[d].getMap().getMap().getFromRatio() == 3 + && refs[d].getMap().getMap().getToRatio() == 1) + { + mappedrefs.addElement(refs[d]); // add translated protein maps + } + } + dnarefs = new DBRefEntry[mappedrefs.size()]; + mappedrefs.copyInto(dnarefs); + for (int d = 0; d < dnarefs.length; d++) + { + Mapping mp = dnarefs[d].getMap(); + StringBuffer sqstr = new StringBuffer(); + if (mp != null) + { + Mapping intersect = mp.intersectVisContigs(viscontigs); + // generate seqstring for this sequence based on mapping + + if (sqstr.length() > alwidth) + alwidth = sqstr.length(); + cdnasqs.addElement(sqstr.toString()); + cdnasqi.addElement(dna); + cdnaprod.addElement(intersect); + } + } + } + SequenceI[] cdna = new SequenceI[cdnasqs.size()]; + DBRefEntry[] prods = new DBRefEntry[cdnaprod.size()]; + String[] xons = new String[cdnasqs.size()]; + cdnasqs.copyInto(xons); + cdnaprod.copyInto(prods); + cdnasqi.copyInto(cdna); + return CdnaTranslate(cdna, xons, prods, viscontigs, gapCharacter, + null, alwidth, dataset); + } + return null; + } + + /** * translate na alignment annotations onto translated amino acid alignment al * using codon mapping codons * @@ -139,18 +318,22 @@ public class Dna if (codons.codons[a] != null && codons.codons[a][0] == (codons.codons[a][2] - 2)) { - pos = codons.codons[a][0]; - if (annotations[i].annotations[pos] == null - || annotations[i].annotations[pos] == null) - continue; - // We just take the annotation in the first base in the codon - anots[a] = new Annotation(annotations[i].annotations[pos]); + anots[a] = getCodonAnnotation(codons.codons[a], + annotations[i].annotations); } } } jalview.datamodel.AlignmentAnnotation aa = new jalview.datamodel.AlignmentAnnotation( annotations[i].label, annotations[i].description, anots); + aa.graph = annotations[i].graph; + aa.graphGroup = annotations[i].graphGroup; + aa.graphHeight = annotations[i].graphHeight; + if (annotations[i].getThreshold() != null) + { + aa.setThreshold(new jalview.datamodel.GraphLine(annotations[i] + .getThreshold())); + } if (annotations[i].hasScore) { aa.setScore(annotations[i].getScore()); @@ -165,7 +348,7 @@ public class Dna // positioning aa.setSequenceRef(aaSeq); aa.createSequenceMapping(aaSeq, aaSeq.getStart(), true); // rebuild - // mapping + // mapping aa.adjustForAlignment(); aaSeq.addAlignmentAnnotation(aa); } @@ -176,31 +359,90 @@ public class Dna } } + private static Annotation getCodonAnnotation(int[] is, + Annotation[] annotations) + { + // Have a look at all the codon positions for annotation and put the first + // one found into the translated annotation pos. + int contrib=0; + Annotation annot = null; + for (int p = 0; p < 3; p++) + { + if (annotations[is[p]] != null) + { + if (annot==null) { + annot = new Annotation(annotations[is[p]]); + contrib = 1; + } else { + // merge with last + Annotation cpy = new Annotation(annotations[is[p]]); + if (annot.colour==null) + { + annot.colour = cpy.colour; + } + if (annot.description==null || annot.description.length()==0) + { + annot.description = cpy.description; + } + if (annot.displayCharacter==null) + { + annot.displayCharacter = cpy.displayCharacter; + } + if (annot.secondaryStructure==0) + { + annot.secondaryStructure = cpy.secondaryStructure; + } + annot.value+=cpy.value; + contrib++; + } + } + } + if (contrib>1) + { + annot.value/=(float)contrib; + } + return annot; + } + /** * Translate a na sequence * * @param selection + * sequence displayed under viscontigs visible columns * @param seqstring + * ORF read in some global alignment reference frame * @param viscontigs + * mapping from global reference frame to visible seqstring ORF + * read * @param codons + * Definition of global ORF alignment reference frame * @param gapCharacter * @param newSeq * @return sequence ready to be added to alignment. */ public static SequenceI translateCodingRegion(SequenceI selection, String seqstring, int[] viscontigs, AlignedCodonFrame codons, - char gapCharacter) + char gapCharacter, DBRefEntry product) { + Vector skip = new Vector(); + int skipint[] = null; ShiftList vismapping = new ShiftList(); // map from viscontigs to seqstring - // intervals + // intervals int vc, scontigs[] = new int[viscontigs.length]; int npos = 0; for (vc = 0; vc < viscontigs.length; vc += 2) { - vismapping.addShift(npos, viscontigs[vc]); - scontigs[vc] = npos; - npos += viscontigs[vc + 1]; - scontigs[vc + 1] = npos; + if (vc == 0) + { + vismapping.addShift(npos, viscontigs[vc]); + } + else + { + // hidden region + vismapping.addShift(npos, viscontigs[vc] - viscontigs[vc - 1] + 1); + } + scontigs[vc] = viscontigs[vc]; + scontigs[vc + 1] = viscontigs[vc + 1]; } StringBuffer protein = new StringBuffer(); @@ -222,9 +464,45 @@ public class Dna String aa = ResidueProperties.codonTranslate(new String(codon)); rf = 0; if (aa == null) + { aa = String.valueOf(gapCharacter); + if (skipint == null) + { + skipint = new int[] + { cdp[0], cdp[2] }; + } + skipint[1] = cdp[2]; + } else { + if (skipint != null) + { + // edit scontigs + skipint[0] = vismapping.shift(skipint[0]); + skipint[1] = vismapping.shift(skipint[1]); + for (vc = 0; vc < scontigs.length; vc += 2) + { + if (scontigs[vc + 1] < skipint[0]) + { + continue; + } + if (scontigs[vc] <= skipint[0]) + { + if (skipint[0] == scontigs[vc]) + { + + } + else + { + int[] t = new int[scontigs.length + 2]; + System.arraycopy(scontigs, 0, t, 0, vc - 1); + // scontigs[vc]; // + } + } + } + skip.addElement(skipint); + skipint = null; + } if (aa.equals("STOP")) { aa = "X"; @@ -250,8 +528,8 @@ public class Dna // with a gap aa = "" + gapCharacter + aa; aspos++; - if (aspos >= codons.aaWidth) - codons.aaWidth = aspos + 1; + // if (aspos >= codons.aaWidth) + // codons.aaWidth = aspos + 1; break; // check the next position for alignment case 0: // codon aligns at aspos position. @@ -267,9 +545,14 @@ public class Dna codons.codons[aspos] = new int[] { cdp[0], cdp[1], cdp[2] }; } - aspos++; if (aspos >= codons.aaWidth) - codons.aaWidth = aspos + 1; + { + // update maximum alignment width + // (we can do this without calling checkCodonFrameWidth because it was already done above) + codons.setAaWidth(aspos); + } + // ready for next translated reading frame alignment position (if any) + aspos++; } } if (resSize > 0) @@ -283,8 +566,8 @@ public class Dna // map and trim contigs to ORF region vc = scontigs.length - 1; lastnpos = vismapping.shift(lastnpos); // place npos in context of - // whole dna alignment (rather - // than visible contigs) + // whole dna alignment (rather + // than visible contigs) // incomplete ORF could be broken over one or two visible contig // intervals. while (vc >= 0 && scontigs[vc] > lastnpos) @@ -313,14 +596,11 @@ public class Dna if (scontigs != null) { npos = 0; - // Find sequence position for scontigs positions on the nucleotide - // sequence string we were passed. - for (vc = 0; vc < viscontigs.length; vc += 2) + // map scontigs to actual sequence positions on selection + for (vc = 0; vc < scontigs.length; vc += 2) { scontigs[vc] = selection.findPosition(scontigs[vc]); // not from 1! - npos += viscontigs[vc]; - scontigs[vc + 1] = selection - .findPosition(npos + scontigs[vc + 1]); // exclusive + scontigs[vc + 1] = selection.findPosition(scontigs[vc + 1]); // exclusive if (scontigs[vc + 1] == selection.getEnd()) break; } @@ -331,17 +611,45 @@ public class Dna System.arraycopy(scontigs, 0, t, 0, vc + 2); scontigs = t; } - + /* + * delete intervals in scontigs which are not translated. 1. map skip + * into sequence position intervals 2. truncate existing ranges and add + * new ranges to exclude untranslated regions. if (skip.size()>0) { + * Vector narange = new Vector(); for (vc=0; vc=skipint[0] && iv[0]<=skipint[1]) { if (iv[0]==skipint[0]) { // + * delete beginning of range } else { // truncate range and create new + * one if necessary iv = (int[]) narange.elementAt(vc+1); if (iv[0]<=skipint[1]) { // + * truncate range iv[0] = skipint[1]; } else { + * } } } else if (iv[0]