X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FDna.java;h=c6bcfdad4ddf13c2743e7687a739cf8139ae58f0;hb=cd5b2de469fb4c09242955cb4b74279e2da348d6;hp=960a6db69f058a0bc28395e9113c2137773d66c8;hpb=6173092ff5cb03f039cac674bfc8bc4f969976a5;p=jalview.git diff --git a/src/jalview/analysis/Dna.java b/src/jalview/analysis/Dna.java index 960a6db..c6bcfda 100644 --- a/src/jalview/analysis/Dna.java +++ b/src/jalview/analysis/Dna.java @@ -1,5 +1,6 @@ package jalview.analysis; +import java.util.Enumeration; import java.util.Hashtable; import java.util.Vector; @@ -9,6 +10,7 @@ import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.Annotation; import jalview.datamodel.ColumnSelection; +import jalview.datamodel.DBRefEntry; import jalview.datamodel.FeatureProperties; import jalview.datamodel.Mapping; import jalview.datamodel.Sequence; @@ -34,9 +36,9 @@ public class Dna return 0; if (cdp1[0] < cdp2[0] || cdp1[1] < cdp2[1] || cdp1[2] < cdp2[2]) return -1; // one base in cdp1 precedes the corresponding base in the - // other codon + // other codon return 1; // one base in cdp1 appears after the corresponding base in the - // other codon. + // other codon. } /** @@ -64,27 +66,54 @@ public class Dna * @param gapCharacter * @param annotations * @param aWidth + * @param dataset destination dataset for translated sequences and mappings * @return */ public static AlignmentI CdnaTranslate(SequenceI[] selection, String[] seqstring, int viscontigs[], char gapCharacter, - AlignmentAnnotation[] annotations, int aWidth) + AlignmentAnnotation[] annotations, int aWidth, Alignment dataset) + { + return CdnaTranslate(selection, seqstring, null, viscontigs, + gapCharacter, annotations, aWidth, dataset); + } + + /** + * + * @param selection + * @param seqstring + * @param product - array of DbRefEntry objects from which exon map in seqstring is derived + * @param viscontigs + * @param gapCharacter + * @param annotations + * @param aWidth + * @param dataset + * @return + */ + public static AlignmentI CdnaTranslate(SequenceI[] selection, + String[] seqstring, DBRefEntry[] product, int viscontigs[], + char gapCharacter, AlignmentAnnotation[] annotations, int aWidth, Alignment dataset) { AlignedCodonFrame codons = new AlignedCodonFrame(aWidth); // stores hash of - // subsequent - // positions for - // each codon - // start position - // in alignment + // subsequent + // positions for + // each codon + // start position + // in alignment int s, sSize = selection.length; Vector pepseqs = new Vector(); for (s = 0; s < sSize; s++) { SequenceI newseq = translateCodingRegion(selection[s], seqstring[s], - viscontigs, codons, gapCharacter); + viscontigs, codons, gapCharacter, (product!=null) ? product[s] : null); // possibly anonymous product if (newseq != null) { pepseqs.addElement(newseq); + SequenceI ds = newseq; + while (ds.getDatasetSequence()!=null) + { + ds = ds.getDatasetSequence(); + } + dataset.addSequence(ds); } } if (codons.aaWidth == 0) @@ -93,11 +122,125 @@ public class Dna pepseqs.copyInto(newseqs); AlignmentI al = new Alignment(newseqs); al.padGaps(); // ensure we look aligned. - al.setDataset(null); + al.setDataset(dataset); translateAlignedAnnotations(annotations, al, codons); al.addCodonFrame(codons); return al; } + /** + * fake the collection of DbRefs with associated exon mappings to identify + * if a translation would generate distinct product in the currently selected region. + * @param selection + * @param viscontigs + * @return + */ + public static boolean canTranslate(SequenceI[] selection, int viscontigs[]) + { + for (int gd=0; gdalwidth) + alwidth = sqstr.length(); + cdnasqs.addElement(sqstr.toString()); + cdnasqi.addElement(dna); + cdnaprod.addElement(intersect); + } + } + } + SequenceI[] cdna = new SequenceI[cdnasqs.size()]; + DBRefEntry[] prods = new DBRefEntry[cdnaprod.size()]; + String[] xons = new String[cdnasqs.size()]; + cdnasqs.copyInto(xons); + cdnaprod.copyInto(prods); + cdnasqi.copyInto(cdna); + return CdnaTranslate(cdna, xons, prods, viscontigs, gapCharacter, null, alwidth, dataset); + } + return null; + } /** * translate na alignment annotations onto translated amino acid alignment al @@ -139,18 +282,20 @@ public class Dna if (codons.codons[a] != null && codons.codons[a][0] == (codons.codons[a][2] - 2)) { - pos = codons.codons[a][0]; - if (annotations[i].annotations[pos] == null - || annotations[i].annotations[pos] == null) - continue; - // We just take the annotation in the first base in the codon - anots[a] = new Annotation(annotations[i].annotations[pos]); + anots[a] = getCodonAnnotation(codons.codons[a], annotations[i].annotations); } } } jalview.datamodel.AlignmentAnnotation aa = new jalview.datamodel.AlignmentAnnotation( annotations[i].label, annotations[i].description, anots); + aa.graph = annotations[i].graph; + aa.graphGroup = annotations[i].graphGroup; + aa.graphHeight = annotations[i].graphHeight; + if (annotations[i].getThreshold()!=null) + { + aa.setThreshold(new jalview.datamodel.GraphLine(annotations[i].getThreshold())); + } if (annotations[i].hasScore) { aa.setScore(annotations[i].getScore()); @@ -165,7 +310,7 @@ public class Dna // positioning aa.setSequenceRef(aaSeq); aa.createSequenceMapping(aaSeq, aaSeq.getStart(), true); // rebuild - // mapping + // mapping aa.adjustForAlignment(); aaSeq.addAlignmentAnnotation(aa); } @@ -176,31 +321,51 @@ public class Dna } } + private static Annotation getCodonAnnotation(int[] is, Annotation[] annotations) + { + // Have a look at all the codon positions for annotation and put the first + // one found into the translated annotation pos. + for (int p=0; p<3; p++) + { + if (annotations[is[p]]!=null) + { + return new Annotation(annotations[is[p]]); + } + } + return null; + } + /** * Translate a na sequence * - * @param selection - * @param seqstring - * @param viscontigs - * @param codons + * @param selection sequence displayed under viscontigs visible columns + * @param seqstring ORF read in some global alignment reference frame + * @param viscontigs mapping from global reference frame to visible seqstring ORF read + * @param codons Definition of global ORF alignment reference frame * @param gapCharacter * @param newSeq * @return sequence ready to be added to alignment. */ public static SequenceI translateCodingRegion(SequenceI selection, String seqstring, int[] viscontigs, AlignedCodonFrame codons, - char gapCharacter) + char gapCharacter, DBRefEntry product) { + Vector skip=new Vector(); + int skipint[]=null; ShiftList vismapping = new ShiftList(); // map from viscontigs to seqstring - // intervals + // intervals int vc, scontigs[] = new int[viscontigs.length]; int npos = 0; for (vc = 0; vc < viscontigs.length; vc += 2) { + if (vc==0) { vismapping.addShift(npos, viscontigs[vc]); - scontigs[vc] = npos; - npos += viscontigs[vc + 1]; - scontigs[vc + 1] = npos; + } else { + // hidden region + vismapping.addShift(npos, viscontigs[vc]-viscontigs[vc-1]+1); + } + scontigs[vc] = viscontigs[vc]; + scontigs[vc + 1] = viscontigs[vc+1]; } StringBuffer protein = new StringBuffer(); @@ -222,9 +387,40 @@ public class Dna String aa = ResidueProperties.codonTranslate(new String(codon)); rf = 0; if (aa == null) - aa = String.valueOf(gapCharacter); - else { + aa = String.valueOf(gapCharacter); + if (skipint==null) + { + skipint = new int[] { cdp[0],cdp[2] }; + } + skipint[1] = cdp[2]; + } else { + if (skipint!=null) + { + // edit scontigs + skipint[0] = vismapping.shift(skipint[0]); + skipint[1] = vismapping.shift(skipint[1]); + for (vc=0; vc= codons.aaWidth) - codons.aaWidth = aspos + 1; + //if (aspos >= codons.aaWidth) + // codons.aaWidth = aspos + 1; break; // check the next position for alignment case 0: // codon aligns at aspos position. @@ -269,7 +465,9 @@ public class Dna } aspos++; if (aspos >= codons.aaWidth) + { codons.aaWidth = aspos + 1; + } } } if (resSize > 0) @@ -283,8 +481,8 @@ public class Dna // map and trim contigs to ORF region vc = scontigs.length - 1; lastnpos = vismapping.shift(lastnpos); // place npos in context of - // whole dna alignment (rather - // than visible contigs) + // whole dna alignment (rather + // than visible contigs) // incomplete ORF could be broken over one or two visible contig // intervals. while (vc >= 0 && scontigs[vc] > lastnpos) @@ -313,14 +511,12 @@ public class Dna if (scontigs != null) { npos = 0; - // Find sequence position for scontigs positions on the nucleotide - // sequence string we were passed. - for (vc = 0; vc < viscontigs.length; vc += 2) + // map scontigs to actual sequence positions on selection + for (vc = 0; vc < scontigs.length; vc += 2) { scontigs[vc] = selection.findPosition(scontigs[vc]); // not from 1! - npos += viscontigs[vc]; scontigs[vc + 1] = selection - .findPosition(npos + scontigs[vc + 1]); // exclusive + .findPosition(scontigs[vc + 1]); // exclusive if (scontigs[vc + 1] == selection.getEnd()) break; } @@ -331,17 +527,72 @@ public class Dna System.arraycopy(scontigs, 0, t, 0, vc + 2); scontigs = t; } - + /* + * delete intervals in scontigs which are not translated. + * 1. map skip into sequence position intervals + * 2. truncate existing ranges and add new ranges to exclude untranslated regions. + if (skip.size()>0) + { + Vector narange = new Vector(); + for (vc=0; vc=skipint[0] && iv[0]<=skipint[1]) + { + if (iv[0]==skipint[0]) + { + // delete beginning of range + } else { + // truncate range and create new one if necessary + iv = (int[]) narange.elementAt(vc+1); + if (iv[0]<=skipint[1]) + { + // truncate range + iv[0] = skipint[1]; + } else { + + } + } + } else + if (iv[0]