From 330436f820ff05bff98de5528a01609d37955274 Mon Sep 17 00:00:00 2001 From: jprocter Date: Fri, 13 Jul 2007 14:59:57 +0000 Subject: [PATCH] bugfixes, partially working code for translation using existing translation associated with aligned sequences --- src/jalview/analysis/Dna.java | 237 +++++++++++++++++++++++++++++++++++------ 1 file changed, 204 insertions(+), 33 deletions(-) diff --git a/src/jalview/analysis/Dna.java b/src/jalview/analysis/Dna.java index 960a6db..7bac46e 100644 --- a/src/jalview/analysis/Dna.java +++ b/src/jalview/analysis/Dna.java @@ -1,5 +1,6 @@ package jalview.analysis; +import java.util.Enumeration; import java.util.Hashtable; import java.util.Vector; @@ -9,6 +10,7 @@ import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.Annotation; import jalview.datamodel.ColumnSelection; +import jalview.datamodel.DBRefEntry; import jalview.datamodel.FeatureProperties; import jalview.datamodel.Mapping; import jalview.datamodel.Sequence; @@ -34,9 +36,9 @@ public class Dna return 0; if (cdp1[0] < cdp2[0] || cdp1[1] < cdp2[1] || cdp1[2] < cdp2[2]) return -1; // one base in cdp1 precedes the corresponding base in the - // other codon + // other codon return 1; // one base in cdp1 appears after the corresponding base in the - // other codon. + // other codon. } /** @@ -64,27 +66,54 @@ public class Dna * @param gapCharacter * @param annotations * @param aWidth + * @param dataset destination dataset for translated sequences and mappings * @return */ public static AlignmentI CdnaTranslate(SequenceI[] selection, String[] seqstring, int viscontigs[], char gapCharacter, - AlignmentAnnotation[] annotations, int aWidth) + AlignmentAnnotation[] annotations, int aWidth, Alignment dataset) + { + return CdnaTranslate(selection, seqstring, null, viscontigs, + gapCharacter, annotations, aWidth, dataset); + } + + /** + * + * @param selection + * @param seqstring + * @param product - array of DbRefEntry objects from which exon map in seqstring is derived + * @param viscontigs + * @param gapCharacter + * @param annotations + * @param aWidth + * @param dataset + * @return + */ + public static AlignmentI CdnaTranslate(SequenceI[] selection, + String[] seqstring, DBRefEntry[] product, int viscontigs[], + char gapCharacter, AlignmentAnnotation[] annotations, int aWidth, Alignment dataset) { AlignedCodonFrame codons = new AlignedCodonFrame(aWidth); // stores hash of - // subsequent - // positions for - // each codon - // start position - // in alignment + // subsequent + // positions for + // each codon + // start position + // in alignment int s, sSize = selection.length; Vector pepseqs = new Vector(); for (s = 0; s < sSize; s++) { SequenceI newseq = translateCodingRegion(selection[s], seqstring[s], - viscontigs, codons, gapCharacter); + viscontigs, codons, gapCharacter, (product!=null) ? product[s] : null); // possibly anonymous product if (newseq != null) { pepseqs.addElement(newseq); + SequenceI ds = newseq; + while (ds.getDatasetSequence()!=null) + { + ds = ds.getDatasetSequence(); + } + dataset.addSequence(ds); } } if (codons.aaWidth == 0) @@ -93,11 +122,125 @@ public class Dna pepseqs.copyInto(newseqs); AlignmentI al = new Alignment(newseqs); al.padGaps(); // ensure we look aligned. - al.setDataset(null); + al.setDataset(dataset); translateAlignedAnnotations(annotations, al, codons); al.addCodonFrame(codons); return al; } + /** + * fake the collection of DbRefs with associated exon mappings to identify + * if a translation would generate distinct product in the currently selected region. + * @param selection + * @param viscontigs + * @return + */ + public static boolean canTranslate(SequenceI[] selection, int viscontigs[]) + { + for (int gd=0; gdalwidth) + alwidth = sqstr.length(); + cdnasqs.addElement(sqstr.toString()); + cdnasqi.addElement(dna); + cdnaprod.addElement(intersect); + } + } + } + SequenceI[] cdna = new SequenceI[cdnasqs.size()]; + DBRefEntry[] prods = new DBRefEntry[cdnaprod.size()]; + String[] xons = new String[cdnasqs.size()]; + cdnasqs.copyInto(xons); + cdnaprod.copyInto(prods); + cdnasqi.copyInto(cdna); + return CdnaTranslate(cdna, xons, prods, viscontigs, gapCharacter, null, alwidth, dataset); + } + return null; + } /** * translate na alignment annotations onto translated amino acid alignment al @@ -139,18 +282,20 @@ public class Dna if (codons.codons[a] != null && codons.codons[a][0] == (codons.codons[a][2] - 2)) { - pos = codons.codons[a][0]; - if (annotations[i].annotations[pos] == null - || annotations[i].annotations[pos] == null) - continue; - // We just take the annotation in the first base in the codon - anots[a] = new Annotation(annotations[i].annotations[pos]); + anots[a] = getCodonAnnotation(codons.codons[a], annotations[i].annotations); } } } jalview.datamodel.AlignmentAnnotation aa = new jalview.datamodel.AlignmentAnnotation( annotations[i].label, annotations[i].description, anots); + aa.graph = annotations[i].graph; + aa.graphGroup = annotations[i].graphGroup; + aa.graphHeight = annotations[i].graphHeight; + if (annotations[i].getThreshold()!=null) + { + aa.setThreshold(new jalview.datamodel.GraphLine(annotations[i].getThreshold())); + } if (annotations[i].hasScore) { aa.setScore(annotations[i].getScore()); @@ -165,7 +310,7 @@ public class Dna // positioning aa.setSequenceRef(aaSeq); aa.createSequenceMapping(aaSeq, aaSeq.getStart(), true); // rebuild - // mapping + // mapping aa.adjustForAlignment(); aaSeq.addAlignmentAnnotation(aa); } @@ -176,23 +321,37 @@ public class Dna } } + private static Annotation getCodonAnnotation(int[] is, Annotation[] annotations) + { + // Have a look at all the codon positions for annotation and put the first + // one found into the translated annotation pos. + for (int p=0; p<3; p++) + { + if (annotations[is[p]]!=null) + { + return new Annotation(annotations[is[p]]); + } + } + return null; + } + /** * Translate a na sequence * - * @param selection - * @param seqstring - * @param viscontigs - * @param codons + * @param selection sequence displayed under viscontigs visible columns + * @param seqstring ORF read in some global alignment reference frame + * @param viscontigs mapping from global reference frame to visible seqstring ORF read + * @param codons Definition of global ORF alignment reference frame * @param gapCharacter * @param newSeq * @return sequence ready to be added to alignment. */ public static SequenceI translateCodingRegion(SequenceI selection, String seqstring, int[] viscontigs, AlignedCodonFrame codons, - char gapCharacter) + char gapCharacter, DBRefEntry product) { ShiftList vismapping = new ShiftList(); // map from viscontigs to seqstring - // intervals + // intervals int vc, scontigs[] = new int[viscontigs.length]; int npos = 0; for (vc = 0; vc < viscontigs.length; vc += 2) @@ -250,8 +409,8 @@ public class Dna // with a gap aa = "" + gapCharacter + aa; aspos++; - if (aspos >= codons.aaWidth) - codons.aaWidth = aspos + 1; + //if (aspos >= codons.aaWidth) + // codons.aaWidth = aspos + 1; break; // check the next position for alignment case 0: // codon aligns at aspos position. @@ -283,8 +442,8 @@ public class Dna // map and trim contigs to ORF region vc = scontigs.length - 1; lastnpos = vismapping.shift(lastnpos); // place npos in context of - // whole dna alignment (rather - // than visible contigs) + // whole dna alignment (rather + // than visible contigs) // incomplete ORF could be broken over one or two visible contig // intervals. while (vc >= 0 && scontigs[vc] > lastnpos) @@ -331,17 +490,29 @@ public class Dna System.arraycopy(scontigs, 0, t, 0, vc + 2); scontigs = t; } - MapList map = new MapList(scontigs, new int[] - { 1, resSize }, 3, 1); // TODO: store mapping on newSeq for linked - // DNA/Protein viewing. + { 1, resSize }, 3, 1); + // update newseq as if it was generated as mapping from product + + if (product != null) + { + newseq.setName(product.getSource() + "|" + + product.getAccessionId()); + if (product.getMap() != null) + { + //Mapping mp = product.getMap(); + //newseq.setStart(mp.getPosition(scontigs[0])); + //newseq.setEnd(mp + // .getPosition(scontigs[scontigs.length - 1])); + } + } transferCodedFeatures(selection, newseq, map, null, null); SequenceI rseq = newseq.deriveSequence(); // construct a dataset - // sequence for our new - // peptide, regardless. + // sequence for our new + // peptide, regardless. // store a mapping (this actually stores a mapping between the dataset // sequences for the two sequences - codons.addMap(selection, newseq, map); + codons.addMap(selection, rseq, map); return rseq; } } -- 1.7.10.2