X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FDna.java;h=7bac46e2405e80683bcbd41db5965597c41cb2b2;hb=33d29753b74b33be28fef39da85fc9fbb256f6c8;hp=5096ae82bf269fd24d797afd08b1970f20c858bc;hpb=6951479b7dc56bd022ce2c370b7f88b5571004fd;p=jalview.git diff --git a/src/jalview/analysis/Dna.java b/src/jalview/analysis/Dna.java index 5096ae8..7bac46e 100644 --- a/src/jalview/analysis/Dna.java +++ b/src/jalview/analysis/Dna.java @@ -1,235 +1,579 @@ package jalview.analysis; +import java.util.Enumeration; import java.util.Hashtable; import java.util.Vector; +import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.Annotation; import jalview.datamodel.ColumnSelection; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.FeatureProperties; +import jalview.datamodel.Mapping; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.schemes.ResidueProperties; import jalview.util.MapList; +import jalview.util.ShiftList; -public class Dna { +public class Dna +{ /** * * @param cdp1 * @param cdp2 - * @return -1 if cdp1 aligns before cdp2, 0 if in the same column or cdp2 is null, +1 if after cdp2 + * @return -1 if cdp1 aligns before cdp2, 0 if in the same column or cdp2 is + * null, +1 if after cdp2 */ - private static int compare_codonpos(int[] cdp1, int[] cdp2) { - if (cdp2==null || (cdp1[0]==cdp2[0] && cdp1[1] == cdp2[1] && cdp1[2] == cdp2[2])) + private static int compare_codonpos(int[] cdp1, int[] cdp2) + { + if (cdp2 == null + || (cdp1[0] == cdp2[0] && cdp1[1] == cdp2[1] && cdp1[2] == cdp2[2])) return 0; - if (cdp1[0]mapped protein sequence alignment translation given set of sequences + * 1. id distinct coding regions within selected region for each sequence 2. + * generate peptides based on inframe (or given) translation or (optionally + * and where specified) out of frame translations (annotated appropriately) 3. + * align peptides based on codon alignment + */ /** - * create a new alignment of protein sequences - * by an inframe translation of the provided NA sequences + * id potential products from dna 1. search for distinct products within + * selected region for each selected sequence 2. group by associated DB type. + * 3. return as form for input into above function + */ + /** + * + */ + /** + * create a new alignment of protein sequences by an inframe translation of + * the provided NA sequences + * * @param selection * @param seqstring * @param viscontigs * @param gapCharacter * @param annotations * @param aWidth + * @param dataset destination dataset for translated sequences and mappings * @return */ - public static AlignmentI CdnaTranslate(SequenceI[] selection, String[] seqstring, int viscontigs[], char gapCharacter, - AlignmentAnnotation[] annotations, int aWidth) { - int s, sSize = selection.length; - SequenceI [] newSeq = new SequenceI[sSize]; - int res, resSize; - StringBuffer protein; - String seq; - - int[][] codons = new int[aWidth][]; // stores hash of subsequent positions for each codon start position in alignment + public static AlignmentI CdnaTranslate(SequenceI[] selection, + String[] seqstring, int viscontigs[], char gapCharacter, + AlignmentAnnotation[] annotations, int aWidth, Alignment dataset) + { + return CdnaTranslate(selection, seqstring, null, viscontigs, + gapCharacter, annotations, aWidth, dataset); + } - for (res=0;res=aslen) - aslen=aspos+1; - break; // check the next position for alignment - case 0: - // codon aligns at aspos position. - findpos = false; } } - // codon aligns with all other sequence residues found at aspos - protein.append(aa); - if (codons[aspos]==null) - { - // mark this column as aligning to this aligned reading frame - codons[aspos] = new int[] { cdp[0], cdp[1], cdp[2] }; - } - aspos++; - if (aspos>=aslen) - aslen=aspos+1; } } - if (resSize>0) + } + return false; + } + /** + * generate a set of translated protein products from annotated sequenceI + * @param selection + * @param viscontigs + * @param gapCharacter + * @param dataset destination dataset for translated sequences + * @param annotations + * @param aWidth + * @return + */ + public static AlignmentI CdnaTranslate(SequenceI[] selection, + int viscontigs[], char gapCharacter, Alignment dataset) + { + int alwidth=0; + Vector cdnasqs=new Vector(); + Vector cdnasqi=new Vector(); + Vector cdnaprod=new Vector(); + for (int gd=0; gd0 && scontigs[vc]>nend) + // intersect with pep + Vector mappedrefs = new Vector(); + DBRefEntry[] refs=dna.getDBRef(); + for (int d=0; dnend) - { - vc-=2; - } else { - // correct last interval in list. - scontigs[vc]=nend; - } + mappedrefs.addElement(refs[d]); // add translated protein maps } - if ((vc+2)alwidth) + alwidth = sqstr.length(); + cdnasqs.addElement(sqstr.toString()); + cdnasqi.addElement(dna); + cdnaprod.addElement(intersect); } } - MapList map = new MapList(scontigs, new int[] { 1, resSize },3,1); // TODO: store mapping on newSeq for linked DNA/Protein viewing. } - // register the mapping somehow - // + SequenceI[] cdna = new SequenceI[cdnasqs.size()]; + DBRefEntry[] prods = new DBRefEntry[cdnaprod.size()]; + String[] xons = new String[cdnasqs.size()]; + cdnasqs.copyInto(xons); + cdnaprod.copyInto(prods); + cdnasqi.copyInto(cdna); + return CdnaTranslate(cdna, xons, prods, viscontigs, gapCharacter, null, alwidth, dataset); } - if (aslen==0) - return null; - AlignmentI al = new Alignment(newSeq); - al.padGaps(); // ensure we look aligned. - al.setDataset(null); - + return null; + } - //////////////////////////////// + /** + * translate na alignment annotations onto translated amino acid alignment al + * using codon mapping codons + * + * @param annotations + * @param al + * @param codons + */ + public static void translateAlignedAnnotations( + AlignmentAnnotation[] annotations, AlignmentI al, + AlignedCodonFrame codons) + { + // ////////////////////////////// // Copy annotations across // // Can only do this for columns with consecutive codons, or where // annotation is sequence associated. - - int pos,a,aSize; - if(annotations!=null) + + int pos, a, aSize; + if (annotations != null) { for (int i = 0; i < annotations.length; i++) { // Skip any autogenerated annotation - if (annotations[i].autoCalculated) { + if (annotations[i].autoCalculated) + { continue; } - - aSize = aslen; // aa alignment width. - jalview.datamodel.Annotation[] anots = - (annotations[i].annotations==null) - ? null : - new jalview.datamodel.Annotation[aSize]; - if (anots!=null) + + aSize = codons.getaaWidth(); // aa alignment width. + jalview.datamodel.Annotation[] anots = (annotations[i].annotations == null) ? null + : new jalview.datamodel.Annotation[aSize]; + if (anots != null) { for (a = 0; a < aSize; a++) { // process through codon map. - if (codons[a]!=null && codons[a][0]==(codons[a][2]-2)) + if (codons.codons[a] != null + && codons.codons[a][0] == (codons.codons[a][2] - 2)) { - pos = codons[a][0]; - if (annotations[i].annotations[pos] == null - || annotations[i].annotations[pos] == null) - continue; - - anots[a] = new Annotation(annotations[i].annotations[pos]); + anots[a] = getCodonAnnotation(codons.codons[a], annotations[i].annotations); } } } - jalview.datamodel.AlignmentAnnotation aa - = new jalview.datamodel.AlignmentAnnotation(annotations[i].label, - annotations[i].description, anots); + jalview.datamodel.AlignmentAnnotation aa = new jalview.datamodel.AlignmentAnnotation( + annotations[i].label, annotations[i].description, anots); + aa.graph = annotations[i].graph; + aa.graphGroup = annotations[i].graphGroup; + aa.graphHeight = annotations[i].graphHeight; + if (annotations[i].getThreshold()!=null) + { + aa.setThreshold(new jalview.datamodel.GraphLine(annotations[i].getThreshold())); + } if (annotations[i].hasScore) { aa.setScore(annotations[i].getScore()); } + if (annotations[i].sequenceRef != null) + { + SequenceI aaSeq = codons + .getAaForDnaSeq(annotations[i].sequenceRef); + if (aaSeq != null) + { + // aa.compactAnnotationArray(); // throw away alignment annotation + // positioning + aa.setSequenceRef(aaSeq); + aa.createSequenceMapping(aaSeq, aaSeq.getStart(), true); // rebuild + // mapping + aa.adjustForAlignment(); + aaSeq.addAlignmentAnnotation(aa); + } + + } al.addAnnotation(aa); } } - return al; + } + + private static Annotation getCodonAnnotation(int[] is, Annotation[] annotations) + { + // Have a look at all the codon positions for annotation and put the first + // one found into the translated annotation pos. + for (int p=0; p<3; p++) + { + if (annotations[is[p]]!=null) + { + return new Annotation(annotations[is[p]]); + } + } + return null; + } + + /** + * Translate a na sequence + * + * @param selection sequence displayed under viscontigs visible columns + * @param seqstring ORF read in some global alignment reference frame + * @param viscontigs mapping from global reference frame to visible seqstring ORF read + * @param codons Definition of global ORF alignment reference frame + * @param gapCharacter + * @param newSeq + * @return sequence ready to be added to alignment. + */ + public static SequenceI translateCodingRegion(SequenceI selection, + String seqstring, int[] viscontigs, AlignedCodonFrame codons, + char gapCharacter, DBRefEntry product) + { + ShiftList vismapping = new ShiftList(); // map from viscontigs to seqstring + // intervals + int vc, scontigs[] = new int[viscontigs.length]; + int npos = 0; + for (vc = 0; vc < viscontigs.length; vc += 2) + { + vismapping.addShift(npos, viscontigs[vc]); + scontigs[vc] = npos; + npos += viscontigs[vc + 1]; + scontigs[vc + 1] = npos; + } + + StringBuffer protein = new StringBuffer(); + String seq = seqstring.replace('U', 'T'); + char codon[] = new char[3]; + int cdp[] = new int[3], rf = 0, lastnpos = 0, nend; + int aspos = 0; + int resSize = 0; + for (npos = 0, nend = seq.length(); npos < nend; npos++) + { + if (!jalview.util.Comparison.isGap(seq.charAt(npos))) + { + cdp[rf] = npos; // store position + codon[rf++] = seq.charAt(npos); // store base + } + // filled an RF yet ? + if (rf == 3) + { + String aa = ResidueProperties.codonTranslate(new String(codon)); + rf = 0; + if (aa == null) + aa = String.valueOf(gapCharacter); + else + { + if (aa.equals("STOP")) + { + aa = "X"; + } + resSize++; + } + // insert/delete gaps prior to this codon - if necessary + boolean findpos = true; + while (findpos) + { + // first ensure that the codons array is long enough. + codons.checkCodonFrameWidth(aspos); + // now check to see if we place the aa at the current aspos in the + // protein alignment + switch (Dna.compare_codonpos(cdp, codons.codons[aspos])) + { + case -1: + codons.insertAAGap(aspos, gapCharacter); + findpos = false; + break; + case +1: + // this aa appears after the aligned codons at aspos, so prefix it + // with a gap + aa = "" + gapCharacter + aa; + aspos++; + //if (aspos >= codons.aaWidth) + // codons.aaWidth = aspos + 1; + break; // check the next position for alignment + case 0: + // codon aligns at aspos position. + findpos = false; + } + } + // codon aligns with all other sequence residues found at aspos + protein.append(aa); + lastnpos = npos; + if (codons.codons[aspos] == null) + { + // mark this column as aligning to this aligned reading frame + codons.codons[aspos] = new int[] + { cdp[0], cdp[1], cdp[2] }; + } + aspos++; + if (aspos >= codons.aaWidth) + codons.aaWidth = aspos + 1; + } + } + if (resSize > 0) + { + SequenceI newseq = new Sequence(selection.getName(), protein + .toString()); + if (rf != 0) + { + jalview.bin.Cache.log + .debug("trimming contigs for incomplete terminal codon."); + // map and trim contigs to ORF region + vc = scontigs.length - 1; + lastnpos = vismapping.shift(lastnpos); // place npos in context of + // whole dna alignment (rather + // than visible contigs) + // incomplete ORF could be broken over one or two visible contig + // intervals. + while (vc >= 0 && scontigs[vc] > lastnpos) + { + if (vc > 0 && scontigs[vc - 1] > lastnpos) + { + vc -= 2; + } + else + { + // correct last interval in list. + scontigs[vc] = lastnpos; + } + } + + if (vc > 0 && (vc + 1) < scontigs.length) + { + // truncate map list to just vc elements + int t[] = new int[vc + 1]; + System.arraycopy(scontigs, 0, t, 0, vc + 1); + scontigs = t; + } + if (vc <= 0) + scontigs = null; + } + if (scontigs != null) + { + npos = 0; + // Find sequence position for scontigs positions on the nucleotide + // sequence string we were passed. + for (vc = 0; vc < viscontigs.length; vc += 2) + { + scontigs[vc] = selection.findPosition(scontigs[vc]); // not from 1! + npos += viscontigs[vc]; + scontigs[vc + 1] = selection + .findPosition(npos + scontigs[vc + 1]); // exclusive + if (scontigs[vc + 1] == selection.getEnd()) + break; + } + // trim trailing empty intervals. + if ((vc + 2) < scontigs.length) + { + int t[] = new int[vc + 2]; + System.arraycopy(scontigs, 0, t, 0, vc + 2); + scontigs = t; + } + MapList map = new MapList(scontigs, new int[] + { 1, resSize }, 3, 1); + // update newseq as if it was generated as mapping from product + + if (product != null) + { + newseq.setName(product.getSource() + "|" + + product.getAccessionId()); + if (product.getMap() != null) + { + //Mapping mp = product.getMap(); + //newseq.setStart(mp.getPosition(scontigs[0])); + //newseq.setEnd(mp + // .getPosition(scontigs[scontigs.length - 1])); + } + } + transferCodedFeatures(selection, newseq, map, null, null); + SequenceI rseq = newseq.deriveSequence(); // construct a dataset + // sequence for our new + // peptide, regardless. + // store a mapping (this actually stores a mapping between the dataset + // sequences for the two sequences + codons.addMap(selection, rseq, map); + return rseq; + } + } + // register the mapping somehow + // + return null; + } + + /** + * Given a peptide newly translated from a dna sequence, copy over and set any + * features on the peptide from the DNA. If featureTypes is null, all features + * on the dna sequence are searched (rather than just the displayed ones), and + * similarly for featureGroups. + * + * @param dna + * @param pep + * @param map + * @param featureTypes + * hash who's keys are the displayed feature type strings + * @param featureGroups + * hash where keys are feature groups and values are Boolean objects + * indicating if they are displayed. + */ + private static void transferCodedFeatures(SequenceI dna, SequenceI pep, + MapList map, Hashtable featureTypes, Hashtable featureGroups) + { + SequenceFeature[] sf = dna.getDatasetSequence().getSequenceFeatures(); + Boolean fgstate; + jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils + .selectRefs(dna.getDBRef(), + jalview.datamodel.DBRefSource.DNACODINGDBS); + if (dnarefs != null) + { + // intersect with pep + for (int d = 0; d < dnarefs.length; d++) + { + Mapping mp = dnarefs[d].getMap(); + if (mp != null) + { + } + } + } + if (sf != null) + { + for (int f = 0; f < sf.length; f++) + { + fgstate = (featureGroups == null) ? null : ((Boolean) featureGroups + .get(sf[f].featureGroup)); + if ((featureTypes == null || featureTypes.containsKey(sf[f] + .getType())) + && (fgstate == null || fgstate.booleanValue())) + { + if (FeatureProperties.isCodingFeature(null, sf[f].getType())) + { + // if (map.intersectsFrom(sf[f].begin, sf[f].end)) + { + + } + } + } + } + } } }