X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=inline;f=src%2Fjalview%2Fanalysis%2FDna.java;h=49c37df7f2d810881c0bc55e7a3a6b21c2f4f8ea;hb=29cfe164a6c90fbaaf7ef7ed1f2ad4ef50940bfc;hp=48d03b3a35f079d0b971ebde77c3294644cecf79;hpb=f7a97ef025c498c19fdcc37d9fc0506d511d18dd;p=jalview.git
diff --git a/src/jalview/analysis/Dna.java b/src/jalview/analysis/Dna.java
index 48d03b3..49c37df 100644
--- a/src/jalview/analysis/Dna.java
+++ b/src/jalview/analysis/Dna.java
@@ -1,236 +1,716 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)
+ * Copyright (C) 2011 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with Jalview. If not, see .
+ */
package jalview.analysis;
+import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Vector;
+import jalview.datamodel.AlignedCodonFrame;
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.Annotation;
import jalview.datamodel.ColumnSelection;
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.FeatureProperties;
+import jalview.datamodel.Mapping;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.schemes.ResidueProperties;
import jalview.util.MapList;
+import jalview.util.ShiftList;
-public class Dna {
+public class Dna
+{
/**
*
* @param cdp1
* @param cdp2
- * @return -1 if cdp1 aligns before cdp2, 0 if in the same column or cdp2 is null, +1 if after cdp2
+ * @return -1 if cdp1 aligns before cdp2, 0 if in the same column or cdp2 is
+ * null, +1 if after cdp2
*/
- private static int compare_codonpos(int[] cdp1, int[] cdp2) {
- if (cdp2==null || (cdp1[0]==cdp2[0] && cdp1[1] == cdp2[1] && cdp1[2] == cdp2[2]))
+ private static int compare_codonpos(int[] cdp1, int[] cdp2)
+ {
+ if (cdp2 == null
+ || (cdp1[0] == cdp2[0] && cdp1[1] == cdp2[1] && cdp1[2] == cdp2[2]))
return 0;
- if (cdp1[0]mapped protein sequence alignment translation given set of sequences
+ * 1. id distinct coding regions within selected region for each sequence 2.
+ * generate peptides based on inframe (or given) translation or (optionally
+ * and where specified) out of frame translations (annotated appropriately) 3.
+ * align peptides based on codon alignment
+ */
/**
- * create a new alignment of protein sequences
- * by an inframe translation of the provided NA sequences
+ * id potential products from dna 1. search for distinct products within
+ * selected region for each selected sequence 2. group by associated DB type.
+ * 3. return as form for input into above function
+ */
+ /**
+ *
+ */
+ /**
+ * create a new alignment of protein sequences by an inframe translation of
+ * the provided NA sequences
+ *
* @param selection
* @param seqstring
* @param viscontigs
* @param gapCharacter
* @param annotations
* @param aWidth
+ * @param dataset
+ * destination dataset for translated sequences and mappings
* @return
*/
- public static AlignmentI CdnaTranslate(SequenceI[] selection, String[] seqstring, int viscontigs[], char gapCharacter,
- AlignmentAnnotation[] annotations, int aWidth) {
- int s, sSize = selection.length;
- SequenceI [] newSeq = new SequenceI[sSize];
- int res, resSize;
- StringBuffer protein;
- String seq;
-
- int[][] codons = new int[aWidth][]; // stores hash of subsequent positions for each codon start position in alignment
+ public static AlignmentI CdnaTranslate(SequenceI[] selection,
+ String[] seqstring, int viscontigs[], char gapCharacter,
+ AlignmentAnnotation[] annotations, int aWidth, Alignment dataset)
+ {
+ return CdnaTranslate(selection, seqstring, null, viscontigs,
+ gapCharacter, annotations, aWidth, dataset);
+ }
- for (res=0;res=aslen)
- aslen=aspos+1;
- break; // check the next position for alignment
- case 0:
- // codon aligns at aspos position.
- findpos = false;
}
}
- // codon aligns with all other sequence residues found at aspos
- protein.append(aa);
- if (codons[aspos]==null)
- {
- // mark this column as aligning to this aligned reading frame
- codons[aspos] = new int[] { cdp[0], cdp[1], cdp[2] };
- }
- aspos++;
- if (aspos>=aslen)
- aslen=aspos+1;
}
}
- if (resSize>0)
+ }
+ return false;
+ }
+
+ /**
+ * generate a set of translated protein products from annotated sequenceI
+ *
+ * @param selection
+ * @param viscontigs
+ * @param gapCharacter
+ * @param dataset
+ * destination dataset for translated sequences
+ * @param annotations
+ * @param aWidth
+ * @return
+ */
+ public static AlignmentI CdnaTranslate(SequenceI[] selection,
+ int viscontigs[], char gapCharacter, Alignment dataset)
+ {
+ int alwidth = 0;
+ Vector cdnasqs = new Vector();
+ Vector cdnasqi = new Vector();
+ Vector cdnaprod = new Vector();
+ for (int gd = 0; gd < selection.length; gd++)
+ {
+ SequenceI dna = selection[gd];
+ jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils
+ .selectRefs(dna.getDBRef(),
+ jalview.datamodel.DBRefSource.DNACODINGDBS);
+ if (dnarefs != null)
{
- newSeq[s] = new Sequence(selection[s].getName(),
- protein.toString());
- if (rf!=0)
- {
- jalview.bin.Cache.log.debug("trimming contigs for incomplete terminal codon.");
- // trim contigs
- vc=scontigs.length-1;
- nend-=rf;
- // incomplete ORF could be broken over one or two visible contig intervals.
- while (vc>0 && scontigs[vc]>nend)
- {
- if (scontigs[vc-1]>nend)
- {
- vc-=2;
- } else {
- // correct last interval in list.
- scontigs[vc]=nend;
- }
+ // intersect with pep
+ Vector mappedrefs = new Vector();
+ DBRefEntry[] refs = dna.getDBRef();
+ for (int d = 0; d < refs.length; d++)
+ {
+ if (refs[d].getMap() != null && refs[d].getMap().getMap() != null
+ && refs[d].getMap().getMap().getFromRatio() == 3
+ && refs[d].getMap().getMap().getToRatio() == 1)
+ {
+ mappedrefs.addElement(refs[d]); // add translated protein maps
}
- if ((vc+2) alwidth)
+ alwidth = sqstr.length();
+ cdnasqs.addElement(sqstr.toString());
+ cdnasqi.addElement(dna);
+ cdnaprod.addElement(intersect);
}
}
- MapList map = new MapList(scontigs, new int[] { 1, resSize },3,1); // TODO: store mapping on newSeq for linked DNA/Protein viewing.
}
- // register the mapping somehow
- //
+ SequenceI[] cdna = new SequenceI[cdnasqs.size()];
+ DBRefEntry[] prods = new DBRefEntry[cdnaprod.size()];
+ String[] xons = new String[cdnasqs.size()];
+ cdnasqs.copyInto(xons);
+ cdnaprod.copyInto(prods);
+ cdnasqi.copyInto(cdna);
+ return CdnaTranslate(cdna, xons, prods, viscontigs, gapCharacter,
+ null, alwidth, dataset);
}
- if (aslen==0)
- return null;
- AlignmentI al = new Alignment(newSeq);
- al.padGaps(); // ensure we look aligned.
- al.setDataset(null);
-
+ return null;
+ }
- ////////////////////////////////
+ /**
+ * translate na alignment annotations onto translated amino acid alignment al
+ * using codon mapping codons
+ *
+ * @param annotations
+ * @param al
+ * @param codons
+ */
+ public static void translateAlignedAnnotations(
+ AlignmentAnnotation[] annotations, AlignmentI al,
+ AlignedCodonFrame codons)
+ {
+ // //////////////////////////////
// Copy annotations across
//
// Can only do this for columns with consecutive codons, or where
// annotation is sequence associated.
-
- int pos,a,aSize;
- if(annotations!=null)
+
+ int pos, a, aSize;
+ if (annotations != null)
{
for (int i = 0; i < annotations.length; i++)
{
// Skip any autogenerated annotation
- if (annotations[i].autoCalculated) {
+ if (annotations[i].autoCalculated)
+ {
continue;
}
-
- aSize = aslen; // aa alignment width.
- jalview.datamodel.Annotation[] anots =
- (annotations[i].annotations==null)
- ? null :
- new jalview.datamodel.Annotation[aSize];
- if (anots!=null)
+
+ aSize = codons.getaaWidth(); // aa alignment width.
+ jalview.datamodel.Annotation[] anots = (annotations[i].annotations == null) ? null
+ : new jalview.datamodel.Annotation[aSize];
+ if (anots != null)
{
for (a = 0; a < aSize; a++)
{
// process through codon map.
- if (codons[a]!=null && codons[a][0]==(codons[a][2]-2))
+ if (codons.codons[a] != null
+ && codons.codons[a][0] == (codons.codons[a][2] - 2))
{
- pos = codons[a][0];
- if (annotations[i].annotations[pos] == null
- || annotations[i].annotations[pos] == null)
- continue;
-
- anots[a] = new Annotation(annotations[i].annotations[pos]);
+ anots[a] = getCodonAnnotation(codons.codons[a],
+ annotations[i].annotations);
}
}
}
- jalview.datamodel.AlignmentAnnotation aa
- = new jalview.datamodel.AlignmentAnnotation(annotations[i].label,
- annotations[i].description, anots);
+ jalview.datamodel.AlignmentAnnotation aa = new jalview.datamodel.AlignmentAnnotation(
+ annotations[i].label, annotations[i].description, anots);
+ aa.graph = annotations[i].graph;
+ aa.graphGroup = annotations[i].graphGroup;
+ aa.graphHeight = annotations[i].graphHeight;
+ if (annotations[i].getThreshold() != null)
+ {
+ aa.setThreshold(new jalview.datamodel.GraphLine(annotations[i]
+ .getThreshold()));
+ }
if (annotations[i].hasScore)
{
aa.setScore(annotations[i].getScore());
}
+ if (annotations[i].sequenceRef != null)
+ {
+ SequenceI aaSeq = codons
+ .getAaForDnaSeq(annotations[i].sequenceRef);
+ if (aaSeq != null)
+ {
+ // aa.compactAnnotationArray(); // throw away alignment annotation
+ // positioning
+ aa.setSequenceRef(aaSeq);
+ aa.createSequenceMapping(aaSeq, aaSeq.getStart(), true); // rebuild
+ // mapping
+ aa.adjustForAlignment();
+ aaSeq.addAlignmentAnnotation(aa);
+ }
+
+ }
al.addAnnotation(aa);
}
}
- return al;
+ }
+
+ private static Annotation getCodonAnnotation(int[] is,
+ Annotation[] annotations)
+ {
+ // Have a look at all the codon positions for annotation and put the first
+ // one found into the translated annotation pos.
+ int contrib = 0;
+ Annotation annot = null;
+ for (int p = 0; p < 3; p++)
+ {
+ if (annotations[is[p]] != null)
+ {
+ if (annot == null)
+ {
+ annot = new Annotation(annotations[is[p]]);
+ contrib = 1;
+ }
+ else
+ {
+ // merge with last
+ Annotation cpy = new Annotation(annotations[is[p]]);
+ if (annot.colour == null)
+ {
+ annot.colour = cpy.colour;
+ }
+ if (annot.description == null || annot.description.length() == 0)
+ {
+ annot.description = cpy.description;
+ }
+ if (annot.displayCharacter == null)
+ {
+ annot.displayCharacter = cpy.displayCharacter;
+ }
+ if (annot.secondaryStructure == 0)
+ {
+ annot.secondaryStructure = cpy.secondaryStructure;
+ }
+ annot.value += cpy.value;
+ contrib++;
+ }
+ }
+ }
+ if (contrib > 1)
+ {
+ annot.value /= (float) contrib;
+ }
+ return annot;
+ }
+
+ /**
+ * Translate a na sequence
+ *
+ * @param selection
+ * sequence displayed under viscontigs visible columns
+ * @param seqstring
+ * ORF read in some global alignment reference frame
+ * @param viscontigs
+ * mapping from global reference frame to visible seqstring ORF read
+ * @param codons
+ * Definition of global ORF alignment reference frame
+ * @param gapCharacter
+ * @param newSeq
+ * @return sequence ready to be added to alignment.
+ */
+ public static SequenceI translateCodingRegion(SequenceI selection,
+ String seqstring, int[] viscontigs, AlignedCodonFrame codons,
+ char gapCharacter, DBRefEntry product)
+ {
+ Vector skip = new Vector();
+ int skipint[] = null;
+ ShiftList vismapping = new ShiftList(); // map from viscontigs to seqstring
+ // intervals
+ int vc, scontigs[] = new int[viscontigs.length];
+ int npos = 0;
+ for (vc = 0; vc < viscontigs.length; vc += 2)
+ {
+ if (vc == 0)
+ {
+ vismapping.addShift(npos, viscontigs[vc]);
+ }
+ else
+ {
+ // hidden region
+ vismapping.addShift(npos, viscontigs[vc] - viscontigs[vc - 1] + 1);
+ }
+ scontigs[vc] = viscontigs[vc];
+ scontigs[vc + 1] = viscontigs[vc + 1];
+ }
+
+ StringBuffer protein = new StringBuffer();
+ String seq = seqstring.replace('U', 'T');
+ char codon[] = new char[3];
+ int cdp[] = new int[3], rf = 0, lastnpos = 0, nend;
+ int aspos = 0;
+ int resSize = 0;
+ for (npos = 0, nend = seq.length(); npos < nend; npos++)
+ {
+ if (!jalview.util.Comparison.isGap(seq.charAt(npos)))
+ {
+ cdp[rf] = npos; // store position
+ codon[rf++] = seq.charAt(npos); // store base
+ }
+ // filled an RF yet ?
+ if (rf == 3)
+ {
+ String aa = ResidueProperties.codonTranslate(new String(codon));
+ rf = 0;
+ if (aa == null)
+ {
+ aa = String.valueOf(gapCharacter);
+ if (skipint == null)
+ {
+ skipint = new int[]
+ { cdp[0], cdp[2] };
+ }
+ skipint[1] = cdp[2];
+ }
+ else
+ {
+ if (skipint != null)
+ {
+ // edit scontigs
+ skipint[0] = vismapping.shift(skipint[0]);
+ skipint[1] = vismapping.shift(skipint[1]);
+ for (vc = 0; vc < scontigs.length; vc += 2)
+ {
+ if (scontigs[vc + 1] < skipint[0])
+ {
+ continue;
+ }
+ if (scontigs[vc] <= skipint[0])
+ {
+ if (skipint[0] == scontigs[vc])
+ {
+
+ }
+ else
+ {
+ int[] t = new int[scontigs.length + 2];
+ System.arraycopy(scontigs, 0, t, 0, vc - 1);
+ // scontigs[vc]; //
+ }
+ }
+ }
+ skip.addElement(skipint);
+ skipint = null;
+ }
+ if (aa.equals("STOP"))
+ {
+ aa = "X";
+ }
+ resSize++;
+ }
+ // insert/delete gaps prior to this codon - if necessary
+ boolean findpos = true;
+ while (findpos)
+ {
+ // first ensure that the codons array is long enough.
+ codons.checkCodonFrameWidth(aspos);
+ // now check to see if we place the aa at the current aspos in the
+ // protein alignment
+ switch (Dna.compare_codonpos(cdp, codons.codons[aspos]))
+ {
+ case -1:
+ codons.insertAAGap(aspos, gapCharacter);
+ findpos = false;
+ break;
+ case +1:
+ // this aa appears after the aligned codons at aspos, so prefix it
+ // with a gap
+ aa = "" + gapCharacter + aa;
+ aspos++;
+ // if (aspos >= codons.aaWidth)
+ // codons.aaWidth = aspos + 1;
+ break; // check the next position for alignment
+ case 0:
+ // codon aligns at aspos position.
+ findpos = false;
+ }
+ }
+ // codon aligns with all other sequence residues found at aspos
+ protein.append(aa);
+ lastnpos = npos;
+ if (codons.codons[aspos] == null)
+ {
+ // mark this column as aligning to this aligned reading frame
+ codons.codons[aspos] = new int[]
+ { cdp[0], cdp[1], cdp[2] };
+ }
+ if (aspos >= codons.aaWidth)
+ {
+ // update maximum alignment width
+ // (we can do this without calling checkCodonFrameWidth because it was
+ // already done above)
+ codons.setAaWidth(aspos);
+ }
+ // ready for next translated reading frame alignment position (if any)
+ aspos++;
+ }
+ }
+ if (resSize > 0)
+ {
+ SequenceI newseq = new Sequence(selection.getName(),
+ protein.toString());
+ if (rf != 0)
+ {
+ jalview.bin.Cache.log
+ .debug("trimming contigs for incomplete terminal codon.");
+ // map and trim contigs to ORF region
+ vc = scontigs.length - 1;
+ lastnpos = vismapping.shift(lastnpos); // place npos in context of
+ // whole dna alignment (rather
+ // than visible contigs)
+ // incomplete ORF could be broken over one or two visible contig
+ // intervals.
+ while (vc >= 0 && scontigs[vc] > lastnpos)
+ {
+ if (vc > 0 && scontigs[vc - 1] > lastnpos)
+ {
+ vc -= 2;
+ }
+ else
+ {
+ // correct last interval in list.
+ scontigs[vc] = lastnpos;
+ }
+ }
+
+ if (vc > 0 && (vc + 1) < scontigs.length)
+ {
+ // truncate map list to just vc elements
+ int t[] = new int[vc + 1];
+ System.arraycopy(scontigs, 0, t, 0, vc + 1);
+ scontigs = t;
+ }
+ if (vc <= 0)
+ scontigs = null;
+ }
+ if (scontigs != null)
+ {
+ npos = 0;
+ // map scontigs to actual sequence positions on selection
+ for (vc = 0; vc < scontigs.length; vc += 2)
+ {
+ scontigs[vc] = selection.findPosition(scontigs[vc]); // not from 1!
+ scontigs[vc + 1] = selection.findPosition(scontigs[vc + 1]); // exclusive
+ if (scontigs[vc + 1] == selection.getEnd())
+ break;
+ }
+ // trim trailing empty intervals.
+ if ((vc + 2) < scontigs.length)
+ {
+ int t[] = new int[vc + 2];
+ System.arraycopy(scontigs, 0, t, 0, vc + 2);
+ scontigs = t;
+ }
+ /*
+ * delete intervals in scontigs which are not translated. 1. map skip
+ * into sequence position intervals 2. truncate existing ranges and add
+ * new ranges to exclude untranslated regions. if (skip.size()>0) {
+ * Vector narange = new Vector(); for (vc=0; vc=skipint[0] &&
+ * iv[0]<=skipint[1]) { if (iv[0]==skipint[0]) { // delete beginning of
+ * range } else { // truncate range and create new one if necessary iv =
+ * (int[]) narange.elementAt(vc+1); if (iv[0]<=skipint[1]) { // truncate
+ * range iv[0] = skipint[1]; } else { } } } else if (iv[0]