X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FDna.java;h=49c37df7f2d810881c0bc55e7a3a6b21c2f4f8ea;hb=a45774ee31d9f35d4eff46d54d7deab719afb092;hp=960a6db69f058a0bc28395e9113c2137773d66c8;hpb=6173092ff5cb03f039cac674bfc8bc4f969976a5;p=jalview.git
diff --git a/src/jalview/analysis/Dna.java b/src/jalview/analysis/Dna.java
index 960a6db..49c37df 100644
--- a/src/jalview/analysis/Dna.java
+++ b/src/jalview/analysis/Dna.java
@@ -1,5 +1,23 @@
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7)
+ * Copyright (C) 2011 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with Jalview. If not, see .
+ */
package jalview.analysis;
+import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Vector;
@@ -9,6 +27,7 @@ import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.Annotation;
import jalview.datamodel.ColumnSelection;
+import jalview.datamodel.DBRefEntry;
import jalview.datamodel.FeatureProperties;
import jalview.datamodel.Mapping;
import jalview.datamodel.Sequence;
@@ -34,9 +53,9 @@ public class Dna
return 0;
if (cdp1[0] < cdp2[0] || cdp1[1] < cdp2[1] || cdp1[2] < cdp2[2])
return -1; // one base in cdp1 precedes the corresponding base in the
- // other codon
+ // other codon
return 1; // one base in cdp1 appears after the corresponding base in the
- // other codon.
+ // other codon.
}
/**
@@ -64,27 +83,60 @@ public class Dna
* @param gapCharacter
* @param annotations
* @param aWidth
+ * @param dataset
+ * destination dataset for translated sequences and mappings
* @return
*/
public static AlignmentI CdnaTranslate(SequenceI[] selection,
String[] seqstring, int viscontigs[], char gapCharacter,
- AlignmentAnnotation[] annotations, int aWidth)
+ AlignmentAnnotation[] annotations, int aWidth, Alignment dataset)
+ {
+ return CdnaTranslate(selection, seqstring, null, viscontigs,
+ gapCharacter, annotations, aWidth, dataset);
+ }
+
+ /**
+ *
+ * @param selection
+ * @param seqstring
+ * @param product
+ * - array of DbRefEntry objects from which exon map in seqstring is
+ * derived
+ * @param viscontigs
+ * @param gapCharacter
+ * @param annotations
+ * @param aWidth
+ * @param dataset
+ * @return
+ */
+ public static AlignmentI CdnaTranslate(SequenceI[] selection,
+ String[] seqstring, DBRefEntry[] product, int viscontigs[],
+ char gapCharacter, AlignmentAnnotation[] annotations, int aWidth,
+ Alignment dataset)
{
AlignedCodonFrame codons = new AlignedCodonFrame(aWidth); // stores hash of
- // subsequent
- // positions for
- // each codon
- // start position
- // in alignment
+ // subsequent
+ // positions for
+ // each codon
+ // start position
+ // in alignment
int s, sSize = selection.length;
Vector pepseqs = new Vector();
for (s = 0; s < sSize; s++)
{
SequenceI newseq = translateCodingRegion(selection[s], seqstring[s],
- viscontigs, codons, gapCharacter);
+ viscontigs, codons, gapCharacter,
+ (product != null) ? product[s] : null); // possibly anonymous
+ // product
if (newseq != null)
{
pepseqs.addElement(newseq);
+ SequenceI ds = newseq;
+ while (ds.getDatasetSequence() != null)
+ {
+ ds = ds.getDatasetSequence();
+ }
+ dataset.addSequence(ds);
}
}
if (codons.aaWidth == 0)
@@ -93,13 +145,139 @@ public class Dna
pepseqs.copyInto(newseqs);
AlignmentI al = new Alignment(newseqs);
al.padGaps(); // ensure we look aligned.
- al.setDataset(null);
+ al.setDataset(dataset);
translateAlignedAnnotations(annotations, al, codons);
al.addCodonFrame(codons);
return al;
}
/**
+ * fake the collection of DbRefs with associated exon mappings to identify if
+ * a translation would generate distinct product in the currently selected
+ * region.
+ *
+ * @param selection
+ * @param viscontigs
+ * @return
+ */
+ public static boolean canTranslate(SequenceI[] selection,
+ int viscontigs[])
+ {
+ for (int gd = 0; gd < selection.length; gd++)
+ {
+ SequenceI dna = selection[gd];
+ jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils
+ .selectRefs(dna.getDBRef(),
+ jalview.datamodel.DBRefSource.DNACODINGDBS);
+ if (dnarefs != null)
+ {
+ // intersect with pep
+ // intersect with pep
+ Vector mappedrefs = new Vector();
+ DBRefEntry[] refs = dna.getDBRef();
+ for (int d = 0; d < refs.length; d++)
+ {
+ if (refs[d].getMap() != null && refs[d].getMap().getMap() != null
+ && refs[d].getMap().getMap().getFromRatio() == 3
+ && refs[d].getMap().getMap().getToRatio() == 1)
+ {
+ mappedrefs.addElement(refs[d]); // add translated protein maps
+ }
+ }
+ dnarefs = new DBRefEntry[mappedrefs.size()];
+ mappedrefs.copyInto(dnarefs);
+ for (int d = 0; d < dnarefs.length; d++)
+ {
+ Mapping mp = dnarefs[d].getMap();
+ if (mp != null)
+ {
+ for (int vc = 0; vc < viscontigs.length; vc += 2)
+ {
+ int[] mpr = mp.locateMappedRange(viscontigs[vc],
+ viscontigs[vc + 1]);
+ if (mpr != null)
+ {
+ return true;
+ }
+ }
+ }
+ }
+ }
+ }
+ return false;
+ }
+
+ /**
+ * generate a set of translated protein products from annotated sequenceI
+ *
+ * @param selection
+ * @param viscontigs
+ * @param gapCharacter
+ * @param dataset
+ * destination dataset for translated sequences
+ * @param annotations
+ * @param aWidth
+ * @return
+ */
+ public static AlignmentI CdnaTranslate(SequenceI[] selection,
+ int viscontigs[], char gapCharacter, Alignment dataset)
+ {
+ int alwidth = 0;
+ Vector cdnasqs = new Vector();
+ Vector cdnasqi = new Vector();
+ Vector cdnaprod = new Vector();
+ for (int gd = 0; gd < selection.length; gd++)
+ {
+ SequenceI dna = selection[gd];
+ jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils
+ .selectRefs(dna.getDBRef(),
+ jalview.datamodel.DBRefSource.DNACODINGDBS);
+ if (dnarefs != null)
+ {
+ // intersect with pep
+ Vector mappedrefs = new Vector();
+ DBRefEntry[] refs = dna.getDBRef();
+ for (int d = 0; d < refs.length; d++)
+ {
+ if (refs[d].getMap() != null && refs[d].getMap().getMap() != null
+ && refs[d].getMap().getMap().getFromRatio() == 3
+ && refs[d].getMap().getMap().getToRatio() == 1)
+ {
+ mappedrefs.addElement(refs[d]); // add translated protein maps
+ }
+ }
+ dnarefs = new DBRefEntry[mappedrefs.size()];
+ mappedrefs.copyInto(dnarefs);
+ for (int d = 0; d < dnarefs.length; d++)
+ {
+ Mapping mp = dnarefs[d].getMap();
+ StringBuffer sqstr = new StringBuffer();
+ if (mp != null)
+ {
+ Mapping intersect = mp.intersectVisContigs(viscontigs);
+ // generate seqstring for this sequence based on mapping
+
+ if (sqstr.length() > alwidth)
+ alwidth = sqstr.length();
+ cdnasqs.addElement(sqstr.toString());
+ cdnasqi.addElement(dna);
+ cdnaprod.addElement(intersect);
+ }
+ }
+ }
+ SequenceI[] cdna = new SequenceI[cdnasqs.size()];
+ DBRefEntry[] prods = new DBRefEntry[cdnaprod.size()];
+ String[] xons = new String[cdnasqs.size()];
+ cdnasqs.copyInto(xons);
+ cdnaprod.copyInto(prods);
+ cdnasqi.copyInto(cdna);
+ return CdnaTranslate(cdna, xons, prods, viscontigs, gapCharacter,
+ null, alwidth, dataset);
+ }
+ return null;
+ }
+
+ /**
* translate na alignment annotations onto translated amino acid alignment al
* using codon mapping codons
*
@@ -139,18 +317,22 @@ public class Dna
if (codons.codons[a] != null
&& codons.codons[a][0] == (codons.codons[a][2] - 2))
{
- pos = codons.codons[a][0];
- if (annotations[i].annotations[pos] == null
- || annotations[i].annotations[pos] == null)
- continue;
- // We just take the annotation in the first base in the codon
- anots[a] = new Annotation(annotations[i].annotations[pos]);
+ anots[a] = getCodonAnnotation(codons.codons[a],
+ annotations[i].annotations);
}
}
}
jalview.datamodel.AlignmentAnnotation aa = new jalview.datamodel.AlignmentAnnotation(
annotations[i].label, annotations[i].description, anots);
+ aa.graph = annotations[i].graph;
+ aa.graphGroup = annotations[i].graphGroup;
+ aa.graphHeight = annotations[i].graphHeight;
+ if (annotations[i].getThreshold() != null)
+ {
+ aa.setThreshold(new jalview.datamodel.GraphLine(annotations[i]
+ .getThreshold()));
+ }
if (annotations[i].hasScore)
{
aa.setScore(annotations[i].getScore());
@@ -165,7 +347,7 @@ public class Dna
// positioning
aa.setSequenceRef(aaSeq);
aa.createSequenceMapping(aaSeq, aaSeq.getStart(), true); // rebuild
- // mapping
+ // mapping
aa.adjustForAlignment();
aaSeq.addAlignmentAnnotation(aa);
}
@@ -176,31 +358,92 @@ public class Dna
}
}
+ private static Annotation getCodonAnnotation(int[] is,
+ Annotation[] annotations)
+ {
+ // Have a look at all the codon positions for annotation and put the first
+ // one found into the translated annotation pos.
+ int contrib = 0;
+ Annotation annot = null;
+ for (int p = 0; p < 3; p++)
+ {
+ if (annotations[is[p]] != null)
+ {
+ if (annot == null)
+ {
+ annot = new Annotation(annotations[is[p]]);
+ contrib = 1;
+ }
+ else
+ {
+ // merge with last
+ Annotation cpy = new Annotation(annotations[is[p]]);
+ if (annot.colour == null)
+ {
+ annot.colour = cpy.colour;
+ }
+ if (annot.description == null || annot.description.length() == 0)
+ {
+ annot.description = cpy.description;
+ }
+ if (annot.displayCharacter == null)
+ {
+ annot.displayCharacter = cpy.displayCharacter;
+ }
+ if (annot.secondaryStructure == 0)
+ {
+ annot.secondaryStructure = cpy.secondaryStructure;
+ }
+ annot.value += cpy.value;
+ contrib++;
+ }
+ }
+ }
+ if (contrib > 1)
+ {
+ annot.value /= (float) contrib;
+ }
+ return annot;
+ }
+
/**
* Translate a na sequence
*
* @param selection
+ * sequence displayed under viscontigs visible columns
* @param seqstring
+ * ORF read in some global alignment reference frame
* @param viscontigs
+ * mapping from global reference frame to visible seqstring ORF read
* @param codons
+ * Definition of global ORF alignment reference frame
* @param gapCharacter
* @param newSeq
* @return sequence ready to be added to alignment.
*/
public static SequenceI translateCodingRegion(SequenceI selection,
String seqstring, int[] viscontigs, AlignedCodonFrame codons,
- char gapCharacter)
+ char gapCharacter, DBRefEntry product)
{
+ Vector skip = new Vector();
+ int skipint[] = null;
ShiftList vismapping = new ShiftList(); // map from viscontigs to seqstring
- // intervals
+ // intervals
int vc, scontigs[] = new int[viscontigs.length];
int npos = 0;
for (vc = 0; vc < viscontigs.length; vc += 2)
{
- vismapping.addShift(npos, viscontigs[vc]);
- scontigs[vc] = npos;
- npos += viscontigs[vc + 1];
- scontigs[vc + 1] = npos;
+ if (vc == 0)
+ {
+ vismapping.addShift(npos, viscontigs[vc]);
+ }
+ else
+ {
+ // hidden region
+ vismapping.addShift(npos, viscontigs[vc] - viscontigs[vc - 1] + 1);
+ }
+ scontigs[vc] = viscontigs[vc];
+ scontigs[vc + 1] = viscontigs[vc + 1];
}
StringBuffer protein = new StringBuffer();
@@ -222,9 +465,45 @@ public class Dna
String aa = ResidueProperties.codonTranslate(new String(codon));
rf = 0;
if (aa == null)
+ {
aa = String.valueOf(gapCharacter);
+ if (skipint == null)
+ {
+ skipint = new int[]
+ { cdp[0], cdp[2] };
+ }
+ skipint[1] = cdp[2];
+ }
else
{
+ if (skipint != null)
+ {
+ // edit scontigs
+ skipint[0] = vismapping.shift(skipint[0]);
+ skipint[1] = vismapping.shift(skipint[1]);
+ for (vc = 0; vc < scontigs.length; vc += 2)
+ {
+ if (scontigs[vc + 1] < skipint[0])
+ {
+ continue;
+ }
+ if (scontigs[vc] <= skipint[0])
+ {
+ if (skipint[0] == scontigs[vc])
+ {
+
+ }
+ else
+ {
+ int[] t = new int[scontigs.length + 2];
+ System.arraycopy(scontigs, 0, t, 0, vc - 1);
+ // scontigs[vc]; //
+ }
+ }
+ }
+ skip.addElement(skipint);
+ skipint = null;
+ }
if (aa.equals("STOP"))
{
aa = "X";
@@ -250,8 +529,8 @@ public class Dna
// with a gap
aa = "" + gapCharacter + aa;
aspos++;
- if (aspos >= codons.aaWidth)
- codons.aaWidth = aspos + 1;
+ // if (aspos >= codons.aaWidth)
+ // codons.aaWidth = aspos + 1;
break; // check the next position for alignment
case 0:
// codon aligns at aspos position.
@@ -267,15 +546,21 @@ public class Dna
codons.codons[aspos] = new int[]
{ cdp[0], cdp[1], cdp[2] };
}
- aspos++;
if (aspos >= codons.aaWidth)
- codons.aaWidth = aspos + 1;
+ {
+ // update maximum alignment width
+ // (we can do this without calling checkCodonFrameWidth because it was
+ // already done above)
+ codons.setAaWidth(aspos);
+ }
+ // ready for next translated reading frame alignment position (if any)
+ aspos++;
}
}
if (resSize > 0)
{
- SequenceI newseq = new Sequence(selection.getName(), protein
- .toString());
+ SequenceI newseq = new Sequence(selection.getName(),
+ protein.toString());
if (rf != 0)
{
jalview.bin.Cache.log
@@ -283,8 +568,8 @@ public class Dna
// map and trim contigs to ORF region
vc = scontigs.length - 1;
lastnpos = vismapping.shift(lastnpos); // place npos in context of
- // whole dna alignment (rather
- // than visible contigs)
+ // whole dna alignment (rather
+ // than visible contigs)
// incomplete ORF could be broken over one or two visible contig
// intervals.
while (vc >= 0 && scontigs[vc] > lastnpos)
@@ -313,14 +598,11 @@ public class Dna
if (scontigs != null)
{
npos = 0;
- // Find sequence position for scontigs positions on the nucleotide
- // sequence string we were passed.
- for (vc = 0; vc < viscontigs.length; vc += 2)
+ // map scontigs to actual sequence positions on selection
+ for (vc = 0; vc < scontigs.length; vc += 2)
{
scontigs[vc] = selection.findPosition(scontigs[vc]); // not from 1!
- npos += viscontigs[vc];
- scontigs[vc + 1] = selection
- .findPosition(npos + scontigs[vc + 1]); // exclusive
+ scontigs[vc + 1] = selection.findPosition(scontigs[vc + 1]); // exclusive
if (scontigs[vc + 1] == selection.getEnd())
break;
}
@@ -331,22 +613,49 @@ public class Dna
System.arraycopy(scontigs, 0, t, 0, vc + 2);
scontigs = t;
}
-
+ /*
+ * delete intervals in scontigs which are not translated. 1. map skip
+ * into sequence position intervals 2. truncate existing ranges and add
+ * new ranges to exclude untranslated regions. if (skip.size()>0) {
+ * Vector narange = new Vector(); for (vc=0; vc=skipint[0] &&
+ * iv[0]<=skipint[1]) { if (iv[0]==skipint[0]) { // delete beginning of
+ * range } else { // truncate range and create new one if necessary iv =
+ * (int[]) narange.elementAt(vc+1); if (iv[0]<=skipint[1]) { // truncate
+ * range iv[0] = skipint[1]; } else { } } } else if (iv[0]