/*
- * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2)
- * Copyright (C) 2014 The Jalview Authors
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
*/
package jalview.analysis;
+import jalview.api.AlignViewportI;
import jalview.bin.Cache;
+import jalview.datamodel.AlignedCodon;
import jalview.datamodel.AlignedCodonFrame;
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentAnnotation;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Hashtable;
+import java.util.Comparator;
import java.util.List;
+import java.util.Map;
public class Dna
{
+ private static final String STOP_X = "X";
+
+ private static final Comparator<AlignedCodon> comparator = new CodonComparator();
+
+ /*
+ * 'final' variables describe the inputs to the translation, which should not
+ * be modified.
+ */
+ final private List<SequenceI> selection;
+
+ final private String[] seqstring;
+
+ final private int[] contigs;
+
+ final private char gapChar;
+
+ final private AlignmentAnnotation[] annotations;
+
+ final private int dnaWidth;
+
+ final private Alignment dataset;
+
+ /*
+ * Working variables for the translation.
+ *
+ * The width of the translation-in-progress protein alignment.
+ */
+ private int aaWidth = 0;
+
+ /*
+ * This array will be built up so that position i holds the codon positions
+ * e.g. [7, 9, 10] that match column i (base 0) in the aligned translation.
+ * Note this implies a contract that if two codons do not align exactly, their
+ * translated products must occupy different column positions.
+ */
+ private AlignedCodon[] alignedCodons;
+
+ /**
+ * Constructor given a viewport and the visible contigs.
+ *
+ * @param viewport
+ * @param visibleContigs
+ */
+ public Dna(AlignViewportI viewport, int[] visibleContigs)
+ {
+ this.selection = Arrays.asList(viewport.getSequenceSelection());
+ this.seqstring = viewport.getViewAsString(true);
+ this.contigs = visibleContigs;
+ this.gapChar = viewport.getGapCharacter();
+ this.annotations = viewport.getAlignment().getAlignmentAnnotation();
+ this.dnaWidth = viewport.getAlignment().getWidth();
+ this.dataset = viewport.getAlignment().getDataset();
+ }
+
/**
* Test whether codon positions cdp1 should align before, with, or after cdp2.
* Returns zero if all positions match (or either argument is null). Returns
* <li>compareCodonPos([3,4,5], [2,5,6]) also returns -1</li>
* </ul>
*
- * @param cdp1
- * @param cdp2
+ * @param ac1
+ * @param ac2
+ * @return
+ */
+ public static final int compareCodonPos(AlignedCodon ac1, AlignedCodon ac2)
+ {
+ return comparator.compare(ac1, ac2);
+ // return jalview_2_8_2compare(ac1, ac2);
+ }
+
+ /**
+ * Codon comparison up to Jalview 2.8.2. This rule is sequence order dependent
+ * - see http://issues.jalview.org/browse/JAL-1635
+ *
+ * @param ac1
+ * @param ac2
* @return
*/
- public static int compareCodonPos(int[] cdp1, int[] cdp2)
+ private static int jalview_2_8_2compare(AlignedCodon ac1, AlignedCodon ac2)
{
- if (cdp1 == null
- || cdp2 == null
- || (cdp1[0] == cdp2[0] && cdp1[1] == cdp2[1] && cdp1[2] == cdp2[2]))
+ if (ac1 == null || ac2 == null || (ac1.equals(ac2)))
{
return 0;
}
- if (cdp1[0] < cdp2[0] || cdp1[1] < cdp2[1] || cdp1[2] < cdp2[2])
+ if (ac1.pos1 < ac2.pos1 || ac1.pos2 < ac2.pos2 || ac1.pos3 < ac2.pos3)
{
// one base in cdp1 precedes the corresponding base in the other codon
return -1;
}
/**
- * DNA->mapped protein sequence alignment translation given set of sequences
- * 1. id distinct coding regions within selected region for each sequence 2.
- * generate peptides based on inframe (or given) translation or (optionally
- * and where specified) out of frame translations (annotated appropriately) 3.
- * align peptides based on codon alignment
- */
- /**
- * id potential products from dna 1. search for distinct products within
- * selected region for each selected sequence 2. group by associated DB type.
- * 3. return as form for input into above function
- */
- /**
- *
- */
- /**
- * create a new alignment of protein sequences by an inframe translation of
- * the provided NA sequences
- *
- * @param selection
- * @param seqstring
- * @param viscontigs
- * @param gapCharacter
- * @param annotations
- * @param aWidth
- * @param dataset
- * destination dataset for translated sequences and mappings
- * @return
- */
- public static AlignmentI cdnaTranslate(SequenceI[] selection,
- String[] seqstring, int viscontigs[], char gapCharacter,
- AlignmentAnnotation[] annotations, int aWidth, Alignment dataset)
- {
- return cdnaTranslate(Arrays.asList(selection), seqstring, null,
- viscontigs, gapCharacter, annotations, aWidth, dataset);
- }
-
- /**
*
- * @param cdnaseqs
- * @param seqstring
- * @param product
- * - array of DbRefEntry objects from which exon map in seqstring is
- * derived
- * @param viscontigs
- * @param gapCharacter
- * @param annotations
- * @param aWidth
- * @param dataset
* @return
*/
- public static AlignmentI cdnaTranslate(List<SequenceI> cdnaseqs,
- String[] seqstring, DBRefEntry[] product, int viscontigs[],
- char gapCharacter, AlignmentAnnotation[] annotations, int aWidth,
- Alignment dataset)
+ public AlignmentI translateCdna()
{
- AlignedCodonFrame acf = new AlignedCodonFrame(aWidth);
+ AlignedCodonFrame acf = new AlignedCodonFrame();
- /*
- * This array will be built up so that position i holds the codon positions
- * e.g. [7, 9, 10] that match column i (base 0) in the aligned translation.
- * Note this implies a contract that if two codons do not align exactly,
- * their translated products must occupy different column positions.
- */
- int[][] alignedCodons = new int[aWidth][];
+ alignedCodons = new AlignedCodon[dnaWidth];
int s;
- int sSize = cdnaseqs.size();
+ int sSize = selection.size();
List<SequenceI> pepseqs = new ArrayList<SequenceI>();
for (s = 0; s < sSize; s++)
{
- SequenceI newseq = translateCodingRegion(cdnaseqs.get(s),
- seqstring[s], viscontigs, acf, alignedCodons, pepseqs,
- gapCharacter,
- false);
+ SequenceI newseq = translateCodingRegion(selection.get(s),
+ seqstring[s], acf, pepseqs);
if (newseq != null)
{
al.padGaps();
// link the protein translation to the DNA dataset
al.setDataset(dataset);
- translateAlignedAnnotations(annotations, al, acf, alignedCodons);
+ translateAlignedAnnotations(al, acf);
al.addCodonFrame(acf);
return al;
}
}
/**
- * Translate na alignment annotations onto translated amino acid alignment al
- * using codon mapping codons
+ * Translate nucleotide alignment annotations onto translated amino acid
+ * alignment using codon mapping codons
*
- * @param annotations
* @param al
- * @param acf
+ * the translated protein alignment
*/
- protected static void translateAlignedAnnotations(
- AlignmentAnnotation[] annotations, AlignmentI al,
- AlignedCodonFrame acf, int[][] codons)
+ protected void translateAlignedAnnotations(AlignmentI al,
+ AlignedCodonFrame acf)
{
// Can only do this for columns with consecutive codons, or where
// annotation is sequence associated.
continue;
}
- int aSize = acf.getaaWidth(); // aa alignment width.
+ int aSize = aaWidth;
Annotation[] anots = (annotation.annotations == null) ? null
: new Annotation[aSize];
if (anots != null)
for (int a = 0; a < aSize; a++)
{
// process through codon map.
- if (a < codons.length && codons[a] != null
- && codons[a][0] == (codons[a][2] - 2))
+ if (a < alignedCodons.length && alignedCodons[a] != null
+ && alignedCodons[a].pos1 == (alignedCodons[a].pos3 - 2))
{
- anots[a] = getCodonAnnotation(codons[a],
+ anots[a] = getCodonAnnotation(alignedCodons[a],
annotation.annotations);
}
}
}
}
- private static Annotation getCodonAnnotation(int[] is,
+ private static Annotation getCodonAnnotation(AlignedCodon is,
Annotation[] annotations)
{
// Have a look at all the codon positions for annotation and put the first
// one found into the translated annotation pos.
int contrib = 0;
Annotation annot = null;
- for (int p = 0; p < 3; p++)
+ for (int p = 1; p <= 3; p++)
{
- if (annotations[is[p]] != null)
+ int dnaCol = is.getBaseColumn(p);
+ if (annotations[dnaCol] != null)
{
if (annot == null)
{
- annot = new Annotation(annotations[is[p]]);
+ annot = new Annotation(annotations[dnaCol]);
contrib = 1;
}
else
{
// merge with last
- Annotation cpy = new Annotation(annotations[is[p]]);
+ Annotation cpy = new Annotation(annotations[dnaCol]);
if (annot.colour == null)
{
annot.colour = cpy.colour;
* sequence displayed under viscontigs visible columns
* @param seqstring
* ORF read in some global alignment reference frame
- * @param viscontigs
- * mapping from global reference frame to visible seqstring ORF read
* @param acf
* Definition of global ORF alignment reference frame
- * @param alignedCodons
* @param proteinSeqs
- * @param gapCharacter
- * @param starForStop
- * when true stop codons will translate as '*', otherwise as 'X'
* @return sequence ready to be added to alignment.
*/
- protected static SequenceI translateCodingRegion(SequenceI selection,
- String seqstring, int[] viscontigs, AlignedCodonFrame acf,
- int[][] alignedCodons, List<SequenceI> proteinSeqs,
- char gapCharacter,
- final boolean starForStop)
+ protected SequenceI translateCodingRegion(SequenceI selection,
+ String seqstring, AlignedCodonFrame acf,
+ List<SequenceI> proteinSeqs)
{
List<int[]> skip = new ArrayList<int[]>();
int skipint[] = null;
ShiftList vismapping = new ShiftList(); // map from viscontigs to seqstring
// intervals
- int vc, scontigs[] = new int[viscontigs.length];
+ int vc;
+ int[] scontigs = new int[contigs.length];
int npos = 0;
- for (vc = 0; vc < viscontigs.length; vc += 2)
+ for (vc = 0; vc < contigs.length; vc += 2)
{
if (vc == 0)
{
- vismapping.addShift(npos, viscontigs[vc]);
+ vismapping.addShift(npos, contigs[vc]);
}
else
{
// hidden region
- vismapping.addShift(npos, viscontigs[vc] - viscontigs[vc - 1] + 1);
+ vismapping.addShift(npos, contigs[vc] - contigs[vc - 1] + 1);
}
- scontigs[vc] = viscontigs[vc];
- scontigs[vc + 1] = viscontigs[vc + 1];
+ scontigs[vc] = contigs[vc];
+ scontigs[vc + 1] = contigs[vc + 1];
}
// allocate a roughly sized buffer for the protein sequence
StringBuilder protein = new StringBuilder(seqstring.length() / 2);
String seq = seqstring.replace('U', 'T').replace('u', 'T');
char codon[] = new char[3];
- int cdp[] = new int[3], rf = 0, lastnpos = 0, nend;
+ int cdp[] = new int[3];
+ int rf = 0;
+ int lastnpos = 0;
+ int nend;
int aspos = 0;
int resSize = 0;
for (npos = 0, nend = seq.length(); npos < nend; npos++)
/*
* Filled up a reading frame...
*/
+ AlignedCodon alignedCodon = new AlignedCodon(cdp[0], cdp[1], cdp[2]);
String aa = ResidueProperties.codonTranslate(new String(codon));
rf = 0;
- final String gapString = String.valueOf(gapCharacter);
+ final String gapString = String.valueOf(gapChar);
if (aa == null)
{
aa = gapString;
if (skipint == null)
{
skipint = new int[]
- { cdp[0], cdp[2] };
+ { alignedCodon.pos1, alignedCodon.pos3 /* cdp[0], cdp[2] */};
}
- skipint[1] = cdp[2];
+ skipint[1] = alignedCodon.pos3; // cdp[2];
}
else
{
}
if (aa.equals("STOP"))
{
- aa = starForStop ? "*" : "X";
+ aa = STOP_X;
}
resSize++;
}
- // insert gaps prior to this codon - if necessary
boolean findpos = true;
while (findpos)
{
- // expand the codons array if necessary
- alignedCodons = checkCodonFrameWidth(alignedCodons, aspos);
-
/*
* Compare this codon's base positions with those currently aligned to
* this column in the translation.
*/
- final int compareCodonPos = Dna.compareCodonPos(cdp,
+ final int compareCodonPos = compareCodonPos(alignedCodon,
alignedCodons[aspos]);
- // debug
- System.out.println(seq + "/" + aa + " codons: "
- + Arrays.deepToString(alignedCodons));
- System.out
- .println(("Compare " + Arrays.toString(cdp) + " at pos "
- + aspos + " with "
- + Arrays.toString(alignedCodons[aspos]) + " got " + compareCodonPos));
- // end debug
switch (compareCodonPos)
{
case -1:
* This codon should precede the mapped positions - need to insert a
* gap in all prior sequences.
*/
- insertAAGap(aspos, gapCharacter, alignedCodons, proteinSeqs);
+ insertAAGap(aspos, proteinSeqs);
findpos = false;
break;
if (alignedCodons[aspos] == null)
{
// mark this column as aligning to this aligned reading frame
- alignedCodons[aspos] = new int[]
- { cdp[0], cdp[1], cdp[2] };
+ alignedCodons[aspos] = alignedCodon;
}
- else if (!Arrays.equals(alignedCodons[aspos], cdp))
+ else if (!alignedCodons[aspos].equals(alignedCodon))
{
throw new IllegalStateException("Tried to coalign "
- + Arrays.asList(alignedCodons[aspos], cdp));
+ + alignedCodons[aspos].toString() + " with "
+ + alignedCodon.toString());
}
- if (aspos >= acf.aaWidth)
+ if (aspos >= aaWidth)
{
// update maximum alignment width
- // (we can do this without calling checkCodonFrameWidth because it was
- // already done above)
- System.out.println("aspos " + aspos + " >= " + acf.aaWidth);
- acf.setAaWidth(aspos);
+ aaWidth = aspos;
}
// ready for next translated reading frame alignment position (if any)
aspos++;
* Insert a gap into the aligned proteins and the codon mapping array.
*
* @param pos
- * @param gapCharacter
- * @param alignedCodons
* @param proteinSeqs
+ * @return
*/
- protected static void insertAAGap(int pos, char gapCharacter,
- int[][] alignedCodons, List<SequenceI> proteinSeqs)
+ protected void insertAAGap(int pos,
+ List<SequenceI> proteinSeqs)
{
- System.out.println("insertAAGap " + pos + "/" + proteinSeqs.size());
- // aaWidth++;
+ aaWidth++;
for (SequenceI seq : proteinSeqs)
{
- seq.insertCharAt(pos, gapCharacter);
+ seq.insertCharAt(pos, gapChar);
}
- // if (pos < aaWidth)
- // {
- // aaWidth++;
- System.arraycopy(alignedCodons, pos, alignedCodons, pos + 1,
- alignedCodons.length - pos - 1);
- alignedCodons[pos] = null; // clear so new codon position can be marked.
- // }
+ checkCodonFrameWidth();
+ if (pos < aaWidth)
+ {
+ aaWidth++;
+
+ /*
+ * Shift from [pos] to the end one to the right, and null out [pos]
+ */
+ System.arraycopy(alignedCodons, pos, alignedCodons, pos + 1,
+ alignedCodons.length - pos - 1);
+ alignedCodons[pos] = null;
+ }
}
/**
- * Check the codons array is big enough to accommodate the given position, if
- * not resize it.
- *
- * @param alignedCodons
- * @param aspos
- * @return the resized array (or the original if no resize needed)
+ * Check the codons array can accommodate a single insertion, if not resize
+ * it.
*/
- protected static int[][] checkCodonFrameWidth(int[][] alignedCodons,
- int aspos)
+ protected void checkCodonFrameWidth()
{
- // TODO why not codons.length < aspos ?
- // should codons expand if length is 2 or 3 and aslen==2 ?
- if (alignedCodons.length <= aspos + 1)
+ if (alignedCodons[alignedCodons.length - 1] != null)
{
- // probably never have to do this ?
- int[][] c = new int[alignedCodons.length + 10][];
- for (int i = 0; i < alignedCodons.length; i++)
- {
- c[i] = alignedCodons[i];
- }
- return c;
+ /*
+ * arraycopy insertion would bump a filled slot off the end, so expand.
+ */
+ AlignedCodon[] c = new AlignedCodon[alignedCodons.length + 10];
+ System.arraycopy(alignedCodons, 0, c, 0, alignedCodons.length);
+ alignedCodons = c;
}
- return alignedCodons;
}
/**
* @param pep
* @param map
* @param featureTypes
- * hash who's keys are the displayed feature type strings
+ * hash whose keys are the displayed feature type strings
* @param featureGroups
* hash where keys are feature groups and values are Boolean objects
* indicating if they are displayed.
*/
private static void transferCodedFeatures(SequenceI dna, SequenceI pep,
- MapList map, Hashtable featureTypes, Hashtable featureGroups)
+ MapList map, Map<String, Object> featureTypes,
+ Map<String, Boolean> featureGroups)
{
- SequenceFeature[] sf = (dna.getDatasetSequence() != null ? dna
+ SequenceFeature[] sfs = (dna.getDatasetSequence() != null ? dna
.getDatasetSequence() : dna).getSequenceFeatures();
Boolean fgstate;
DBRefEntry[] dnarefs = DBRefUtils.selectRefs(dna.getDBRef(),
}
}
}
- if (sf != null)
+ if (sfs != null)
{
- for (int f = 0; f < sf.length; f++)
+ for (SequenceFeature sf : sfs)
{
- fgstate = (featureGroups == null) ? null : ((Boolean) featureGroups
- .get(sf[f].featureGroup));
- if ((featureTypes == null || featureTypes.containsKey(sf[f]
- .getType())) && (fgstate == null || fgstate.booleanValue()))
+ fgstate = (featureGroups == null) ? null : featureGroups
+ .get(sf.featureGroup);
+ if ((featureTypes == null || featureTypes.containsKey(sf.getType()))
+ && (fgstate == null || fgstate.booleanValue()))
{
- if (FeatureProperties.isCodingFeature(null, sf[f].getType()))
+ if (FeatureProperties.isCodingFeature(null, sf.getType()))
{
// if (map.intersectsFrom(sf[f].begin, sf[f].end))
{