{
/**
+ * Represents the 3 possible results of trying to map one alignment to
+ * another.
+ */
+ public enum MappingResult
+ {
+ Mapped, NotMapped, AlreadyMapped
+ }
+
+ /**
* given an existing alignment, create a new alignment including all, or up to
* flankSize additional symbols from each sequence's dataset sequence
*
/**
* Build mapping of protein to cDNA alignment. Mappings are made between
- * sequences which have the same name and compatible lengths. Returns true if
- * at least one sequence mapping was made, else false.
+ * sequences which have the same name and compatible lengths. Has a 3-valued
+ * result: either Mapped (at least one sequence mapping was created),
+ * AlreadyMapped (all possible sequence mappings already exist), or NotMapped
+ * (no possible sequence mappings exist).
*
* @param proteinAlignment
* @param cdnaAlignment
* @return
*/
- public static boolean mapProteinToCdna(final AlignmentI proteinAlignment,
+ public static MappingResult mapProteinToCdna(
+ final AlignmentI proteinAlignment,
final AlignmentI cdnaAlignment)
{
- boolean mapped = false;
+ boolean mappingPossible = false;
+ boolean mappingPerformed = false;
+
List<SequenceI> thisSeqs = proteinAlignment.getSequences();
/*
if (candidates == null)
{
/*
- * No cDNA sequence with matching name, so no mapping for this protein
- * sequence
+ * No cDNA sequence with matching name, so no mapping possible for this
+ * protein sequence
*/
continue;
}
+ mappingPossible = true;
for (SequenceI cdnaSeq : candidates)
{
- MapList map = mapProteinToCdna(aaSeq, cdnaSeq);
- if (map != null)
+ if (!mappingExists(proteinAlignment.getCodonFrames(),
+ aaSeq.getDatasetSequence(), cdnaSeq.getDatasetSequence()))
{
- acf.addMap(cdnaSeq, aaSeq, map);
- mapped = true;
+ MapList map = mapProteinToCdna(aaSeq, cdnaSeq);
+ if (map != null)
+ {
+ acf.addMap(cdnaSeq, aaSeq, map);
+ mappingPerformed = true;
+ }
}
}
proteinAlignment.addCodonFrame(acf);
}
- return mapped;
+
+ /*
+ * If at least one mapping was possible but none was done, then the
+ * alignments are already as mapped as they can be.
+ */
+ if (mappingPossible && !mappingPerformed)
+ {
+ return MappingResult.AlreadyMapped;
+ }
+ else
+ {
+ return mappingPerformed ? MappingResult.Mapped
+ : MappingResult.NotMapped;
+ }
+ }
+
+ /**
+ * Answers true if the mappings include one between the given (dataset)
+ * sequences.
+ */
+ public static boolean mappingExists(AlignedCodonFrame[] codonFrames,
+ SequenceI aaSeq, SequenceI cdnaSeq)
+ {
+ if (codonFrames != null)
+ {
+ for (AlignedCodonFrame acf : codonFrames)
+ {
+ if (cdnaSeq == acf.getDnaForAaSeq(aaSeq))
+ {
+ return true;
+ }
+ }
+ }
+ return false;
}
/**
--- /dev/null
+package jalview.analysis;
+
+import jalview.datamodel.AlignedCodon;
+
+import java.util.Comparator;
+
+/**
+ * Implements rules for comparing two aligned codons, i.e. determining whether
+ * they should occupy the same position in a translated protein alignment, or
+ * one or the other should 'follow' (by preceded by a gap).
+ *
+ * @author gmcarstairs
+ *
+ */
+public final class CodonComparator implements Comparator<AlignedCodon>
+{
+
+ @Override
+ public int compare(AlignedCodon ac1, AlignedCodon ac2)
+ {
+ if (ac1 == null || ac2 == null || ac1.equals(ac2))
+ {
+ return 0;
+ }
+
+ /**
+ * <pre>
+ * Case 1: if one starts before the other, and doesn't end after it, then it
+ * precedes. We ignore the middle base position here.
+ * A--GT
+ * -CT-G
+ * </pre>
+ */
+ if (ac1.pos1 < ac2.pos1 && ac1.pos3 <= ac2.pos3)
+ {
+ return -1;
+ }
+ if (ac2.pos1 < ac1.pos1 && ac2.pos3 <= ac1.pos3)
+ {
+ return 1;
+ }
+
+ /**
+ * <pre>
+ * Case 2: if one ends after the other, and doesn't start before it, then it
+ * follows. We ignore the middle base position here.
+ * -TG-A
+ * G-TC
+ * </pre>
+ */
+ if (ac1.pos3 > ac2.pos3 && ac1.pos1 >= ac2.pos1)
+ {
+ return 1;
+ }
+ if (ac2.pos3 > ac1.pos3 && ac2.pos1 >= ac1.pos1)
+ {
+ return -1;
+ }
+
+ /*
+ * Case 3: if start and end match, compare middle base positions.
+ */
+ if (ac1.pos1 == ac2.pos1 && ac1.pos3 == ac2.pos3)
+ {
+ return Integer.compare(ac1.pos2, ac2.pos2);
+ }
+
+ /*
+ * That just leaves the 'enclosing' case - one codon starts after but ends
+ * before the other. If the middle bases don't match, use their comparison
+ * (majority vote).
+ */
+ int compareMiddles = Integer.compare(ac1.pos2, ac2.pos2);
+ if (compareMiddles != 0)
+ {
+ return compareMiddles;
+ }
+
+ /**
+ * <pre>
+ * Finally just leaves overlap with matching middle base, e.g.
+ * -A-A-A
+ * G--GG
+ * In this case the choice is arbitrary whether to compare based on
+ * first or last base position. We pick the first. Note this preserves
+ * symmetricality of the comparison.
+ * </pre>
+ */
+ return Integer.compare(ac1.pos1, ac2.pos1);
+ }
+}
import jalview.api.AlignViewportI;
import jalview.bin.Cache;
+import jalview.datamodel.AlignedCodon;
import jalview.datamodel.AlignedCodonFrame;
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentAnnotation;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Hashtable;
+import java.util.Comparator;
import java.util.List;
+import java.util.Map;
public class Dna
{
private static final String STOP_X = "X";
+ private static final Comparator<AlignedCodon> comparator = new CodonComparator();
+
+ /*
+ * 'final' variables describe the inputs to the translation, which should not
+ * be modified.
+ */
final private List<SequenceI> selection;
final private String[] seqstring;
final private Alignment dataset;
+ /*
+ * Working variables for the translation.
+ *
+ * The width of the translation-in-progress protein alignment.
+ */
private int aaWidth = 0;
+ /*
+ * This array will be built up so that position i holds the codon positions
+ * e.g. [7, 9, 10] that match column i (base 0) in the aligned translation.
+ * Note this implies a contract that if two codons do not align exactly, their
+ * translated products must occupy different column positions.
+ */
+ private AlignedCodon[] alignedCodons;
+
/**
* Constructor given a viewport and the visible contigs.
*
* <li>compareCodonPos([3,4,5], [2,5,6]) also returns -1</li>
* </ul>
*
- * @param cdp1
- * @param cdp2
+ * @param ac1
+ * @param ac2
* @return
*/
- public static int compareCodonPos(int[] cdp1, int[] cdp2)
+ public static final int compareCodonPos(AlignedCodon ac1, AlignedCodon ac2)
{
- if (cdp1 == null
- || cdp2 == null
- || (cdp1[0] == cdp2[0] && cdp1[1] == cdp2[1] && cdp1[2] == cdp2[2]))
+ return comparator.compare(ac1, ac2);
+ // return jalview_2_8_2compare(ac1, ac2);
+ }
+
+ /**
+ * Codon comparison up to Jalview 2.8.2. This rule is sequence order dependent
+ * - see http://issues.jalview.org/browse/JAL-1635
+ *
+ * @param ac1
+ * @param ac2
+ * @return
+ */
+ private static int jalview_2_8_2compare(AlignedCodon ac1, AlignedCodon ac2)
+ {
+ if (ac1 == null || ac2 == null || (ac1.equals(ac2)))
{
return 0;
}
- if (cdp1[0] < cdp2[0] || cdp1[1] < cdp2[1] || cdp1[2] < cdp2[2])
+ if (ac1.pos1 < ac2.pos1 || ac1.pos2 < ac2.pos2 || ac1.pos3 < ac2.pos3)
{
// one base in cdp1 precedes the corresponding base in the other codon
return -1;
{
AlignedCodonFrame acf = new AlignedCodonFrame();
- /*
- * This array will be built up so that position i holds the codon positions
- * e.g. [7, 9, 10] that match column i (base 0) in the aligned translation.
- * Note this implies a contract that if two codons do not align exactly,
- * their translated products must occupy different column positions.
- */
- int[][] alignedCodons = new int[dnaWidth][];
+ alignedCodons = new AlignedCodon[dnaWidth];
int s;
int sSize = selection.size();
for (s = 0; s < sSize; s++)
{
SequenceI newseq = translateCodingRegion(selection.get(s),
- seqstring[s], acf, alignedCodons, pepseqs);
+ seqstring[s], acf, pepseqs);
if (newseq != null)
{
al.padGaps();
// link the protein translation to the DNA dataset
al.setDataset(dataset);
- translateAlignedAnnotations(annotations, al, acf, alignedCodons);
+ translateAlignedAnnotations(al, acf);
al.addCodonFrame(acf);
return al;
}
}
/**
- * Translate na alignment annotations onto translated amino acid alignment al
- * using codon mapping codons
+ * Translate nucleotide alignment annotations onto translated amino acid
+ * alignment using codon mapping codons
*
- * @param annotations
* @param al
- * @param acf
+ * the translated protein alignment
*/
- protected static void translateAlignedAnnotations(
- AlignmentAnnotation[] annotations, AlignmentI al,
- AlignedCodonFrame acf, int[][] codons)
+ protected void translateAlignedAnnotations(AlignmentI al,
+ AlignedCodonFrame acf)
{
// Can only do this for columns with consecutive codons, or where
// annotation is sequence associated.
continue;
}
- int aSize = acf.getaaWidth(); // aa alignment width.
+ int aSize = aaWidth;
Annotation[] anots = (annotation.annotations == null) ? null
: new Annotation[aSize];
if (anots != null)
for (int a = 0; a < aSize; a++)
{
// process through codon map.
- if (a < codons.length && codons[a] != null
- && codons[a][0] == (codons[a][2] - 2))
+ if (a < alignedCodons.length && alignedCodons[a] != null
+ && alignedCodons[a].pos1 == (alignedCodons[a].pos3 - 2))
{
- anots[a] = getCodonAnnotation(codons[a],
+ anots[a] = getCodonAnnotation(alignedCodons[a],
annotation.annotations);
}
}
}
}
- private static Annotation getCodonAnnotation(int[] is,
+ private static Annotation getCodonAnnotation(AlignedCodon is,
Annotation[] annotations)
{
// Have a look at all the codon positions for annotation and put the first
// one found into the translated annotation pos.
int contrib = 0;
Annotation annot = null;
- for (int p = 0; p < 3; p++)
+ for (int p = 1; p <= 3; p++)
{
- if (annotations[is[p]] != null)
+ int dnaCol = is.getBaseColumn(p);
+ if (annotations[dnaCol] != null)
{
if (annot == null)
{
- annot = new Annotation(annotations[is[p]]);
+ annot = new Annotation(annotations[dnaCol]);
contrib = 1;
}
else
{
// merge with last
- Annotation cpy = new Annotation(annotations[is[p]]);
+ Annotation cpy = new Annotation(annotations[dnaCol]);
if (annot.colour == null)
{
annot.colour = cpy.colour;
* sequence displayed under viscontigs visible columns
* @param seqstring
* ORF read in some global alignment reference frame
- * @param viscontigs
- * mapping from global reference frame to visible seqstring ORF read
* @param acf
* Definition of global ORF alignment reference frame
- * @param alignedCodons
* @param proteinSeqs
- * @param gapCharacter
- * @param starForStop
- * when true stop codons will translate as '*', otherwise as 'X'
* @return sequence ready to be added to alignment.
*/
protected SequenceI translateCodingRegion(SequenceI selection,
String seqstring, AlignedCodonFrame acf,
- int[][] alignedCodons, List<SequenceI> proteinSeqs)
+ List<SequenceI> proteinSeqs)
{
List<int[]> skip = new ArrayList<int[]>();
int skipint[] = null;
StringBuilder protein = new StringBuilder(seqstring.length() / 2);
String seq = seqstring.replace('U', 'T').replace('u', 'T');
char codon[] = new char[3];
- int cdp[] = new int[3], rf = 0, lastnpos = 0, nend;
+ int cdp[] = new int[3];
+ int rf = 0;
+ int lastnpos = 0;
+ int nend;
int aspos = 0;
int resSize = 0;
for (npos = 0, nend = seq.length(); npos < nend; npos++)
/*
* Filled up a reading frame...
*/
+ AlignedCodon alignedCodon = new AlignedCodon(cdp[0], cdp[1], cdp[2]);
String aa = ResidueProperties.codonTranslate(new String(codon));
rf = 0;
final String gapString = String.valueOf(gapChar);
if (skipint == null)
{
skipint = new int[]
- { cdp[0], cdp[2] };
+ { alignedCodon.pos1, alignedCodon.pos3 /* cdp[0], cdp[2] */};
}
- skipint[1] = cdp[2];
+ skipint[1] = alignedCodon.pos3; // cdp[2];
}
else
{
}
resSize++;
}
- // insert gaps prior to this codon - if necessary
boolean findpos = true;
while (findpos)
{
- // expand the codons array if necessary
- alignedCodons = checkCodonFrameWidth(alignedCodons, aspos);
-
/*
* Compare this codon's base positions with those currently aligned to
* this column in the translation.
*/
- final int compareCodonPos = Dna.compareCodonPos(cdp,
+ final int compareCodonPos = compareCodonPos(alignedCodon,
alignedCodons[aspos]);
- // debug
- System.out.println(seq + "/" + aa + " codons: "
- + Arrays.deepToString(alignedCodons));
- for (SequenceI s : proteinSeqs)
- {
- System.out.println(s.getSequenceAsString());
- }
- System.out
- .println(("Compare " + Arrays.toString(cdp) + " at pos "
- + aspos + " with "
- + Arrays.toString(alignedCodons[aspos]) + " got " + compareCodonPos));
- // end debug
switch (compareCodonPos)
{
case -1:
* This codon should precede the mapped positions - need to insert a
* gap in all prior sequences.
*/
- alignedCodons = insertAAGap(aspos, alignedCodons, proteinSeqs);
+ insertAAGap(aspos, proteinSeqs);
findpos = false;
break;
if (alignedCodons[aspos] == null)
{
// mark this column as aligning to this aligned reading frame
- alignedCodons[aspos] = new int[]
- { cdp[0], cdp[1], cdp[2] };
+ alignedCodons[aspos] = alignedCodon;
}
- else if (!Arrays.equals(alignedCodons[aspos], cdp))
+ else if (!alignedCodons[aspos].equals(alignedCodon))
{
throw new IllegalStateException("Tried to coalign "
- + Arrays.asList(alignedCodons[aspos], cdp));
+ + alignedCodons[aspos].toString() + " with "
+ + alignedCodon.toString());
}
- System.out.println(aspos + "/" + aaWidth);
if (aspos >= aaWidth)
{
// update maximum alignment width
- // (we can do this without calling checkCodonFrameWidth because it was
- // already done above)
aaWidth = aspos;
}
// ready for next translated reading frame alignment position (if any)
* Insert a gap into the aligned proteins and the codon mapping array.
*
* @param pos
- * @param alignedCodons
* @param proteinSeqs
* @return
*/
- protected int[][] insertAAGap(int pos, int[][] alignedCodons,
+ protected void insertAAGap(int pos,
List<SequenceI> proteinSeqs)
{
- System.out.println("insertAAGap " + pos + "/" + proteinSeqs.size());
aaWidth++;
for (SequenceI seq : proteinSeqs)
{
seq.insertCharAt(pos, gapChar);
}
- int[][] resized = checkCodonFrameWidth(alignedCodons, pos);
+ checkCodonFrameWidth();
if (pos < aaWidth)
{
aaWidth++;
- System.arraycopy(resized, pos, resized, pos + 1, resized.length - pos
- - 1);
- resized[pos] = null; // clear so new codon position can be marked.
+
+ /*
+ * Shift from [pos] to the end one to the right, and null out [pos]
+ */
+ System.arraycopy(alignedCodons, pos, alignedCodons, pos + 1,
+ alignedCodons.length - pos - 1);
+ alignedCodons[pos] = null;
}
- return resized;
}
/**
- * Check the codons array is big enough to accommodate the given position, if
- * not resize it.
- *
- * @param alignedCodons
- * @param aspos
- * @return the resized array (or the original if no resize needed)
+ * Check the codons array can accommodate a single insertion, if not resize
+ * it.
*/
- protected static int[][] checkCodonFrameWidth(int[][] alignedCodons,
- int aspos)
+ protected void checkCodonFrameWidth()
{
- // TODO why not codons.length < aspos ?
- // should codons expand if length is 2 or 3 and aslen==2 ?
- System.out.println("Checking " + alignedCodons.length + "/" + aspos);
- if (alignedCodons.length <= aspos + 1)
+ if (alignedCodons[alignedCodons.length - 1] != null)
{
- // probably never have to do this ?
- int[][] c = new int[alignedCodons.length + 10][];
- for (int i = 0; i < alignedCodons.length; i++)
- {
- c[i] = alignedCodons[i];
- }
- return c;
+ /*
+ * arraycopy insertion would bump a filled slot off the end, so expand.
+ */
+ AlignedCodon[] c = new AlignedCodon[alignedCodons.length + 10];
+ System.arraycopy(alignedCodons, 0, c, 0, alignedCodons.length);
+ alignedCodons = c;
}
- return alignedCodons;
}
/**
* @param pep
* @param map
* @param featureTypes
- * hash who's keys are the displayed feature type strings
+ * hash whose keys are the displayed feature type strings
* @param featureGroups
* hash where keys are feature groups and values are Boolean objects
* indicating if they are displayed.
*/
private static void transferCodedFeatures(SequenceI dna, SequenceI pep,
- MapList map, Hashtable featureTypes, Hashtable featureGroups)
+ MapList map, Map<String, Object> featureTypes,
+ Map<String, Boolean> featureGroups)
{
- SequenceFeature[] sf = (dna.getDatasetSequence() != null ? dna
+ SequenceFeature[] sfs = (dna.getDatasetSequence() != null ? dna
.getDatasetSequence() : dna).getSequenceFeatures();
Boolean fgstate;
DBRefEntry[] dnarefs = DBRefUtils.selectRefs(dna.getDBRef(),
}
}
}
- if (sf != null)
+ if (sfs != null)
{
- for (int f = 0; f < sf.length; f++)
+ for (SequenceFeature sf : sfs)
{
- fgstate = (featureGroups == null) ? null : ((Boolean) featureGroups
- .get(sf[f].featureGroup));
- if ((featureTypes == null || featureTypes.containsKey(sf[f]
- .getType())) && (fgstate == null || fgstate.booleanValue()))
+ fgstate = (featureGroups == null) ? null : featureGroups
+ .get(sf.featureGroup);
+ if ((featureTypes == null || featureTypes.containsKey(sf.getType()))
+ && (fgstate == null || fgstate.booleanValue()))
{
- if (FeatureProperties.isCodingFeature(null, sf[f].getType()))
+ if (FeatureProperties.isCodingFeature(null, sf.getType()))
{
// if (map.intersectsFrom(sf[f].begin, sf[f].end))
{
--- /dev/null
+package jalview.datamodel;
+
+/**
+ * Holds the aligned column positions (base 0) for one codon in a nucleotide
+ * sequence. The object is immutable once created.
+ *
+ * Example: in "G-AT-C-GA" the aligned codons are (0, 2, 3) and (5, 7, 8).
+ *
+ * @author gmcarstairs
+ *
+ */
+public final class AlignedCodon
+{
+ public final int pos1;
+
+ public final int pos2;
+
+ public final int pos3;
+
+ public AlignedCodon(int i, int j, int k)
+ {
+ pos1 = i;
+ pos2 = j;
+ pos3 = k;
+ }
+
+ /**
+ * Returns the column position for the given base (1, 2, 3).
+ *
+ * @param base
+ * @return
+ * @throws IllegalArgumentException
+ * if an argument value other than 1, 2 or 3 is supplied
+ */
+ public int getBaseColumn(int base)
+ {
+ if (base < 1 || base > 3)
+ {
+ throw new IllegalArgumentException(Integer.toString(base));
+ }
+ return base == 1 ? pos1 : (base == 2 ? pos2 : pos3);
+ }
+
+ /**
+ * Two aligned codons are equal if all their base positions are the same.
+ */
+ @Override
+ public boolean equals(Object o)
+ {
+ /*
+ * Equality with null value required for consistency with
+ * Dna.compareCodonPos
+ */
+ if (o == null)
+ {
+ return true;
+ }
+ if (!(o instanceof AlignedCodon))
+ {
+ return false;
+ }
+ AlignedCodon ac = (AlignedCodon) o;
+ return (pos1 == ac.pos1 && pos2 == ac.pos2 && pos3 == ac.pos3);
+ }
+
+ @Override
+ public String toString()
+ {
+ return "[" + pos1 + ", " + pos2 + ", " + pos3 + "]";
+ }
+}
public class AlignedCodonFrame
{
- /**
- * <pre>
- * Aligned nucleotide positions for codons mapped to column positions of of aligned
- * proteins. e.g.
- * codons[3] = [12, 14, 15] means:
- * column 4 in the protein alignment translates cols 13, 15, 16 in cDNA
- * codons[5] = null means column 6 in the protein alignment is a gap
- * </pre>
- */
- public int[][] codons = null;
-
- /**
- * Width of protein sequence alignment (implicit assertion that codons.length
- * >= aaWidth)
- */
- public int aaWidth = 0;
-
/*
* TODO: not an ideal solution - we reference the aligned amino acid sequences
* in order to make insertions on them Better would be dnaAlignment and
/*
* tied array of Mappings to protein sequence Objects and SequenceI[]
- * aaSeqs=null; MapLists where eac maps from the corresponding dnaSeqs element
- * to corresponding aaSeqs element
+ * aaSeqs=null; MapLists where each maps from the corresponding dnaSeqs
+ * element to corresponding aaSeqs element
*/
private Mapping[] dnaToProt = null;
*/
public AlignedCodonFrame(AlignedCodonFrame acf, SequenceI[] alignment)
{
- this.codons = acf.codons;
this.dnaSeqs = acf.dnaSeqs;
this.dnaToProt = acf.dnaToProt;
}
/**
- * ensure that codons array is at least as wide as aslen residues
- *
- * @param aslen
- * @return (possibly newly expanded) codon array
- */
- public int[][] checkCodonFrameWidth(int aslen)
- {
- // TODO why not codons.length < aslen ?
- // should codons expand if length is 2 or 3 and aslen==2 ?
- if (codons.length <= aslen + 1)
- {
- // probably never have to do this ?
- int[][] c = new int[codons.length + 10][];
- for (int i = 0; i < codons.length; i++)
- {
- c[i] = codons[i];
- codons[i] = null;
- }
- codons = c;
- }
- return codons;
- }
-
- /**
- * @return width of aligned translated amino acid residues
- */
- public int getaaWidth()
- {
- return aaWidth;
- }
-
- /**
- * increase aaWidth by one and insert a new aligned codon position space at
- * aspos.
- *
- * @param aspos
- */
- public void insertAAGap(int aspos, char gapCharacter)
- {
- // this aa appears before the aligned codons at aspos - so shift them in
- // each pair of mapped sequences
- aaWidth++;
- // we actually have to modify the aligned sequences here, so use the
- // a_aaSeqs vector
- for (SequenceI seq : a_aaSeqs)
- {
- seq.insertCharAt(aspos, gapCharacter);
- }
-
- if (aspos < aaWidth)
- {
- aaWidth++;
- System.arraycopy(codons, aspos, codons, aspos + 1, codons.length
- - aspos - 1);
- codons[aspos] = null; // clear so new codon position can be marked.
- }
- }
-
- public void setAaWidth(int aapos)
- {
- aaWidth = aapos;
- }
-
- /**
* add a mapping between the dataset sequences for the associated dna and
* protein sequence objects
*
import jalview.analysis.AAFrequency;
import jalview.analysis.AlignmentSorter;
import jalview.analysis.AlignmentUtils;
+import jalview.analysis.AlignmentUtils.MappingResult;
import jalview.analysis.Conservation;
import jalview.analysis.CrossRef;
import jalview.analysis.Dna;
final AlignmentI thatAlignment = af.alignPanel.getAlignment();
if (thatAlignment.isNucleotide())
{
- // TODO exclude an AlignFrame which is already mapped to this one
- // temporary version: exclude if already a CommandListener (should
- // cover most cases but not all)
- final boolean alreadyMapped = this.viewport
- .getStructureSelectionManager().hasCommandListener(
- af.viewport);
- if (alreadyMapped)
+ MappingResult mapped = AlignmentUtils.mapProteinToCdna(
+ thisAlignment, thatAlignment);
+ if (mapped == MappingResult.AlreadyMapped)
{
alreadyLinkedCount++;
}
- else
+ else if (mapped == MappingResult.Mapped)
{
- boolean mapped = AlignmentUtils.mapProteinToCdna(thisAlignment,
- thatAlignment);
- if (mapped)
- {
- final StructureSelectionManager ssm = StructureSelectionManager
- .getStructureSelectionManager(Desktop.instance);
- ssm.addMappings(thisAlignment.getCodonFrames());
- ssm.addCommandListener(af.getViewport());
- linkedCount++;
- }
+ final StructureSelectionManager ssm = StructureSelectionManager
+ .getStructureSelectionManager(Desktop.instance);
+ ssm.addMappings(thisAlignment.getCodonFrames());
+ ssm.addCommandListener(af.getViewport());
+ linkedCount++;
}
}
}
package jalview.analysis;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
+import jalview.analysis.AlignmentUtils.MappingResult;
import jalview.datamodel.AlignedCodonFrame;
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
AlignmentI cdna1 = loadAlignment(
dnaData,
"FASTA");
- boolean mapped = AlignmentUtils.mapProteinToCdna(protein, cdna1);
- assertTrue(mapped);
+ MappingResult mapped = AlignmentUtils.mapProteinToCdna(protein, cdna1);
+ assertEquals(mapped, MappingResult.Mapped);
/*
* Check two mappings (one for Mouse, one for Human)
AlignmentI cdna1 = loadAlignment(
dnaData,
"FASTA");
- boolean mapped = AlignmentUtils.mapProteinToCdna(protein, cdna1);
- assertTrue(mapped);
+ MappingResult mapped = AlignmentUtils.mapProteinToCdna(protein, cdna1);
+ assertEquals(mapped, MappingResult.Mapped);
/*
* Check two mappings (one for Mouse, one for Human)
--- /dev/null
+package jalview.analysis;
+
+import java.util.Random;
+
+/**
+ * Generates a random Fasta format DNA alignment for given sequence length and
+ * count.
+ *
+ * @author gmcarstairs
+ *
+ */
+public class DnaAlignmentGenerator
+{
+ private static final char GAP = '-';
+
+ private static final char[] BASES = new char[]
+ { 'G', 'T', 'C', 'A', GAP };
+
+ private Random random;
+
+ /**
+ * Given args for sequence length and count, output a DNA 'alignment' where
+ * each position is a random choice from 'GTCA-'.
+ *
+ * @param args
+ * the width (base count) and height (sequence count) to generate
+ * plus an integer random seed value
+ */
+ public static void main(String[] args)
+ {
+ int width = Integer.parseInt(args[0]);
+ int height = Integer.parseInt(args[1]);
+ long randomSeed = Long.valueOf(args[2]);
+ new DnaAlignmentGenerator().generate(width, height, randomSeed);
+ }
+
+ /**
+ * Outputs a DNA 'alignment' of given width and height, where each position is
+ * a random choice from 'GTCA-'.
+ *
+ * @param width
+ * @param height
+ * @param randomSeed
+ */
+ private void generate(int width, int height, long randomSeed)
+ {
+ random = new Random(randomSeed);
+ for (int seqno = 0; seqno < height; seqno++)
+ {
+ generateSequence(seqno + 1, width);
+ }
+ }
+
+ /**
+ * Outputs a DNA 'sequence' of given length, with some random gaps included.
+ *
+ * @param seqno
+ * @param length
+ */
+ private void generateSequence(int seqno, int length)
+ {
+ System.out.println(">SEQ" + seqno);
+ StringBuilder seq = new StringBuilder(length);
+
+ /*
+ * Loop till we've output 'length' real bases (excluding gaps)
+ */
+ for (int count = 0 ; count < length ; ) {
+ char c = BASES[random.nextInt(Integer.MAX_VALUE) % 5];
+ seq.append(c);
+ if (c != GAP)
+ {
+ count++;
+ }
+ }
+ System.out.println(seq.toString());
+ }
+}
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import jalview.api.AlignViewportI;
+import jalview.datamodel.AlignedCodon;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.ColumnSelection;
import jalview.gui.AlignViewport;
import jalview.io.FormatAdapter;
import java.io.IOException;
-import java.util.Arrays;
import org.junit.Test;
* @throws IOException
*/
@Test
- public void testTranslatCdna_hiddenColumns() throws IOException
+ public void testTranslateCdna_hiddenColumns() throws IOException
{
AlignmentI alf = new FormatAdapter().readFile(fasta,
FormatAdapter.PASTE, "FASTA");
}
/**
+ * Use this test to help debug into any cases of interest.
+ */
+ @Test
+ public void testCompareCodonPos_oneOnly()
+ {
+ assertFollows("-AA--A", "G--GG"); // 2 shifted seq2, 3 shifted seq1
+ }
+
+ /**
* Tests for method that compares 'alignment' of two codon position triplets.
*/
@Test
/*
* Returns 0 for any null argument
*/
- assertEquals(0, Dna.compareCodonPos(new int[]
- { 1, 2, 3 }, null));
- assertEquals(0, Dna.compareCodonPos(null, new int[]
- { 1, 2, 3 }));
+ assertEquals(0, Dna.compareCodonPos(new AlignedCodon(1, 2, 3), null));
+ assertEquals(0, Dna.compareCodonPos(null, new AlignedCodon(1, 2, 3)));
/*
* Work through 27 combinations. First 9 cases where first position matches.
assertPrecedes("AAA", "GG-G"); // 2 matches, 3 shifted seq2
assertFollows("A-AA", "GG-G"); // 2 shifted seq1, 3 matches
assertFollows("A-A-A", "GG-G"); // 2 shifted seq1, 3 shifted seq1
- // TODO is this right?
assertPrecedes("A-AA", "GG--G"); // 2 shifted seq1, 3 shifted seq2
assertPrecedes("AA-A", "G-GG"); // 2 shifted seq2, 3 matches
- assertPrecedes("AA--A", "G-GG"); // 2 shifted seq2, 3 shifted seq1
+ assertFollows("AA--A", "G-GG"); // 2 shifted seq2, 3 shifted seq1
assertPrecedes("AAA", "G-GG"); // 2 shifted seq2, 3 shifted seq2
/*
*/
assertFollows("-AAA", "G-GG"); // 2 and 3 match
assertFollows("-AA-A", "G-GG"); // 2 matches, 3 shifted seq1
- assertPrecedes("-AAA", "G-G-G"); // 2 matches, 3 shifted seq2
+ // 'enclosing' case: pick first to start precedes
+ assertFollows("-AAA", "G-G-G"); // 2 matches, 3 shifted seq2
assertFollows("-A-AA", "G-G-G"); // 2 shifted seq1, 3 matches
assertFollows("-A-A-A", "G-G-G"); // 2 shifted seq1, 3 shifted seq1
- // is this right? codon2 ends after codon1
- assertPrecedes("-A-AA", "G-G--G"); // 2 shifted seq1, 3 shifted seq2
- assertPrecedes("-AA-A", "G--GG"); // 2 shifted seq2, 3 matches
- assertPrecedes("-AA--A", "G--GG"); // 2 shifted seq2, 3 shifted seq1
+ // 'enclosing' case: pick first to start precedes
+ assertFollows("-A-AA", "G-G--G"); // 2 shifted seq1, 3 shifted seq2
+ assertFollows("-AA-A", "G--GG"); // 2 shifted seq2, 3 matches
+ assertFollows("-AA--A", "G--GG"); // 2 shifted seq2, 3 shifted seq1
assertPrecedes("-AAA", "G--GG"); // 2 shifted seq2, 3 shifted seq2
/*
assertPrecedes("A-A-A", "-GGG"); // 2 matches, 3 shifted seq1
assertPrecedes("A-AA", "-GG-G"); // 2 matches, 3 shifted seq2
assertPrecedes("A--AA", "-GG-G"); // 2 shifted seq1, 3 matches
- assertPrecedes("A--AA", "-GGG"); // 2 shifted seq1, 3 shifted seq1
+ // 'enclosing' case with middle base deciding:
+ assertFollows("A--AA", "-GGG"); // 2 shifted seq1, 3 shifted seq1
assertPrecedes("A--AA", "-GG--G"); // 2 shifted seq1, 3 shifted seq2
assertPrecedes("AA-A", "-GGG"); // 2 shifted seq2, 3 matches
assertPrecedes("AA--A", "-GGG"); // 2 shifted seq2, 3 shifted seq1
assertPrecedes("AAA", "-GGG"); // 2 shifted seq2, 3 shifted seq2
+ }
- /*
- * two codon positions can each 'precede' the other! the comparison is
- * biased to the first sequence
- */
- // TODO is this correct?
- assertPrecedes("-A--AA", "--GGG");
- assertPrecedes("--AAA", "-A--AA");
+ /**
+ * Test that all the cases in testCompareCodonPos have a 'symmetric'
+ * comparison (without checking the actual comparison result).
+ */
+ @Test
+ public void testCompareCodonPos_isSymmetric()
+ {
+ assertSymmetric("AAA", "GGG");
+ assertSymmetric("AA-A", "GGG");
+ assertSymmetric("AAA", "GG-G");
+ assertSymmetric("A-AA", "GG-G");
+ assertSymmetric("A-A-A", "GG-G");
+ assertSymmetric("A-AA", "GG--G");
+ assertSymmetric("AA-A", "G-GG");
+ assertSymmetric("AA--A", "G-GG");
+ assertSymmetric("AAA", "G-GG");
+ assertSymmetric("-AAA", "G-GG");
+ assertSymmetric("-AA-A", "G-GG");
+ assertSymmetric("-AAA", "G-G-G");
+ assertSymmetric("-A-AA", "G-G-G");
+ assertSymmetric("-A-A-A", "G-G-G");
+ assertSymmetric("-A-AA", "G-G--G");
+ assertSymmetric("-AA-A", "G--GG");
+ assertSymmetric("-AA--A", "G--GG");
+ assertSymmetric("-AAA", "G--GG");
+ assertSymmetric("A-AA", "-GGG");
+ assertSymmetric("A-A-A", "-GGG");
+ assertSymmetric("A-AA", "-GG-G");
+ assertSymmetric("A--AA", "-GG-G");
+ assertSymmetric("A--AA", "-GGG");
+ assertSymmetric("A--AA", "-GG--G");
+ assertSymmetric("AA-A", "-GGG");
+ assertSymmetric("AA--A", "-GGG");
+ assertSymmetric("AAA", "-GGG");
+ }
+
+ private void assertSymmetric(String codon1, String codon2)
+ {
+ assertEquals("Comparison of '" + codon1 + "' and '" + codon2
+ + " not symmetric", Integer.signum(compare(codon1, codon2)),
+ -Integer.signum(compare(codon2, codon1)));
}
/**
* Assert that the first sequence should map to the same position as the
- * second in a translated alignment
+ * second in a translated alignment. Also checks that this is true if the
+ * order of the codons is reversed.
*
* @param codon1
* @param codon2
*/
private void assertMatches(String codon1, String codon2)
{
- assertEquals("Expected match (0)", 0, compare(codon1, codon2));
+ assertEquals("Expected '" + codon1 + "' matches '" + codon2 + "'", 0,
+ compare(codon1, codon2));
+ assertEquals("Expected '" + codon2 + "' matches '" + codon1 + "'", 0,
+ compare(codon2, codon1));
}
/**
*/
private void assertPrecedes(String codon1, String codon2)
{
- assertEquals("Expected precedes (-1)", -1, compare(codon1, codon2));
+ assertEquals("Expected '" + codon1 + "' precedes '" + codon2 + "'",
+ -1, compare(codon1, codon2));
}
/**
*/
private void assertFollows(String codon1, String codon2)
{
- assertEquals("Expected follows (1)", 1, compare(codon1, codon2));
+ assertEquals("Expected '" + codon1 + "' follows '" + codon2 + "'", 1,
+ compare(codon1, codon2));
}
/**
*/
private int compare(String s1, String s2)
{
- final int[] cd1 = convertCodon(s1);
- final int[] cd2 = convertCodon(s2);
- System.out.println("K: " + s1 + " " + Arrays.toString(cd1));
- System.out.println("G: " + s2 + " " + Arrays.toString(cd2));
+ final AlignedCodon cd1 = convertCodon(s1);
+ final AlignedCodon cd2 = convertCodon(s2);
+ System.out.println("K: " + s1 + " " + cd1.toString());
+ System.out.println("G: " + s2 + " " + cd2.toString());
System.out.println();
return Dna.compareCodonPos(cd1, cd2);
}
* @param s
* @return
*/
- private int[] convertCodon(String s)
+ private AlignedCodon convertCodon(String s)
{
- int[] result = new int[3];
+ int[] codon = new int[3];
int i = 0;
for (int j = 0; j < s.length(); j++)
{
if (s.charAt(j) != '-')
{
- result[i++] = j;
+ codon[i++] = j;
}
}
- return result;
+ return new AlignedCodon(codon[0], codon[1], codon[2]);
}
/**
@Test
public void testConvertCodon()
{
- assertEquals("[0, 1, 2]", Arrays.toString(convertCodon("AAA")));
- assertEquals("[0, 2, 5]", Arrays.toString(convertCodon("A-A--A")));
- assertEquals("[1, 3, 4]", Arrays.toString(convertCodon("-A-AA-")));
+ assertEquals("[0, 1, 2]", convertCodon("AAA").toString());
+ assertEquals("[0, 2, 5]", convertCodon("A-A--A").toString());
+ assertEquals("[1, 3, 4]", convertCodon("-A-AA-").toString());
}
}
package jalview.datamodel;
-import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertSame;
import jalview.util.MapList;
@Test
public void testConstructor_copyWithSequence()
{
- AlignedCodonFrame acf = new AlignedCodonFrame(0);
- acf.codons = new int[][]
- { new int[]
- { 1, 3 }, new int[]
- { 4, 6 } };
+ AlignedCodonFrame acf = new AlignedCodonFrame();
MapList map = new MapList(new int[]
{ 1, 3 }, new int[]
{ 1, 1 }, 3, 1);
newaligned[0] = new Sequence("", "-F-K-Q");
newaligned[0].setDatasetSequence(aaseq.getDatasetSequence());
AlignedCodonFrame copy = new AlignedCodonFrame(acf, newaligned);
- assertSame(copy.codons, acf.codons);
- assertEquals(copy.aaWidth, acf.aaWidth);
assertSame(copy.getdnaSeqs(), acf.getdnaSeqs());
assertSame(newaligned[0], copy.getAaForDnaSeq(dnaseq, false));
}
--- /dev/null
+package jalview.datamodel;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.junit.Test;
+
+public class AlignedCodonTest
+{
+
+ @Test
+ public void testEquals()
+ {
+ AlignedCodon ac = new AlignedCodon(1, 3, 4);
+ assertTrue(ac.equals(null));
+ assertFalse(ac.equals("hello"));
+ assertFalse(ac.equals(new AlignedCodon(1, 3, 5)));
+ assertTrue(ac.equals(new AlignedCodon(1, 3, 4)));
+ assertTrue(ac.equals(ac));
+ }
+
+ @Test
+ public void testToString() {
+ AlignedCodon ac = new AlignedCodon(1, 3, 4);
+ assertEquals("[1, 3, 4]", ac.toString());
+ }
+}
* Make mappings between sequences. The 'aligned cDNA' is playing the role
* of what would normally be protein here.
*/
- AlignedCodonFrame acf = new AlignedCodonFrame(al1.getWidth());
+ AlignedCodonFrame acf = new AlignedCodonFrame();
MapList ml = new MapList(new int[]
{ 1, 12 }, new int[]
{ 1, 12 }, 1, 1);
*/
AlignmentI al1 = loadAlignment(CDNA_SEQS_1, "FASTA");
AlignmentI al2 = loadAlignment(AA_SEQS_1, "FASTA");
- AlignedCodonFrame acf = new AlignedCodonFrame(al2.getWidth());
+ AlignedCodonFrame acf = new AlignedCodonFrame();
MapList ml = new MapList(new int[]
{ 1, 12 }, new int[]
{ 1, 4 }, 3, 1);