From: gmungoc Date: Mon, 19 Jan 2015 17:18:20 +0000 (+0000) Subject: JAL-1619 refactored cDNA translation; modified codon comparison; tests X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=285b51fa6f49054040c72dfb2a64db637dc05c79;p=jalview.git JAL-1619 refactored cDNA translation; modified codon comparison; tests --- diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 8591d6a..b38ed30 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -43,6 +43,15 @@ public class AlignmentUtils { /** + * Represents the 3 possible results of trying to map one alignment to + * another. + */ + public enum MappingResult + { + Mapped, NotMapped, AlreadyMapped + } + + /** * given an existing alignment, create a new alignment including all, or up to * flankSize additional symbols from each sequence's dataset sequence * @@ -195,17 +204,22 @@ public class AlignmentUtils /** * Build mapping of protein to cDNA alignment. Mappings are made between - * sequences which have the same name and compatible lengths. Returns true if - * at least one sequence mapping was made, else false. + * sequences which have the same name and compatible lengths. Has a 3-valued + * result: either Mapped (at least one sequence mapping was created), + * AlreadyMapped (all possible sequence mappings already exist), or NotMapped + * (no possible sequence mappings exist). * * @param proteinAlignment * @param cdnaAlignment * @return */ - public static boolean mapProteinToCdna(final AlignmentI proteinAlignment, + public static MappingResult mapProteinToCdna( + final AlignmentI proteinAlignment, final AlignmentI cdnaAlignment) { - boolean mapped = false; + boolean mappingPossible = false; + boolean mappingPerformed = false; + List thisSeqs = proteinAlignment.getSequences(); /* @@ -221,23 +235,61 @@ public class AlignmentUtils if (candidates == null) { /* - * No cDNA sequence with matching name, so no mapping for this protein - * sequence + * No cDNA sequence with matching name, so no mapping possible for this + * protein sequence */ continue; } + mappingPossible = true; for (SequenceI cdnaSeq : candidates) { - MapList map = mapProteinToCdna(aaSeq, cdnaSeq); - if (map != null) + if (!mappingExists(proteinAlignment.getCodonFrames(), + aaSeq.getDatasetSequence(), cdnaSeq.getDatasetSequence())) { - acf.addMap(cdnaSeq, aaSeq, map); - mapped = true; + MapList map = mapProteinToCdna(aaSeq, cdnaSeq); + if (map != null) + { + acf.addMap(cdnaSeq, aaSeq, map); + mappingPerformed = true; + } } } proteinAlignment.addCodonFrame(acf); } - return mapped; + + /* + * If at least one mapping was possible but none was done, then the + * alignments are already as mapped as they can be. + */ + if (mappingPossible && !mappingPerformed) + { + return MappingResult.AlreadyMapped; + } + else + { + return mappingPerformed ? MappingResult.Mapped + : MappingResult.NotMapped; + } + } + + /** + * Answers true if the mappings include one between the given (dataset) + * sequences. + */ + public static boolean mappingExists(AlignedCodonFrame[] codonFrames, + SequenceI aaSeq, SequenceI cdnaSeq) + { + if (codonFrames != null) + { + for (AlignedCodonFrame acf : codonFrames) + { + if (cdnaSeq == acf.getDnaForAaSeq(aaSeq)) + { + return true; + } + } + } + return false; } /** diff --git a/src/jalview/analysis/CodonComparator.java b/src/jalview/analysis/CodonComparator.java new file mode 100644 index 0000000..fc196de --- /dev/null +++ b/src/jalview/analysis/CodonComparator.java @@ -0,0 +1,91 @@ +package jalview.analysis; + +import jalview.datamodel.AlignedCodon; + +import java.util.Comparator; + +/** + * Implements rules for comparing two aligned codons, i.e. determining whether + * they should occupy the same position in a translated protein alignment, or + * one or the other should 'follow' (by preceded by a gap). + * + * @author gmcarstairs + * + */ +public final class CodonComparator implements Comparator +{ + + @Override + public int compare(AlignedCodon ac1, AlignedCodon ac2) + { + if (ac1 == null || ac2 == null || ac1.equals(ac2)) + { + return 0; + } + + /** + *
+     * Case 1: if one starts before the other, and doesn't end after it, then it
+     * precedes. We ignore the middle base position here.
+     * A--GT
+     * -CT-G
+     * 
+ */ + if (ac1.pos1 < ac2.pos1 && ac1.pos3 <= ac2.pos3) + { + return -1; + } + if (ac2.pos1 < ac1.pos1 && ac2.pos3 <= ac1.pos3) + { + return 1; + } + + /** + *
+     * Case 2: if one ends after the other, and doesn't start before it, then it
+     * follows. We ignore the middle base position here.
+     * -TG-A
+     * G-TC
+     * 
+ */ + if (ac1.pos3 > ac2.pos3 && ac1.pos1 >= ac2.pos1) + { + return 1; + } + if (ac2.pos3 > ac1.pos3 && ac2.pos1 >= ac1.pos1) + { + return -1; + } + + /* + * Case 3: if start and end match, compare middle base positions. + */ + if (ac1.pos1 == ac2.pos1 && ac1.pos3 == ac2.pos3) + { + return Integer.compare(ac1.pos2, ac2.pos2); + } + + /* + * That just leaves the 'enclosing' case - one codon starts after but ends + * before the other. If the middle bases don't match, use their comparison + * (majority vote). + */ + int compareMiddles = Integer.compare(ac1.pos2, ac2.pos2); + if (compareMiddles != 0) + { + return compareMiddles; + } + + /** + *
+     * Finally just leaves overlap with matching middle base, e.g. 
+     * -A-A-A
+     * G--GG 
+     * In this case the choice is arbitrary whether to compare based on
+     * first or last base position. We pick the first. Note this preserves
+     * symmetricality of the comparison.
+     * 
+ */ + return Integer.compare(ac1.pos1, ac2.pos1); + } +} diff --git a/src/jalview/analysis/Dna.java b/src/jalview/analysis/Dna.java index ebaee8b..2ef63d4 100644 --- a/src/jalview/analysis/Dna.java +++ b/src/jalview/analysis/Dna.java @@ -22,6 +22,7 @@ package jalview.analysis; import jalview.api.AlignViewportI; import jalview.bin.Cache; +import jalview.datamodel.AlignedCodon; import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; @@ -43,13 +44,20 @@ import jalview.util.ShiftList; import java.util.ArrayList; import java.util.Arrays; -import java.util.Hashtable; +import java.util.Comparator; import java.util.List; +import java.util.Map; public class Dna { private static final String STOP_X = "X"; + private static final Comparator comparator = new CodonComparator(); + + /* + * 'final' variables describe the inputs to the translation, which should not + * be modified. + */ final private List selection; final private String[] seqstring; @@ -64,8 +72,21 @@ public class Dna final private Alignment dataset; + /* + * Working variables for the translation. + * + * The width of the translation-in-progress protein alignment. + */ private int aaWidth = 0; + /* + * This array will be built up so that position i holds the codon positions + * e.g. [7, 9, 10] that match column i (base 0) in the aligned translation. + * Note this implies a contract that if two codons do not align exactly, their + * translated products must occupy different column positions. + */ + private AlignedCodon[] alignedCodons; + /** * Constructor given a viewport and the visible contigs. * @@ -96,19 +117,31 @@ public class Dna *
  • compareCodonPos([3,4,5], [2,5,6]) also returns -1
  • * * - * @param cdp1 - * @param cdp2 + * @param ac1 + * @param ac2 * @return */ - public static int compareCodonPos(int[] cdp1, int[] cdp2) + public static final int compareCodonPos(AlignedCodon ac1, AlignedCodon ac2) { - if (cdp1 == null - || cdp2 == null - || (cdp1[0] == cdp2[0] && cdp1[1] == cdp2[1] && cdp1[2] == cdp2[2])) + return comparator.compare(ac1, ac2); + // return jalview_2_8_2compare(ac1, ac2); + } + + /** + * Codon comparison up to Jalview 2.8.2. This rule is sequence order dependent + * - see http://issues.jalview.org/browse/JAL-1635 + * + * @param ac1 + * @param ac2 + * @return + */ + private static int jalview_2_8_2compare(AlignedCodon ac1, AlignedCodon ac2) + { + if (ac1 == null || ac2 == null || (ac1.equals(ac2))) { return 0; } - if (cdp1[0] < cdp2[0] || cdp1[1] < cdp2[1] || cdp1[2] < cdp2[2]) + if (ac1.pos1 < ac2.pos1 || ac1.pos2 < ac2.pos2 || ac1.pos3 < ac2.pos3) { // one base in cdp1 precedes the corresponding base in the other codon return -1; @@ -125,13 +158,7 @@ public class Dna { AlignedCodonFrame acf = new AlignedCodonFrame(); - /* - * This array will be built up so that position i holds the codon positions - * e.g. [7, 9, 10] that match column i (base 0) in the aligned translation. - * Note this implies a contract that if two codons do not align exactly, - * their translated products must occupy different column positions. - */ - int[][] alignedCodons = new int[dnaWidth][]; + alignedCodons = new AlignedCodon[dnaWidth]; int s; int sSize = selection.size(); @@ -139,7 +166,7 @@ public class Dna for (s = 0; s < sSize; s++) { SequenceI newseq = translateCodingRegion(selection.get(s), - seqstring[s], acf, alignedCodons, pepseqs); + seqstring[s], acf, pepseqs); if (newseq != null) { @@ -162,7 +189,7 @@ public class Dna al.padGaps(); // link the protein translation to the DNA dataset al.setDataset(dataset); - translateAlignedAnnotations(annotations, al, acf, alignedCodons); + translateAlignedAnnotations(al, acf); al.addCodonFrame(acf); return al; } @@ -222,16 +249,14 @@ public class Dna } /** - * Translate na alignment annotations onto translated amino acid alignment al - * using codon mapping codons + * Translate nucleotide alignment annotations onto translated amino acid + * alignment using codon mapping codons * - * @param annotations * @param al - * @param acf + * the translated protein alignment */ - protected static void translateAlignedAnnotations( - AlignmentAnnotation[] annotations, AlignmentI al, - AlignedCodonFrame acf, int[][] codons) + protected void translateAlignedAnnotations(AlignmentI al, + AlignedCodonFrame acf) { // Can only do this for columns with consecutive codons, or where // annotation is sequence associated. @@ -252,7 +277,7 @@ public class Dna continue; } - int aSize = acf.getaaWidth(); // aa alignment width. + int aSize = aaWidth; Annotation[] anots = (annotation.annotations == null) ? null : new Annotation[aSize]; if (anots != null) @@ -260,10 +285,10 @@ public class Dna for (int a = 0; a < aSize; a++) { // process through codon map. - if (a < codons.length && codons[a] != null - && codons[a][0] == (codons[a][2] - 2)) + if (a < alignedCodons.length && alignedCodons[a] != null + && alignedCodons[a].pos1 == (alignedCodons[a].pos3 - 2)) { - anots[a] = getCodonAnnotation(codons[a], + anots[a] = getCodonAnnotation(alignedCodons[a], annotation.annotations); } } @@ -304,26 +329,27 @@ public class Dna } } - private static Annotation getCodonAnnotation(int[] is, + private static Annotation getCodonAnnotation(AlignedCodon is, Annotation[] annotations) { // Have a look at all the codon positions for annotation and put the first // one found into the translated annotation pos. int contrib = 0; Annotation annot = null; - for (int p = 0; p < 3; p++) + for (int p = 1; p <= 3; p++) { - if (annotations[is[p]] != null) + int dnaCol = is.getBaseColumn(p); + if (annotations[dnaCol] != null) { if (annot == null) { - annot = new Annotation(annotations[is[p]]); + annot = new Annotation(annotations[dnaCol]); contrib = 1; } else { // merge with last - Annotation cpy = new Annotation(annotations[is[p]]); + Annotation cpy = new Annotation(annotations[dnaCol]); if (annot.colour == null) { annot.colour = cpy.colour; @@ -359,20 +385,14 @@ public class Dna * sequence displayed under viscontigs visible columns * @param seqstring * ORF read in some global alignment reference frame - * @param viscontigs - * mapping from global reference frame to visible seqstring ORF read * @param acf * Definition of global ORF alignment reference frame - * @param alignedCodons * @param proteinSeqs - * @param gapCharacter - * @param starForStop - * when true stop codons will translate as '*', otherwise as 'X' * @return sequence ready to be added to alignment. */ protected SequenceI translateCodingRegion(SequenceI selection, String seqstring, AlignedCodonFrame acf, - int[][] alignedCodons, List proteinSeqs) + List proteinSeqs) { List skip = new ArrayList(); int skipint[] = null; @@ -400,7 +420,10 @@ public class Dna StringBuilder protein = new StringBuilder(seqstring.length() / 2); String seq = seqstring.replace('U', 'T').replace('u', 'T'); char codon[] = new char[3]; - int cdp[] = new int[3], rf = 0, lastnpos = 0, nend; + int cdp[] = new int[3]; + int rf = 0; + int lastnpos = 0; + int nend; int aspos = 0; int resSize = 0; for (npos = 0, nend = seq.length(); npos < nend; npos++) @@ -415,6 +438,7 @@ public class Dna /* * Filled up a reading frame... */ + AlignedCodon alignedCodon = new AlignedCodon(cdp[0], cdp[1], cdp[2]); String aa = ResidueProperties.codonTranslate(new String(codon)); rf = 0; final String gapString = String.valueOf(gapChar); @@ -424,9 +448,9 @@ public class Dna if (skipint == null) { skipint = new int[] - { cdp[0], cdp[2] }; + { alignedCodon.pos1, alignedCodon.pos3 /* cdp[0], cdp[2] */}; } - skipint[1] = cdp[2]; + skipint[1] = alignedCodon.pos3; // cdp[2]; } else { @@ -525,31 +549,15 @@ public class Dna } resSize++; } - // insert gaps prior to this codon - if necessary boolean findpos = true; while (findpos) { - // expand the codons array if necessary - alignedCodons = checkCodonFrameWidth(alignedCodons, aspos); - /* * Compare this codon's base positions with those currently aligned to * this column in the translation. */ - final int compareCodonPos = Dna.compareCodonPos(cdp, + final int compareCodonPos = compareCodonPos(alignedCodon, alignedCodons[aspos]); - // debug - System.out.println(seq + "/" + aa + " codons: " - + Arrays.deepToString(alignedCodons)); - for (SequenceI s : proteinSeqs) - { - System.out.println(s.getSequenceAsString()); - } - System.out - .println(("Compare " + Arrays.toString(cdp) + " at pos " - + aspos + " with " - + Arrays.toString(alignedCodons[aspos]) + " got " + compareCodonPos)); - // end debug switch (compareCodonPos) { case -1: @@ -558,7 +566,7 @@ public class Dna * This codon should precede the mapped positions - need to insert a * gap in all prior sequences. */ - alignedCodons = insertAAGap(aspos, alignedCodons, proteinSeqs); + insertAAGap(aspos, proteinSeqs); findpos = false; break; @@ -585,20 +593,17 @@ public class Dna if (alignedCodons[aspos] == null) { // mark this column as aligning to this aligned reading frame - alignedCodons[aspos] = new int[] - { cdp[0], cdp[1], cdp[2] }; + alignedCodons[aspos] = alignedCodon; } - else if (!Arrays.equals(alignedCodons[aspos], cdp)) + else if (!alignedCodons[aspos].equals(alignedCodon)) { throw new IllegalStateException("Tried to coalign " - + Arrays.asList(alignedCodons[aspos], cdp)); + + alignedCodons[aspos].toString() + " with " + + alignedCodon.toString()); } - System.out.println(aspos + "/" + aaWidth); if (aspos >= aaWidth) { // update maximum alignment width - // (we can do this without calling checkCodonFrameWidth because it was - // already done above) aaWidth = aspos; } // ready for next translated reading frame alignment position (if any) @@ -714,56 +719,47 @@ public class Dna * Insert a gap into the aligned proteins and the codon mapping array. * * @param pos - * @param alignedCodons * @param proteinSeqs * @return */ - protected int[][] insertAAGap(int pos, int[][] alignedCodons, + protected void insertAAGap(int pos, List proteinSeqs) { - System.out.println("insertAAGap " + pos + "/" + proteinSeqs.size()); aaWidth++; for (SequenceI seq : proteinSeqs) { seq.insertCharAt(pos, gapChar); } - int[][] resized = checkCodonFrameWidth(alignedCodons, pos); + checkCodonFrameWidth(); if (pos < aaWidth) { aaWidth++; - System.arraycopy(resized, pos, resized, pos + 1, resized.length - pos - - 1); - resized[pos] = null; // clear so new codon position can be marked. + + /* + * Shift from [pos] to the end one to the right, and null out [pos] + */ + System.arraycopy(alignedCodons, pos, alignedCodons, pos + 1, + alignedCodons.length - pos - 1); + alignedCodons[pos] = null; } - return resized; } /** - * Check the codons array is big enough to accommodate the given position, if - * not resize it. - * - * @param alignedCodons - * @param aspos - * @return the resized array (or the original if no resize needed) + * Check the codons array can accommodate a single insertion, if not resize + * it. */ - protected static int[][] checkCodonFrameWidth(int[][] alignedCodons, - int aspos) + protected void checkCodonFrameWidth() { - // TODO why not codons.length < aspos ? - // should codons expand if length is 2 or 3 and aslen==2 ? - System.out.println("Checking " + alignedCodons.length + "/" + aspos); - if (alignedCodons.length <= aspos + 1) + if (alignedCodons[alignedCodons.length - 1] != null) { - // probably never have to do this ? - int[][] c = new int[alignedCodons.length + 10][]; - for (int i = 0; i < alignedCodons.length; i++) - { - c[i] = alignedCodons[i]; - } - return c; + /* + * arraycopy insertion would bump a filled slot off the end, so expand. + */ + AlignedCodon[] c = new AlignedCodon[alignedCodons.length + 10]; + System.arraycopy(alignedCodons, 0, c, 0, alignedCodons.length); + alignedCodons = c; } - return alignedCodons; } /** @@ -776,15 +772,16 @@ public class Dna * @param pep * @param map * @param featureTypes - * hash who's keys are the displayed feature type strings + * hash whose keys are the displayed feature type strings * @param featureGroups * hash where keys are feature groups and values are Boolean objects * indicating if they are displayed. */ private static void transferCodedFeatures(SequenceI dna, SequenceI pep, - MapList map, Hashtable featureTypes, Hashtable featureGroups) + MapList map, Map featureTypes, + Map featureGroups) { - SequenceFeature[] sf = (dna.getDatasetSequence() != null ? dna + SequenceFeature[] sfs = (dna.getDatasetSequence() != null ? dna .getDatasetSequence() : dna).getSequenceFeatures(); Boolean fgstate; DBRefEntry[] dnarefs = DBRefUtils.selectRefs(dna.getDBRef(), @@ -800,16 +797,16 @@ public class Dna } } } - if (sf != null) + if (sfs != null) { - for (int f = 0; f < sf.length; f++) + for (SequenceFeature sf : sfs) { - fgstate = (featureGroups == null) ? null : ((Boolean) featureGroups - .get(sf[f].featureGroup)); - if ((featureTypes == null || featureTypes.containsKey(sf[f] - .getType())) && (fgstate == null || fgstate.booleanValue())) + fgstate = (featureGroups == null) ? null : featureGroups + .get(sf.featureGroup); + if ((featureTypes == null || featureTypes.containsKey(sf.getType())) + && (fgstate == null || fgstate.booleanValue())) { - if (FeatureProperties.isCodingFeature(null, sf[f].getType())) + if (FeatureProperties.isCodingFeature(null, sf.getType())) { // if (map.intersectsFrom(sf[f].begin, sf[f].end)) { diff --git a/src/jalview/datamodel/AlignedCodon.java b/src/jalview/datamodel/AlignedCodon.java new file mode 100644 index 0000000..d0e62a1 --- /dev/null +++ b/src/jalview/datamodel/AlignedCodon.java @@ -0,0 +1,71 @@ +package jalview.datamodel; + +/** + * Holds the aligned column positions (base 0) for one codon in a nucleotide + * sequence. The object is immutable once created. + * + * Example: in "G-AT-C-GA" the aligned codons are (0, 2, 3) and (5, 7, 8). + * + * @author gmcarstairs + * + */ +public final class AlignedCodon +{ + public final int pos1; + + public final int pos2; + + public final int pos3; + + public AlignedCodon(int i, int j, int k) + { + pos1 = i; + pos2 = j; + pos3 = k; + } + + /** + * Returns the column position for the given base (1, 2, 3). + * + * @param base + * @return + * @throws IllegalArgumentException + * if an argument value other than 1, 2 or 3 is supplied + */ + public int getBaseColumn(int base) + { + if (base < 1 || base > 3) + { + throw new IllegalArgumentException(Integer.toString(base)); + } + return base == 1 ? pos1 : (base == 2 ? pos2 : pos3); + } + + /** + * Two aligned codons are equal if all their base positions are the same. + */ + @Override + public boolean equals(Object o) + { + /* + * Equality with null value required for consistency with + * Dna.compareCodonPos + */ + if (o == null) + { + return true; + } + if (!(o instanceof AlignedCodon)) + { + return false; + } + AlignedCodon ac = (AlignedCodon) o; + return (pos1 == ac.pos1 && pos2 == ac.pos2 && pos3 == ac.pos3); + } + + @Override + public String toString() + { + return "[" + pos1 + ", " + pos2 + ", " + pos3 + "]"; + } +} diff --git a/src/jalview/datamodel/AlignedCodonFrame.java b/src/jalview/datamodel/AlignedCodonFrame.java index bfac717..417363a 100644 --- a/src/jalview/datamodel/AlignedCodonFrame.java +++ b/src/jalview/datamodel/AlignedCodonFrame.java @@ -32,23 +32,6 @@ import java.util.List; public class AlignedCodonFrame { - /** - *
    -   * Aligned nucleotide positions for codons mapped to column positions of of aligned
    -   * proteins. e.g.
    -   * codons[3] = [12, 14, 15] means:
    -   *     column 4 in the protein alignment translates cols 13, 15, 16 in cDNA
    -   * codons[5] = null means column 6 in the protein alignment is a gap
    -   * 
    - */ - public int[][] codons = null; - - /** - * Width of protein sequence alignment (implicit assertion that codons.length - * >= aaWidth) - */ - public int aaWidth = 0; - /* * TODO: not an ideal solution - we reference the aligned amino acid sequences * in order to make insertions on them Better would be dnaAlignment and @@ -63,8 +46,8 @@ public class AlignedCodonFrame /* * tied array of Mappings to protein sequence Objects and SequenceI[] - * aaSeqs=null; MapLists where eac maps from the corresponding dnaSeqs element - * to corresponding aaSeqs element + * aaSeqs=null; MapLists where each maps from the corresponding dnaSeqs + * element to corresponding aaSeqs element */ private Mapping[] dnaToProt = null; @@ -88,7 +71,6 @@ public class AlignedCodonFrame */ public AlignedCodonFrame(AlignedCodonFrame acf, SequenceI[] alignment) { - this.codons = acf.codons; this.dnaSeqs = acf.dnaSeqs; this.dnaToProt = acf.dnaToProt; @@ -118,70 +100,6 @@ public class AlignedCodonFrame } /** - * ensure that codons array is at least as wide as aslen residues - * - * @param aslen - * @return (possibly newly expanded) codon array - */ - public int[][] checkCodonFrameWidth(int aslen) - { - // TODO why not codons.length < aslen ? - // should codons expand if length is 2 or 3 and aslen==2 ? - if (codons.length <= aslen + 1) - { - // probably never have to do this ? - int[][] c = new int[codons.length + 10][]; - for (int i = 0; i < codons.length; i++) - { - c[i] = codons[i]; - codons[i] = null; - } - codons = c; - } - return codons; - } - - /** - * @return width of aligned translated amino acid residues - */ - public int getaaWidth() - { - return aaWidth; - } - - /** - * increase aaWidth by one and insert a new aligned codon position space at - * aspos. - * - * @param aspos - */ - public void insertAAGap(int aspos, char gapCharacter) - { - // this aa appears before the aligned codons at aspos - so shift them in - // each pair of mapped sequences - aaWidth++; - // we actually have to modify the aligned sequences here, so use the - // a_aaSeqs vector - for (SequenceI seq : a_aaSeqs) - { - seq.insertCharAt(aspos, gapCharacter); - } - - if (aspos < aaWidth) - { - aaWidth++; - System.arraycopy(codons, aspos, codons, aspos + 1, codons.length - - aspos - 1); - codons[aspos] = null; // clear so new codon position can be marked. - } - } - - public void setAaWidth(int aapos) - { - aaWidth = aapos; - } - - /** * add a mapping between the dataset sequences for the associated dna and * protein sequence objects * diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java index 5fe97a4..918b156 100644 --- a/src/jalview/gui/AlignFrame.java +++ b/src/jalview/gui/AlignFrame.java @@ -23,6 +23,7 @@ package jalview.gui; import jalview.analysis.AAFrequency; import jalview.analysis.AlignmentSorter; import jalview.analysis.AlignmentUtils; +import jalview.analysis.AlignmentUtils.MappingResult; import jalview.analysis.Conservation; import jalview.analysis.CrossRef; import jalview.analysis.Dna; @@ -733,28 +734,19 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, final AlignmentI thatAlignment = af.alignPanel.getAlignment(); if (thatAlignment.isNucleotide()) { - // TODO exclude an AlignFrame which is already mapped to this one - // temporary version: exclude if already a CommandListener (should - // cover most cases but not all) - final boolean alreadyMapped = this.viewport - .getStructureSelectionManager().hasCommandListener( - af.viewport); - if (alreadyMapped) + MappingResult mapped = AlignmentUtils.mapProteinToCdna( + thisAlignment, thatAlignment); + if (mapped == MappingResult.AlreadyMapped) { alreadyLinkedCount++; } - else + else if (mapped == MappingResult.Mapped) { - boolean mapped = AlignmentUtils.mapProteinToCdna(thisAlignment, - thatAlignment); - if (mapped) - { - final StructureSelectionManager ssm = StructureSelectionManager - .getStructureSelectionManager(Desktop.instance); - ssm.addMappings(thisAlignment.getCodonFrames()); - ssm.addCommandListener(af.getViewport()); - linkedCount++; - } + final StructureSelectionManager ssm = StructureSelectionManager + .getStructureSelectionManager(Desktop.instance); + ssm.addMappings(thisAlignment.getCodonFrames()); + ssm.addCommandListener(af.getViewport()); + linkedCount++; } } } diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index f76362f..3ada6fa 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -21,8 +21,8 @@ package jalview.analysis; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +import jalview.analysis.AlignmentUtils.MappingResult; import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; @@ -161,8 +161,8 @@ public class AlignmentUtilsTests AlignmentI cdna1 = loadAlignment( dnaData, "FASTA"); - boolean mapped = AlignmentUtils.mapProteinToCdna(protein, cdna1); - assertTrue(mapped); + MappingResult mapped = AlignmentUtils.mapProteinToCdna(protein, cdna1); + assertEquals(mapped, MappingResult.Mapped); /* * Check two mappings (one for Mouse, one for Human) @@ -235,8 +235,8 @@ public class AlignmentUtilsTests AlignmentI cdna1 = loadAlignment( dnaData, "FASTA"); - boolean mapped = AlignmentUtils.mapProteinToCdna(protein, cdna1); - assertTrue(mapped); + MappingResult mapped = AlignmentUtils.mapProteinToCdna(protein, cdna1); + assertEquals(mapped, MappingResult.Mapped); /* * Check two mappings (one for Mouse, one for Human) diff --git a/test/jalview/analysis/DnaAlignmentGenerator.java b/test/jalview/analysis/DnaAlignmentGenerator.java new file mode 100644 index 0000000..59a08be --- /dev/null +++ b/test/jalview/analysis/DnaAlignmentGenerator.java @@ -0,0 +1,78 @@ +package jalview.analysis; + +import java.util.Random; + +/** + * Generates a random Fasta format DNA alignment for given sequence length and + * count. + * + * @author gmcarstairs + * + */ +public class DnaAlignmentGenerator +{ + private static final char GAP = '-'; + + private static final char[] BASES = new char[] + { 'G', 'T', 'C', 'A', GAP }; + + private Random random; + + /** + * Given args for sequence length and count, output a DNA 'alignment' where + * each position is a random choice from 'GTCA-'. + * + * @param args + * the width (base count) and height (sequence count) to generate + * plus an integer random seed value + */ + public static void main(String[] args) + { + int width = Integer.parseInt(args[0]); + int height = Integer.parseInt(args[1]); + long randomSeed = Long.valueOf(args[2]); + new DnaAlignmentGenerator().generate(width, height, randomSeed); + } + + /** + * Outputs a DNA 'alignment' of given width and height, where each position is + * a random choice from 'GTCA-'. + * + * @param width + * @param height + * @param randomSeed + */ + private void generate(int width, int height, long randomSeed) + { + random = new Random(randomSeed); + for (int seqno = 0; seqno < height; seqno++) + { + generateSequence(seqno + 1, width); + } + } + + /** + * Outputs a DNA 'sequence' of given length, with some random gaps included. + * + * @param seqno + * @param length + */ + private void generateSequence(int seqno, int length) + { + System.out.println(">SEQ" + seqno); + StringBuilder seq = new StringBuilder(length); + + /* + * Loop till we've output 'length' real bases (excluding gaps) + */ + for (int count = 0 ; count < length ; ) { + char c = BASES[random.nextInt(Integer.MAX_VALUE) % 5]; + seq.append(c); + if (c != GAP) + { + count++; + } + } + System.out.println(seq.toString()); + } +} diff --git a/test/jalview/analysis/DnaTest.java b/test/jalview/analysis/DnaTest.java index 49a544d..43ee5f2 100644 --- a/test/jalview/analysis/DnaTest.java +++ b/test/jalview/analysis/DnaTest.java @@ -4,13 +4,13 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import jalview.api.AlignViewportI; +import jalview.datamodel.AlignedCodon; import jalview.datamodel.AlignmentI; import jalview.datamodel.ColumnSelection; import jalview.gui.AlignViewport; import jalview.io.FormatAdapter; import java.io.IOException; -import java.util.Arrays; import org.junit.Test; @@ -171,7 +171,7 @@ public class DnaTest * @throws IOException */ @Test - public void testTranslatCdna_hiddenColumns() throws IOException + public void testTranslateCdna_hiddenColumns() throws IOException { AlignmentI alf = new FormatAdapter().readFile(fasta, FormatAdapter.PASTE, "FASTA"); @@ -188,6 +188,15 @@ public class DnaTest } /** + * Use this test to help debug into any cases of interest. + */ + @Test + public void testCompareCodonPos_oneOnly() + { + assertFollows("-AA--A", "G--GG"); // 2 shifted seq2, 3 shifted seq1 + } + + /** * Tests for method that compares 'alignment' of two codon position triplets. */ @Test @@ -196,10 +205,8 @@ public class DnaTest /* * Returns 0 for any null argument */ - assertEquals(0, Dna.compareCodonPos(new int[] - { 1, 2, 3 }, null)); - assertEquals(0, Dna.compareCodonPos(null, new int[] - { 1, 2, 3 })); + assertEquals(0, Dna.compareCodonPos(new AlignedCodon(1, 2, 3), null)); + assertEquals(0, Dna.compareCodonPos(null, new AlignedCodon(1, 2, 3))); /* * Work through 27 combinations. First 9 cases where first position matches. @@ -209,10 +216,9 @@ public class DnaTest assertPrecedes("AAA", "GG-G"); // 2 matches, 3 shifted seq2 assertFollows("A-AA", "GG-G"); // 2 shifted seq1, 3 matches assertFollows("A-A-A", "GG-G"); // 2 shifted seq1, 3 shifted seq1 - // TODO is this right? assertPrecedes("A-AA", "GG--G"); // 2 shifted seq1, 3 shifted seq2 assertPrecedes("AA-A", "G-GG"); // 2 shifted seq2, 3 matches - assertPrecedes("AA--A", "G-GG"); // 2 shifted seq2, 3 shifted seq1 + assertFollows("AA--A", "G-GG"); // 2 shifted seq2, 3 shifted seq1 assertPrecedes("AAA", "G-GG"); // 2 shifted seq2, 3 shifted seq2 /* @@ -220,13 +226,14 @@ public class DnaTest */ assertFollows("-AAA", "G-GG"); // 2 and 3 match assertFollows("-AA-A", "G-GG"); // 2 matches, 3 shifted seq1 - assertPrecedes("-AAA", "G-G-G"); // 2 matches, 3 shifted seq2 + // 'enclosing' case: pick first to start precedes + assertFollows("-AAA", "G-G-G"); // 2 matches, 3 shifted seq2 assertFollows("-A-AA", "G-G-G"); // 2 shifted seq1, 3 matches assertFollows("-A-A-A", "G-G-G"); // 2 shifted seq1, 3 shifted seq1 - // is this right? codon2 ends after codon1 - assertPrecedes("-A-AA", "G-G--G"); // 2 shifted seq1, 3 shifted seq2 - assertPrecedes("-AA-A", "G--GG"); // 2 shifted seq2, 3 matches - assertPrecedes("-AA--A", "G--GG"); // 2 shifted seq2, 3 shifted seq1 + // 'enclosing' case: pick first to start precedes + assertFollows("-A-AA", "G-G--G"); // 2 shifted seq1, 3 shifted seq2 + assertFollows("-AA-A", "G--GG"); // 2 shifted seq2, 3 matches + assertFollows("-AA--A", "G--GG"); // 2 shifted seq2, 3 shifted seq1 assertPrecedes("-AAA", "G--GG"); // 2 shifted seq2, 3 shifted seq2 /* @@ -236,31 +243,71 @@ public class DnaTest assertPrecedes("A-A-A", "-GGG"); // 2 matches, 3 shifted seq1 assertPrecedes("A-AA", "-GG-G"); // 2 matches, 3 shifted seq2 assertPrecedes("A--AA", "-GG-G"); // 2 shifted seq1, 3 matches - assertPrecedes("A--AA", "-GGG"); // 2 shifted seq1, 3 shifted seq1 + // 'enclosing' case with middle base deciding: + assertFollows("A--AA", "-GGG"); // 2 shifted seq1, 3 shifted seq1 assertPrecedes("A--AA", "-GG--G"); // 2 shifted seq1, 3 shifted seq2 assertPrecedes("AA-A", "-GGG"); // 2 shifted seq2, 3 matches assertPrecedes("AA--A", "-GGG"); // 2 shifted seq2, 3 shifted seq1 assertPrecedes("AAA", "-GGG"); // 2 shifted seq2, 3 shifted seq2 + } - /* - * two codon positions can each 'precede' the other! the comparison is - * biased to the first sequence - */ - // TODO is this correct? - assertPrecedes("-A--AA", "--GGG"); - assertPrecedes("--AAA", "-A--AA"); + /** + * Test that all the cases in testCompareCodonPos have a 'symmetric' + * comparison (without checking the actual comparison result). + */ + @Test + public void testCompareCodonPos_isSymmetric() + { + assertSymmetric("AAA", "GGG"); + assertSymmetric("AA-A", "GGG"); + assertSymmetric("AAA", "GG-G"); + assertSymmetric("A-AA", "GG-G"); + assertSymmetric("A-A-A", "GG-G"); + assertSymmetric("A-AA", "GG--G"); + assertSymmetric("AA-A", "G-GG"); + assertSymmetric("AA--A", "G-GG"); + assertSymmetric("AAA", "G-GG"); + assertSymmetric("-AAA", "G-GG"); + assertSymmetric("-AA-A", "G-GG"); + assertSymmetric("-AAA", "G-G-G"); + assertSymmetric("-A-AA", "G-G-G"); + assertSymmetric("-A-A-A", "G-G-G"); + assertSymmetric("-A-AA", "G-G--G"); + assertSymmetric("-AA-A", "G--GG"); + assertSymmetric("-AA--A", "G--GG"); + assertSymmetric("-AAA", "G--GG"); + assertSymmetric("A-AA", "-GGG"); + assertSymmetric("A-A-A", "-GGG"); + assertSymmetric("A-AA", "-GG-G"); + assertSymmetric("A--AA", "-GG-G"); + assertSymmetric("A--AA", "-GGG"); + assertSymmetric("A--AA", "-GG--G"); + assertSymmetric("AA-A", "-GGG"); + assertSymmetric("AA--A", "-GGG"); + assertSymmetric("AAA", "-GGG"); + } + + private void assertSymmetric(String codon1, String codon2) + { + assertEquals("Comparison of '" + codon1 + "' and '" + codon2 + + " not symmetric", Integer.signum(compare(codon1, codon2)), + -Integer.signum(compare(codon2, codon1))); } /** * Assert that the first sequence should map to the same position as the - * second in a translated alignment + * second in a translated alignment. Also checks that this is true if the + * order of the codons is reversed. * * @param codon1 * @param codon2 */ private void assertMatches(String codon1, String codon2) { - assertEquals("Expected match (0)", 0, compare(codon1, codon2)); + assertEquals("Expected '" + codon1 + "' matches '" + codon2 + "'", 0, + compare(codon1, codon2)); + assertEquals("Expected '" + codon2 + "' matches '" + codon1 + "'", 0, + compare(codon2, codon1)); } /** @@ -272,7 +319,8 @@ public class DnaTest */ private void assertPrecedes(String codon1, String codon2) { - assertEquals("Expected precedes (-1)", -1, compare(codon1, codon2)); + assertEquals("Expected '" + codon1 + "' precedes '" + codon2 + "'", + -1, compare(codon1, codon2)); } /** @@ -284,7 +332,8 @@ public class DnaTest */ private void assertFollows(String codon1, String codon2) { - assertEquals("Expected follows (1)", 1, compare(codon1, codon2)); + assertEquals("Expected '" + codon1 + "' follows '" + codon2 + "'", 1, + compare(codon1, codon2)); } /** @@ -297,10 +346,10 @@ public class DnaTest */ private int compare(String s1, String s2) { - final int[] cd1 = convertCodon(s1); - final int[] cd2 = convertCodon(s2); - System.out.println("K: " + s1 + " " + Arrays.toString(cd1)); - System.out.println("G: " + s2 + " " + Arrays.toString(cd2)); + final AlignedCodon cd1 = convertCodon(s1); + final AlignedCodon cd2 = convertCodon(s2); + System.out.println("K: " + s1 + " " + cd1.toString()); + System.out.println("G: " + s2 + " " + cd2.toString()); System.out.println(); return Dna.compareCodonPos(cd1, cd2); } @@ -312,18 +361,18 @@ public class DnaTest * @param s * @return */ - private int[] convertCodon(String s) + private AlignedCodon convertCodon(String s) { - int[] result = new int[3]; + int[] codon = new int[3]; int i = 0; for (int j = 0; j < s.length(); j++) { if (s.charAt(j) != '-') { - result[i++] = j; + codon[i++] = j; } } - return result; + return new AlignedCodon(codon[0], codon[1], codon[2]); } /** @@ -332,8 +381,8 @@ public class DnaTest @Test public void testConvertCodon() { - assertEquals("[0, 1, 2]", Arrays.toString(convertCodon("AAA"))); - assertEquals("[0, 2, 5]", Arrays.toString(convertCodon("A-A--A"))); - assertEquals("[1, 3, 4]", Arrays.toString(convertCodon("-A-AA-"))); + assertEquals("[0, 1, 2]", convertCodon("AAA").toString()); + assertEquals("[0, 2, 5]", convertCodon("A-A--A").toString()); + assertEquals("[1, 3, 4]", convertCodon("-A-AA-").toString()); } } diff --git a/test/jalview/datamodel/AlignedCodonFrameTest.java b/test/jalview/datamodel/AlignedCodonFrameTest.java index 9f1d1e0..c73eb0b 100644 --- a/test/jalview/datamodel/AlignedCodonFrameTest.java +++ b/test/jalview/datamodel/AlignedCodonFrameTest.java @@ -1,6 +1,5 @@ package jalview.datamodel; -import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertSame; import jalview.util.MapList; @@ -15,11 +14,7 @@ public class AlignedCodonFrameTest @Test public void testConstructor_copyWithSequence() { - AlignedCodonFrame acf = new AlignedCodonFrame(0); - acf.codons = new int[][] - { new int[] - { 1, 3 }, new int[] - { 4, 6 } }; + AlignedCodonFrame acf = new AlignedCodonFrame(); MapList map = new MapList(new int[] { 1, 3 }, new int[] { 1, 1 }, 3, 1); @@ -30,8 +25,6 @@ public class AlignedCodonFrameTest newaligned[0] = new Sequence("", "-F-K-Q"); newaligned[0].setDatasetSequence(aaseq.getDatasetSequence()); AlignedCodonFrame copy = new AlignedCodonFrame(acf, newaligned); - assertSame(copy.codons, acf.codons); - assertEquals(copy.aaWidth, acf.aaWidth); assertSame(copy.getdnaSeqs(), acf.getdnaSeqs()); assertSame(newaligned[0], copy.getAaForDnaSeq(dnaseq, false)); } diff --git a/test/jalview/datamodel/AlignedCodonTest.java b/test/jalview/datamodel/AlignedCodonTest.java new file mode 100644 index 0000000..60368b1 --- /dev/null +++ b/test/jalview/datamodel/AlignedCodonTest.java @@ -0,0 +1,28 @@ +package jalview.datamodel; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import org.junit.Test; + +public class AlignedCodonTest +{ + + @Test + public void testEquals() + { + AlignedCodon ac = new AlignedCodon(1, 3, 4); + assertTrue(ac.equals(null)); + assertFalse(ac.equals("hello")); + assertFalse(ac.equals(new AlignedCodon(1, 3, 5))); + assertTrue(ac.equals(new AlignedCodon(1, 3, 4))); + assertTrue(ac.equals(ac)); + } + + @Test + public void testToString() { + AlignedCodon ac = new AlignedCodon(1, 3, 4); + assertEquals("[1, 3, 4]", ac.toString()); + } +} diff --git a/test/jalview/datamodel/AlignmentTest.java b/test/jalview/datamodel/AlignmentTest.java index 2b1fc72..8912155 100644 --- a/test/jalview/datamodel/AlignmentTest.java +++ b/test/jalview/datamodel/AlignmentTest.java @@ -125,7 +125,7 @@ public class AlignmentTest * Make mappings between sequences. The 'aligned cDNA' is playing the role * of what would normally be protein here. */ - AlignedCodonFrame acf = new AlignedCodonFrame(al1.getWidth()); + AlignedCodonFrame acf = new AlignedCodonFrame(); MapList ml = new MapList(new int[] { 1, 12 }, new int[] { 1, 12 }, 1, 1); @@ -171,7 +171,7 @@ public class AlignmentTest */ AlignmentI al1 = loadAlignment(CDNA_SEQS_1, "FASTA"); AlignmentI al2 = loadAlignment(AA_SEQS_1, "FASTA"); - AlignedCodonFrame acf = new AlignedCodonFrame(al2.getWidth()); + AlignedCodonFrame acf = new AlignedCodonFrame(); MapList ml = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, 3, 1);