From 93de2a3b535decd5f5d7b24e5618806f4e99c46e Mon Sep 17 00:00:00 2001 From: gmungoc Date: Fri, 16 Jan 2015 15:09:06 +0000 Subject: [PATCH] JAL-1619 refactoring in progress for Dna translation --- resources/lang/Messages.properties | 2 + src/jalview/analysis/Dna.java | 352 +++++++++----------- src/jalview/datamodel/AlignedCodonFrame.java | 21 +- src/jalview/gui/AlignFrame.java | 47 +-- src/jalview/gui/Jalview2XML.java | 38 +-- src/jalview/io/AppletFormatAdapter.java | 32 +- src/jalview/jbgui/GAlignFrame.java | 38 ++- .../structure/StructureSelectionManager.java | 119 ++----- test/jalview/analysis/DnaTest.java | 155 ++++++++- test/jalview/analysis/DnaTranslation.java | 4 +- test/jalview/datamodel/AlignedCodonFrameTest.java | 38 +++ 11 files changed, 444 insertions(+), 402 deletions(-) create mode 100644 test/jalview/datamodel/AlignedCodonFrameTest.java diff --git a/resources/lang/Messages.properties b/resources/lang/Messages.properties index 1652eb7..46b424c 100644 --- a/resources/lang/Messages.properties +++ b/resources/lang/Messages.properties @@ -697,6 +697,8 @@ label.cdna_all_linked = All {0} compatible cDNA alignments are already linked label.align_cdna = Align linked cDNA label.align_cdna_tip = Any linked cDNA sequences will be realigned to match this alignment. label.cdna_aligned = {0} sequences in {1} alignments were realigned +label.view_as_cdna = Show aligned cDNA +label.view_as_cdna_tip = Open a new alignment of the related cDNA sequences label.align = Align label.extract_scores = Extract Scores label.get_cross_refs = Get Cross References diff --git a/src/jalview/analysis/Dna.java b/src/jalview/analysis/Dna.java index 0c020dd..cf6f83e 100644 --- a/src/jalview/analysis/Dna.java +++ b/src/jalview/analysis/Dna.java @@ -20,6 +20,7 @@ */ package jalview.analysis; +import jalview.bin.Cache; import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; @@ -34,41 +35,49 @@ import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.schemes.ResidueProperties; import jalview.util.Comparison; +import jalview.util.DBRefUtils; import jalview.util.MapList; import jalview.util.ShiftList; import java.util.ArrayList; +import java.util.Arrays; import java.util.Hashtable; import java.util.List; -import java.util.Vector; - -import java.util.ArrayList; -import java.util.Hashtable; -import java.util.Vector; public class Dna { /** + * Test whether codon positions cdp1 should align before, with, or after cdp2. + * Returns zero if all positions match (or either argument is null). Returns + * -1 if any position in the first codon precedes the corresponding position + * in the second codon. Else returns +1 (some position in the second codon + * precedes the corresponding position in the first). + * + * Note this is not necessarily symmetric, for example: + * * * @param cdp1 * @param cdp2 - * @return -1 if cdp1 aligns before cdp2, 0 if in the same column or cdp2 is - * null, +1 if after cdp2 + * @return */ - private static int compare_codonpos(int[] cdp1, int[] cdp2) + public static int compareCodonPos(int[] cdp1, int[] cdp2) { - if (cdp2 == null + if (cdp1 == null + || cdp2 == null || (cdp1[0] == cdp2[0] && cdp1[1] == cdp2[1] && cdp1[2] == cdp2[2])) { return 0; } if (cdp1[0] < cdp2[0] || cdp1[1] < cdp2[1] || cdp1[2] < cdp2[2]) - { - return -1; // one base in cdp1 precedes the corresponding base in the + { + // one base in cdp1 precedes the corresponding base in the other codon + return -1; } - // other codon - return 1; // one base in cdp1 appears after the corresponding base in the - // other codon. + // one base in cdp1 appears after the corresponding base in the other codon. + return 1; } /** @@ -100,17 +109,17 @@ public class Dna * destination dataset for translated sequences and mappings * @return */ - public static AlignmentI CdnaTranslate(SequenceI[] selection, + public static AlignmentI cdnaTranslate(SequenceI[] selection, String[] seqstring, int viscontigs[], char gapCharacter, AlignmentAnnotation[] annotations, int aWidth, Alignment dataset) { - return CdnaTranslate(selection, seqstring, null, viscontigs, - gapCharacter, annotations, aWidth, dataset); + return cdnaTranslate(Arrays.asList(selection), seqstring, null, + viscontigs, gapCharacter, annotations, aWidth, dataset); } /** * - * @param selection + * @param cdnaseqs * @param seqstring * @param product * - array of DbRefEntry objects from which exon map in seqstring is @@ -122,29 +131,33 @@ public class Dna * @param dataset * @return */ - public static AlignmentI CdnaTranslate(SequenceI[] selection, + public static AlignmentI cdnaTranslate(List cdnaseqs, String[] seqstring, DBRefEntry[] product, int viscontigs[], char gapCharacter, AlignmentAnnotation[] annotations, int aWidth, Alignment dataset) { - AlignedCodonFrame codons = new AlignedCodonFrame(aWidth); // stores hash of - // subsequent - // positions for - // each codon - // start position - // in alignment - int s, sSize = selection.length; - Vector pepseqs = new Vector(); + AlignedCodonFrame acf = new AlignedCodonFrame(aWidth); + + /* + * This array will be built up so that position i holds the codon positions + * e.g. [7, 9, 10] that match column i (base 0) in the aligned translation. + * Note this implies a contract that if two codons do not align exactly, + * their translated products must occupy different column positions. + */ + int[][] alignedCodons = new int[aWidth][]; + + int s; + int sSize = cdnaseqs.size(); + List pepseqs = new ArrayList(); for (s = 0; s < sSize; s++) { - SequenceI newseq = translateCodingRegion(selection[s], seqstring[s], - viscontigs, codons, gapCharacter, - (product != null) ? product[s] : null, false); // possibly - // anonymous - // product + SequenceI newseq = translateCodingRegion(cdnaseqs.get(s), + seqstring[s], viscontigs, acf, alignedCodons, gapCharacter, + false); + if (newseq != null) { - pepseqs.addElement(newseq); + pepseqs.add(newseq); SequenceI ds = newseq; if (dataset != null) { @@ -156,17 +169,15 @@ public class Dna } } } - if (codons.aaWidth == 0) - { - return null; - } - SequenceI[] newseqs = new SequenceI[pepseqs.size()]; - pepseqs.copyInto(newseqs); + + SequenceI[] newseqs = pepseqs.toArray(new SequenceI[pepseqs.size()]); AlignmentI al = new Alignment(newseqs); - al.padGaps(); // ensure we look aligned. + // ensure we look aligned. + al.padGaps(); + // link the protein translation to the DNA dataset al.setDataset(dataset); - // translateAlignedAnnotations(annotations, al, codons); - al.addCodonFrame(codons); + translateAlignedAnnotations(annotations, al, acf, alignedCodons); + al.addCodonFrame(acf); return al; } @@ -185,14 +196,13 @@ public class Dna for (int gd = 0; gd < selection.length; gd++) { SequenceI dna = selection[gd]; - jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils + DBRefEntry[] dnarefs = DBRefUtils .selectRefs(dna.getDBRef(), jalview.datamodel.DBRefSource.DNACODINGDBS); if (dnarefs != null) { // intersect with pep - // intersect with pep - Vector mappedrefs = new Vector(); + List mappedrefs = new ArrayList(); DBRefEntry[] refs = dna.getDBRef(); for (int d = 0; d < refs.length; d++) { @@ -200,11 +210,10 @@ public class Dna && refs[d].getMap().getMap().getFromRatio() == 3 && refs[d].getMap().getMap().getToRatio() == 1) { - mappedrefs.addElement(refs[d]); // add translated protein maps + mappedrefs.add(refs[d]); // add translated protein maps } } - dnarefs = new DBRefEntry[mappedrefs.size()]; - mappedrefs.copyInto(dnarefs); + dnarefs = mappedrefs.toArray(new DBRefEntry[mappedrefs.size()]); for (int d = 0; d < dnarefs.length; d++) { Mapping mp = dnarefs[d].getMap(); @@ -227,88 +236,16 @@ public class Dna } /** - * generate a set of translated protein products from annotated sequenceI - * - * @param selection - * @param viscontigs - * @param gapCharacter - * @param dataset - * destination dataset for translated sequences - * @param annotations - * @param aWidth - * @return - */ - public static AlignmentI CdnaTranslate(SequenceI[] selection, - int viscontigs[], char gapCharacter, Alignment dataset) - { - int alwidth = 0; - Vector cdnasqs = new Vector(); - Vector cdnasqi = new Vector(); - Vector cdnaprod = new Vector(); - for (int gd = 0; gd < selection.length; gd++) - { - SequenceI dna = selection[gd]; - jalview.datamodel.DBRefEntry[] dnarefs = jalview.util.DBRefUtils - .selectRefs(dna.getDBRef(), - jalview.datamodel.DBRefSource.DNACODINGDBS); - if (dnarefs != null) - { - // intersect with pep - Vector mappedrefs = new Vector(); - DBRefEntry[] refs = dna.getDBRef(); - for (int d = 0; d < refs.length; d++) - { - if (refs[d].getMap() != null && refs[d].getMap().getMap() != null - && refs[d].getMap().getMap().getFromRatio() == 3 - && refs[d].getMap().getMap().getToRatio() == 1) - { - mappedrefs.addElement(refs[d]); // add translated protein maps - } - } - dnarefs = new DBRefEntry[mappedrefs.size()]; - mappedrefs.copyInto(dnarefs); - for (int d = 0; d < dnarefs.length; d++) - { - Mapping mp = dnarefs[d].getMap(); - StringBuffer sqstr = new StringBuffer(); - if (mp != null) - { - Mapping intersect = mp.intersectVisContigs(viscontigs); - // generate seqstring for this sequence based on mapping - - if (sqstr.length() > alwidth) - { - alwidth = sqstr.length(); - } - cdnasqs.addElement(sqstr.toString()); - cdnasqi.addElement(dna); - cdnaprod.addElement(intersect); - } - } - } - SequenceI[] cdna = new SequenceI[cdnasqs.size()]; - DBRefEntry[] prods = new DBRefEntry[cdnaprod.size()]; - String[] xons = new String[cdnasqs.size()]; - cdnasqs.copyInto(xons); - cdnaprod.copyInto(prods); - cdnasqi.copyInto(cdna); - return CdnaTranslate(cdna, xons, prods, viscontigs, gapCharacter, - null, alwidth, dataset); - } - return null; - } - - /** * Translate na alignment annotations onto translated amino acid alignment al * using codon mapping codons * * @param annotations * @param al - * @param codons + * @param acf */ - public static void translateAlignedAnnotations( + protected static void translateAlignedAnnotations( AlignmentAnnotation[] annotations, AlignmentI al, - AlignedCodonFrame codons) + AlignedCodonFrame acf, int[][] codons) { // Can only do this for columns with consecutive codons, or where // annotation is sequence associated. @@ -329,7 +266,7 @@ public class Dna continue; } - int aSize = codons.getaaWidth(); // aa alignment width. + int aSize = acf.getaaWidth(); // aa alignment width. Annotation[] anots = (annotation.annotations == null) ? null : new Annotation[aSize]; if (anots != null) @@ -337,10 +274,10 @@ public class Dna for (int a = 0; a < aSize; a++) { // process through codon map. - if (a < codons.codons.length && codons.codons[a] != null - && codons.codons[a][0] == (codons.codons[a][2] - 2)) + if (a < codons.length && codons[a] != null + && codons[a][0] == (codons[a][2] - 2)) { - anots[a] = getCodonAnnotation(codons.codons[a], + anots[a] = getCodonAnnotation(codons[a], annotation.annotations); } } @@ -364,7 +301,7 @@ public class Dna final SequenceI seqRef = annotation.sequenceRef; if (seqRef != null) { - SequenceI aaSeq = codons.getAaForDnaSeq(seqRef); + SequenceI aaSeq = acf.getAaForDnaSeq(seqRef); if (aaSeq != null) { // aa.compactAnnotationArray(); // throw away alignment annotation @@ -438,42 +375,18 @@ public class Dna * ORF read in some global alignment reference frame * @param viscontigs * mapping from global reference frame to visible seqstring ORF read - * @param codons - * Definition of global ORF alignment reference frame - * @param gapCharacter - * @return sequence ready to be added to alignment. - * @deprecated Use - * {@link #translateCodingRegion(SequenceI,String,int[],AlignedCodonFrame,char,DBRefEntry,boolean)} - * instead - */ - @Deprecated - public static SequenceI translateCodingRegion(SequenceI selection, - String seqstring, int[] viscontigs, AlignedCodonFrame codons, - char gapCharacter, DBRefEntry product) - { - return translateCodingRegion(selection, seqstring, viscontigs, codons, - gapCharacter, product, false); - } - - /** - * Translate a na sequence - * - * @param selection - * sequence displayed under viscontigs visible columns - * @param seqstring - * ORF read in some global alignment reference frame - * @param viscontigs - * mapping from global reference frame to visible seqstring ORF read - * @param codons + * @param acf * Definition of global ORF alignment reference frame + * @param alignedCodons * @param gapCharacter * @param starForStop * when true stop codons will translate as '*', otherwise as 'X' * @return sequence ready to be added to alignment. */ - public static SequenceI translateCodingRegion(SequenceI selection, - String seqstring, int[] viscontigs, AlignedCodonFrame codons, - char gapCharacter, DBRefEntry product, final boolean starForStop) + protected static SequenceI translateCodingRegion(SequenceI selection, + String seqstring, int[] viscontigs, AlignedCodonFrame acf, + int[][] alignedCodons, char gapCharacter, + final boolean starForStop) { List skip = new ArrayList(); int skipint[] = null; @@ -498,7 +411,7 @@ public class Dna // allocate a roughly sized buffer for the protein sequence StringBuilder protein = new StringBuilder(seqstring.length() / 2); - String seq = seqstring.replace('U', 'T'); + String seq = seqstring.replace('U', 'T').replace('u', 'T'); char codon[] = new char[3]; int cdp[] = new int[3], rf = 0, lastnpos = 0, nend; int aspos = 0; @@ -517,9 +430,10 @@ public class Dna */ String aa = ResidueProperties.codonTranslate(new String(codon)); rf = 0; + final String gapString = String.valueOf(gapCharacter); if (aa == null) { - aa = String.valueOf(gapCharacter); + aa = gapString; if (skipint == null) { skipint = new int[] @@ -624,48 +538,77 @@ public class Dna } resSize++; } - // insert/delete gaps prior to this codon - if necessary + // insert gaps prior to this codon - if necessary boolean findpos = true; while (findpos) { - // first ensure that the codons array is long enough. - codons.checkCodonFrameWidth(aspos); - // now check to see if we place the aa at the current aspos in the - // protein alignment - switch (Dna.compare_codonpos(cdp, codons.codons[aspos])) + // expand the codons array if necessary + alignedCodons = checkCodonFrameWidth(alignedCodons, aspos); + + /* + * Compare this codon's base positions with those currently aligned to + * this column in the translation. + */ + final int compareCodonPos = Dna.compareCodonPos(cdp, + alignedCodons[aspos]); + // debug + // System.out.println(seq + "/" + aa + " codons: " + // + Arrays.deepToString(alignedCodons)); + // System.out + // .println(("Compare " + Arrays.toString(cdp) + " at pos " + // + aspos + " with " + // + Arrays.toString(alignedCodons[aspos]) + " got " + + // compareCodonPos)); + // end debug + switch (compareCodonPos) { case -1: - codons.insertAAGap(aspos, gapCharacter); + + /* + * This codon should precede the mapped positions - need to insert a + * gap in all prior sequences. + */ + acf.insertAAGap(aspos, gapCharacter); findpos = false; break; + case +1: - // this aa appears after the aligned codons at aspos, so prefix it - // with a gap - aa = "" + gapCharacter + aa; + + /* + * This codon belongs after the aligned codons at aspos. Prefix it + * with a gap and try the next position. + */ + aa = gapString + aa; aspos++; - // if (aspos >= codons.aaWidth) - // codons.aaWidth = aspos + 1; - break; // check the next position for alignment + break; + case 0: - // codon aligns at aspos position. + + /* + * Exact match - codon 'belongs' at this translated position. + */ findpos = false; } } - // codon aligns with all other sequence residues found at aspos protein.append(aa); lastnpos = npos; - if (codons.codons[aspos] == null) + if (alignedCodons[aspos] == null) { // mark this column as aligning to this aligned reading frame - codons.codons[aspos] = new int[] + alignedCodons[aspos] = new int[] { cdp[0], cdp[1], cdp[2] }; } - if (aspos >= codons.aaWidth) + else if (!Arrays.equals(alignedCodons[aspos], cdp)) + { + throw new IllegalStateException("Tried to coalign " + + Arrays.asList(alignedCodons[aspos], cdp)); + } + if (aspos >= acf.aaWidth) { // update maximum alignment width // (we can do this without calling checkCodonFrameWidth because it was // already done above) - codons.setAaWidth(aspos); + acf.setAaWidth(aspos); } // ready for next translated reading frame alignment position (if any) aspos++; @@ -677,15 +620,14 @@ public class Dna protein.toString()); if (rf != 0) { - if (jalview.bin.Cache.log != null) + final String errMsg = "trimming contigs for incomplete terminal codon."; + if (Cache.log != null) { - jalview.bin.Cache.log - .debug("trimming contigs for incomplete terminal codon."); + Cache.log.debug(errMsg); } else { - System.err - .println("trimming contigs for incomplete terminal codon."); + System.err.println(errMsg); } // map and trim contigs to ORF region vc = scontigs.length - 1; @@ -756,27 +698,13 @@ public class Dna MapList map = new MapList(scontigs, new int[] { 1, resSize }, 3, 1); - // update newseq as if it was generated as mapping from product - - if (product != null) - { - newseq.setName(product.getSource() + "|" - + product.getAccessionId()); - if (product.getMap() != null) - { - // Mapping mp = product.getMap(); - // newseq.setStart(mp.getPosition(scontigs[0])); - // newseq.setEnd(mp - // .getPosition(scontigs[scontigs.length - 1])); - } - } transferCodedFeatures(selection, newseq, map, null, null); SequenceI rseq = newseq.deriveSequence(); // construct a dataset // sequence for our new // peptide, regardless. // store a mapping (this actually stores a mapping between the dataset // sequences for the two sequences - codons.addMap(selection, rseq, map); + acf.addMap(selection, rseq, map); return rseq; } } @@ -786,6 +714,32 @@ public class Dna } /** + * Check the codons array is big enough to accommodate the given position, if + * not resize it. + * + * @param alignedCodons + * @param aspos + * @return the resized array (or the original if no resize needed) + */ + protected static int[][] checkCodonFrameWidth(int[][] alignedCodons, + int aspos) + { + // TODO why not codons.length < aspos ? + // should codons expand if length is 2 or 3 and aslen==2 ? + if (alignedCodons.length <= aspos + 1) + { + // probably never have to do this ? + int[][] c = new int[alignedCodons.length + 10][]; + for (int i = 0; i < alignedCodons.length; i++) + { + c[i] = alignedCodons[i]; + } + return c; + } + return alignedCodons; + } + + /** * Given a peptide newly translated from a dna sequence, copy over and set any * features on the peptide from the DNA. If featureTypes is null, all features * on the dna sequence are searched (rather than just the displayed ones), and diff --git a/src/jalview/datamodel/AlignedCodonFrame.java b/src/jalview/datamodel/AlignedCodonFrame.java index f65f068..6048808 100644 --- a/src/jalview/datamodel/AlignedCodonFrame.java +++ b/src/jalview/datamodel/AlignedCodonFrame.java @@ -33,14 +33,19 @@ public class AlignedCodonFrame { /** - * array of nucleotide positions for aligned codons at column of aligned - * proteins. + *
+   * Aligned nucleotide positions for codons mapped to column positions of of aligned
+   * proteins. e.g.
+   * codons[3] = [12, 14, 15] means:
+   *     column 4 in the protein alignment translates cols 13, 15, 16 in cDNA
+   * codons[5] = null means column 6 in the protein alignment is a gap
+   * 
*/ public int[][] codons = null; /** - * width of protein sequence alignement implicit assertion that codons.length - * >= aaWidth + * Width of protein sequence alignment (implicit assertion that codons.length + * >= aaWidth) */ public int aaWidth = 0; @@ -100,6 +105,11 @@ public class AlignedCodonFrame for (SequenceI seq : acf.a_aaSeqs) { boolean found = false; + // TODO may not correctly handle the case where the same sequence appears + // twice in the source alignment i.e. same dataset sequence + // the copy will reference the first aligned sequence for both + // ?not solvable if realignment may reorder the sequences + // or check on sequence name as well???? for (SequenceI newseq : alignment) { if (seq.getDatasetSequence() == newseq.getDatasetSequence()) @@ -125,6 +135,8 @@ public class AlignedCodonFrame */ public int[][] checkCodonFrameWidth(int aslen) { + // TODO why not codons.length < aslen ? + // should codons expand if length is 2 or 3 and aslen==2 ? if (codons.length <= aslen + 1) { // probably never have to do this ? @@ -165,7 +177,6 @@ public class AlignedCodonFrame seq.insertCharAt(aspos, gapCharacter); } - checkCodonFrameWidth(aspos); if (aspos < aaWidth) { aaWidth++; diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java index 8ff9eda..cc7839e 100644 --- a/src/jalview/gui/AlignFrame.java +++ b/src/jalview/gui/AlignFrame.java @@ -1122,7 +1122,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, if (value == JalviewFileChooser.APPROVE_OPTION) { currentFileFormat = chooser.getSelectedFormat(); - if (currentFileFormat == null) + while (currentFileFormat == null) { JOptionPane .showInternalMessageDialog( @@ -1132,8 +1132,12 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, MessageManager .getString("label.file_format_not_specified"), JOptionPane.WARNING_MESSAGE); + currentFileFormat = chooser.getSelectedFormat(); value = chooser.showSaveDialog(this); - return; + if (value != JalviewFileChooser.APPROVE_OPTION) + { + return; + } } fileName = chooser.getSelectedFile().getPath(); @@ -4865,43 +4869,6 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, } } - @Override - public void showProducts_actionPerformed(ActionEvent e) - { - // ///////////////////////////// - // Collect Data to be translated/transferred - - SequenceI[] selection = viewport.getSequenceSelection(); - AlignmentI al = null; - try - { - al = jalview.analysis.Dna.CdnaTranslate(selection, viewport - .getViewAsVisibleContigs(true), viewport.getGapCharacter(), - viewport.getAlignment().getDataset()); - } catch (Exception ex) - { - al = null; - jalview.bin.Cache.log.debug("Exception during translation.", ex); - } - if (al == null) - { - JOptionPane - .showMessageDialog( - Desktop.desktop, - MessageManager - .getString("label.select_at_least_three_bases_in_at_least_one_sequence_to_cDNA_translation"), - MessageManager.getString("label.translation_failed"), - JOptionPane.WARNING_MESSAGE); - } - else - { - AlignFrame af = new AlignFrame(al, DEFAULT_WIDTH, DEFAULT_HEIGHT); - Desktop.addInternalFrame(af, MessageManager.formatMessage( - "label.translation_of_params", new String[] - { this.getTitle() }), DEFAULT_WIDTH, DEFAULT_HEIGHT); - } - } - /** * Construct and display a new frame containing the translation of this * frame's cDNA sequences to their protein (amino acid) equivalents. @@ -4917,7 +4884,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, AlignmentI al = null; try { - al = jalview.analysis.Dna.CdnaTranslate(selection, seqstring, + al = jalview.analysis.Dna.cdnaTranslate(selection, seqstring, viewport.getViewAsVisibleContigs(true), viewport .getGapCharacter(), viewport.getAlignment() .getAlignmentAnnotation(), viewport.getAlignment() diff --git a/src/jalview/gui/Jalview2XML.java b/src/jalview/gui/Jalview2XML.java index 8b74620..452d4f9 100644 --- a/src/jalview/gui/Jalview2XML.java +++ b/src/jalview/gui/Jalview2XML.java @@ -22,12 +22,12 @@ package jalview.gui; import jalview.api.structures.JalviewStructureDisplayI; import jalview.bin.Cache; +import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.SequenceI; import jalview.schemabinding.version2.AlcodMap; -import jalview.schemabinding.version2.Alcodon; import jalview.schemabinding.version2.AlcodonFrame; import jalview.schemabinding.version2.Annotation; import jalview.schemabinding.version2.AnnotationColours; @@ -869,19 +869,6 @@ public class Jalview2XML { AlcodonFrame alc = new AlcodonFrame(); vamsasSet.addAlcodonFrame(alc); - for (int p = 0; p < jac[i].aaWidth; p++) - { - Alcodon cmap = new Alcodon(); - if (jac[i].codons[p] != null) - { - // Null codons indicate a gapped column in the translated peptide - // alignment. - cmap.setPos1(jac[i].codons[p][0]); - cmap.setPos2(jac[i].codons[p][1]); - cmap.setPos3(jac[i].codons[p][2]); - } - alc.addAlcodon(cmap); - } if (jac[i].getProtMappings() != null && jac[i].getProtMappings().length > 0) { @@ -2397,28 +2384,7 @@ public class Jalview2XML AlcodonFrame[] alc = vamsasSet.getAlcodonFrame(); for (int i = 0; i < alc.length; i++) { - jalview.datamodel.AlignedCodonFrame cf = new jalview.datamodel.AlignedCodonFrame( - alc[i].getAlcodonCount()); - if (alc[i].getAlcodonCount() > 0) - { - Alcodon[] alcods = alc[i].getAlcodon(); - for (int p = 0; p < cf.codons.length; p++) - { - if (alcods[p].hasPos1() && alcods[p].hasPos2() - && alcods[p].hasPos3()) - { - // translated codons require three valid positions - cf.codons[p] = new int[3]; - cf.codons[p][0] = (int) alcods[p].getPos1(); - cf.codons[p][1] = (int) alcods[p].getPos2(); - cf.codons[p][2] = (int) alcods[p].getPos3(); - } - else - { - cf.codons[p] = null; - } - } - } + AlignedCodonFrame cf = new AlignedCodonFrame(0); if (alc[i].getAlcodMapCount() > 0) { AlcodMap[] maps = alc[i].getAlcodMap(); diff --git a/src/jalview/io/AppletFormatAdapter.java b/src/jalview/io/AppletFormatAdapter.java index d3b19fa..7265b5c 100755 --- a/src/jalview/io/AppletFormatAdapter.java +++ b/src/jalview/io/AppletFormatAdapter.java @@ -50,6 +50,22 @@ public class AppletFormatAdapter "PDB", "JnetFile", "RNAML", PhylipFile.FILE_DESC }; // , "SimpleBLAST" }; /** + * List of readable format file extensions by application in order + * corresponding to READABLE_FNAMES + */ + public static final String[] READABLE_EXTENSIONS = new String[] + { "fa, fasta, mfa, fastq", "aln", "pfam", "msf", "pir", "blc", "amsa", + "sto,stk", "xml,rnaml", PhylipFile.FILE_EXT, "jar,jvp" }; + + /** + * List of readable formats by application in order corresponding to + * READABLE_EXTENSIONS + */ + public static final String[] READABLE_FNAMES = new String[] + { "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Stockholm", + "RNAML", PhylipFile.FILE_DESC, "Jalview" }; + + /** * List of valid format strings for use by callers of the formatSequences * method */ @@ -73,22 +89,6 @@ public class AppletFormatAdapter { "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Jalview", "STH", "Jalview", PhylipFile.FILE_DESC }; - /** - * List of readable format file extensions by application in order - * corresponding to READABLE_FNAMES - */ - public static final String[] READABLE_EXTENSIONS = new String[] - { "fa, fasta, mfa, fastq", "aln", "pfam", "msf", "pir", "blc", "amsa", - "jar,jvp", "sto,stk", "xml,rnaml", PhylipFile.FILE_EXT }; // ".blast" - - /** - * List of readable formats by application in order corresponding to - * READABLE_EXTENSIONS - */ - public static final String[] READABLE_FNAMES = new String[] - { "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Jalview", - "Stockholm", "RNAML", PhylipFile.FILE_DESC };// , - // "SimpleBLAST" // }; diff --git a/src/jalview/jbgui/GAlignFrame.java b/src/jalview/jbgui/GAlignFrame.java index 8dba34d..f633bf6 100755 --- a/src/jalview/jbgui/GAlignFrame.java +++ b/src/jalview/jbgui/GAlignFrame.java @@ -1816,6 +1816,7 @@ public class GAlignFrame extends JInternalFrame * cDNA menu options */ cdna.setText(MessageManager.getString("label.cdna")); + // link to available cDNA JMenuItem linkCdna = new JMenuItem( MessageManager.getString("label.link_cdna")); linkCdna.setToolTipText(JvSwingUtils.wrapTooltip(true, @@ -1829,6 +1830,7 @@ public class GAlignFrame extends JInternalFrame } }); cdna.add(linkCdna); + // align linked cDNA JMenuItem alignCdna = new JMenuItem( MessageManager.getString("label.align_cdna")); alignCdna.setToolTipText(JvSwingUtils.wrapTooltip(true, @@ -1843,6 +1845,20 @@ public class GAlignFrame extends JInternalFrame }); cdna.add(alignCdna); + // view alignment as cDNA (when known) + JMenuItem viewAsCdna = new JMenuItem( + MessageManager.getString("label.view_as_cdna")); + viewAsCdna.setToolTipText(JvSwingUtils.wrapTooltip(true, + MessageManager.getString("label.view_as_cdna_tip"))); + viewAsCdna.addActionListener(new ActionListener() { + @Override + public void actionPerformed(ActionEvent e) + { + viewAsCdna_actionPerformed(); + } + }); + cdna.add(viewAsCdna); + extractScores.setText(MessageManager.getString("label.extract_scores") + "..."); extractScores.addActionListener(new ActionListener() @@ -1853,15 +1869,11 @@ public class GAlignFrame extends JInternalFrame extractScores_actionPerformed(e); } }); - extractScores.setVisible(true); // JBPNote: TODO: make gui for regex based - // score extraction + extractScores.setVisible(true); + // JBPNote: TODO: make gui for regex based score extraction + + // for show products actions see AlignFrame.canShowProducts showProducts.setText(MessageManager.getString("label.get_cross_refs")); - /* - * showProducts.addActionListener(new ActionListener() { - * - * public void actionPerformed(ActionEvent e) { - * showProducts_actionPerformed(e); } }); - */ openFeatureSettings.setText(MessageManager .getString("label.feature_settings")); openFeatureSettings.addActionListener(new ActionListener() @@ -2435,6 +2447,12 @@ public class GAlignFrame extends JInternalFrame // selectMenu.add(listenToViewSelections); } + protected void viewAsCdna_actionPerformed() + { + // TODO Auto-generated method stub + + } + protected void alignCdna_actionPerformed() { // TODO Auto-generated method stub @@ -2613,10 +2631,6 @@ public class GAlignFrame extends JInternalFrame { } - protected void showProducts_actionPerformed(ActionEvent e) - { - } - protected void buildSortByAnnotationScoresMenu() { } diff --git a/src/jalview/structure/StructureSelectionManager.java b/src/jalview/structure/StructureSelectionManager.java index 3fead00..5a3beb4 100644 --- a/src/jalview/structure/StructureSelectionManager.java +++ b/src/jalview/structure/StructureSelectionManager.java @@ -43,8 +43,10 @@ import java.util.Enumeration; import java.util.HashMap; import java.util.IdentityHashMap; import java.util.Iterator; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.Vector; import MCview.Atom; @@ -56,12 +58,13 @@ public class StructureSelectionManager StructureMapping[] mappings; - private boolean processSecondaryStructure = false, - secStructServices = false, addTempFacAnnot = false; + private boolean processSecondaryStructure = false; - List seqmappings = null; + private boolean secStructServices = false; - private int[] seqmappingrefs = null; // refcount for seqmappings elements + private boolean addTempFacAnnot = false; + + private Set seqmappings = new LinkedHashSet(); private List commandListeners = new ArrayList(); @@ -890,74 +893,37 @@ public class StructureSelectionManager return sb.toString(); } - private synchronized void modifySeqMappingList(boolean add, - AlignedCodonFrame[] codonFrames) + /** + * Remove each of the given codonFrames from the stored set (if present). + * + * @param codonFrames + */ + public void removeMappings(AlignedCodonFrame[] codonFrames) { - if (!add && (seqmappings == null || seqmappings.size() == 0)) - { - return; - } - if (seqmappings == null) - { - seqmappings = new ArrayList(); - } - if (codonFrames != null && codonFrames.length > 0) + if (codonFrames != null) { - for (int cf = 0; cf < codonFrames.length; cf++) + for (AlignedCodonFrame acf : codonFrames) { - if (seqmappings.contains(codonFrames[cf])) - { - if (add) - { - seqmappingrefs[seqmappings.indexOf(codonFrames[cf])]++; - } - else - { - if (--seqmappingrefs[seqmappings.indexOf(codonFrames[cf])] <= 0) - { - int pos = seqmappings.indexOf(codonFrames[cf]); - int[] nr = new int[seqmappingrefs.length - 1]; - if (pos > 0) - { - System.arraycopy(seqmappingrefs, 0, nr, 0, pos); - } - if (pos < seqmappingrefs.length - 1) - { - System.arraycopy(seqmappingrefs, pos + 1, nr, 0, - seqmappingrefs.length - pos - 2); - } - } - } - } - else - { - if (add) - { - seqmappings.add(codonFrames[cf]); - - int[] nsr = new int[(seqmappingrefs == null) ? 1 - : seqmappingrefs.length + 1]; - if (seqmappingrefs != null && seqmappingrefs.length > 0) - { - System.arraycopy(seqmappingrefs, 0, nsr, 0, - seqmappingrefs.length); - } - nsr[(seqmappingrefs == null) ? 0 : seqmappingrefs.length] = 1; - seqmappingrefs = nsr; - } - } + seqmappings.remove(acf); } } } - public void removeMappings(AlignedCodonFrame[] codonFrames) - { - modifySeqMappingList(false, codonFrames); - } - + /** + * Add each of the given codonFrames to the stored set (if not aready + * present). + * + * @param codonFrames + */ public void addMappings(AlignedCodonFrame[] codonFrames) { - modifySeqMappingList(true, codonFrames); + if (codonFrames != null) + { + for (AlignedCodonFrame acf : codonFrames) + { + seqmappings.add(acf); + } + } } Vector sel_listeners = new Vector(); @@ -1021,32 +987,6 @@ public class StructureSelectionManager } } - public void finalize() throws Throwable - { - if (listeners != null) - { - listeners.clear(); - listeners = null; - } - if (pdbIdFileName != null) - { - pdbIdFileName.clear(); - pdbIdFileName = null; - } - if (sel_listeners != null) - { - sel_listeners.clear(); - sel_listeners = null; - } - if (view_listeners != null) - { - view_listeners.clear(); - view_listeners = null; - } - mappings = null; - seqmappingrefs = null; - } - /** * release all references associated with this manager provider * @@ -1070,7 +1010,6 @@ public class StructureSelectionManager } catch (Throwable x) { } - ; } } } diff --git a/test/jalview/analysis/DnaTest.java b/test/jalview/analysis/DnaTest.java index b1ce141..4a8d4d0 100644 --- a/test/jalview/analysis/DnaTest.java +++ b/test/jalview/analysis/DnaTest.java @@ -6,6 +6,7 @@ import jalview.datamodel.ColumnSelection; import jalview.io.FormatAdapter; import java.io.IOException; +import java.util.Arrays; import org.junit.Test; @@ -34,7 +35,7 @@ public class DnaTest FormatAdapter.PASTE, "FASTA"); final String sequenceAsString = alf .getSequenceAt(0).getSequenceAsString(); - AlignmentI translated = Dna.CdnaTranslate(alf.getSequencesArray(), + AlignmentI translated = Dna.cdnaTranslate(alf.getSequencesArray(), new String[] { sequenceAsString }, new int[] { 0, alf.getWidth() - 1 }, alf.getGapCharacter(), null, @@ -59,7 +60,7 @@ public class DnaTest cs.hideColumns(6, 14); // hide codons 3/4/5 cs.hideColumns(24, 35); // hide codons 9-12 cs.hideColumns(177, 191); // hide codons 60-64 - AlignmentI translated = Dna.CdnaTranslate(alf.getSequencesArray(), + AlignmentI translated = Dna.cdnaTranslate(alf.getSequencesArray(), cs.getVisibleSequenceStrings(0, alf.getWidth(), alf.getSequencesArray()), new int[] { 0, alf.getWidth() - 1 }, alf.getGapCharacter(), null, @@ -67,4 +68,154 @@ public class DnaTest String aa = translated.getSequenceAt(0).getSequenceAsString(); assertEquals("AACDDGGGGHHIIIKKLLLLLLMNNPPPPQQRRRRRRSSSSSSTTTTVVVVW", aa); } + + /** + * Tests for method that compares 'alignment' of two codon position triplets. + */ + @Test + public void testCompareCodonPos() + { + /* + * Returns 0 for any null argument + */ + assertEquals(0, Dna.compareCodonPos(new int[] + { 1, 2, 3 }, null)); + assertEquals(0, Dna.compareCodonPos(null, new int[] + { 1, 2, 3 })); + + /* + * Work through 27 combinations. First 9 cases where first position matches. + */ + assertMatches("AAA", "GGG"); // 2 and 3 match + assertFollows("AA-A", "GGG"); // 2 matches, 3 shifted seq1 + assertPrecedes("AAA", "GG-G"); // 2 matches, 3 shifted seq2 + assertFollows("A-AA", "GG-G"); // 2 shifted seq1, 3 matches + assertFollows("A-A-A", "GG-G"); // 2 shifted seq1, 3 shifted seq1 + // TODO is this right? + assertPrecedes("A-AA", "GG--G"); // 2 shifted seq1, 3 shifted seq2 + assertPrecedes("AA-A", "G-GG"); // 2 shifted seq2, 3 matches + assertPrecedes("AA--A", "G-GG"); // 2 shifted seq2, 3 shifted seq1 + assertPrecedes("AAA", "G-GG"); // 2 shifted seq2, 3 shifted seq2 + + /* + * 9 cases where first position is shifted in first sequence. + */ + assertFollows("-AAA", "G-GG"); // 2 and 3 match + assertFollows("-AA-A", "G-GG"); // 2 matches, 3 shifted seq1 + assertPrecedes("-AAA", "G-G-G"); // 2 matches, 3 shifted seq2 + assertFollows("-A-AA", "G-G-G"); // 2 shifted seq1, 3 matches + assertFollows("-A-A-A", "G-G-G"); // 2 shifted seq1, 3 shifted seq1 + // is this right? codon2 ends after codon1 + assertPrecedes("-A-AA", "G-G--G"); // 2 shifted seq1, 3 shifted seq2 + assertPrecedes("-AA-A", "G--GG"); // 2 shifted seq2, 3 matches + assertPrecedes("-AA--A", "G--GG"); // 2 shifted seq2, 3 shifted seq1 + assertPrecedes("-AAA", "G--GG"); // 2 shifted seq2, 3 shifted seq2 + + /* + * 9 cases where first position is shifted in second sequence. + */ + assertPrecedes("A-AA", "-GGG"); // 2 and 3 match + assertPrecedes("A-A-A", "-GGG"); // 2 matches, 3 shifted seq1 + assertPrecedes("A-AA", "-GG-G"); // 2 matches, 3 shifted seq2 + assertPrecedes("A--AA", "-GG-G"); // 2 shifted seq1, 3 matches + assertPrecedes("A--AA", "-GGG"); // 2 shifted seq1, 3 shifted seq1 + assertPrecedes("A--AA", "-GG--G"); // 2 shifted seq1, 3 shifted seq2 + assertPrecedes("AA-A", "-GGG"); // 2 shifted seq2, 3 matches + assertPrecedes("AA--A", "-GGG"); // 2 shifted seq2, 3 shifted seq1 + assertPrecedes("AAA", "-GGG"); // 2 shifted seq2, 3 shifted seq2 + + /* + * two codon positions can each 'precede' the other! the comparison is + * biased to the first sequence + */ + // TODO is this correct? + assertPrecedes("-A--AA", "--GGG"); + assertPrecedes("--AAA", "-A--AA"); + } + + /** + * Assert that the first sequence should map to the same position as the + * second in a translated alignment + * + * @param codon1 + * @param codon2 + */ + private void assertMatches(String codon1, String codon2) + { + assertEquals("Expected match (0)", 0, compare(codon1, codon2)); + } + + /** + * Assert that the first sequence should precede the second in a translated + * alignment + * + * @param codon1 + * @param codon2 + */ + private void assertPrecedes(String codon1, String codon2) + { + assertEquals("Expected precedes (-1)", -1, compare(codon1, codon2)); + } + + /** + * Assert that the first sequence should follow the second in a translated + * alignment + * + * @param codon1 + * @param codon2 + */ + private void assertFollows(String codon1, String codon2) + { + assertEquals("Expected follows (1)", 1, compare(codon1, codon2)); + } + + /** + * Convert two nucleotide strings to base positions and pass to + * Dna.compareCodonPos, return the result. + * + * @param s1 + * @param s2 + * @return + */ + private int compare(String s1, String s2) + { + final int[] cd1 = convertCodon(s1); + final int[] cd2 = convertCodon(s2); + System.out.println("K: " + s1 + " " + Arrays.toString(cd1)); + System.out.println("G: " + s2 + " " + Arrays.toString(cd2)); + System.out.println(); + return Dna.compareCodonPos(cd1, cd2); + } + + /** + * Convert a string e.g. "-GC-T" to base positions e.g. [1, 2, 4]. The string + * should have exactly 3 non-gap characters, and use '-' for gaps. + * + * @param s + * @return + */ + private int[] convertCodon(String s) + { + int[] result = new int[3]; + int i = 0; + for (int j = 0; j < s.length(); j++) + { + if (s.charAt(j) != '-') + { + result[i++] = j; + } + } + return result; + } + + /** + * Weirdly, maybe worth a test to prove the helper method of this test class. + */ + @Test + public void testConvertCodon() + { + assertEquals("[0, 1, 2]", Arrays.toString(convertCodon("AAA"))); + assertEquals("[0, 2, 5]", Arrays.toString(convertCodon("A-A--A"))); + assertEquals("[1, 3, 4]", Arrays.toString(convertCodon("-A-AA-"))); + } } diff --git a/test/jalview/analysis/DnaTranslation.java b/test/jalview/analysis/DnaTranslation.java index 708ee21..8e6e5ae 100644 --- a/test/jalview/analysis/DnaTranslation.java +++ b/test/jalview/analysis/DnaTranslation.java @@ -110,7 +110,7 @@ public class DnaTranslation // full translation ColumnSelection cs = new jalview.datamodel.ColumnSelection(); assertNotNull("Couldn't do a full width translation of test data.", - jalview.analysis.Dna.CdnaTranslate( + jalview.analysis.Dna.cdnaTranslate( alf.getSequencesArray(), cs.getVisibleSequenceStrings(0, alf.getWidth(), alf.getSequencesArray()), new int[] @@ -130,7 +130,7 @@ public class DnaTranslation String[] sel = cs.getVisibleSequenceStrings(0, alf.getWidth(), alf.getSequencesArray()); jalview.datamodel.AlignmentI transAlf = jalview.analysis.Dna - .CdnaTranslate(alf.getSequencesArray(), sel, vcontigs, + .cdnaTranslate(alf.getSequencesArray(), sel, vcontigs, alf.getGapCharacter(), null, alf.getWidth(), null); assertTrue("Translation failed (ipos=" + ipos diff --git a/test/jalview/datamodel/AlignedCodonFrameTest.java b/test/jalview/datamodel/AlignedCodonFrameTest.java new file mode 100644 index 0000000..9f1d1e0 --- /dev/null +++ b/test/jalview/datamodel/AlignedCodonFrameTest.java @@ -0,0 +1,38 @@ +package jalview.datamodel; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertSame; +import jalview.util.MapList; + +import org.junit.Test; + +public class AlignedCodonFrameTest +{ + + /** + * Test the constructor which copies all except the aligned protein sequences. + */ + @Test + public void testConstructor_copyWithSequence() + { + AlignedCodonFrame acf = new AlignedCodonFrame(0); + acf.codons = new int[][] + { new int[] + { 1, 3 }, new int[] + { 4, 6 } }; + MapList map = new MapList(new int[] + { 1, 3 }, new int[] + { 1, 1 }, 3, 1); + SequenceI aaseq = new Sequence("", "FKQ"); + SequenceI dnaseq = new Sequence("", "ATTCGTACGGAC"); + acf.addMap(dnaseq, aaseq, map); + SequenceI[] newaligned = new SequenceI[1]; + newaligned[0] = new Sequence("", "-F-K-Q"); + newaligned[0].setDatasetSequence(aaseq.getDatasetSequence()); + AlignedCodonFrame copy = new AlignedCodonFrame(acf, newaligned); + assertSame(copy.codons, acf.codons); + assertEquals(copy.aaWidth, acf.aaWidth); + assertSame(copy.getdnaSeqs(), acf.getdnaSeqs()); + assertSame(newaligned[0], copy.getAaForDnaSeq(dnaseq, false)); + } +} -- 1.7.10.2