From cf82c19425ee457a01c3f1a04aabfb0d9817af6f Mon Sep 17 00:00:00 2001 From: gmungoc Date: Mon, 12 Jan 2015 17:13:45 +0000 Subject: [PATCH] JAL-1619 load/align cDNA for protein (wip) --- resources/lang/Messages.properties | 13 +- src/jalview/analysis/AlignmentUtils.java | 148 +++++++++++++ src/jalview/datamodel/AlignedCodonFrame.java | 59 ++++- src/jalview/datamodel/Alignment.java | 212 +++++++++++------- src/jalview/datamodel/AlignmentI.java | 25 +-- src/jalview/gui/AlignFrame.java | 279 ++++++------------------ src/jalview/jbgui/GAlignFrame.java | 45 +++- src/jalview/schemes/ResidueProperties.java | 2 + src/jalview/util/MapList.java | 1 + test/jalview/analysis/AlignmentUtilsTests.java | 266 +++++++++++++++++++++- test/jalview/datamodel/AlignmentTest.java | 151 ++++++------- 11 files changed, 805 insertions(+), 396 deletions(-) diff --git a/resources/lang/Messages.properties b/resources/lang/Messages.properties index ef303bc..1652eb7 100644 --- a/resources/lang/Messages.properties +++ b/resources/lang/Messages.properties @@ -688,8 +688,15 @@ label.load_tree_for_sequence_set = Load a tree for this sequence set label.export_image = Export Image label.vamsas_store = VAMSAS store label.translate_cDNA = Translate cDNA -label.cDNA = cDNA -label.associate = Associate +label.cdna = cDNA +label.link_cdna = Link cDNA +label.link_cdna_tip = Link to any compatible cDNA alignments.
Sequences are linked that have the same name and compatible lengths. +label.no_cdna = No compatible cDNA was found +label.linked_cdna = {0} cDNA alignments linked +label.cdna_all_linked = All {0} compatible cDNA alignments are already linked +label.align_cdna = Align linked cDNA +label.align_cdna_tip = Any linked cDNA sequences will be realigned to match this alignment. +label.cdna_aligned = {0} sequences in {1} alignments were realigned label.align = Align label.extract_scores = Extract Scores label.get_cross_refs = Get Cross References @@ -1181,5 +1188,3 @@ label.no_colour_selection_in_scheme = Please, make a colour selection before to label.no_colour_selection_warn = Error saving colour scheme label.nonstandard_translation = Non-standard translation warn.nonstandard_translation = Non-standard translation(s) detected at {0}.
Do you wish to proceed? -label.cdna_realign = Warning -warn.cdna_realign = cDNA will be realigned if necessary to match the protein alignment.
Do you wish to proceed? diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 6385fa7..741e5e4 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -20,12 +20,17 @@ */ package jalview.analysis; +import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.SequenceI; +import jalview.schemes.ResidueProperties; +import jalview.util.MapList; import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; /** * grab bag of useful alignment manipulation operations Expect these to be @@ -159,4 +164,147 @@ public class AlignmentUtils } return result; } + + /** + * Returns a map of lists of sequences in the alignment, keyed by sequence + * name. For use in mapping between different alignment views of the same + * sequences. + * + * @see jalview.datamodel.AlignmentI#getSequencesByName() + */ + public static Map> getSequencesByName( + AlignmentI al) + { + Map> theMap = new LinkedHashMap>(); + for (SequenceI seq : al.getSequences()) + { + String name = seq.getName(); + if (name != null) + { + List seqs = theMap.get(name); + if (seqs == null) + { + seqs = new ArrayList(); + theMap.put(name, seqs); + } + seqs.add(seq); + } + } + return theMap; + } + + /** + * Build mapping of protein to cDNA alignment. Mappings are made between + * sequences which have the same name and compatible lengths. + * + * @param proteinAlignment + * @param cdnaAlignment + * @return + */ + public static boolean mapProteinToCdna(final AlignmentI proteinAlignment, + final AlignmentI cdnaAlignment) + { + boolean mapped = false; + List thisSeqs = proteinAlignment.getSequences(); + + /* + * Build a look-up of cDNA sequences by name, for matching purposes. + */ + Map> cdnaSeqs = cdnaAlignment + .getSequencesByName(); + + for (SequenceI aaSeq : thisSeqs) + { + AlignedCodonFrame acf = new AlignedCodonFrame( + proteinAlignment.getWidth()); + List candidates = cdnaSeqs.get(aaSeq.getName()); + if (candidates == null) + { + /* + * No cDNA sequence with matching name, so no mapping for this protein + * sequence + */ + continue; + } + for (SequenceI cdnaSeq : candidates) + { + MapList map = mapProteinToCdna(aaSeq, cdnaSeq); + if (map != null) + { + acf.addMap(cdnaSeq, aaSeq, map); + mapped = true; + } + } + proteinAlignment.addCodonFrame(acf); + } + return mapped; + } + + /** + * Build a mapping (if possible) of a protein to a cDNA sequence. The cDNA + * must be three times the length of the protein, possibly after ignoring + * start and/or stop codons. Returns null if no mapping is determined. + * + * @param proteinSeqs + * @param cdnaSeq + * @return + */ + public static MapList mapProteinToCdna(SequenceI proteinSeq, + SequenceI cdnaSeq) + { + String aaSeqString = proteinSeq.getDatasetSequence() + .getSequenceAsString(); + String cdnaSeqString = cdnaSeq.getDatasetSequence() + .getSequenceAsString(); + if (aaSeqString == null || cdnaSeqString == null) + { + return null; + } + + final int mappedLength = 3 * aaSeqString.length(); + int cdnaLength = cdnaSeqString.length(); + int cdnaStart = 1; + int cdnaEnd = cdnaLength; + final int proteinStart = 1; + final int proteinEnd = aaSeqString.length(); + + /* + * If lengths don't match, try ignoring stop codon. + */ + if (cdnaLength != mappedLength) + { + for (Object stop : ResidueProperties.STOP) + { + if (cdnaSeqString.toUpperCase().endsWith((String) stop)) + { + cdnaEnd -= 3; + cdnaLength -= 3; + break; + } + } + } + + /* + * If lengths still don't match, try ignoring start codon. + */ + if (cdnaLength != mappedLength + && cdnaSeqString.toUpperCase().startsWith( + ResidueProperties.START)) + { + cdnaStart += 3; + cdnaLength -= 3; + } + + if (cdnaLength == mappedLength) + { + MapList map = new MapList(new int[] + { cdnaStart, cdnaEnd }, new int[] + { proteinStart, proteinEnd }, 3, 1); + return map; + } + else + { + return null; + } + } } diff --git a/src/jalview/datamodel/AlignedCodonFrame.java b/src/jalview/datamodel/AlignedCodonFrame.java index 0740795..4016ee5 100644 --- a/src/jalview/datamodel/AlignedCodonFrame.java +++ b/src/jalview/datamodel/AlignedCodonFrame.java @@ -226,10 +226,24 @@ public class AlignedCodonFrame /** * * @param sequenceRef - * @return null or corresponding aaSeq entry for dnaSeq entry + * @return null or corresponding aaSeq dataset sequence for dnaSeq entry */ public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef) { + return getAaForDnaSeq(dnaSeqRef, true); + } + + /** + * Return the corresponding aligned or dataset aa sequence for given dna + * sequence, null if not found. + * + * @param sequenceRef + * @param returnDataset + * if true, return the aa dataset, else the aligned sequence + * @return + */ + public SequenceI getAaForDnaSeq(SequenceI dnaSeqRef, boolean returnDataset) + { if (dnaSeqs == null) { return null; @@ -239,7 +253,16 @@ public class AlignedCodonFrame { if (dnaSeqs[ds] == dnaSeqRef || dnaSeqs[ds] == dnads) { - return dnaToProt[ds].to; + if (returnDataset) + { + return dnaToProt[ds].to; + } + else + { + // TODO very fragile - depends on dnaSeqs, dnaToProt, a_aaSeqs moving + // in parallel; revise data model to guarantee this + return (SequenceI) a_aaSeqs.elementAt(ds); + } } } return null; @@ -329,4 +352,36 @@ public class AlignedCodonFrame } } } + + /** + * Returns the DNA codon positions (base 1) for the given position (base 1) in + * a mapped protein sequence, or null if no mapping is found. + * + * Intended for use in aligning cDNA to match aligned protein. Only the first + * mapping found is returned, so not suitable for use if multiple protein + * sequences are mapped to the same cDNA (but aligning cDNA as protein is + * ill-defined for this case anyway). + * + * @param seq + * the DNA dataset sequence + * @param aaPos + * residue position (base 1) in a protein sequence + * @return + */ + public int[] getDnaPosition(SequenceI seq, int aaPos) + { + /* + * Adapted from markMappedRegion(). + */ + MapList ml = null; + for (int i = 0; i < dnaToProt.length; i++) + { + if (dnaSeqs[i] == seq) + { + ml = getdnaToProt()[i]; + break; + } + } + return ml == null ? null : ml.locateInFrom(aaPos, aaPos); + } } diff --git a/src/jalview/datamodel/Alignment.java b/src/jalview/datamodel/Alignment.java index 91108bc..5d11a20 100755 --- a/src/jalview/datamodel/Alignment.java +++ b/src/jalview/datamodel/Alignment.java @@ -20,6 +20,7 @@ */ package jalview.datamodel; +import jalview.analysis.AlignmentUtils; import jalview.util.MessageManager; import java.util.ArrayList; @@ -157,6 +158,17 @@ public class Alignment implements AlignmentI } /** + * Returns a map of lists of sequences keyed by sequence name. + * + * @return + */ + @Override + public Map> getSequencesByName() + { + return AlignmentUtils.getSequencesByName(this); + } + + /** * DOCUMENT ME! * * @param i @@ -1596,114 +1608,156 @@ public class Alignment implements AlignmentI } /** - * Answers true if the supplied alignment has the same number of sequences, - * and they are of equivalent length, ignoring gaps. Alignments should be of - * the same type (protein/nucleotide) or different types with 3:1 length - * scaling. + * Align this alignment 'the same as' the given one. Mapped sequences only are + * realigned. If both of the same type (nucleotide/protein) then align both + * identically. If this is nucleotide and the other is protein, make 3 gaps + * for each gap in the protein sequences. If this is protein and the other is + * nucleotide, insert a gap for each 3 gaps (or part thereof) between + * nucleotide bases. Does nothing if alignment of protein from cDNA is + * requested (not yet implemented). * * @param al */ @Override - public boolean isMappableTo(AlignmentI al) + public int alignAs(AlignmentI al) { - int thisCodonScale = this.isNucleotide() ? 1 : 3; - int thatCodonScale = al.isNucleotide() ? 1 : 3; - if (this == al || this.getHeight() != al.getHeight()) + int count = 0; + boolean thisIsNucleotide = this.isNucleotide(); + boolean thatIsProtein = !al.isNucleotide(); + if (!thatIsProtein && !thisIsNucleotide) { - return false; + System.err + .println("Alignment of protein from cDNA not yet implemented"); + return 0; + // todo: build it - a variant of Dna.CdnaTranslate() } + char thisGapChar = this.getGapCharacter(); + char thatGapChar = al.getGapCharacter(); + String gap = thisIsNucleotide && thatIsProtein ? String + .valueOf(new char[] + { thisGapChar, thisGapChar, thisGapChar }) : String + .valueOf(thisGapChar); + int ratio = thisIsNucleotide && thatIsProtein ? 3 : 1; - // TODO: match sequence ids, allow different sequence ordering? - // TODO: allow for stop/start codons? - // TODO: exclude introns - int i = 0; - for (SequenceI seq : this.getSequences()) - { - final int thisSequenceDnaLength = seq.getDatasetSequence() - .getLength() * thisCodonScale; - final int thatSequenceDnaLength = al.getSequenceAt(i) - .getDatasetSequence().getLength() - * thatCodonScale; - if (thisSequenceDnaLength != thatSequenceDnaLength) + /* + * Get mappings from 'that' alignment's sequences to this. + */ + for (SequenceI alignTo : getSequences()) + { + AlignedCodonFrame[] mappings = al.getCodonFrame(alignTo); + if (mappings != null) { - return false; + for (AlignedCodonFrame mapping : mappings) + { + count += alignSequenceAs(alignTo, mapping, thatGapChar, gap, + ratio) ? 1 : 0; + } } - i++; } - return true; + return count; } /** - * Align this alignment the same as the given one. If both of the same type - * (nucleotide/protein) then align both identically. If this is nucleotide and - * the other is protein, make 3 gaps for each gap in the protein sequences. If - * this is protein and the other is nucleotide, insert a gap for each 3 gaps - * (or part thereof) between nucleotide bases. The two alignments should be - * compatible in height and lengths, but if not, then discrepancies will be - * ignored with unpredictable results. + * Align sequence 'seq' the same way as 'other'. Note this currently assumes + * that we are aligned cDNA to match protein. * - * @param al - * @throws UnsupportedOperation - * if alignment of protein from cDNA is requested (not yet - * implemented) + * @param seq + * the sequence to be realigned + * @param mapping + * holds mapping from the sequence whose alignment is to be 'copied' + * @param thatGapChar + * gap character used in the 'other' sequence + * @param gap + * character string represent a gap in the realigned sequence + * @param ratio + * the number of positions in the realigned sequence corresponding to + * one in the 'other' + * @return true if the sequence was realigned, false if it could not be */ - @Override - public void alignAs(AlignmentI al) - { - boolean thisIsNucleotide = this.isNucleotide(); - boolean thatIsProtein = !al.isNucleotide(); - if (!thatIsProtein && !thisIsNucleotide) + protected boolean alignSequenceAs(SequenceI seq, + AlignedCodonFrame mapping, + char thatGapChar, + String gap, int ratio) + { + char myGapChar = gap.charAt(0); + // TODO rework this to use the mapping to match 'this' to 'that' residue + // position, to handle introns and exons correctly. + // TODO generalise to work for Protein-Protein, dna-dna, dna-protein + SequenceI alignFrom = mapping.getAaForDnaSeq(seq, false); + if (alignFrom == null) { - throw new UnsupportedOperationException( - "Alignment of protein from cDNA not implemented"); + return false; } - char thisGapChar = this.getGapCharacter(); - char thatGapChar = al.getGapCharacter(); - String gap = thisIsNucleotide && thatIsProtein ? String - .valueOf(new char[] - { thisGapChar, thisGapChar, thisGapChar }) : String - .valueOf(thisGapChar); - int ratio = thisIsNucleotide && thatIsProtein ? 3 : 1; - int i = 0; - for (SequenceI seq : this.getSequences()) + final char[] thisSeq = seq.getSequence(); + final char[] thisDs = seq.getDatasetSequence().getSequence(); + final char[] thatAligned = alignFrom.getSequence(); + StringBuilder thisAligned = new StringBuilder(2 * thisDs.length); + + /* + * Find the DNA dataset position that corresponds to the first protein + * residue (e.g. ignoring start codon in cDNA). + */ + int[] dnaStart = mapping.getDnaPosition(seq.getDatasetSequence(), 1); + int thisDsPosition = dnaStart == null ? 0 : dnaStart[0] - 1; + int thisSeqPos = 0; + + /* + * Copy aligned cDNA up to (excluding) the first mapped base. + */ + int basesWritten = 0; + while (basesWritten < thisDsPosition && thisSeqPos < thisSeq.length) + { + char c = thisSeq[thisSeqPos++]; + thisAligned.append(c); + if (c != myGapChar) + { + basesWritten++; + } + } + + /* + * Now traverse the aligned protein mirroring its gaps in cDNA. + */ + for (char thatChar : thatAligned) { - SequenceI other = al.getSequenceAt(i++); - if (other == null) + if (thatChar == thatGapChar) { - continue; + /* + * Add (equivalent of) a gap + */ + thisAligned.append(gap); } - char[] thisDs = seq.getDatasetSequence().getSequence(); - char[] thatDs = other.getSequence(); - StringBuilder thisAligned = new StringBuilder(2 * thisDs.length); - int thisDsPosition = 0; - for (char thatChar : thatDs) + else { - if (thatChar == thatGapChar) - { - /* - * Add (equivalent of) a gap - */ - thisAligned.append(gap); - } - else + /* + * Add (equivalent of) a residue + */ + for (int j = 0; j < ratio && thisDsPosition < thisDs.length; j++) { + thisAligned.append(thisDs[thisDsPosition++]); + /* - * Add (equivalent of) a residue + * Also advance over any gaps and the next residue in the old aligned + * sequence */ - for (int j = 0; j < ratio && thisDsPosition < thisDs.length; j++) + while (thisSeq[thisSeqPos] == myGapChar + && thisSeqPos < thisSeq.length) { - thisAligned.append(thisDs[thisDsPosition++]); + thisSeqPos++; } + thisSeqPos++; } } - /* - * Include any 'extra' residues (there shouldn't be). - */ - while (thisDsPosition < thisDs.length) - { - thisAligned.append(thisDs[thisDsPosition++]); - } - seq.setSequence(new String(thisAligned)); } + + /* + * Finally copy any 'extra' aligned cDNA (e.g. stop codon, introns). + */ + while (thisSeqPos < thisSeq.length) + { + thisAligned.append(thisSeq[thisSeqPos++]); + } + seq.setSequence(new String(thisAligned)); + return true; } } diff --git a/src/jalview/datamodel/AlignmentI.java b/src/jalview/datamodel/AlignmentI.java index 130a073..bd9ba9e 100755 --- a/src/jalview/datamodel/AlignmentI.java +++ b/src/jalview/datamodel/AlignmentI.java @@ -37,6 +37,7 @@ public interface AlignmentI extends AnnotatedCollectionI public int getHeight(); /** + * * Calculates the maximum width of the alignment, including gaps. * * @return Greatest sequence length within alignment. @@ -87,6 +88,13 @@ public interface AlignmentI extends AnnotatedCollectionI public SequenceI getSequenceAt(int i); /** + * Returns a map of lists of sequences keyed by sequence name. + * + * @return + */ + public Map> getSequencesByName(); + + /** * Add a new sequence to this alignment. * * @param seq @@ -484,24 +492,15 @@ public interface AlignmentI extends AnnotatedCollectionI public void validateAnnotation(AlignmentAnnotation alignmentAnnotation); /** - * Answers true if the two alignments residues could be put into - * correspondence, i.e. the supplied alignment has the same number of - * sequences, and they are of equivalent length, ignoring gaps. Alignments - * should be of the same type (protein/nucleotide) or different types with 3:1 - * length scaling. - * - * @param al - */ - public boolean isMappableTo(AlignmentI al); - - /** * Align this alignment the same as the given one. If both of the same type * (nucleotide/protein) then align both identically. If this is nucleotide and * the other is protein, make 3 gaps for each gap in the protein sequences. If * this is protein and the other is nucleotide, insert a gap for each 3 gaps - * (or part thereof) between nucleotide bases. + * (or part thereof) between nucleotide bases. Returns the number of mapped + * sequences that were realigned . * * @param al + * @return */ - public void alignAs(AlignmentI al); + public int alignAs(AlignmentI al); } diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java index b839a38..7a36754 100644 --- a/src/jalview/gui/AlignFrame.java +++ b/src/jalview/gui/AlignFrame.java @@ -85,7 +85,6 @@ import jalview.schemes.TurnColourScheme; import jalview.schemes.UserColourScheme; import jalview.schemes.ZappoColourScheme; import jalview.structure.StructureSelectionManager; -import jalview.util.MapList; import jalview.util.MessageManager; import jalview.ws.jws1.Discoverer; import jalview.ws.jws2.Jws2Discoverer; @@ -121,7 +120,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Enumeration; import java.util.Hashtable; -import java.util.Iterator; import java.util.List; import java.util.Vector; @@ -707,7 +705,6 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, { showTranslation.setVisible(nucleotide); cdna.setVisible(!nucleotide); - configureCdnaMenu(); conservationMenuItem.setEnabled(!nucleotide); modifyConservation.setEnabled(!nucleotide); showGroupConservation.setEnabled(!nucleotide); @@ -716,241 +713,99 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, } /** - * Add any suitable options to the 'cDNA' sub-menu. Options may be to - * associate a cDNA alignment, or to align an associated alignment. To be - * suitable for association, an AlignFrame has to be nucleotide, and have the - * right number of sequences of corresponding length to this one. + * Builds codon mappings from this (protein) alignment to any compatible + * nucleotide alignments. Mappings are built between sequences with the same + * name and compatible lengths. Also makes the cDNA alignment a + * CommandListener for the protein alignment so that edits are mirrored. */ - protected void configureCdnaMenu() + @Override + protected void linkCdna_actionPerformed() { - cdna.removeAll(); + int linkedCount = 0; + int alreadyLinkedCount = 0; + final AlignmentI thisAlignment = this.alignPanel.getAlignment(); - /* - * Identify candidates for 'associate cDNA', add to menu. - */ - List candidates = getCdnaCandidates(); - for (final AlignFrame candidate : candidates) + for (AlignFrame af : Desktop.getAlignframes()) { - final String text = MessageManager.getString("label.associate") + " " - + candidate.getTitle(); - JMenuItem option = new JMenuItem(text); - option.addActionListener(new ActionListener() + if (af.alignPanel != null) { - @Override - public void actionPerformed(ActionEvent e) + final AlignmentI thatAlignment = af.alignPanel.getAlignment(); + if (thatAlignment.isNucleotide()) { - associateCdna(candidate); - } - }); - cdna.add(option); - } - - /* - * Identify candidates for 'align cDNA', add to menu. - */ - final AlignFrame[] alignframes = Desktop.getAlignframes(); - if (alignframes != null) - { - for (final AlignFrame af : alignframes) - { - if (af != this) - { - if (this.viewport.getStructureSelectionManager() - .hasCommandListener(af.viewport)) + // TODO exclude an AlignFrame which is already mapped to this one + // temporary version: exclude if already a CommandListener (should + // cover most cases but not all) + final boolean alreadyMapped = this.viewport + .getStructureSelectionManager().hasCommandListener( + af.viewport); + if (alreadyMapped) + { + alreadyLinkedCount++; + } + else { - final String text = MessageManager.getString("label.align") - + " " + af.getTitle(); - JMenuItem option = new JMenuItem(text); - option.addActionListener(new ActionListener() + boolean mapped = AlignmentUtils.mapProteinToCdna(thisAlignment, + thatAlignment); + if (mapped) { - @Override - public void actionPerformed(ActionEvent e) - { - af.alignPanel.getAlignment().alignAs( - AlignFrame.this.alignPanel.getAlignment()); - af.viewport.alignmentChanged(af.alignPanel); - } - }); - cdna.add(option); + final StructureSelectionManager ssm = StructureSelectionManager + .getStructureSelectionManager(Desktop.instance); + ssm.addMappings(thisAlignment.getCodonFrames()); + ssm.addCommandListener(af.getViewport()); + linkedCount++; + } } } } } - - cdna.setEnabled(cdna.getMenuComponentCount() > 0); + String msg = ""; + if (linkedCount == 0 && alreadyLinkedCount == 0) + { + msg = MessageManager.getString("label.no_cdna"); + } + else if (linkedCount > 0) + { + msg = MessageManager.formatMessage("label.linked_cdna", linkedCount); + } + else + { + msg = MessageManager.formatMessage("label.cdna_all_linked", + alreadyLinkedCount); + } + setStatus(msg); } /** - * Returns a list of AlignFrame which are valid candidates for being the cDNA - * to map to this (protein) alignment. Valid means a nucleotide alignment with - * matching number of sequences and sequence lengths (excluding gaps). - * - * @return + * Align any linked cDNA to match the alignment of this (protein) alignment. + * Any mapped sequence regions will be realigned, unmapped sequences are not + * affected. */ - protected List getCdnaCandidates() + @Override + protected void alignCdna_actionPerformed() { - List result = new ArrayList(); - if (this.alignPanel != null) + int seqCount = 0; + int alignCount = 0; + final AlignmentI thisAlignment = this.alignPanel.getAlignment(); + for (AlignFrame af : Desktop.getAlignframes()) { - AlignmentI thisAlignment = this.alignPanel.getAlignment(); - if (thisAlignment == null || thisAlignment.isNucleotide()) + if (af.alignPanel != null) { - return result; - } - final AlignFrame[] alignframes = Desktop.getAlignframes(); - if (alignframes != null) - { - for (AlignFrame af : alignframes) + final AlignmentI thatAlignment = af.alignPanel.getAlignment(); + if (thatAlignment.isNucleotide()) { - if (af.alignPanel != null) + int seqsAligned = thatAlignment.alignAs(thisAlignment); + seqCount += seqsAligned; + if (seqsAligned > 0) { - final AlignmentI thatAlignment = af.alignPanel.getAlignment(); - if (thatAlignment.isNucleotide() - && thisAlignment.isMappableTo(thatAlignment)) - { - // TODO exclude an AlignFrame which is already mapped to this one - // simple version: exclude if already a CommandListener (should - // cover most cases but not all) - if (!this.viewport.getStructureSelectionManager() - .hasCommandListener(af.viewport)) - { - result.add(af); - } - } + af.viewport.alignmentChanged(af.alignPanel); + alignCount++; } } } } - return result; - } - - /** - * Build the codon mappings between the given (nucleotide) alignment and this - * (protein) alignment. Also make the cDNA alignment a CommandListener for the - * protein alignment so that edits are mirrored. The alignments must have the - * same number, and equivalent lengths, of (unaligned) sequence. - * - * @param cdna - * @throws IllegalStateException - * if sequence counts or lengths are incompatible - */ - protected void associateCdna(AlignFrame cdna) - { - /* - * Warn that cDNA may be realigned to match protein - */ - // int confirm = JOptionPane.showConfirmDialog( - // this, - // JvSwingUtils.wrapTooltip(true, - // MessageManager.getString("warn.cdna_realign")), - // MessageManager.getString("label.cdna_realign"), - // JOptionPane.OK_CANCEL_OPTION); - // if (confirm == JOptionPane.CANCEL_OPTION - // || confirm == JOptionPane.CLOSED_OPTION) - // { - // return; - // } - - final AlignmentI aaAlignment = this.alignPanel.getAlignment(); - Iterator thisSeqs = aaAlignment - .getSequences().iterator(); - Iterator cdnaSeqs = cdna.alignPanel.getAlignment() - .getSequences().iterator(); - AlignedCodonFrame acf = new AlignedCodonFrame(aaAlignment.getWidth()); - while (thisSeqs.hasNext()) - { - if (!cdnaSeqs.hasNext()) - { - throw new IllegalStateException("Too few sequences to map"); - } - final SequenceI aaSeq = thisSeqs.next(); - String aaSeqString = aaSeq.getDatasetSequence() - .getSequenceAsString(); - final SequenceI cdnaSeq = cdnaSeqs.next(); - String cdnaSeqString = cdnaSeq.getDatasetSequence() - .getSequenceAsString(); - final int aaLength = aaSeqString.length(); - final int cdnaLength = cdnaSeqString.length(); - if (cdnaLength != 3 * aaLength) - { - throw new IllegalStateException( - "Protein/cDNA lengths don't match: " + aaLength + "/" - + cdnaLength); - } - - /* - * Warn if mapping includes non-standard translations - */ - if (!doTranslationWarningCheck(aaSeq.getName(), aaSeqString, - cdnaSeqString)) - { - return; - } - - MapList map = new MapList(new int[] - { 1, cdnaLength }, new int[] - { 1, aaLength }, 3, 1); - acf.addMap(cdnaSeq, aaSeq, map); - - aaAlignment.addCodonFrame(acf); - - final StructureSelectionManager ssm = StructureSelectionManager - .getStructureSelectionManager(Desktop.instance); - ssm.addMappings(aaAlignment.getCodonFrames()); - ssm.addCommandListener(cdna.getViewport()); - - /* - * Rebuild 'associate cDna' menu so it now excludes the one just - * associated. - */ - configureCdnaMenu(); - } - } - - /** - * Show a warning if any non-standard cDNA to protein would result from - * mapping the sequences. - * - * @param aaSeqName - * @param aaSeqString - * @param aaSeqString - * @return true if no warning, or it is accepted, false if user chooses not to - * proceed. - */ - protected boolean doTranslationWarningCheck(String aaSeqName, - String aaSeqString, String cdnaSeqString) - { - final int aaLength = aaSeqString.length(); - boolean warning = false; - String msg = aaSeqName; - for (int i = 0; i < aaLength; i++) - { - String codon = cdnaSeqString.substring(i * 3, i * 3 + 3); - String aa = ResidueProperties.codonTranslate(codon); - if (!(aa.charAt(0) == aaSeqString.charAt(i))) - { - warning = true; - msg += ":" + (i + 1) + ":" + aaSeqString.charAt(i) + "/" + codon - + ":" + aa; - break; - } - } - if (warning) - { - final String txt = JvSwingUtils.wrapTooltip(true, MessageManager - .formatMessage("warn.nonstandard_translation", msg)); - int confirm = JOptionPane.showConfirmDialog(this, txt, - MessageManager.getString("label.nonstandard_translation"), - JOptionPane.OK_CANCEL_OPTION); - if (confirm == JOptionPane.CANCEL_OPTION - || confirm == JOptionPane.CLOSED_OPTION) - { - return false; - } - } - return true; + setStatus(MessageManager.formatMessage("label.cdna_aligned", seqCount, + alignCount)); } - /** * set up menus for the current viewport. This may be called after any * operation that affects the data in the current view (selection changed, diff --git a/src/jalview/jbgui/GAlignFrame.java b/src/jalview/jbgui/GAlignFrame.java index edb4ac5..8dba34d 100755 --- a/src/jalview/jbgui/GAlignFrame.java +++ b/src/jalview/jbgui/GAlignFrame.java @@ -1811,7 +1811,38 @@ public class GAlignFrame extends JInternalFrame showTranslation_actionPerformed(e); } }); - cdna.setText(MessageManager.getString("label.cDNA")); + + /* + * cDNA menu options + */ + cdna.setText(MessageManager.getString("label.cdna")); + JMenuItem linkCdna = new JMenuItem( + MessageManager.getString("label.link_cdna")); + linkCdna.setToolTipText(JvSwingUtils.wrapTooltip(true, + MessageManager.getString("label.link_cdna_tip"))); + linkCdna.addActionListener(new ActionListener() + { + @Override + public void actionPerformed(ActionEvent e) + { + linkCdna_actionPerformed(); + } + }); + cdna.add(linkCdna); + JMenuItem alignCdna = new JMenuItem( + MessageManager.getString("label.align_cdna")); + alignCdna.setToolTipText(JvSwingUtils.wrapTooltip(true, + MessageManager.getString("label.align_cdna_tip"))); + alignCdna.addActionListener(new ActionListener() + { + @Override + public void actionPerformed(ActionEvent e) + { + alignCdna_actionPerformed(); + } + }); + cdna.add(alignCdna); + extractScores.setText(MessageManager.getString("label.extract_scores") + "..."); extractScores.addActionListener(new ActionListener() @@ -2404,6 +2435,18 @@ public class GAlignFrame extends JInternalFrame // selectMenu.add(listenToViewSelections); } + protected void alignCdna_actionPerformed() + { + // TODO Auto-generated method stub + + } + + protected void linkCdna_actionPerformed() + { + // TODO Auto-generated method stub + + } + /** * Action on clicking sort annotations by type. * diff --git a/src/jalview/schemes/ResidueProperties.java b/src/jalview/schemes/ResidueProperties.java index 8acf1f2..8801f50 100755 --- a/src/jalview/schemes/ResidueProperties.java +++ b/src/jalview/schemes/ResidueProperties.java @@ -675,6 +675,8 @@ public class ResidueProperties public static Vector STOP = new Vector(); + public static String START = "ATG"; + static { codonHash.put("K", Lys); diff --git a/src/jalview/util/MapList.java b/src/jalview/util/MapList.java index 4c62500..5fbc956 100644 --- a/src/jalview/util/MapList.java +++ b/src/jalview/util/MapList.java @@ -39,6 +39,7 @@ public class MapList */ public boolean equals(MapList obj) { + // TODO should have @Override and arg0 of type Object if (obj == this) { return true; diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 18b4252..f76362f 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -20,19 +20,63 @@ */ package jalview.analysis; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; - -import org.junit.Test; - +import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; +import jalview.datamodel.Mapping; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; import jalview.io.AppletFormatAdapter; +import jalview.io.FormatAdapter; +import jalview.util.MapList; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import org.junit.Test; public class AlignmentUtilsTests { + // @formatter:off + private static final String TEST_DATA = + "# STOCKHOLM 1.0\n" + + "#=GS D.melanogaster.1 AC AY119185.1/838-902\n" + + "#=GS D.melanogaster.2 AC AC092237.1/57223-57161\n" + + "#=GS D.melanogaster.3 AC AY060611.1/560-627\n" + + "D.melanogaster.1 G.AGCC.CU...AUGAUCGA\n" + + "#=GR D.melanogaster.1 SS ................((((\n" + + "D.melanogaster.2 C.AUUCAACU.UAUGAGGAU\n" + + "#=GR D.melanogaster.2 SS ................((((\n" + + "D.melanogaster.3 G.UGGCGCU..UAUGACGCA\n" + + "#=GR D.melanogaster.3 SS (.(((...(....(((((((\n" + + "//"; + + private static final String AA_SEQS_1 = + ">Seq1Name\n" + + "K-QY--L\n" + + ">Seq2Name\n" + + "-R-FP-W-\n"; + + private static final String CDNA_SEQS_1 = + ">Seq1Name\n" + + "AC-GG--CUC-CAA-CT\n" + + ">Seq2Name\n" + + "-CG-TTA--ACG---AAGT\n"; + + private static final String CDNA_SEQS_2 = + ">Seq1Name\n" + + "GCTCGUCGTACT\n" + + ">Seq2Name\n" + + "GGGTCAGGCAGT\n"; + // @formatter:on + public static Sequence ts=new Sequence("short","ASDASDASDASDASDASDASDASDASDASDASDASDASD"); + @Test public void testExpandFlanks() { @@ -55,6 +99,222 @@ public class AlignmentUtilsTests assertTrue("Flanking sequence not the same as original dataset sequence.\n"+ung+"\n"+sq.getDatasetSequence().getSequenceAsString(),ung.equalsIgnoreCase(sq.getDatasetSequence().getSequenceAsString())); } } + } } + + /** + * Test method that returns a map of lists of sequences by sequence name. + * + * @throws IOException + */ + @Test + public void testGetSequencesByName() throws IOException + { + final String data = ">Seq1Name\nKQYL\n" + ">Seq2Name\nRFPW\n" + + ">Seq1Name\nABCD\n"; + AlignmentI al = loadAlignment(data, "FASTA"); + Map> map = AlignmentUtils + .getSequencesByName(al); + assertEquals(2, map.keySet().size()); + assertEquals(2, map.get("Seq1Name").size()); + assertEquals("KQYL", map.get("Seq1Name").get(0).getSequenceAsString()); + assertEquals("ABCD", map.get("Seq1Name").get(1).getSequenceAsString()); + assertEquals(1, map.get("Seq2Name").size()); + assertEquals("RFPW", map.get("Seq2Name").get(0).getSequenceAsString()); + } + /** + * Helper method to load an alignment and ensure dataset sequences are set up. + * + * @param data + * @param format TODO + * @return + * @throws IOException + */ + protected AlignmentI loadAlignment(final String data, String format) throws IOException + { + Alignment a = new FormatAdapter().readFile(data, + AppletFormatAdapter.PASTE, format); + a.setDataset(null); + return a; + } + /** + * Test mapping of protein to cDNA. + * + * @throws IOException + */ + @Test + public void testMapProteinToCdna() throws IOException + { + // protein: Human + Mouse, 3 residues + AlignmentI protein = loadAlignment( + ">Human\nKQY\n>Mouse\nAFP\n>Worm\nRST\n", + "FASTA"); + // cDNA: Mouse, Human, Mouse, 9 bases + // @formatter:off + String dnaData = + ">Mouse\nGAAATCCAG\n" + + ">Human\nTTCGATTAC\n" + + ">Mouse\nGTCGTTTGC\n" + + ">Mouse\nGTCGTTTGCgac\n" + // not mapped - wrong length + ">Fly\nGTCGTTTGC\n"; // not mapped - no name match + // @formatter:on + AlignmentI cdna1 = loadAlignment( + dnaData, + "FASTA"); + boolean mapped = AlignmentUtils.mapProteinToCdna(protein, cdna1); + assertTrue(mapped); + + /* + * Check two mappings (one for Mouse, one for Human) + */ + assertEquals(2, protein.getCodonFrames().length); + assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).length); + assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).length); + + /* + * Inspect mapping for Human protein + */ + AlignedCodonFrame humanMapping = protein.getCodonFrame(protein + .getSequenceAt(0))[0]; + assertEquals(1, humanMapping.getdnaSeqs().length); + assertEquals(cdna1.getSequenceAt(1).getDatasetSequence(), + humanMapping.getdnaSeqs()[0]); + Mapping[] protMappings = humanMapping.getProtMappings(); + assertEquals(1, protMappings.length); + MapList mapList = protMappings[0].getMap(); + assertEquals(3, mapList.getFromRatio()); + assertEquals(1, mapList.getToRatio()); + assertTrue(Arrays.equals(new int[] + { 1, 9 }, mapList.getFromRanges())); + assertTrue(Arrays.equals(new int[] + { 1, 3 }, mapList.getToRanges())); + + /* + * Inspect mappings for Mouse protein + */ + AlignedCodonFrame mouseMapping1 = protein.getCodonFrame(protein + .getSequenceAt(1))[0]; + assertEquals(2, mouseMapping1.getdnaSeqs().length); + assertEquals(cdna1.getSequenceAt(0).getDatasetSequence(), + mouseMapping1.getdnaSeqs()[0]); + assertEquals(cdna1.getSequenceAt(2).getDatasetSequence(), + mouseMapping1.getdnaSeqs()[1]); + protMappings = mouseMapping1.getProtMappings(); + assertEquals(2, protMappings.length); + for (int i = 0; i < 2; i++) + { + mapList = protMappings[i].getMap(); + assertEquals(3, mapList.getFromRatio()); + assertEquals(1, mapList.getToRatio()); + assertTrue(Arrays.equals(new int[] + { 1, 9 }, mapList.getFromRanges())); + assertTrue(Arrays.equals(new int[] + { 1, 3 }, mapList.getToRanges())); + } + } + + /** + * Test mapping of protein to cDNA which may include start and/or stop codons. + * + * @throws IOException + */ + @Test + public void testMapProteinToCdna_stopStartCodons() throws IOException + { + // protein: Human + Mouse, 3 residues + AlignmentI protein = loadAlignment( + ">Human\nKQY\n>Mouse\nAFP\n>Worm\nRST\n", "FASTA"); + // @formatter:off + String dnaData = + ">Mouse\natgGAAATCCAG\n" + // Mouse with start codon + ">Human\nTTCGATtactaa\n" + // Human with stop codon TAA + ">Mouse\nGTCGTTTGctaG\n" + // Mouse with stop codon TAG + ">Human\nGTCGTTTgctGa\n" + // Human with stop codon TGA + ">Mouse\nATGGTCGTTTGCtag\n"; // Mouse with start and stop codons + // @formatter:on + AlignmentI cdna1 = loadAlignment( + dnaData, + "FASTA"); + boolean mapped = AlignmentUtils.mapProteinToCdna(protein, cdna1); + assertTrue(mapped); + + /* + * Check two mappings (one for Mouse, one for Human) + */ + assertEquals(2, protein.getCodonFrames().length); + assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(0)).length); + assertEquals(1, protein.getCodonFrame(protein.getSequenceAt(1)).length); + + /* + * Inspect mapping for Human protein - should map to 2nd and 4th cDNA seqs + */ + AlignedCodonFrame humanMapping = protein.getCodonFrame(protein + .getSequenceAt(0))[0]; + assertEquals(2, humanMapping.getdnaSeqs().length); + assertEquals(cdna1.getSequenceAt(1).getDatasetSequence(), + humanMapping.getdnaSeqs()[0]); + assertEquals(cdna1.getSequenceAt(3).getDatasetSequence(), + humanMapping.getdnaSeqs()[1]); + Mapping[] protMappings = humanMapping.getProtMappings(); + // two mappings, both to cDNA with stop codon + assertEquals(2, protMappings.length); + MapList mapList = protMappings[0].getMap(); + assertEquals(3, mapList.getFromRatio()); + assertEquals(1, mapList.getToRatio()); + assertTrue(Arrays.equals(new int[] + { 1, 9 }, mapList.getFromRanges())); + assertTrue(Arrays.equals(new int[] + { 1, 3 }, mapList.getToRanges())); + mapList = protMappings[1].getMap(); + assertEquals(3, mapList.getFromRatio()); + assertEquals(1, mapList.getToRatio()); + assertTrue(Arrays.equals(new int[] + { 1, 9 }, mapList.getFromRanges())); + assertTrue(Arrays.equals(new int[] + { 1, 3 }, mapList.getToRanges())); + + /* + * Inspect mapping for Mouse protein - should map to 1st/3rd/5th cDNA seqs + */ + AlignedCodonFrame mouseMapping = protein.getCodonFrame(protein + .getSequenceAt(1))[0]; + assertEquals(3, mouseMapping.getdnaSeqs().length); + assertEquals(cdna1.getSequenceAt(0).getDatasetSequence(), + mouseMapping.getdnaSeqs()[0]); + assertEquals(cdna1.getSequenceAt(2).getDatasetSequence(), + mouseMapping.getdnaSeqs()[1]); + assertEquals(cdna1.getSequenceAt(4).getDatasetSequence(), + mouseMapping.getdnaSeqs()[2]); + + // three mappings + protMappings = mouseMapping.getProtMappings(); + assertEquals(3, protMappings.length); + + // first mapping to cDNA with start codon + mapList = protMappings[0].getMap(); + assertEquals(3, mapList.getFromRatio()); + assertEquals(1, mapList.getToRatio()); + assertTrue(Arrays.equals(new int[] + { 4, 12 }, mapList.getFromRanges())); + assertTrue(Arrays.equals(new int[] + { 1, 3 }, mapList.getToRanges())); + + // second mapping to cDNA with stop codon + mapList = protMappings[1].getMap(); + assertEquals(3, mapList.getFromRatio()); + assertEquals(1, mapList.getToRatio()); + assertTrue(Arrays.equals(new int[] + { 1, 9 }, mapList.getFromRanges())); + assertTrue(Arrays.equals(new int[] + { 1, 3 }, mapList.getToRanges())); + + // third mapping to cDNA with start and stop codon + mapList = protMappings[2].getMap(); + assertEquals(3, mapList.getFromRatio()); + assertEquals(1, mapList.getToRatio()); + assertTrue(Arrays.equals(new int[] + { 4, 12 }, mapList.getFromRanges())); + assertTrue(Arrays.equals(new int[] + { 1, 3 }, mapList.getToRanges())); } } diff --git a/test/jalview/datamodel/AlignmentTest.java b/test/jalview/datamodel/AlignmentTest.java index 1713bc6..2b1fc72 100644 --- a/test/jalview/datamodel/AlignmentTest.java +++ b/test/jalview/datamodel/AlignmentTest.java @@ -3,9 +3,9 @@ package jalview.datamodel; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; import jalview.io.AppletFormatAdapter; import jalview.io.FormatAdapter; +import jalview.util.MapList; import java.io.IOException; import java.util.Iterator; @@ -35,26 +35,42 @@ public class AlignmentTest "#=GR D.melanogaster.3 SS (.(((...(....(((((((\n" + "//"; - private static final String TEST_DATA2 = - ">TEST21 test21\n" + - "AC-GG--CUC-CAA-CT\n" + - ">TEST22 test22\n" + - "-CG-TTA--ACG---AAGT\n"; - - private static final String TEST_DATA3 = - ">TEST31 test31\n" + + private static final String AA_SEQS_1 = + ">Seq1Name\n" + "K-QY--L\n" + - ">TEST32 test32\n" + + ">Seq2Name\n" + "-R-FP-W-\n"; - private static final String TEST_DATA4 = - ">TEST41 test41\n" + + private static final String CDNA_SEQS_1 = + ">Seq1Name\n" + + "AC-GG--CUC-CAA-CT\n" + + ">Seq2Name\n" + + "-CG-TTA--ACG---AAGT\n"; + + private static final String CDNA_SEQS_2 = + ">Seq1Name\n" + "GCTCGUCGTACT\n" + - ">TEST42 test42\n" + + ">Seq2Name\n" + "GGGTCAGGCAGT\n"; // @formatter:on - private Alignment al; + private AlignmentI al; + + /** + * Helper method to load an alignment and ensure dataset sequences are set up. + * + * @param data + * @param format TODO + * @return + * @throws IOException + */ + protected AlignmentI loadAlignment(final String data, String format) throws IOException + { + Alignment a = new FormatAdapter().readFile(data, + AppletFormatAdapter.PASTE, format); + a.setDataset(null); + return a; + } /* * Read in Stockholm format test data including secondary structure @@ -63,8 +79,7 @@ public class AlignmentTest @Before public void setUp() throws IOException { - al = new FormatAdapter().readFile(TEST_DATA, - AppletFormatAdapter.PASTE, "STH"); + al = loadAlignment(TEST_DATA, "STH"); int i = 0; for (AlignmentAnnotation ann : al.getAlignmentAnnotation()) { @@ -90,42 +105,11 @@ public class AlignmentTest } /** - * Tests for method that checks for alignment 'mappability'. + * Tests for realigning as per a supplied alignment: Dna as Dna. * - * @throws IOException - */ - @Test - public void testIsMappableTo() throws IOException - { - al = new FormatAdapter().readFile(TEST_DATA2, - AppletFormatAdapter.PASTE, "FASTA"); - al.setDataset(null); - - // not mappable to self - assertFalse(al.isMappableTo(al)); - - // dna mappable to protein and vice versa - AlignmentI alp = new FormatAdapter().readFile(TEST_DATA3, - AppletFormatAdapter.PASTE, "FASTA"); - alp.setDataset(null); - assertTrue(al.isMappableTo(alp)); - assertTrue(alp.isMappableTo(al)); - assertFalse(alp.isMappableTo(alp)); - - // not mappable if any sequence length mismatch - alp.getSequenceAt(1).setSequence("-R--FP-"); - alp.getSequenceAt(1).setDatasetSequence(new Sequence("", "RFP")); - assertFalse(alp.isMappableTo(al)); - assertFalse(al.isMappableTo(alp)); - - // not mappable if number of sequences differs - alp.deleteSequence(1); - assertFalse(alp.isMappableTo(al)); - assertFalse(al.isMappableTo(alp)); - } - - /** - * Tests for realigning as per a supplied alignment. + * Note: AlignedCodonFrame's state variables are named for protein-to-cDNA + * mapping, but can be exploited for a general 'sequence-to-sequence' mapping + * as here. * * @throws IOException */ @@ -133,13 +117,21 @@ public class AlignmentTest public void testAlignAs_dnaAsDna() throws IOException { // aligned cDNA: - Alignment al1 = new FormatAdapter().readFile(TEST_DATA2, - AppletFormatAdapter.PASTE, "FASTA"); - al1.setDataset(null); + AlignmentI al1 = loadAlignment(CDNA_SEQS_1, "FASTA"); // unaligned cDNA: - Alignment al2 = new FormatAdapter().readFile(TEST_DATA4, - AppletFormatAdapter.PASTE, "FASTA"); - al2.setDataset(null); + AlignmentI al2 = loadAlignment(CDNA_SEQS_2, "FASTA"); + + /* + * Make mappings between sequences. The 'aligned cDNA' is playing the role + * of what would normally be protein here. + */ + AlignedCodonFrame acf = new AlignedCodonFrame(al1.getWidth()); + MapList ml = new MapList(new int[] + { 1, 12 }, new int[] + { 1, 12 }, 1, 1); + acf.addMap(al2.getSequenceAt(0), al1.getSequenceAt(0), ml); + acf.addMap(al2.getSequenceAt(1), al1.getSequenceAt(1), ml); + al1.addCodonFrame(acf); al2.alignAs(al1); assertEquals("GC-TC--GUC-GTA-CT", al2.getSequenceAt(0) @@ -149,30 +141,21 @@ public class AlignmentTest } /** - * Aligning protein from cDNA yet to be implemented. + * Aligning protein from cDNA yet to be implemented, does nothing. * * @throws IOException */ @Test public void testAlignAs_proteinAsCdna() throws IOException { - // aligned cDNA: - Alignment al1 = new FormatAdapter().readFile(TEST_DATA2, - AppletFormatAdapter.PASTE, "FASTA"); - al1.setDataset(null); - // unaligned cDNA: - Alignment al2 = new FormatAdapter().readFile(TEST_DATA3, - AppletFormatAdapter.PASTE, "FASTA"); - al2.setDataset(null); + AlignmentI al1 = loadAlignment(CDNA_SEQS_1, "FASTA"); + AlignmentI al2 = loadAlignment(AA_SEQS_1, "FASTA"); + String before0 = al2.getSequenceAt(0).getSequenceAsString(); + String before1 = al2.getSequenceAt(1).getSequenceAsString(); - try - { - al2.alignAs(al1); - fail("No exception thrown"); - } catch (UnsupportedOperationException e) - { - // expected; - } + al2.alignAs(al1); + assertEquals(before0, al2.getSequenceAt(0).getSequenceAsString()); + assertEquals(before1, al2.getSequenceAt(1).getSequenceAsString()); } /** @@ -183,14 +166,18 @@ public class AlignmentTest @Test public void testAlignAs_cdnaAsProtein() throws IOException { - // aligned cDNA: - Alignment al1 = new FormatAdapter().readFile(TEST_DATA2, - AppletFormatAdapter.PASTE, "FASTA"); - al1.setDataset(null); - // unaligned cDNA: - Alignment al2 = new FormatAdapter().readFile(TEST_DATA3, - AppletFormatAdapter.PASTE, "FASTA"); - al2.setDataset(null); + /* + * Load alignments and add mappings for cDNA to protein + */ + AlignmentI al1 = loadAlignment(CDNA_SEQS_1, "FASTA"); + AlignmentI al2 = loadAlignment(AA_SEQS_1, "FASTA"); + AlignedCodonFrame acf = new AlignedCodonFrame(al2.getWidth()); + MapList ml = new MapList(new int[] + { 1, 12 }, new int[] + { 1, 4 }, 3, 1); + acf.addMap(al1.getSequenceAt(0), al2.getSequenceAt(0), ml); + acf.addMap(al1.getSequenceAt(1), al2.getSequenceAt(1), ml); + al2.addCodonFrame(acf); al1.alignAs(al2); assertEquals("ACG---GCUCCA------ACT", al1.getSequenceAt(0) -- 1.7.10.2