From: gmungoc Date: Sun, 4 Jan 2015 17:33:20 +0000 (+0000) Subject: JAL-1619 first draft of 'linked protein and cDNA' X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=adab53c4c18c70862109c1fdc33904e71aa005cf;p=jalview.git JAL-1619 first draft of 'linked protein and cDNA' --- diff --git a/resources/lang/Messages.properties b/resources/lang/Messages.properties index fce8470..ef303bc 100644 --- a/resources/lang/Messages.properties +++ b/resources/lang/Messages.properties @@ -688,6 +688,9 @@ label.load_tree_for_sequence_set = Load a tree for this sequence set label.export_image = Export Image label.vamsas_store = VAMSAS store label.translate_cDNA = Translate cDNA +label.cDNA = cDNA +label.associate = Associate +label.align = Align label.extract_scores = Extract Scores label.get_cross_refs = Get Cross References label.sort_alignment_new_tree = Sort Alignment With New Tree @@ -1176,3 +1179,7 @@ label.show_logo = Show Logo label.normalise_logo = Normalise Logo label.no_colour_selection_in_scheme = Please, make a colour selection before to apply colour scheme label.no_colour_selection_warn = Error saving colour scheme +label.nonstandard_translation = Non-standard translation +warn.nonstandard_translation = Non-standard translation(s) detected at {0}.
Do you wish to proceed? +label.cdna_realign = Warning +warn.cdna_realign = cDNA will be realigned if necessary to match the protein alignment.
Do you wish to proceed? diff --git a/src/jalview/analysis/Dna.java b/src/jalview/analysis/Dna.java index 172a910..0c020dd 100644 --- a/src/jalview/analysis/Dna.java +++ b/src/jalview/analysis/Dna.java @@ -42,6 +42,10 @@ import java.util.Hashtable; import java.util.List; import java.util.Vector; +import java.util.ArrayList; +import java.util.Hashtable; +import java.util.Vector; + public class Dna { /** @@ -161,7 +165,7 @@ public class Dna AlignmentI al = new Alignment(newseqs); al.padGaps(); // ensure we look aligned. al.setDataset(dataset); - translateAlignedAnnotations(annotations, al, codons); + // translateAlignedAnnotations(annotations, al, codons); al.addCodonFrame(codons); return al; } diff --git a/src/jalview/datamodel/Alignment.java b/src/jalview/datamodel/Alignment.java index 9c5914f..f31fcd8 100755 --- a/src/jalview/datamodel/Alignment.java +++ b/src/jalview/datamodel/Alignment.java @@ -1594,4 +1594,112 @@ public class Alignment implements AlignmentI { return dataset; } + + /** + * Answers true if the supplied alignment has the same number of sequences, + * and they are of equivalent length, ignoring gaps. Alignments should be of + * the same type (protein/nucleotide) or different types with 3:1 length + * scaling. + * + * @param al + */ + @Override + public boolean isMappableTo(AlignmentI al) + { + int thisCodonScale = this.isNucleotide() ? 1 : 3; + int thatCodonScale = al.isNucleotide() ? 1 : 3; + if (this == al || this.getHeight() != al.getHeight()) + { + return false; + } + int i = 0; + for (SequenceI seq : this.getSequences()) + { + final int thisSequenceDnaLength = seq.getDatasetSequence() + .getLength() * thisCodonScale; + final int thatSequenceDnaLength = al.getSequenceAt(i) + .getDatasetSequence().getLength() + * thatCodonScale; + if (thisSequenceDnaLength != thatSequenceDnaLength) + { + return false; + } + i++; + } + return true; + } + + /** + * Align this alignment the same as the given one. If both of the same type + * (nucleotide/protein) then align both identically. If this is nucleotide and + * the other is protein, make 3 gaps for each gap in the protein sequences. If + * this is protein and the other is nucleotide, insert a gap for each 3 gaps + * (or part thereof) between nucleotide bases. The two alignments should be + * compatible in height and lengths, but if not, then discrepancies will be + * ignored with unpredictable results. + * + * @param al + * @throws UnsupportedOperation + * if alignment of protein from cDNA is requested (not yet + * implemented) + */ + @Override + public void alignAs(AlignmentI al) + { + boolean thisIsNucleotide = this.isNucleotide(); + boolean thatIsProtein = !al.isNucleotide(); + if (!thatIsProtein && !thisIsNucleotide) + { + throw new UnsupportedOperationException( + "Alignment of protein from cDNA not implemented"); + } + char thisGapChar = this.getGapCharacter(); + char thatGapChar = al.getGapCharacter(); + String gap = thisIsNucleotide && thatIsProtein ? String + .valueOf(new char[] + { thisGapChar, thisGapChar, thisGapChar }) : String + .valueOf(thisGapChar); + int ratio = thisIsNucleotide && thatIsProtein ? 3 : 1; + int i = 0; + for (SequenceI seq : this.getSequences()) + { + SequenceI other = al.getSequenceAt(i++); + if (other == null) + { + continue; + } + char[] thisDs = seq.getDatasetSequence().getSequence(); + char[] thatDs = other.getSequence(); + StringBuilder thisAligned = new StringBuilder(2 * thisDs.length); + int thisDsPosition = 0; + for (char thatChar : thatDs) + { + if (thatChar == thatGapChar) + { + /* + * Add (equivalent of) a gap + */ + thisAligned.append(gap); + } + else + { + /* + * Add (equivalent of) a residue + */ + for (int j = 0; j < ratio && thisDsPosition < thisDs.length; j++) + { + thisAligned.append(thisDs[thisDsPosition++]); + } + } + } + /* + * Include any 'extra' residues (there shouldn't be). + */ + while (thisDsPosition < thisDs.length) + { + thisAligned.append(thisDs[thisDsPosition++]); + } + seq.setSequence(new String(thisAligned)); + } + } } diff --git a/src/jalview/datamodel/AlignmentI.java b/src/jalview/datamodel/AlignmentI.java index c7e30a4..130a073 100755 --- a/src/jalview/datamodel/AlignmentI.java +++ b/src/jalview/datamodel/AlignmentI.java @@ -482,4 +482,26 @@ public interface AlignmentI extends AnnotatedCollectionI * @param alignmentAnnotation */ public void validateAnnotation(AlignmentAnnotation alignmentAnnotation); + + /** + * Answers true if the two alignments residues could be put into + * correspondence, i.e. the supplied alignment has the same number of + * sequences, and they are of equivalent length, ignoring gaps. Alignments + * should be of the same type (protein/nucleotide) or different types with 3:1 + * length scaling. + * + * @param al + */ + public boolean isMappableTo(AlignmentI al); + + /** + * Align this alignment the same as the given one. If both of the same type + * (nucleotide/protein) then align both identically. If this is nucleotide and + * the other is protein, make 3 gaps for each gap in the protein sequences. If + * this is protein and the other is nucleotide, insert a gap for each 3 gaps + * (or part thereof) between nucleotide bases. + * + * @param al + */ + public void alignAs(AlignmentI al); } diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java index 865a9aa..b839a38 100644 --- a/src/jalview/gui/AlignFrame.java +++ b/src/jalview/gui/AlignFrame.java @@ -84,6 +84,8 @@ import jalview.schemes.TaylorColourScheme; import jalview.schemes.TurnColourScheme; import jalview.schemes.UserColourScheme; import jalview.schemes.ZappoColourScheme; +import jalview.structure.StructureSelectionManager; +import jalview.util.MapList; import jalview.util.MessageManager; import jalview.ws.jws1.Discoverer; import jalview.ws.jws2.Jws2Discoverer; @@ -119,6 +121,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Enumeration; import java.util.Hashtable; +import java.util.Iterator; import java.util.List; import java.util.Vector; @@ -703,21 +706,253 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, public void setGUINucleotide(boolean nucleotide) { showTranslation.setVisible(nucleotide); + cdna.setVisible(!nucleotide); + configureCdnaMenu(); conservationMenuItem.setEnabled(!nucleotide); modifyConservation.setEnabled(!nucleotide); showGroupConservation.setEnabled(!nucleotide); rnahelicesColour.setEnabled(nucleotide); purinePyrimidineColour.setEnabled(nucleotide); - // Remember AlignFrame always starts as protein - // if (!nucleotide) + } + + /** + * Add any suitable options to the 'cDNA' sub-menu. Options may be to + * associate a cDNA alignment, or to align an associated alignment. To be + * suitable for association, an AlignFrame has to be nucleotide, and have the + * right number of sequences of corresponding length to this one. + */ + protected void configureCdnaMenu() + { + cdna.removeAll(); + + /* + * Identify candidates for 'associate cDNA', add to menu. + */ + List candidates = getCdnaCandidates(); + for (final AlignFrame candidate : candidates) + { + final String text = MessageManager.getString("label.associate") + " " + + candidate.getTitle(); + JMenuItem option = new JMenuItem(text); + option.addActionListener(new ActionListener() + { + @Override + public void actionPerformed(ActionEvent e) + { + associateCdna(candidate); + } + }); + cdna.add(option); + } + + /* + * Identify candidates for 'align cDNA', add to menu. + */ + final AlignFrame[] alignframes = Desktop.getAlignframes(); + if (alignframes != null) + { + for (final AlignFrame af : alignframes) + { + if (af != this) + { + if (this.viewport.getStructureSelectionManager() + .hasCommandListener(af.viewport)) + { + final String text = MessageManager.getString("label.align") + + " " + af.getTitle(); + JMenuItem option = new JMenuItem(text); + option.addActionListener(new ActionListener() + { + @Override + public void actionPerformed(ActionEvent e) + { + af.alignPanel.getAlignment().alignAs( + AlignFrame.this.alignPanel.getAlignment()); + af.viewport.alignmentChanged(af.alignPanel); + } + }); + cdna.add(option); + } + } + } + } + + cdna.setEnabled(cdna.getMenuComponentCount() > 0); + } + + /** + * Returns a list of AlignFrame which are valid candidates for being the cDNA + * to map to this (protein) alignment. Valid means a nucleotide alignment with + * matching number of sequences and sequence lengths (excluding gaps). + * + * @return + */ + protected List getCdnaCandidates() + { + List result = new ArrayList(); + if (this.alignPanel != null) + { + AlignmentI thisAlignment = this.alignPanel.getAlignment(); + if (thisAlignment == null || thisAlignment.isNucleotide()) + { + return result; + } + final AlignFrame[] alignframes = Desktop.getAlignframes(); + if (alignframes != null) + { + for (AlignFrame af : alignframes) + { + if (af.alignPanel != null) + { + final AlignmentI thatAlignment = af.alignPanel.getAlignment(); + if (thatAlignment.isNucleotide() + && thisAlignment.isMappableTo(thatAlignment)) + { + // TODO exclude an AlignFrame which is already mapped to this one + // simple version: exclude if already a CommandListener (should + // cover most cases but not all) + if (!this.viewport.getStructureSelectionManager() + .hasCommandListener(af.viewport)) + { + result.add(af); + } + } + } + } + } + } + return result; + } + + /** + * Build the codon mappings between the given (nucleotide) alignment and this + * (protein) alignment. Also make the cDNA alignment a CommandListener for the + * protein alignment so that edits are mirrored. The alignments must have the + * same number, and equivalent lengths, of (unaligned) sequence. + * + * @param cdna + * @throws IllegalStateException + * if sequence counts or lengths are incompatible + */ + protected void associateCdna(AlignFrame cdna) + { + /* + * Warn that cDNA may be realigned to match protein + */ + // int confirm = JOptionPane.showConfirmDialog( + // this, + // JvSwingUtils.wrapTooltip(true, + // MessageManager.getString("warn.cdna_realign")), + // MessageManager.getString("label.cdna_realign"), + // JOptionPane.OK_CANCEL_OPTION); + // if (confirm == JOptionPane.CANCEL_OPTION + // || confirm == JOptionPane.CLOSED_OPTION) // { - // showTr - // calculateMenu.remove(calculateMenu.getItemCount() - 2); + // return; // } + + final AlignmentI aaAlignment = this.alignPanel.getAlignment(); + Iterator thisSeqs = aaAlignment + .getSequences().iterator(); + Iterator cdnaSeqs = cdna.alignPanel.getAlignment() + .getSequences().iterator(); + AlignedCodonFrame acf = new AlignedCodonFrame(aaAlignment.getWidth()); + while (thisSeqs.hasNext()) + { + if (!cdnaSeqs.hasNext()) + { + throw new IllegalStateException("Too few sequences to map"); + } + final SequenceI aaSeq = thisSeqs.next(); + String aaSeqString = aaSeq.getDatasetSequence() + .getSequenceAsString(); + final SequenceI cdnaSeq = cdnaSeqs.next(); + String cdnaSeqString = cdnaSeq.getDatasetSequence() + .getSequenceAsString(); + final int aaLength = aaSeqString.length(); + final int cdnaLength = cdnaSeqString.length(); + if (cdnaLength != 3 * aaLength) + { + throw new IllegalStateException( + "Protein/cDNA lengths don't match: " + aaLength + "/" + + cdnaLength); + } + + /* + * Warn if mapping includes non-standard translations + */ + if (!doTranslationWarningCheck(aaSeq.getName(), aaSeqString, + cdnaSeqString)) + { + return; + } + + MapList map = new MapList(new int[] + { 1, cdnaLength }, new int[] + { 1, aaLength }, 3, 1); + acf.addMap(cdnaSeq, aaSeq, map); + + aaAlignment.addCodonFrame(acf); + + final StructureSelectionManager ssm = StructureSelectionManager + .getStructureSelectionManager(Desktop.instance); + ssm.addMappings(aaAlignment.getCodonFrames()); + ssm.addCommandListener(cdna.getViewport()); + + /* + * Rebuild 'associate cDna' menu so it now excludes the one just + * associated. + */ + configureCdnaMenu(); + } + } + + /** + * Show a warning if any non-standard cDNA to protein would result from + * mapping the sequences. + * + * @param aaSeqName + * @param aaSeqString + * @param aaSeqString + * @return true if no warning, or it is accepted, false if user chooses not to + * proceed. + */ + protected boolean doTranslationWarningCheck(String aaSeqName, + String aaSeqString, String cdnaSeqString) + { + final int aaLength = aaSeqString.length(); + boolean warning = false; + String msg = aaSeqName; + for (int i = 0; i < aaLength; i++) + { + String codon = cdnaSeqString.substring(i * 3, i * 3 + 3); + String aa = ResidueProperties.codonTranslate(codon); + if (!(aa.charAt(0) == aaSeqString.charAt(i))) + { + warning = true; + msg += ":" + (i + 1) + ":" + aaSeqString.charAt(i) + "/" + codon + + ":" + aa; + break; + } + } + if (warning) + { + final String txt = JvSwingUtils.wrapTooltip(true, MessageManager + .formatMessage("warn.nonstandard_translation", msg)); + int confirm = JOptionPane.showConfirmDialog(this, txt, + MessageManager.getString("label.nonstandard_translation"), + JOptionPane.OK_CANCEL_OPTION); + if (confirm == JOptionPane.CANCEL_OPTION + || confirm == JOptionPane.CLOSED_OPTION) + { + return false; + } + } + return true; } /** - * set up menus for the currently viewport. This may be called after any + * set up menus for the current viewport. This may be called after any * operation that affects the data in the current view (selection changed, * etc) to update the menus to reflect the new state. */ diff --git a/src/jalview/gui/Desktop.java b/src/jalview/gui/Desktop.java index b8f629a..a1d4492 100644 --- a/src/jalview/gui/Desktop.java +++ b/src/jalview/gui/Desktop.java @@ -21,6 +21,7 @@ package jalview.gui; import jalview.bin.Cache; +import jalview.datamodel.AlignmentI; import jalview.io.FileLoader; import jalview.io.FormatAdapter; import jalview.io.IdentifyFile; @@ -68,6 +69,7 @@ import java.lang.reflect.Constructor; import java.net.URL; import java.util.ArrayList; import java.util.Hashtable; +import java.util.List; import java.util.StringTokenizer; import java.util.Vector; import java.util.concurrent.ExecutorService; @@ -159,6 +161,8 @@ public class Desktop extends jalview.jbgui.GDesktop implements static final int yOffset = 30; + private static final int THREE = 3; + public static jalview.ws.jws1.Discoverer discoverer; public static Object[] jalviewClipboard; diff --git a/src/jalview/jbgui/GAlignFrame.java b/src/jalview/jbgui/GAlignFrame.java index 387bb7f..edb4ac5 100755 --- a/src/jalview/jbgui/GAlignFrame.java +++ b/src/jalview/jbgui/GAlignFrame.java @@ -254,6 +254,8 @@ public class GAlignFrame extends JInternalFrame protected JMenuItem showTranslation = new JMenuItem(); + protected JMenu cdna = new JMenu(); + protected JMenuItem extractScores = new JMenuItem(); protected JMenuItem expandAlignment = new JMenuItem(); @@ -1809,6 +1811,7 @@ public class GAlignFrame extends JInternalFrame showTranslation_actionPerformed(e); } }); + cdna.setText(MessageManager.getString("label.cDNA")); extractScores.setText(MessageManager.getString("label.extract_scores") + "..."); extractScores.addActionListener(new ActionListener() @@ -2335,6 +2338,7 @@ public class GAlignFrame extends JInternalFrame calculateMenu.add(PCAMenuItem); calculateMenu.addSeparator(); calculateMenu.add(showTranslation); + calculateMenu.add(cdna); calculateMenu.add(showProducts); calculateMenu.add(autoCalculate); calculateMenu.add(sortByTree); diff --git a/test/jalview/datamodel/AlignmentTest.java b/test/jalview/datamodel/AlignmentTest.java index 93170b7..1713bc6 100644 --- a/test/jalview/datamodel/AlignmentTest.java +++ b/test/jalview/datamodel/AlignmentTest.java @@ -3,7 +3,9 @@ package jalview.datamodel; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import jalview.io.AppletFormatAdapter; +import jalview.io.FormatAdapter; import java.io.IOException; import java.util.Iterator; @@ -32,8 +34,25 @@ public class AlignmentTest "D.melanogaster.3 G.UGGCGCU..UAUGACGCA\n" + "#=GR D.melanogaster.3 SS (.(((...(....(((((((\n" + "//"; - // @formatter:on + private static final String TEST_DATA2 = + ">TEST21 test21\n" + + "AC-GG--CUC-CAA-CT\n" + + ">TEST22 test22\n" + + "-CG-TTA--ACG---AAGT\n"; + + private static final String TEST_DATA3 = + ">TEST31 test31\n" + + "K-QY--L\n" + + ">TEST32 test32\n" + + "-R-FP-W-\n"; + + private static final String TEST_DATA4 = + ">TEST41 test41\n" + + "GCTCGUCGTACT\n" + + ">TEST42 test42\n" + + "GGGTCAGGCAGT\n"; + // @formatter:on private Alignment al; @@ -44,13 +63,14 @@ public class AlignmentTest @Before public void setUp() throws IOException { - al = new jalview.io.FormatAdapter().readFile(TEST_DATA, + al = new FormatAdapter().readFile(TEST_DATA, AppletFormatAdapter.PASTE, "STH"); - for (int i = 0; i < al.getSequencesArray().length; ++i) + int i = 0; + for (AlignmentAnnotation ann : al.getAlignmentAnnotation()) { - al.addAnnotation(al.getSequenceAt(i).getAnnotation()[0]); - al.getSequenceAt(i).getAnnotation()[0].setCalcId("CalcIdFor" + ann.setCalcId("CalcIdFor" + al.getSequenceAt(i).getName()); + i++; } } @@ -68,4 +88,114 @@ public class AlignmentTest assertEquals("D.melanogaster.2", ann.sequenceRef.getName()); assertFalse(iter.hasNext()); } + + /** + * Tests for method that checks for alignment 'mappability'. + * + * @throws IOException + */ + @Test + public void testIsMappableTo() throws IOException + { + al = new FormatAdapter().readFile(TEST_DATA2, + AppletFormatAdapter.PASTE, "FASTA"); + al.setDataset(null); + + // not mappable to self + assertFalse(al.isMappableTo(al)); + + // dna mappable to protein and vice versa + AlignmentI alp = new FormatAdapter().readFile(TEST_DATA3, + AppletFormatAdapter.PASTE, "FASTA"); + alp.setDataset(null); + assertTrue(al.isMappableTo(alp)); + assertTrue(alp.isMappableTo(al)); + assertFalse(alp.isMappableTo(alp)); + + // not mappable if any sequence length mismatch + alp.getSequenceAt(1).setSequence("-R--FP-"); + alp.getSequenceAt(1).setDatasetSequence(new Sequence("", "RFP")); + assertFalse(alp.isMappableTo(al)); + assertFalse(al.isMappableTo(alp)); + + // not mappable if number of sequences differs + alp.deleteSequence(1); + assertFalse(alp.isMappableTo(al)); + assertFalse(al.isMappableTo(alp)); + } + + /** + * Tests for realigning as per a supplied alignment. + * + * @throws IOException + */ + @Test + public void testAlignAs_dnaAsDna() throws IOException + { + // aligned cDNA: + Alignment al1 = new FormatAdapter().readFile(TEST_DATA2, + AppletFormatAdapter.PASTE, "FASTA"); + al1.setDataset(null); + // unaligned cDNA: + Alignment al2 = new FormatAdapter().readFile(TEST_DATA4, + AppletFormatAdapter.PASTE, "FASTA"); + al2.setDataset(null); + + al2.alignAs(al1); + assertEquals("GC-TC--GUC-GTA-CT", al2.getSequenceAt(0) + .getSequenceAsString()); + assertEquals("-GG-GTC--AGG---CAGT", al2.getSequenceAt(1) + .getSequenceAsString()); + } + + /** + * Aligning protein from cDNA yet to be implemented. + * + * @throws IOException + */ + @Test + public void testAlignAs_proteinAsCdna() throws IOException + { + // aligned cDNA: + Alignment al1 = new FormatAdapter().readFile(TEST_DATA2, + AppletFormatAdapter.PASTE, "FASTA"); + al1.setDataset(null); + // unaligned cDNA: + Alignment al2 = new FormatAdapter().readFile(TEST_DATA3, + AppletFormatAdapter.PASTE, "FASTA"); + al2.setDataset(null); + + try + { + al2.alignAs(al1); + fail("No exception thrown"); + } catch (UnsupportedOperationException e) + { + // expected; + } + } + + /** + * Test aligning cdna as per protein alignment. + * + * @throws IOException + */ + @Test + public void testAlignAs_cdnaAsProtein() throws IOException + { + // aligned cDNA: + Alignment al1 = new FormatAdapter().readFile(TEST_DATA2, + AppletFormatAdapter.PASTE, "FASTA"); + al1.setDataset(null); + // unaligned cDNA: + Alignment al2 = new FormatAdapter().readFile(TEST_DATA3, + AppletFormatAdapter.PASTE, "FASTA"); + al2.setDataset(null); + + al1.alignAs(al2); + assertEquals("ACG---GCUCCA------ACT", al1.getSequenceAt(0) + .getSequenceAsString()); + assertEquals("---CGT---TAACGA---AGT---", al1.getSequenceAt(1) + .getSequenceAsString()); + } }