/* * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.analysis; import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertNull; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.Annotation; import jalview.datamodel.HiddenMarkovModel; import jalview.datamodel.Profile; import jalview.datamodel.ProfileI; import jalview.datamodel.Profiles; import jalview.datamodel.ProfilesI; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; import jalview.gui.JvOptionPane; import jalview.io.DataSourceType; import jalview.io.FileParse; import jalview.io.HMMFile; import java.io.IOException; import java.net.MalformedURLException; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; public class AAFrequencyTest { HiddenMarkovModel hmm; @BeforeClass(alwaysRun = true) public void setUpJvOptionPane() { JvOptionPane.setInteractiveMode(false); JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION); } @BeforeClass(alwaysRun = true) public void setUp() throws IOException, MalformedURLException { /* * load a dna (ACGT) HMM file to a HiddenMarkovModel */ HMMFile hmmFile = new HMMFile(new FileParse( "test/jalview/io/test_MADE1_hmm.txt", DataSourceType.FILE)); hmm = hmmFile.getHMM(); } @Test(groups = { "Functional" }) public void testCalculate_noProfile() { SequenceI seq1 = new Sequence("Seq1", "CAG-T"); SequenceI seq2 = new Sequence("Seq2", "CAC-T"); SequenceI seq3 = new Sequence("Seq3", "C---G"); SequenceI seq4 = new Sequence("Seq4", "CA--t"); SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 }; int width = seq1.getLength(); ProfilesI result = AAFrequency.calculate(seqs, width, 0, width, false); // col 0 is 100% C ProfileI col = result.get(0); assertEquals(100f, col.getPercentageIdentity(false)); assertEquals(100f, col.getPercentageIdentity(true)); assertEquals(4, col.getMaxCount()); assertEquals("C", col.getModalResidue()); assertNull(col.getCounts()); // col 1 is 75% A col = result.get(1); assertEquals(75f, col.getPercentageIdentity(false)); assertEquals(100f, col.getPercentageIdentity(true)); assertEquals(3, col.getMaxCount()); assertEquals("A", col.getModalResidue()); // col 2 is 50% G 50% C or 25/25 counting gaps col = result.get(2); assertEquals(25f, col.getPercentageIdentity(false)); assertEquals(50f, col.getPercentageIdentity(true)); assertEquals(1, col.getMaxCount()); assertEquals("CG", col.getModalResidue()); // col 3 is all gaps col = result.get(3); assertEquals(0f, col.getPercentageIdentity(false)); assertEquals(0f, col.getPercentageIdentity(true)); assertEquals(0, col.getMaxCount()); assertEquals("", col.getModalResidue()); // col 4 is 75% T 25% G col = result.get(4); assertEquals(75f, col.getPercentageIdentity(false)); assertEquals(75f, col.getPercentageIdentity(true)); assertEquals(3, col.getMaxCount()); assertEquals("T", col.getModalResidue()); } @Test(groups = { "Functional" }) public void testCalculate_withProfile() { SequenceI seq1 = new Sequence("Seq1", "CAGT"); SequenceI seq2 = new Sequence("Seq2", "CACT"); SequenceI seq3 = new Sequence("Seq3", "C--G"); SequenceI seq4 = new Sequence("Seq4", "CA-t"); SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 }; int width = seq1.getLength(); ProfilesI result = AAFrequency.calculate(seqs, width, 0, width, true); ProfileI profile = result.get(0); assertEquals(4, profile.getCounts().getCount('C')); assertEquals(4, profile.getHeight()); assertEquals(4, profile.getNonGapped()); profile = result.get(1); assertEquals(3, profile.getCounts().getCount('A')); assertEquals(4, profile.getHeight()); assertEquals(3, profile.getNonGapped()); profile = result.get(2); assertEquals(1, profile.getCounts().getCount('C')); assertEquals(1, profile.getCounts().getCount('G')); assertEquals(4, profile.getHeight()); assertEquals(2, profile.getNonGapped()); profile = result.get(3); assertEquals(3, profile.getCounts().getCount('T')); assertEquals(1, profile.getCounts().getCount('G')); assertEquals(4, profile.getHeight()); assertEquals(4, profile.getNonGapped()); } @Test(groups = { "Functional" }, enabled = false) public void testCalculate_withProfileTiming() { SequenceI seq1 = new Sequence("Seq1", "CAGT"); SequenceI seq2 = new Sequence("Seq2", "CACT"); SequenceI seq3 = new Sequence("Seq3", "C--G"); SequenceI seq4 = new Sequence("Seq4", "CA-t"); SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 }; // ensure class loaded and initialised int width = seq1.getLength(); AAFrequency.calculate(seqs, width, 0, width, true); int reps = 100000; long start = System.currentTimeMillis(); for (int i = 0; i < reps; i++) { AAFrequency.calculate(seqs, width, 0, width, true); } System.out.println(System.currentTimeMillis() - start); } /** * Test generation of consensus annotation with options 'include gaps' * (profile percentages are of all sequences, whether gapped or not), and * 'show logo' (the full profile with all residue percentages is reported in * the description for the tooltip) */ @Test(groups = { "Functional" }) public void testCompleteConsensus_includeGaps_showLogo() { /* * first compute the profiles */ SequenceI seq1 = new Sequence("Seq1", "CAG-T"); SequenceI seq2 = new Sequence("Seq2", "CAC-T"); SequenceI seq3 = new Sequence("Seq3", "C---G"); SequenceI seq4 = new Sequence("Seq4", "CA--t"); SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 }; int width = seq1.getLength(); ProfilesI profiles = AAFrequency.calculate(seqs, width, 0, width, true); AlignmentAnnotation consensus = new AlignmentAnnotation("Consensus", "PID", new Annotation[width]); AAFrequency .completeConsensus(consensus, profiles, 0, 5, false, true, 4); Annotation ann = consensus.annotations[0]; assertEquals("C 100%", ann.description); assertEquals("C", ann.displayCharacter); ann = consensus.annotations[1]; assertEquals("A 75%", ann.description); assertEquals("A", ann.displayCharacter); ann = consensus.annotations[2]; assertEquals("C 25%; G 25%", ann.description); assertEquals("+", ann.displayCharacter); ann = consensus.annotations[3]; assertEquals("", ann.description); assertEquals("-", ann.displayCharacter); ann = consensus.annotations[4]; assertEquals("T 75%; G 25%", ann.description); assertEquals("T", ann.displayCharacter); } /** * Test generation of consensus annotation with options 'ignore gaps' (profile * percentages are of the non-gapped sequences) and 'no logo' (only the modal * residue[s] percentage is reported in the description for the tooltip) */ @Test(groups = { "Functional" }) public void testCompleteConsensus_ignoreGaps_noLogo() { /* * first compute the profiles */ SequenceI seq1 = new Sequence("Seq1", "CAG-T"); SequenceI seq2 = new Sequence("Seq2", "CAC-T"); SequenceI seq3 = new Sequence("Seq3", "C---G"); SequenceI seq4 = new Sequence("Seq4", "CA--t"); SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 }; int width = seq1.getLength(); ProfilesI profiles = AAFrequency.calculate(seqs, width, 0, width, true); AlignmentAnnotation consensus = new AlignmentAnnotation("Consensus", "PID", new Annotation[width]); AAFrequency .completeConsensus(consensus, profiles, 0, 5, true, false, 4); Annotation ann = consensus.annotations[0]; assertEquals("C 100%", ann.description); assertEquals("C", ann.displayCharacter); ann = consensus.annotations[1]; assertEquals("A 100%", ann.description); assertEquals("A", ann.displayCharacter); ann = consensus.annotations[2]; assertEquals("[CG] 50%", ann.description); assertEquals("+", ann.displayCharacter); ann = consensus.annotations[3]; assertEquals("", ann.description); assertEquals("-", ann.displayCharacter); ann = consensus.annotations[4]; assertEquals("T 75%", ann.description); assertEquals("T", ann.displayCharacter); } @Test(groups = { "Functional" }) public void testExtractHMMProfile() throws MalformedURLException, IOException { int[] expected = { 0, 4, 100, 'T', 71, 'C', 12, 'G', 9, 'A', 9 }; int[] actual = AAFrequency.extractHMMProfile(hmm, 17, false, false); for (int i = 0; i < actual.length; i++) { if (i == 2) { assertEquals(actual[i], expected[i]); } else { assertEquals(actual[i], expected[i]); } } int[] expected2 = { 0, 4, 100, 'A', 85, 'C', 0, 'G', 0, 'T', 0 }; int[] actual2 = AAFrequency.extractHMMProfile(hmm, 2, true, false); for (int i = 0; i < actual2.length; i++) { if (i == 2) { assertEquals(actual[i], expected[i]); } else { assertEquals(actual[i], expected[i]); } } assertNull(AAFrequency.extractHMMProfile(null, 98978867, true, false)); } @Test(groups = { "Functional" }) public void testGetAnalogueCount() { /* * 'T' in column 0 has emission probability 0.7859, scales to 7859 */ int count = AAFrequency.getAnalogueCount(hmm, 0, 'T', false, false); assertEquals(7859, count); /* * same with 'use info height': value is multiplied by log ratio * log(value / background) / log(2) = log(0.7859/0.25)/0.693 * = log(3.1)/0.693 = 1.145/0.693 = 1.66 * so value becomes 1.2987 and scales up to 12987 */ count = AAFrequency.getAnalogueCount(hmm, 0, 'T', false, true); assertEquals(12987, count); /* * 'G' in column 20 has emission probability 0.75457, scales to 7546 */ count = AAFrequency.getAnalogueCount(hmm, 20, 'G', false, false); assertEquals(7546, count); /* * 'G' in column 1077 has emission probability 0.0533, here * ignored (set to 0) since below background of 0.25 */ count = AAFrequency.getAnalogueCount(hmm, 1077, 'G', true, false); assertEquals(0, count); } @Test(groups = { "Functional" }) public void testCompleteInformation() { ProfileI prof1 = new Profile(1, 0, 100, "A"); ProfileI prof2 = new Profile(1, 0, 100, "-"); ProfilesI profs = new Profiles(new ProfileI[] { prof1, prof2 }); Annotation ann1 = new Annotation(6.5f); Annotation ann2 = new Annotation(0f); Annotation[] annots = new Annotation[] { ann1, ann2 }; SequenceI seq = new Sequence("", "AA", 0, 0); seq.setHMM(hmm); AlignmentAnnotation annot = new AlignmentAnnotation("", "", annots); annot.setSequenceRef(seq); AAFrequency.completeInformation(annot, profs, 0, 1); float ic = annot.annotations[0].value; assertEquals(0.91532f, ic, 0.0001f); ic = annot.annotations[1].value; assertEquals(0f, ic, 0.0001f); int i = 0; } }