import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.Annotation;
+import jalview.datamodel.HiddenMarkovModel;
import jalview.datamodel.Profile;
+import jalview.datamodel.ProfileI;
+import jalview.datamodel.Profiles;
+import jalview.datamodel.ProfilesI;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceI;
+import jalview.gui.JvOptionPane;
+import jalview.io.DataSourceType;
+import jalview.io.FileParse;
+import jalview.io.HMMFile;
+import java.io.IOException;
+import java.net.MalformedURLException;
+
+import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
public class AAFrequencyTest
{
+ HiddenMarkovModel hmm;
+
+ @BeforeClass(alwaysRun = true)
+ public void setUpJvOptionPane()
+ {
+ JvOptionPane.setInteractiveMode(false);
+ JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
+ }
+
+ @BeforeClass(alwaysRun = true)
+ public void setUp() throws IOException, MalformedURLException
+ {
+ /*
+ * load a dna (ACGT) HMM file to a HiddenMarkovModel
+ */
+ HMMFile hmmFile = new HMMFile(new FileParse(
+ "test/jalview/io/test_MADE1_hmm.txt", DataSourceType.FILE));
+ hmm = hmmFile.getHMM();
+ }
+
@Test(groups = { "Functional" })
public void testCalculate_noProfile()
{
SequenceI seq3 = new Sequence("Seq3", "C---G");
SequenceI seq4 = new Sequence("Seq4", "CA--t");
SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
- Profile[] result = new Profile[seq1.getLength()];
-
- AAFrequency.calculate(seqs, 0, seq1.getLength(), result, false);
+ int width = seq1.getLength();
+ ProfilesI result = AAFrequency.calculate(seqs, width, 0, width,
+ false);
// col 0 is 100% C
- Profile col = result[0];
+ ProfileI col = result.get(0);
assertEquals(100f, col.getPercentageIdentity(false));
assertEquals(100f, col.getPercentageIdentity(true));
assertEquals(4, col.getMaxCount());
assertNull(col.getCounts());
// col 1 is 75% A
- col = result[1];
+ col = result.get(1);
assertEquals(75f, col.getPercentageIdentity(false));
assertEquals(100f, col.getPercentageIdentity(true));
assertEquals(3, col.getMaxCount());
assertEquals("A", col.getModalResidue());
// col 2 is 50% G 50% C or 25/25 counting gaps
- col = result[2];
+ col = result.get(2);
assertEquals(25f, col.getPercentageIdentity(false));
assertEquals(50f, col.getPercentageIdentity(true));
assertEquals(1, col.getMaxCount());
assertEquals("CG", col.getModalResidue());
// col 3 is all gaps
- col = result[3];
+ col = result.get(3);
assertEquals(0f, col.getPercentageIdentity(false));
assertEquals(0f, col.getPercentageIdentity(true));
assertEquals(0, col.getMaxCount());
assertEquals("", col.getModalResidue());
// col 4 is 75% T 25% G
- col = result[4];
+ col = result.get(4);
assertEquals(75f, col.getPercentageIdentity(false));
assertEquals(75f, col.getPercentageIdentity(true));
assertEquals(3, col.getMaxCount());
SequenceI seq3 = new Sequence("Seq3", "C--G");
SequenceI seq4 = new Sequence("Seq4", "CA-t");
SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
- Profile[] result = new Profile[seq1.getLength()];
+ int width = seq1.getLength();
+ ProfilesI result = AAFrequency.calculate(seqs, width, 0, width,
+ true);
- AAFrequency.calculate(seqs, 0, seq1.getLength(), result, true);
- Profile profile = result[0];
+ ProfileI profile = result.get(0);
assertEquals(4, profile.getCounts().getCount('C'));
assertEquals(4, profile.getHeight());
assertEquals(4, profile.getNonGapped());
- profile = result[1];
+ profile = result.get(1);
assertEquals(3, profile.getCounts().getCount('A'));
assertEquals(4, profile.getHeight());
assertEquals(3, profile.getNonGapped());
- profile = result[2];
+ profile = result.get(2);
assertEquals(1, profile.getCounts().getCount('C'));
assertEquals(1, profile.getCounts().getCount('G'));
assertEquals(4, profile.getHeight());
assertEquals(2, profile.getNonGapped());
- profile = result[3];
+ profile = result.get(3);
assertEquals(3, profile.getCounts().getCount('T'));
assertEquals(1, profile.getCounts().getCount('G'));
assertEquals(4, profile.getHeight());
SequenceI seq3 = new Sequence("Seq3", "C--G");
SequenceI seq4 = new Sequence("Seq4", "CA-t");
SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
- Profile[] result = new Profile[seq1.getLength()];
- // ensure class loaded and initialized
- AAFrequency.calculate(seqs, 0, seq1.getLength(), result, true);
+ // ensure class loaded and initialised
+ int width = seq1.getLength();
+ AAFrequency.calculate(seqs, width, 0, width, true);
+
int reps = 100000;
long start = System.currentTimeMillis();
for (int i = 0; i < reps; i++)
{
- AAFrequency.calculate(seqs, 0, seq1.getLength(), result, true);
+ AAFrequency.calculate(seqs, width, 0, width, true);
}
System.out.println(System.currentTimeMillis() - start);
}
SequenceI seq3 = new Sequence("Seq3", "C---G");
SequenceI seq4 = new Sequence("Seq4", "CA--t");
SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
- Profile[] profiles = new Profile[seq1.getLength()];
- AAFrequency.calculate(seqs, 0, seq1.getLength(), profiles, true);
+ int width = seq1.getLength();
+ ProfilesI profiles = AAFrequency.calculate(seqs, width, 0, width, true);
AlignmentAnnotation consensus = new AlignmentAnnotation("Consensus",
- "PID", new Annotation[seq1.getLength()]);
+ "PID", new Annotation[width]);
AAFrequency
.completeConsensus(consensus, profiles, 0, 5, false, true, 4);
SequenceI seq3 = new Sequence("Seq3", "C---G");
SequenceI seq4 = new Sequence("Seq4", "CA--t");
SequenceI[] seqs = new SequenceI[] { seq1, seq2, seq3, seq4 };
- Profile[] profiles = new Profile[seq1.getLength()];
- AAFrequency.calculate(seqs, 0, seq1.getLength(), profiles, true);
+ int width = seq1.getLength();
+ ProfilesI profiles = AAFrequency.calculate(seqs, width, 0, width, true);
AlignmentAnnotation consensus = new AlignmentAnnotation("Consensus",
- "PID", new Annotation[seq1.getLength()]);
+ "PID", new Annotation[width]);
AAFrequency
.completeConsensus(consensus, profiles, 0, 5, true, false, 4);
assertEquals("T 75%", ann.description);
assertEquals("T", ann.displayCharacter);
}
+
+ @Test(groups = { "Functional" })
+ public void testExtractHMMProfile()
+ throws MalformedURLException, IOException
+ {
+ int[] expected = { 0, 4, 100, 'T', 71, 'C', 12, 'G', 9, 'A', 9 };
+ int[] actual = AAFrequency.extractHMMProfile(hmm, 17, false, false);
+ for (int i = 0; i < actual.length; i++)
+ {
+ if (i == 2)
+ {
+ assertEquals(actual[i], expected[i]);
+ }
+ else
+ {
+ assertEquals(actual[i], expected[i]);
+ }
+ }
+
+ int[] expected2 = { 0, 4, 100, 'A', 85, 'C', 0, 'G', 0, 'T', 0 };
+ int[] actual2 = AAFrequency.extractHMMProfile(hmm, 2, true, false);
+ for (int i = 0; i < actual2.length; i++)
+ {
+ if (i == 2)
+ {
+ assertEquals(actual[i], expected[i]);
+ }
+ else
+ {
+ assertEquals(actual[i], expected[i]);
+ }
+ }
+
+ assertNull(AAFrequency.extractHMMProfile(null, 98978867, true, false));
+ }
+
+ @Test(groups = { "Functional" })
+ public void testGetAnalogueCount()
+ {
+ /*
+ * 'T' in column 0 has emission probability 0.7859, scales to 7859
+ */
+ int count = AAFrequency.getAnalogueCount(hmm, 0, 'T', false, false);
+ assertEquals(7859, count);
+
+ /*
+ * same with 'use info height': value is multiplied by log ratio
+ * log(value / background) / log(2) = log(0.7859/0.25)/0.693
+ * = log(3.1)/0.693 = 1.145/0.693 = 1.66
+ * so value becomes 1.2987 and scales up to 12987
+ */
+ count = AAFrequency.getAnalogueCount(hmm, 0, 'T', false, true);
+ assertEquals(12987, count);
+
+ /*
+ * 'G' in column 20 has emission probability 0.75457, scales to 7546
+ */
+ count = AAFrequency.getAnalogueCount(hmm, 20, 'G', false, false);
+ assertEquals(7546, count);
+
+ /*
+ * 'G' in column 1077 has emission probability 0.0533, here
+ * ignored (set to 0) since below background of 0.25
+ */
+ count = AAFrequency.getAnalogueCount(hmm, 1077, 'G', true, false);
+ assertEquals(0, count);
+ }
+
+ @Test(groups = { "Functional" })
+ public void testCompleteInformation()
+ {
+ ProfileI prof1 = new Profile(1, 0, 100, "A");
+ ProfileI prof2 = new Profile(1, 0, 100, "-");
+
+ ProfilesI profs = new Profiles(new ProfileI[] { prof1, prof2 });
+ Annotation ann1 = new Annotation(6.5f);
+ Annotation ann2 = new Annotation(0f);
+ Annotation[] annots = new Annotation[] { ann1, ann2 };
+ SequenceI seq = new Sequence("", "AA", 0, 0);
+ seq.setHMM(hmm);
+ AlignmentAnnotation annot = new AlignmentAnnotation("", "", annots);
+ annot.setSequenceRef(seq);
+ AAFrequency.completeInformation(annot, profs, 0, 1);
+ float ic = annot.annotations[0].value;
+ assertEquals(0.91532f, ic, 0.0001f);
+ ic = annot.annotations[1].value;
+ assertEquals(0f, ic, 0.0001f);
+ int i = 0;
+ }
+
}