2 * Sun Jul 5 13:42:41 1998
4 * Test driver for P7ViterbiAlignAlignment().
7 * 1) read an alignment and a corresponding HMM
8 * 2) align the alignment to the HMM to get a master trace
9 * 3) map the alignment to the HMM to get another master trace
10 * 4) Test that the two master traces are identical; if not, fail.
11 * This doesn't have to be true always, but it's true for the
13 * 5) Get imposed traces for each sequence
14 * 6) Viterbi align individual seqs to the model;
15 * compare the imposed trace with the Viterbi trace;
16 * 7) If an excessive number of individual traces differ from
17 * those imposed by master, fail.
19 * CVS $Id: alignalign_test.c,v 1.1.1.1 2005/03/22 08:34:49 cmzmasek Exp $
29 static char banner[] = "\
30 alignalign_test : testing of P7ViterbiAlignAlignment() code";
32 static char usage[] = "\
33 Usage: alignalign_test [-options]\n\
34 Available options are:\n\
35 -h : help; display this usage info\n\
39 static char experts[] = "\
40 --ali <f> : read alignment from <f>\n\
41 --hmm <f> : read HMM from <f>\n\
44 static struct opt_s OPTIONS[] = {
45 { "-h", TRUE, sqdARG_NONE },
46 { "-v", TRUE, sqdARG_NONE },
47 { "--ali", FALSE, sqdARG_STRING },
48 { "--hmm", FALSE, sqdARG_STRING },
50 #define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))
53 main(int argc, char **argv)
55 char *hmmfile; /* file to read HMM(s) from */
56 HMMFILE *hmmfp; /* opened hmmfile for reading */
57 struct plan7_s *hmm; /* HMM to search with */
58 char *afile; /* file to read alignment from */
59 int format; /* format determined for afile */
60 MSAFILE *afp; /* afile, open for reading */
61 MSA *msa; /* multiple sequence alignment from afile */
62 char **rseq; /* raw, dealigned aseq */
63 char *dsq; /* digitized target sequence */
64 struct p7trace_s *mtr; /* master traceback from alignment */
65 struct p7trace_s *maptr; /* master traceback from mapping */
66 struct p7trace_s **tr; /* individual tracebacks imposed by mtr */
67 struct p7trace_s **itr; /* individual trace from P7Viterbi() */
68 int idx; /* counter for seqs */
69 int ndiff; /* number of differing traces */
70 int rlen; /* length of an unaligned sequence */
73 int be_standard; /* TRUE when running standard test */
75 char *optname; /* name of option found by Getopt() */
76 char *optarg; /* argument found by Getopt() */
77 int optind; /* index in argv[] */
79 /***********************************************
81 ***********************************************/
85 format = MSAFILE_STOCKHOLM;
89 while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage,
90 &optind, &optname, &optarg)) {
91 if (strcmp(optname, "-v") == 0) be_verbose = TRUE;
92 else if (strcmp(optname, "--ali") == 0) { afile = optarg; be_standard = FALSE; }
93 else if (strcmp(optname, "--hmm") == 0) { hmmfile = optarg; be_standard = FALSE; }
94 else if (strcmp(optname, "-h") == 0) {
95 Banner(stdout, banner);
101 if (argc - optind != 0)
102 Die("Incorrect number of arguments.\n%s\n", usage);
104 /***********************************************
105 * Get one alignment from test file: must be Stockholm format.
106 ***********************************************/
108 if ((afp = MSAFileOpen(afile, format, NULL)) == NULL)
109 Die("Alignment file %s could not be opened for reading", afile);
110 if ((msa = MSAFileRead(afp)) == NULL)
111 Die("Didn't read an alignment from %s", afile);
114 for (idx = 0; idx < msa->nseq; idx++)
115 s2upper(msa->aseq[idx]);
116 DealignAseqs(msa->aseq, msa->nseq, &rseq);
118 /***********************************************
120 * Read a single HMM from it.
121 ***********************************************/
123 if ((hmmfp = HMMFileOpen(hmmfile, NULL)) == NULL)
124 Die("Failed to open HMM file %s\n", hmmfile);
125 if (!HMMFileRead(hmmfp, &hmm))
126 Die("Failed to read any HMMs from %s\n", hmmfile);
128 Die("HMM file %s corrupt or in incorrect format? Parse failed", hmmfile);
129 P7Logoddsify(hmm, TRUE);
131 if (! (hmm->flags & PLAN7_MAP))
132 Die("HMM in %s has no map", hmmfile);
133 if (GCGMultchecksum(msa->aseq, msa->nseq) != hmm->checksum)
134 Die("Checksum for alignment in %s does not match that in HMM (%d != %d)",
135 afile, GCGMultchecksum(msa->aseq, msa->nseq), hmm->checksum);
137 /***********************************************
139 * mapped alignment should match re-aligned alignment:
140 * obtain and compare the two master traces
141 ***********************************************/
143 mtr = P7ViterbiAlignAlignment(msa, hmm);
144 maptr = MasterTraceFromMap(hmm->map, hmm->M, msa->alen);
145 if (! TraceVerify(mtr, hmm->M, msa->alen))
146 Die("Trace verify on P7ViterbiAlignAlignment() result failed\n");
147 if (! TraceVerify(maptr, hmm->M, msa->alen))
148 Die("Trace verify on MasterTraceFromMap() result failed\n");
149 if (! TraceCompare(mtr, maptr))
150 Die("Master traces differ for alignment versus map\n");
152 /**************************************************
154 * seq traces implied by mapped alignment should generally match
155 * re-aligned individual sequences.
156 ***************************************************/
158 ImposeMasterTrace(msa->aseq, msa->nseq, mtr, &tr);
160 itr = MallocOrDie(sizeof(struct p7trace_s *) * msa->nseq);
161 /* align individuals, compare traces */
163 for (idx = 0; idx < msa->nseq; idx++)
165 rlen = strlen(rseq[idx]);
166 dsq = DigitizeSequence(rseq[idx], rlen);
167 P7Viterbi(dsq, rlen, hmm, &(itr[idx]));
169 if (! TraceCompare(itr[idx], tr[idx]))
174 /* Determine success/failure.
176 if (ndiff > msa->nseq / 2)
177 Die("alignalign: Test FAILED; %d/%d differ\n", ndiff, msa->nseq);
181 Die("alignalign: Test FAILED; %d traces differ, should be 12\n", ndiff);
182 if (msa->nseq != 109)
183 Die("alignalign: Test FAILED; %d seqs read, should be 109\n", msa->nseq);
186 if (be_verbose) printf("alignalign: Test passed; %d/%d differ, as expected\n",
193 for (idx = 0; idx < msa->nseq; idx++)
195 P7FreeTrace(tr[idx]);
196 P7FreeTrace(itr[idx]);
200 Free2DArray((void **) rseq, msa->nseq);