5 # Suck in the regression data on our file format test suite.
8 print "Format test suite...\t";
10 open(DAT,"regression.dat") || die "failed to open regression.dat";
14 if (/^(\S+)\s+(\S+)\s+(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\S+)/) {
15 $filename[$nfiles] = $1;
16 $format[$nfiles] = $2;
17 $seqtype[$nfiles] = $3;
20 $shortest[$nfiles] = $6;
21 $longest[$nfiles] = $7;
22 if ($8 eq "yes") { $autodetect[$nfiles] = 1; } else { $autodetect[$nfiles] = 0; }
23 if ($9 eq "yes") { $is_alignment[$nfiles] = 1; } else { $is_alignment[$nfiles] = 0; }
24 if ($10 eq "yes") { $is_singleseq[$nfiles] = 1; } else { $is_singleseq[$nfiles] = 0; }
31 # Run seqstat on every file in two modes;
32 # autodetecting (if format allows it), then forcing a format with --informat.
34 for ($i = 0; $i < $nfiles; $i++) {
35 if ($autodetect[$i]) {
36 $output = `$binpath/seqstat $filename[$i]`;
37 if ($? != 0) { die "seqstat failed, autodetecting, on $filename[$i]"; }
38 ($ns, $nr, $fr, $to) = &parse_seqstat($output);
39 if ($ns != $nseq[$i] ||
41 $fr != $shortest[$i] ||
43 { die "seqstat regression failed, autodetecting, on $filename[$i]"; }
45 $output = `$binpath/seqstat --informat $format[$i] $filename[$i]`;
46 if ($? != 0) { die "seqstat failed, using --informat, on $filename[$i]"; }
47 ($ns, $nr, $fr, $to) = &parse_seqstat($output);
48 if ($ns != $nseq[$i] ||
50 $fr != $shortest[$i] ||
52 { die "seqstat regression failed, using --informat, on $filename[$i]"; }
58 for ($i = 0; $i < $nfiles; $i++) {
59 for ($j = 0; $j < $nfiles; $j++) {
60 if (! $is_alignment[$i] && $is_alignment[$j]) { next; } # can't convert unaligned to aligned
61 if (! $is_singleseq[$i] && $is_singleseq[$j]) { next; } # can't convert multiple seqs to single seq format
63 `$binpath/sreformat --informat $format[$i] $format[$j] $filename[$i] > formattest.tmp`;
64 if ($? != 0) { die "sreformat failed ($format[$i] to $format[$j]) on $filename[$i]"; }
65 $output = `$binpath/seqstat --informat $format[$j] formattest.tmp`;
66 if ($? != 0) { die "seqstat failed after sreformat ($format[$i] to $format[$j]) on $filename[$i]"; }
67 ($ns, $nr, $fr, $to) = &parse_seqstat($output);
68 if ($ns != $nseq[$i] ||
70 $fr != $shortest[$i] ||
72 { die "seqstat regression failed after sreformat ($format[$i] to $format[$j]) on $filename[$i]"; }
77 unlink "formattest.tmp";
80 # Function: parse_seqstat(file)
82 # Returns the number of sequences in the file,
83 # and their maximum and minimum length, and their avg. len.
84 # Dies if 'seqstat' fails.
87 local($output) = shift;
88 my ($nseq, $nres, $fromlen, $tolen);
90 if ($output =~ /Number of sequences:\s+(\d+)/) {$nseq = $1; }
91 if ($output =~ /Total # residues:\s+(\d+)/) {$nres = $1; }
92 if ($output =~ /Smallest:\s+(\d+)/) {$fromlen = $1; }
93 if ($output =~ /Largest:\s+(\d+)/) {$tolen = $1; }
94 ($nseq, $nres, $fromlen, $tolen);