6 # Copyright (C) 2001 Washington University School of Medicine
7 # and Howard Hughes Medical Institute
10 # Author: Christian M. Zmasek
11 # zmasek@genetics.wustl.edu
12 # http://www.genetics.wustl.edu/eddy/people/zmasek/
16 # Last modified 08/16/01
19 # Objective. Determines the distribution of top orthology bootstrap values
20 # of a Xrio.pl output file.
22 # Usage. "bootstrapCounter.pl <infile = Xrio.pl-output> <outfile>"
24 # Important. The result of this is meaningful ONLY if the thresholds
25 # for output of the RIO analysis are set to zero (L=0 R=0).
30 # # ############################################################################
31 # # Annotation: B0511.6 CE17345 helicase (ST.LOUIS) TR:O61815 protein_id:AAC17654.1
35 # # Query has not been aligned (score lower than gathering cutoff).
36 # # ############################################################################
39 # # ############################################################################
40 # # Annotation: B0511.7 CE17346 (ST.LOUIS) TR:O61817 protein_id:AAC17655.1
44 # RIO - Resampled Inference of Orthologs
46 # ------------------------------------------------------------------------------
47 # Alignment file: /tmp/Xriopl9846081980/Full-FHA
48 # Alignment : FHA domain
50 # Query file : /tmp/Xriopl9846081980/__queryfile__
51 # ==============================================================================
53 # Query : CE17346.FHA_CAEEL/45-114
55 # Number (in %) of observed orthologies (o) and super orthologies (s) to query
56 # in bootstrapped trees, evolutionary distance to query:
58 # Sequence Description # o[%] s[%] distance
59 # -------- ----------- ---- ---- --------
60 # YC67_MYCTU/308-372 - 20 14 1.577840
61 # FRAH_ANASP/204-277 FRAH PROTEIN. 17 16 1.532670
62 # ABA2_NICPL/557-633 ZEAXANTHIN EPOXIDASE PRECURSOR (EC 1.14.-.-). 14 11 1.885700
63 # ABA2_LYCES/563-639 ZEAXANTHIN EPOXIDASE PRECURSOR (EC 1.14.-.-). 14 11 2.140000
67 # Distance values (based on ML branch length values on consensus tree)
68 # --------------------------------------------------------------------
69 # Given the thresholds for distance calculations:
70 # No sequence is considered orthologous to query.
86 my @o_bootstraps_array = ();
87 my @s_bootstraps_array = ();
93 &errorInCommandLine();
98 $outfile = $ARGV[ 1 ];
101 die "\n$0: <<$outfile>> already exists.\n";
103 unless ( ( -s $infile ) && ( -f $infile ) && ( -T $infile ) ) {
104 die "\n$0: <<$infile>> does not exist, is empty, or is not a plain textfile.\n";
108 open( IN, "$infile" ) || die "\n$0: Cannot open file <<$infile>>: $!\n";
111 for ( $i = 0; $i <= 100; ++$i ) {
112 $s_bootstraps_array[ $i ] = $o_bootstraps_array[ $i ] = 0;
115 while ( $return_line = <IN> ) {
117 if ( $return_line =~ /^\s*--------\s+/ ) {
120 elsif ( $return_line =~ /^\s*Distance\s+values\s+/i ) {
123 elsif ( $results == 1 && $return_line =~ /^\s*!NO\s+ORTHOLOGS/ ) {
124 $o_bootstraps_array[ 0 ]++;
125 $s_bootstraps_array[ 0 ]++;
129 elsif ( $results == 1 && $return_line =~ /(\S+)\s+(\S+)\s+\S+\s*$/ ) {
133 if ( $o_bootstraps > 100 || $s_bootstraps > 100
134 || $o_bootstraps < 0 ) {
135 print "o bootstraps: $o_bootstraps\n";
136 print "s bootstraps: $s_bootstraps\n";
137 die "\n\n$0: Error: Boostrap value(s) out of range.\n\n";
141 $o_bootstraps_array[ $o_bootstraps ]++;
142 $s_bootstraps_array[ $s_bootstraps ]++;
150 open( OUT, ">$outfile" ) || die "\n$0: Cannot create file \"$outfile\": $!\n";
152 print OUT "bootstrapCounter.pl version: $VERSION\n\n";
153 print OUT "Distribution of top bootstrap values\n\n";
154 print OUT "Input file : $infile\n";
155 print OUT "Output file: $outfile\n";
156 print OUT "Date : ".`date`."\n";
157 print OUT "Total: $total\n\n";
158 print OUT "top-orthology-bootstraps vs. count:\n";
159 for ( $i = 0; $i < @o_bootstraps_array; ++$i ) {
160 print OUT "$i $o_bootstraps_array[ $i ]\n";
162 print OUT "\n\ntop-super-orthology-bootstraps vs. count:\n";
163 for ( $i = 0; $i < @s_bootstraps_array; ++$i ) {
164 print OUT "$i $s_bootstraps_array[ $i ]\n";
168 print( "\nDone.\n\n" );
174 sub errorInCommandLine {
176 print " bootstrapCounter.pl version: $VERSION\n";
177 print " Usage: \"bootstrapCounter.pl <infile = Xrio.pl-output> <outfile>\"\n";
178 print " Important: The result of this is meaningful ONLY if the thresholds\n";
179 print " for output of the RIO analysis are set to zero (L=0 R=0).\n";