6 # Copyright (C) 2001 Washington University School of Medicine
7 # and Howard Hughes Medical Institute
10 # Author: Christian M. Zmasek
11 # zmasek@genetics.wustl.edu
12 # http://www.genetics.wustl.edu/eddy/people/zmasek/
16 # Last modified 09/24/01
19 # Objective. Selection of RIO analysis results with top ortholgy
20 # bootstrap values greater or less than a threshold.
22 # Usage: "bootstrapSelector.pl <threshold options> <infile = Xrio.pl-output> <outfile>"
23 # Options: "l" for "less or equal" ("grater or equal" is default)
24 # "c" for "all hits must meet threshold in case of
25 # multiple copies of the same domain in the query"
26 # (default: "at least one")
27 # Example: "bootstrapSelector.pl 95lc OUTFILE_At_1 At_1_out"
29 # Important. The result of this is meaningful ONLY if the thresholds
30 # for output of the RIO analysis are set to zero (L=0 R=0).
37 # # ############################################################################
38 # # Annotation: B0511.6 CE17345 helicase (ST.LOUIS) TR:O61815 protein_id:AAC17654.1
42 # # Query has not been aligned (score lower than gathering cutoff).
43 # # ############################################################################
46 # # ############################################################################
47 # # Annotation: B0511.7 CE17346 (ST.LOUIS) TR:O61817 protein_id:AAC17655.1
51 # RIO - Resampled Inference of Orthologs
53 # ------------------------------------------------------------------------------
54 # Alignment file: /tmp/Xriopl9846081980/Full-FHA
55 # Alignment : FHA domain
57 # Query file : /tmp/Xriopl9846081980/__queryfile__
58 # ==============================================================================
60 # Query : CE17346.FHA_CAEEL/45-114
62 # Number (in %) of observed orthologies (o) and super orthologies (s) to query
63 # in bootstrapped trees, evolutionary distance to query:
65 # Sequence Description # o[%] s[%] distance
66 # -------- ----------- ---- ---- --------
67 # YC67_MYCTU/308-372 - 20 14 1.577840
68 # FRAH_ANASP/204-277 FRAH PROTEIN. 17 16 1.532670
69 # ABA2_NICPL/557-633 ZEAXANTHIN EPOXIDASE PRECURSOR (EC 1.14.-.-). 14 11 1.885700
70 # ABA2_LYCES/563-639 ZEAXANTHIN EPOXIDASE PRECURSOR (EC 1.14.-.-). 14 11 2.140000
74 # Distance values (based on ML branch length values on consensus tree)
75 # --------------------------------------------------------------------
76 # Given the thresholds for distance calculations:
77 # No sequence is considered orthologous to query.
89 my $summary_outfile = "";
93 my $analysis_performed = 0;
102 &errorInCommandLine();
106 $threshold = $ARGV[ 0 ];
107 $infile = $ARGV[ 1 ];
108 $outfile = $ARGV[ 2 ];
109 $summary_outfile = $outfile.".short";
112 die "\n$0: <<$outfile>> already exists.\n";
114 unless ( ( -s $infile ) && ( -f $infile ) && ( -T $infile ) ) {
115 die "\n$0: <<$infile>> does not exist, is empty, or is not a plain textfile.\n";
119 if ( $threshold =~ /l/ ) {
123 if ( $threshold =~ /c/ ) {
128 open( IN, "$infile" ) || die "\n$0: Cannot open file <<$infile>>: $!\n";
130 open( OUT, ">$outfile" ) || die "\n$0: Cannot create file \"$outfile\": $!\n";
131 open( OUT_SUMMARY, ">$summary_outfile" ) || die "\n$0: Cannot create file \"$summary_outfile\": $!\n";
133 print OUT "bootstrapSelector.pl version: $VERSION\n\n";
134 print OUT "Selection of RIO analysis results with top ortholgy\n";
135 print OUT "bootstrap values greater or less than a threshold.\n\n";
136 if ( $larger == 1 ) {
137 print OUT "Threshold : Grater than or equal to $threshold\n";
140 print OUT "Threshold : Less than or equal to $threshold\n";
142 print OUT "In case of multiple copies of the same domain in the query:\n";
143 if ( $complete == 1 ) {
144 print OUT "All hits must meet threshold.\n";
147 print OUT "At least one hit must meet threshold.\n";
149 print OUT "Input file : $infile\n";
150 print OUT "Output file : $outfile\n";
151 print OUT "Output file short: $summary_outfile\n";
152 print OUT "Date : ".`date`."\n\n\n";
154 while ( $return_line = <IN> ) {
156 if ( $return_line =~ /^\s*# Annotation:\s*(.+)/ ) {
158 $identifier = substr( $identifier, 0, 60);
159 $analysis_performed = 0;
165 if ( $reading == 1 && $return_line =~ /^\s*RIO/ ) {
166 $analysis_performed = 1;
170 && $return_line =~ /^\s*# ####################################/ ) {
171 if ( $analysis_performed == 1 ) {
177 if ( $reading == 1 ) {
178 $lines[ $i++ ] = $return_line;
184 print OUT "\n\nTotal: $total\n";
187 close( OUT_SUMMARY );
189 print "\nTotal: $total\n";
198 my $o_bootstraps = 0;
201 for ( $j = 0; $j < $i; $j++ ) {
203 if ( $lines[ $j ] =~ /^\s*--------\s+/ ) {
206 elsif ( $lines[ $j ] =~ /^\s*Distance\s+values\s+/i ) {
209 elsif ( $results == 1
210 && ( $lines[ $j ] =~ /\S+\s+\S+\s+\S+\s*$/
211 || $lines[ $j ] =~ /^\s*!NO\s+ORTHOLOGS/ ) ) {
213 if ( $lines[ $j ] =~ /^\s*!NO\s+ORTHOLOGS/ ) {
217 $lines[ $j ] =~ /(\S+)\s+\S+\s+\S+\s*$/;
220 $top1 = $lines[ $j ];
222 $top1 =~ s/\s{2,}/ /g;
228 if ( $o_bootstraps > 100 || $o_bootstraps < 0 ) {
229 print "o bootstraps: $o_bootstraps\n";
230 die "\n\n$0: Error: Boostrap value(s) out of range.\n\n";
233 if ( $larger == 1 ) {
234 if ( $complete != 1 && $o_bootstraps >= $threshold ) {
239 elsif ( $complete == 1 && $o_bootstraps < $threshold ) {
244 if ( $complete != 1 && $o_bootstraps <= $threshold ) {
249 elsif ( $complete == 1 && $o_bootstraps > $threshold ) {
255 if ( $complete == 1 ) {
266 print OUT "# ############################################################################\n";
267 for ( $j = 0; $j < $i; ++$j ) {
268 print OUT "$lines[ $j ]";
270 print OUT "# ############################################################################\n\n\n";
271 print OUT_SUMMARY "$identifier [top 1: $top1]\n\n";
276 sub errorInCommandLine {
278 print " bootstrapCounter.pl version: $VERSION\n";
279 print " Usage: \"bootstrapSelector.pl <threshold options> <infile = Xrio.pl-output> <outfile>\"\n";
280 print " Options: \"l\" for \"less or equal\" (\"grater or equal\" is default)\n";
281 print " \"c\" for \"all hits must meet threshold in case of\n";
282 print " multiple copies of the same domain in the query\"\n";
283 print " (default: \"at least one\")\n";
285 print " \"bootstrapSelector.pl 95lc OUTFILE_At_1 At_1_out\"\n\n";
286 print " Important: The result of this is meaningful ONLY if the thresholds\n";
287 print " for output of the RIO analysis are set to zero (L=0 R=0).\n\n";