5 arco_stats.pl - script to collate SGE job stats from ARCo
18 # path for nicer fonts for the graph labels
19 my $FONTPATH = "/homes/www-jpred/live/public_html/fonts/";
20 my @month = qw( Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec );
21 my $user = 'www-jpred';
36 'plot!' => \$showPlot,
38 'run-stats=i' => \$runStats,
45 pod2usage( -verbose => 2 ) if ($man);
46 pod2usage( -verbose => 1 ) if ($help);
48 my $dbh = DBI->connect( "dbi:Pg:host=postgres.compbio.dundee.ac.uk;dbname=arco", 'account', 'saffron' ) or die "ERROR - can't connect to db: ", $DBI::errstr;
50 # generate output prefix name unless already specified
51 $out = "${user}_stats" unless ($out);
53 # find out the current month and year. Correct year to real 4-digit number.
54 my ( $currDate, $currMon, $currYr ) = ( localtime() )[ 3 .. 5 ];
57 my $monthly = get_monthly($dbh);
58 warn "Warning - no monthly data found\n" unless ( scalar @{$monthly} );
59 my $daily = get_daily( $dbh, $currMon, $currYr );
60 warn "Warning - no daily data found for $month[$currMon] $currYr\n" unless ( scalar @{$daily} );
61 print_run_stats( $dbh, $runStats ) if ($runStats);
65 print "Drawing plots...\n" unless ($quiet);
66 draw_graph( $monthly, 'Month', "Monthly Totals", "${out}_monthly.png" );
67 draw_graph( $daily, 'Date', "Daily Totals for $month[$currMon]", "${out}_daily.png" );
71 print "Writing CSV files...\n" unless ($quiet);
72 print_data( $daily, "${out}_daily.csv" );
73 print_data( $monthly, "${out}_monthly.csv" );
75 print "Finished!\n" unless ($quiet);
78 #########################################################################################################
79 # count the number of jobs per month since records began
83 # retrieve all jobs run and give the epoch time they started.
84 # has a kludge to remove some extraneous run info for Aug 2008
85 my $sth = $dbh->prepare(
86 "SELECT EXTRACT(EPOCH FROM start_time) AS epoch
88 WHERE username = '$user'
89 AND submission_time > '2008-sep-01'::Date
91 ) or die "ERROR - unable to prepare SELECT statement: ", $dbh->errstr();
95 # foreach epoch time retrieve month and year
96 # and count the number of jobs run per month
98 while ( my @row = $sth->fetchrow_array ) {
99 my ( $mnth, $year ) = ( localtime( $row[0] ) )[ 4 .. 5 ];
101 $data{$year}{$mnth}++;
104 # convert month counts into data structure readable by GD::Graph
107 foreach my $year ( sort keys %data ) {
108 foreach my $mon ( sort { $a <=> $b } keys %{ $data{$year} } ) {
109 my $date = sprintf "%s %02d", $month[$mon], $year - 2000; # convert into 2-digit version (not Y2K compatible)
110 $sortedData[0][$i] = $date;
111 $sortedData[1][$i] = $data{$year}{$mon};
115 return ( \@sortedData );
118 #########################################################################################################
119 # count the number of jobs per day of current month
125 # retrieve the number of jobs run per day during this month
126 my $sth = $dbh->prepare(
127 "SELECT DISTINCT(CAST(start_time AS DATE)) AS start_date, COUNT(CAST(start_time AS DATE))
129 WHERE username = '$user'
130 AND start_time >= '$year-$month[$mnth]-01'::Date
132 ORDER BY start_date ASC"
133 ) or die "ERROR - unable to prepare SELECT statement: ", $dbh->errstr();
137 # generate data structure for GD::Graph with day counts
140 while ( my @row = $sth->fetchrow_array ) {
141 # $row[0] is the date
142 # $row[1] is the count
143 my $date = ( split( /-/, $row[0] ) )[2];
145 #print "$date: $row[1]\n";
146 $data[0][$i] = $date;
147 $data[1][$i] = $row[1];
155 #########################################################################################################
156 # print out specific stats relating to run time, queuing time and exit status
157 sub print_run_stats {
161 # get the date n days ago
162 my $secsInDays = 86400 * $days; # num seconds in a day * number of days
163 my $daysAgo = ( time() - $secsInDays );
164 my ( $date, $mnth, $year ) = ( localtime($daysAgo) )[ 3 .. 5 ];
167 #print "$days days ago was: $year-$month[$mnth]-$date\n";
169 # retrieve run-specific stats for the user
170 my $sth = $dbh->prepare(
171 "SELECT wallclock_time, maxvmem, exit_status, EXTRACT(EPOCH FROM start_time - submission_time) AS wait_time
173 WHERE username = '$user'
174 AND submission_time >= '$year-$month[$mnth]-$date'::Date"
175 ) or die "ERROR - unable to prepare SELECT statement: ", $dbh->errstr();
176 $sth->execute() or die;
177 my $nRows = $sth->rows();
180 my $file = 'run_stats.csv';
181 open( my $OUT, ">>", $file ) or die "ERROR - unable to open '$file' for write: ${!}\nDied";
182 print $OUT "$currYr-$month[$currMon]-$currDate,$nRows,";
184 # if no jobs run in time frame warn, set everything to zero and return
186 warn "Warning - no jobs found for user '$user' in the last $days days\n";
187 print $OUT "0,0,0,0,0\n";
192 # collate useful data
194 while ( my @row = $sth->fetchrow_array ) {
195 $data{runtime} += $row[0];
196 $data{vmem} += $row[1];
198 if ( $row[2] == 4 ) {
204 $data{waittime} += $row[3];
208 # define potentially undefined variables
209 $data{timeouts} = 0 unless ( $data{timeouts} );
210 $data{errors} = 0 unless ( $data{errors} );
212 # write out data to file
213 foreach my $k qw(runtime vmem waittime) {
214 printf $OUT "%.0f,", $data{$k} / $nRows;
216 print $OUT "$data{timeouts},$data{errors}\n";
219 #########################################################################################################
221 my ( $dataref, $x_label, $title, $outFile ) = @_;
223 my $graph = GD::Graph::bars->new( 700, 400 );
225 $graph->set_title_font ( "$FONTPATH/VeraBd.ttf", 12 );
226 $graph->set_x_label_font ( "$FONTPATH/VeraBd.ttf", 8 );
227 $graph->set_y_label_font ( "$FONTPATH/VeraBd.ttf", 8 );
228 $graph->set_x_axis_font ( "$FONTPATH/Vera.ttf", 6 );
229 $graph->set_y_axis_font ( "$FONTPATH/Vera.ttf", 8 );
232 y_label => 'No. Jpred Submissions',
235 shadowclr => 'lgray',
236 x_labels_vertical => 1,
238 # borderclrs => undef,
241 ) or die $graph->error;
243 my $gd = $graph->plot($dataref) or die $graph->error;
245 open( my $IMG, ">", $outFile ) or die "ERROR - unable to open '$outFile' for write: ${!}\nDied";
252 #########################################################################################################
257 if ( !scalar @{$data} ) {
258 warn "Warning - no data to print out. Nothing to do.\n";
262 my $total = scalar @{ $data->[0] };
264 open( my $OUT, ">", $file ) or die "ERROR - unable to open '$file' for write: ${!}\nDied";
265 print $OUT "Date,nRuns\n";
266 for ( my $i = 0 ; $i < $total ; ++$i ) {
267 print $OUT "$data->[0][$i],$data->[1][$i]\n";
271 #########################################################################################################
274 arco_stats.pl --user <sge_user> [--out <file_prefix> --x <pixels> --y <pixels>] [--csv] [--plot] [--run-stats <days>] [--quiet] [--man] [--help]
278 Script to collate run statistics from the SGE ARCo system.
280 The script will retrieve all historical data for the specified, count the number of jobs run and collate them by month. For the current month, data will be broken down by day.
282 With no options script will get stats for the www-jpred user and output CSV formatted data only. Filenames will take the form <username>_stats_[daily|monthly].csv, unless specified with the --out switch.
290 Specify SGE user. [Default: www-jpred]
294 Ouput filename prefix.
298 Specific X-dimension of plot figure in pixels. [Default: 700]
302 Specific Y-dimension of plot figure in pixels. [Default: 400]
304 =item B<--csv>,B<--nocsv>
306 Toggle for CSV output. [Default: on]
308 =item B<--plot>,B<--noplot>
310 Toggle for plotting of data. [Default: off]
314 Set the number of days for collating run statistics (e.g. mean run time, mean wait time). [Default: 0]
318 Switch off progress messages. Useful if running in cron.
326 Full manpage of program.
332 Script assumes it won't run in the past and threfore is not Y2K compliant.
336 Chris Cole <christian@cole.name>