1 /* Copyright (c) 2013 Alexander Sherstnev
\r
2 * Copyright (c) 2011 Peter Troshin
\r
4 * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0
\r
6 * This library is free software; you can redistribute it and/or modify it under the terms of the
\r
7 * Apache License version 2 as published by the Apache Software Foundation
\r
9 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
\r
10 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache
\r
11 * License for more details.
\r
13 * A copy of the license is in apache_license.txt. It is also available here:
\r
14 * @see: http://www.apache.org/licenses/LICENSE-2.0.txt
\r
16 * Any republication or derived work distributed in source code form
\r
17 * must include this copyright and license notice.
\r
19 package compbio.stat.collector;
\r
21 import java.io.File;
\r
22 import java.io.FileFilter;
\r
23 import java.io.IOException;
\r
24 import java.sql.SQLException;
\r
25 import java.text.SimpleDateFormat;
\r
26 import java.util.ArrayList;
\r
27 import java.util.Date;
\r
28 import java.util.HashSet;
\r
29 import java.util.List;
\r
30 import java.util.Set;
\r
32 import org.apache.log4j.Logger;
\r
34 import compbio.engine.client.PathValidator;
\r
35 import compbio.engine.client.SkeletalExecutable;
\r
36 import compbio.util.FileUtil;
\r
39 * Class assumptions:
\r
40 * 1. Number of runs of each WS = number of folders with name
\r
41 * 2. Number of successful runs = all runs with no result file
\r
42 * 3. Per period of time = limit per file creating time
\r
43 * 4. Runtime (avg/max) = finish time - start time
\r
44 * 5. Task & result size = result.size
\r
46 * Abandoned runs - not collected runs
\r
48 * Cancelled runs - cancelled
\r
50 * Cluster vs local runs
\r
52 * Reasons for failure = look in the err out?
\r
55 * Metadata required:
\r
57 * work directory for local and cluster tasks = from Helper or cmd parameter. WS
\r
58 * names - enumeration. Status file names and content.
\r
60 * @author Peter Troshin
\r
61 * @author Alexander Sherstnev
\r
64 public class ExecutionStatCollector implements Runnable {
\r
65 static final int UNDEFINED = -1;
\r
66 static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss");
\r
67 private static final Logger log = Logger.getLogger(ExecutionStatCollector.class);
\r
69 final private File workingDirectory;
\r
70 final private List<JobStat> stats;
\r
72 * Consider the job that has been working for longer than timeOutInHours
\r
73 * completed, whatever the outcome
\r
75 final private int timeOutInHours;
\r
78 * List subdirectories in the job directory
\r
80 * @param workingDirectory
\r
81 * @param timeOutInHours
\r
83 public ExecutionStatCollector(String workingDirectory, int timeOutInHours) {
\r
84 log.info("Starting stat collector for directory: " + workingDirectory);
\r
85 log.info("Maximum allowed runtime(h): " + timeOutInHours);
\r
86 if (!PathValidator.isValidDirectory(workingDirectory)) {
\r
87 throw new IllegalArgumentException("workingDirectory '" + workingDirectory + "' does not exist!");
\r
89 this.workingDirectory = new File(workingDirectory);
\r
90 stats = new ArrayList<JobStat>();
\r
91 if (timeOutInHours <= 0) {
\r
92 throw new IllegalArgumentException(
\r
93 "Timeout value must be greater than 0! Given value: " + timeOutInHours);
\r
95 this.timeOutInHours = timeOutInHours;
\r
98 boolean hasCompleted(JobDirectory jd) {
\r
99 JobStat jstat = jd.getJobStat();
\r
100 if (jstat.hasResult() || jstat.getIsCancelled()
\r
101 || jstat.getIsFinished() || hasTimedOut(jd)) {
\r
107 boolean hasTimedOut(JobDirectory jd) {
\r
108 return ((System.currentTimeMillis() - jd.jobdir.lastModified()) / (1000 * 60 * 60)) > timeOutInHours;
\r
112 * Make sure that collectStatistics methods was called prior to calling
\r
113 * this! TODO consider running collectStatistics from here on the first call
\r
115 StatProcessor getStats() {
\r
116 if (stats.isEmpty()) {
\r
117 log.info("Please make sure collectStatistics method was called prior to calling getStats()!");
\r
119 return new StatProcessor(stats);
\r
122 void writeStatToDB() throws SQLException {
\r
123 Set<JobStat> rjobs = new HashSet<JobStat>(stats);
\r
124 StatDB statdb = new StatDB();
\r
125 log.debug("Removing records that has already been recorded");
\r
127 statdb.removeRecordedJobs(rjobs);
\r
128 log.debug("New records left: " + rjobs.size());
\r
129 statdb.insertData(rjobs);
\r
135 public static void main(String[] args) throws IOException, SQLException {
\r
137 // updateTime(new File(
\r
138 // "D:\\workspace\\JABA2\\jobsout\\AACon#170462904473672\\STARTED"));
\r
140 File[] files = FileUtil.getFiles("Y:\\fc\\www-jws2\\jaba\\jobsout",
\r
142 List<JobStat> stats = new ArrayList<JobStat>();
\r
143 for (File file : files) {
\r
144 JobDirectory jd = new JobDirectory(file);
\r
145 stats.add(jd.getJobStat());
\r
146 // System.out.println(jd.getJobStat().getJobReportTabulated());
\r
148 StatProcessor sp = new StatProcessor(stats);
\r
149 System.out.println(sp.reportStat());
\r
150 System.out.println();
\r
151 System.out.println("!!!!!!!!!!!!!!!!!!");
\r
152 System.out.println();
\r
154 Set<JobStat> rjobs = new HashSet<JobStat>(sp.stats);
\r
155 StatDB statdb = new StatDB();
\r
156 statdb.removeRecordedJobs(rjobs);
\r
157 statdb.insertData(rjobs);
\r
160 static FileFilter directories = new FileFilter() {
\r
162 public boolean accept(File pathname) {
\r
163 return pathname.isDirectory() && !pathname.getName().startsWith(".");
\r
168 void collectStatistics() {
\r
169 // clear stats array;
\r
171 File[] dirs = workingDirectory.listFiles(directories);
\r
172 for (File dir : dirs) {
\r
173 // skip work directory with test inputas
\r
174 log.debug("check directory: " + dir.getName() + "...");
\r
175 if (InputFilter.accept(new File(dir.getPath() + File.separator + SkeletalExecutable.INPUT))) {
\r
176 JobDirectory jd = new JobDirectory(dir);
\r
177 JobStat jstat = jd.getJobStat();
\r
178 // Do not record stats on the job that has not completed yet
\r
179 if (hasCompleted(jd)) {
\r
182 log.debug("Skipping the job: " + jstat + " as it has not completed yet");
\r
185 log.trace("training input: " + dir.getName() + File.separator + SkeletalExecutable.INPUT);
\r
191 public void run() {
\r
192 log.info("Started updating statistics at " + new Date());
\r
193 log.info("For directory: " + workingDirectory.getAbsolutePath());
\r
195 collectStatistics();
\r
197 StatProcessor local_stats = getStats();
\r
198 log.info("Found " + local_stats.getJobNumber() + " jobs!");
\r
201 } catch (SQLException e) {
\r
202 log.error("Fails to update jobs statistics database!");
\r
203 log.error(e.getLocalizedMessage(), e);
\r
205 log.info("Finished updating statistics at " + new Date());
\r