1 package compbio.stat.collector;
\r
4 import java.io.FileFilter;
\r
5 import java.io.IOException;
\r
6 import java.sql.SQLException;
\r
7 import java.text.SimpleDateFormat;
\r
8 import java.util.ArrayList;
\r
9 import java.util.Date;
\r
10 import java.util.HashMap;
\r
11 import java.util.HashSet;
\r
12 import java.util.List;
\r
13 import java.util.Map;
\r
14 import java.util.Set;
\r
16 import org.apache.log4j.Logger;
\r
18 import compbio.engine.client.Executable;
\r
19 import compbio.engine.client.PathValidator;
\r
20 import compbio.engine.client.SkeletalExecutable;
\r
21 import compbio.metadata.JobStatus;
\r
22 import compbio.util.FileUtil;
\r
23 import compbio.ws.client.Services;
\r
26 * Number of runs of each WS = number of folders with name
\r
28 * Number of successful runs = all runs with no result file
\r
30 * Per period of time = limit per file creating time Runtime (avg/max) =
\r
32 * started time - finished time
\r
34 * Task & result size = result.size
\r
36 * Abandoned runs - not collected runs
\r
38 * Cancelled runs - cancelled
\r
40 * Cluster vs local runs
\r
42 * Reasons for failure = look in the err out?
\r
45 * Metadata required:
\r
47 * work directory for local and cluster tasks = from Helper or cmd parameter. WS
\r
48 * names - enumeration. Status file names and content.
\r
53 public class ExecutionStatCollector implements Runnable {
\r
55 static final int UNDEFINED = -1;
\r
57 private static final Logger log = Logger
\r
58 .getLogger(ExecutionStatCollector.class);
\r
60 static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss");
\r
62 final private File workDirectory;
\r
63 final private List<JobStat> stats;
\r
65 * Consider the job that has been working for longer than timeOutInHours
\r
66 * completed, whatever the outcome
\r
68 final private int timeOutInHours;
\r
71 * List subdirectories in the job directory
\r
73 * @param workDirectory
\r
74 * @param timeOutInHours
\r
76 public ExecutionStatCollector(String workDirectory, int timeOutInHours) {
\r
77 log.info("Starting stat collector for directory: " + workDirectory);
\r
78 log.info("Maximum allowed runtime(h): " + timeOutInHours);
\r
79 if (!PathValidator.isValidDirectory(workDirectory)) {
\r
80 throw new IllegalArgumentException("workDirectory '"
\r
81 + workDirectory + "' does not exist!");
\r
83 this.workDirectory = new File(workDirectory);
\r
84 stats = new ArrayList<JobStat>();
\r
85 if (timeOutInHours <= 0) {
\r
86 throw new IllegalArgumentException(
\r
87 "Timeout value must be greater than 0! Given value: "
\r
90 this.timeOutInHours = timeOutInHours;
\r
93 boolean hasCompleted(JobDirectory jd) {
\r
94 JobStat jstat = jd.getJobStat();
\r
95 if (jstat.hasResult() || jstat.getIsCancelled()
\r
96 || jstat.getIsFinished() || hasTimedOut(jd)) {
\r
102 boolean hasTimedOut(JobDirectory jd) {
\r
103 return ((System.currentTimeMillis() - jd.jobdir.lastModified()) / (1000 * 60 * 60)) > timeOutInHours;
\r
107 * Make sure that collectStatistics methods was called prior to calling
\r
108 * this! TODO consider running collectStatistics from here on the first call
\r
110 StatProcessor getStats() {
\r
111 if (stats.isEmpty()) {
\r
112 log.info("Please make sure collectStatistics method was called prior to calling getStats()!");
\r
114 return new StatProcessor(stats);
\r
117 void writeStatToDB() throws SQLException {
\r
118 Set<JobStat> rjobs = new HashSet<JobStat>(stats);
\r
119 StatDB statdb = new StatDB();
\r
120 log.debug("Removing records that has already been recorded");
\r
122 statdb.removeRecordedJobs(rjobs);
\r
123 log.debug("New records left: " + rjobs.size());
\r
124 statdb.insertData(rjobs);
\r
128 * static void updateTime(File statFile) throws IOException { long lastMod =
\r
129 * statFile.lastModified(); FileWriter fw = new FileWriter(statFile);
\r
130 * fw.write(new Long(lastMod).toString()); fw.close(); }
\r
136 public static void main(String[] args) throws IOException, SQLException {
\r
138 // updateTime(new File(
\r
139 // "D:\\workspace\\JABA2\\jobsout\\AACon#170462904473672\\STARTED"));
\r
141 File[] files = FileUtil.getFiles("Y:\\fc\\www-jws2\\jaba\\jobsout",
\r
143 List<JobStat> stats = new ArrayList<JobStat>();
\r
144 for (File file : files) {
\r
145 JobDirectory jd = new JobDirectory(file);
\r
146 stats.add(jd.getJobStat());
\r
147 // System.out.println(jd.getJobStat().getJobReportTabulated());
\r
149 StatProcessor sp = new StatProcessor(stats);
\r
150 System.out.println(sp.reportStat());
\r
151 System.out.println();
\r
152 System.out.println("!!!!!!!!!!!!!!!!!!");
\r
153 System.out.println();
\r
155 Set<JobStat> rjobs = new HashSet<JobStat>(sp.stats);
\r
156 StatDB statdb = new StatDB();
\r
157 statdb.removeRecordedJobs(rjobs);
\r
158 statdb.insertData(rjobs);
\r
161 static FileFilter directories = new FileFilter() {
\r
163 public boolean accept(File pathname) {
\r
164 return pathname.isDirectory()
\r
165 && !pathname.getName().startsWith(".");
\r
169 static class JobDirectory {
\r
172 Map<String, File> files = new HashMap<String, File>();
\r
174 JobDirectory(File directory) {
\r
175 this.jobdir = directory;
\r
176 for (File f : jobdir.listFiles()) {
\r
177 files.put(f.getName(), f);
\r
181 boolean hasStatus(JobStatus status) {
\r
182 return files.containsKey(status.toString());
\r
185 boolean isCollected() {
\r
186 return hasStatus(JobStatus.COLLECTED);
\r
189 boolean isCancelled() {
\r
190 return hasStatus(JobStatus.CANCELLED);
\r
193 long getStartTime() {
\r
194 long starttime = UNDEFINED;
\r
195 File startfile = files.get(JobStatus.STARTED.toString());
\r
196 if (startfile == null) {
\r
197 startfile = files.get(JobStatus.SUBMITTED.toString());
\r
200 if (startfile != null) {
\r
201 String start = FileUtil.readFileToString(startfile);
\r
202 starttime = Long.parseLong(start.trim());
\r
204 } catch (IOException ignore) {
\r
206 "IOException while reading STARTED status file! Ignoring...",
\r
209 starttime = startfile.lastModified();
\r
210 } catch (NumberFormatException ignore) {
\r
212 "NumberFormatException while reading STARTED status file! Ignoring...",
\r
215 starttime = startfile.lastModified();
\r
221 String getClusterJobID() {
\r
222 String clustjobId = "";
\r
223 File jobid = files.get("JOBID");
\r
225 if (jobid != null) {
\r
226 clustjobId = FileUtil.readFileToString(jobid);
\r
228 } catch (IOException ioe) {
\r
230 "IO Exception while reading the content of JOBID file for job "
\r
233 return clustjobId.trim();
\r
236 long getFinishedTime() {
\r
237 long ftime = UNDEFINED;
\r
238 File finished = files.get(JobStatus.FINISHED.toString());
\r
239 if (finished != null) {
\r
241 if (finished != null) {
\r
242 String start = FileUtil.readFileToString(finished);
\r
243 ftime = Long.parseLong(start.trim());
\r
245 } catch (IOException ignore) {
\r
247 "IOException while reading FINISHED status file! Ignoring...",
\r
250 ftime = finished.lastModified();
\r
251 } catch (NumberFormatException ignore) {
\r
253 "NumberFormatException while reading FINISHED status file! Ignoring...",
\r
256 ftime = finished.lastModified();
\r
262 private Services getService() {
\r
263 return Services.getServiceByJobDirectory(jobdir);
\r
266 long getResultSize() {
\r
267 Class<? extends Executable<?>> name = Services
\r
268 .getRunnerByJobDirectory(jobdir);
\r
271 if (name.getSimpleName().equalsIgnoreCase("IUPred")) {
\r
272 f = files.get("out.glob");
\r
274 f = files.get("out.short");
\r
276 f = files.get("out.long");
\r
278 f = files.get(SkeletalExecutable.OUTPUT);
\r
286 long getInputSize() {
\r
287 Class<? extends Executable<?>> name = Services
\r
288 .getRunnerByJobDirectory(jobdir);
\r
290 File input = files.get(SkeletalExecutable.INPUT);
\r
291 if (input != null) {
\r
292 return input.length();
\r
297 JobStat getJobStat() {
\r
298 return JobStat.newInstance(getService(), getClusterJobID(),
\r
299 jobdir.getName(), getStartTime(), getFinishedTime(),
\r
300 getInputSize(), getResultSize(), isCancelled(),
\r
305 public int hashCode() {
\r
306 final int prime = 31;
\r
308 result = prime * result
\r
309 + ((jobdir == null) ? 0 : jobdir.hashCode());
\r
314 public boolean equals(Object obj) {
\r
319 if (getClass() != obj.getClass())
\r
321 JobDirectory other = (JobDirectory) obj;
\r
322 if (jobdir == null) {
\r
323 if (other.jobdir != null)
\r
325 } else if (!jobdir.equals(other.jobdir))
\r
331 void collectStatistics() {
\r
332 File[] files = workDirectory.listFiles(directories);
\r
333 for (File file : files) {
\r
334 JobDirectory jd = new JobDirectory(file);
\r
335 JobStat jstat = jd.getJobStat();
\r
336 // Do not record stats on the job that has not completed yet
\r
337 if (hasCompleted(jd)) {
\r
340 log.debug("Skipping the job: " + jstat);
\r
341 log.debug("As it has not completed yet");
\r
343 // System.out.println(jd.getJobStat().getJobReportTabulated());
\r
348 public void run() {
\r
349 log.info("Started updating statistics at " + new Date());
\r
350 log.info("For directory: " + workDirectory.getAbsolutePath());
\r
352 collectStatistics();
\r
354 StatProcessor local_stats = getStats();
\r
355 log.info("Found " + local_stats.getJobNumber() + " jobs!");
\r
358 } catch (SQLException e) {
\r
359 log.error("Fails to update jobs statistics database!");
\r
360 log.error(e.getLocalizedMessage(), e);
\r
362 log.info("Finished updating statistics at " + new Date());
\r