1 /* Copyright (c) 2011 Peter Troshin
\r
3 * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0
\r
5 * This library is free software; you can redistribute it and/or modify it under the terms of the
\r
6 * Apache License version 2 as published by the Apache Software Foundation
\r
8 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
\r
9 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache
\r
10 * License for more details.
\r
12 * A copy of the license is in apache_license.txt. It is also available here:
\r
13 * @see: http://www.apache.org/licenses/LICENSE-2.0.txt
\r
15 * Any republication or derived work distributed in source code form
\r
16 * must include this copyright and license notice.
\r
18 package compbio.stat.collector;
\r
20 import java.io.File;
\r
21 import java.io.FileFilter;
\r
22 import java.io.IOException;
\r
23 import java.sql.SQLException;
\r
24 import java.text.SimpleDateFormat;
\r
25 import java.util.ArrayList;
\r
26 import java.util.Date;
\r
27 import java.util.HashMap;
\r
28 import java.util.HashSet;
\r
29 import java.util.List;
\r
30 import java.util.Map;
\r
31 import java.util.Set;
\r
33 import org.apache.log4j.Logger;
\r
35 import compbio.engine.client.Executable;
\r
36 import compbio.engine.client.PathValidator;
\r
37 import compbio.engine.client.SkeletalExecutable;
\r
38 import compbio.metadata.JobStatus;
\r
39 import compbio.util.FileUtil;
\r
40 import compbio.ws.client.Services;
\r
41 import compbio.ws.client.ServicesUtil;
\r
44 * Number of runs of each WS = number of folders with name
\r
46 * Number of successful runs = all runs with no result file
\r
48 * Per period of time = limit per file creating time Runtime (avg/max) =
\r
50 * started time - finished time
\r
52 * Task & result size = result.size
\r
54 * Abandoned runs - not collected runs
\r
56 * Cancelled runs - cancelled
\r
58 * Cluster vs local runs
\r
60 * Reasons for failure = look in the err out?
\r
63 * Metadata required:
\r
65 * work directory for local and cluster tasks = from Helper or cmd parameter. WS
\r
66 * names - enumeration. Status file names and content.
\r
71 public class ExecutionStatCollector implements Runnable {
\r
73 static final int UNDEFINED = -1;
\r
75 private static final Logger log = Logger
\r
76 .getLogger(ExecutionStatCollector.class);
\r
78 static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss");
\r
80 final private File workDirectory;
\r
81 final private List<JobStat> stats;
\r
83 * Consider the job that has been working for longer than timeOutInHours
\r
84 * completed, whatever the outcome
\r
86 final private int timeOutInHours;
\r
89 * List subdirectories in the job directory
\r
91 * @param workDirectory
\r
92 * @param timeOutInHours
\r
94 public ExecutionStatCollector(String workDirectory, int timeOutInHours) {
\r
95 log.info("Starting stat collector for directory: " + workDirectory);
\r
96 log.info("Maximum allowed runtime(h): " + timeOutInHours);
\r
97 if (!PathValidator.isValidDirectory(workDirectory)) {
\r
98 throw new IllegalArgumentException("workDirectory '"
\r
99 + workDirectory + "' does not exist!");
\r
101 this.workDirectory = new File(workDirectory);
\r
102 stats = new ArrayList<JobStat>();
\r
103 if (timeOutInHours <= 0) {
\r
104 throw new IllegalArgumentException(
\r
105 "Timeout value must be greater than 0! Given value: "
\r
108 this.timeOutInHours = timeOutInHours;
\r
111 boolean hasCompleted(JobDirectory jd) {
\r
112 JobStat jstat = jd.getJobStat();
\r
113 if (jstat.hasResult() || jstat.getIsCancelled()
\r
114 || jstat.getIsFinished() || hasTimedOut(jd)) {
\r
120 boolean hasTimedOut(JobDirectory jd) {
\r
121 return ((System.currentTimeMillis() - jd.jobdir.lastModified()) / (1000 * 60 * 60)) > timeOutInHours;
\r
125 * Make sure that collectStatistics methods was called prior to calling
\r
126 * this! TODO consider running collectStatistics from here on the first call
\r
128 StatProcessor getStats() {
\r
129 if (stats.isEmpty()) {
\r
130 log.info("Please make sure collectStatistics method was called prior to calling getStats()!");
\r
132 return new StatProcessor(stats);
\r
135 void writeStatToDB() throws SQLException {
\r
136 Set<JobStat> rjobs = new HashSet<JobStat>(stats);
\r
137 StatDB statdb = new StatDB();
\r
138 log.debug("Removing records that has already been recorded");
\r
140 statdb.removeRecordedJobs(rjobs);
\r
141 log.debug("New records left: " + rjobs.size());
\r
142 statdb.insertData(rjobs);
\r
146 * static void updateTime(File statFile) throws IOException { long lastMod =
\r
147 * statFile.lastModified(); FileWriter fw = new FileWriter(statFile);
\r
148 * fw.write(new Long(lastMod).toString()); fw.close(); }
\r
154 public static void main(String[] args) throws IOException, SQLException {
\r
156 // updateTime(new File(
\r
157 // "D:\\workspace\\JABA2\\jobsout\\AACon#170462904473672\\STARTED"));
\r
159 File[] files = FileUtil.getFiles("Y:\\fc\\www-jws2\\jaba\\jobsout",
\r
161 List<JobStat> stats = new ArrayList<JobStat>();
\r
162 for (File file : files) {
\r
163 JobDirectory jd = new JobDirectory(file);
\r
164 stats.add(jd.getJobStat());
\r
165 // System.out.println(jd.getJobStat().getJobReportTabulated());
\r
167 StatProcessor sp = new StatProcessor(stats);
\r
168 System.out.println(sp.reportStat());
\r
169 System.out.println();
\r
170 System.out.println("!!!!!!!!!!!!!!!!!!");
\r
171 System.out.println();
\r
173 Set<JobStat> rjobs = new HashSet<JobStat>(sp.stats);
\r
174 StatDB statdb = new StatDB();
\r
175 statdb.removeRecordedJobs(rjobs);
\r
176 statdb.insertData(rjobs);
\r
179 static FileFilter directories = new FileFilter() {
\r
181 public boolean accept(File pathname) {
\r
182 return pathname.isDirectory()
\r
183 && !pathname.getName().startsWith(".");
\r
187 static class JobDirectory {
\r
190 Map<String, File> files = new HashMap<String, File>();
\r
192 JobDirectory(File directory) {
\r
193 this.jobdir = directory;
\r
194 for (File f : jobdir.listFiles()) {
\r
195 files.put(f.getName(), f);
\r
199 boolean hasStatus(JobStatus status) {
\r
200 return files.containsKey(status.toString());
\r
203 boolean isCollected() {
\r
204 return hasStatus(JobStatus.COLLECTED);
\r
207 boolean isCancelled() {
\r
208 return hasStatus(JobStatus.CANCELLED);
\r
211 long getStartTime() {
\r
212 long starttime = UNDEFINED;
\r
213 File startfile = files.get(JobStatus.STARTED.toString());
\r
214 if (startfile == null) {
\r
215 startfile = files.get(JobStatus.SUBMITTED.toString());
\r
218 if (startfile != null) {
\r
219 String start = FileUtil.readFileToString(startfile);
\r
220 starttime = Long.parseLong(start.trim());
\r
222 } catch (IOException ignore) {
\r
224 "IOException while reading STARTED status file! Ignoring...",
\r
227 starttime = startfile.lastModified();
\r
228 } catch (NumberFormatException ignore) {
\r
230 "NumberFormatException while reading STARTED status file! Ignoring...",
\r
233 starttime = startfile.lastModified();
\r
239 String getClusterJobID() {
\r
240 String clustjobId = "";
\r
241 File jobid = files.get("JOBID");
\r
243 if (jobid != null) {
\r
244 clustjobId = FileUtil.readFileToString(jobid);
\r
246 } catch (IOException ioe) {
\r
248 "IO Exception while reading the content of JOBID file for job "
\r
251 return clustjobId.trim();
\r
254 long getFinishedTime() {
\r
255 long ftime = UNDEFINED;
\r
256 File finished = files.get(JobStatus.FINISHED.toString());
\r
257 if (finished != null) {
\r
259 if (finished != null) {
\r
260 String start = FileUtil.readFileToString(finished);
\r
261 ftime = Long.parseLong(start.trim());
\r
263 } catch (IOException ignore) {
\r
265 "IOException while reading FINISHED status file! Ignoring...",
\r
268 ftime = finished.lastModified();
\r
269 } catch (NumberFormatException ignore) {
\r
271 "NumberFormatException while reading FINISHED status file! Ignoring...",
\r
274 ftime = finished.lastModified();
\r
280 private Services getService() {
\r
281 return ServicesUtil.getServiceByJobDirectory(jobdir);
\r
284 long getResultSize() {
\r
285 Class<? extends Executable<?>> name = ServicesUtil
\r
286 .getRunnerByJobDirectory(jobdir);
\r
289 if (name.getSimpleName().equalsIgnoreCase("IUPred")) {
\r
290 f = files.get("out.glob");
\r
292 f = files.get("out.short");
\r
294 f = files.get("out.long");
\r
296 f = files.get(SkeletalExecutable.OUTPUT);
\r
304 long getInputSize() {
\r
305 Class<? extends Executable<?>> name = ServicesUtil
\r
306 .getRunnerByJobDirectory(jobdir);
\r
308 File input = files.get(SkeletalExecutable.INPUT);
\r
309 if (input != null) {
\r
310 return input.length();
\r
315 JobStat getJobStat() {
\r
316 return JobStat.newInstance(getService(), getClusterJobID(),
\r
317 jobdir.getName(), getStartTime(), getFinishedTime(),
\r
318 getInputSize(), getResultSize(), isCancelled(),
\r
323 public int hashCode() {
\r
324 final int prime = 31;
\r
326 result = prime * result
\r
327 + ((jobdir == null) ? 0 : jobdir.hashCode());
\r
332 public boolean equals(Object obj) {
\r
337 if (getClass() != obj.getClass())
\r
339 JobDirectory other = (JobDirectory) obj;
\r
340 if (jobdir == null) {
\r
341 if (other.jobdir != null)
\r
343 } else if (!jobdir.equals(other.jobdir))
\r
350 void collectStatistics() {
\r
351 File[] files = workDirectory.listFiles(directories);
\r
352 for (File file : files) {
\r
353 if (!InputFilter.accept(new File(file.getPath() + File.separator
\r
354 + SkeletalExecutable.INPUT))) {
\r
355 // skip work directory with test input
\r
358 JobDirectory jd = new JobDirectory(file);
\r
359 JobStat jstat = jd.getJobStat();
\r
360 // Do not record stats on the job that has not completed yet
\r
361 if (hasCompleted(jd)) {
\r
364 log.debug("Skipping the job: " + jstat);
\r
365 log.debug("As it has not completed yet");
\r
367 // System.out.println(jd.getJobStat().getJobReportTabulated());
\r
371 public void run() {
\r
372 log.info("Started updating statistics at " + new Date());
\r
373 log.info("For directory: " + workDirectory.getAbsolutePath());
\r
375 collectStatistics();
\r
377 StatProcessor local_stats = getStats();
\r
378 log.info("Found " + local_stats.getJobNumber() + " jobs!");
\r
381 } catch (SQLException e) {
\r
382 log.error("Fails to update jobs statistics database!");
\r
383 log.error(e.getLocalizedMessage(), e);
\r
385 log.info("Finished updating statistics at " + new Date());
\r