1 /* Copyright (c) 2011 Peter Troshin
\r
3 * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0
\r
5 * This library is free software; you can redistribute it and/or modify it under the terms of the
\r
6 * Apache License version 2 as published by the Apache Software Foundation
\r
8 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
\r
9 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache
\r
10 * License for more details.
\r
12 * A copy of the license is in apache_license.txt. It is also available here:
\r
13 * @see: http://www.apache.org/licenses/LICENSE-2.0.txt
\r
15 * Any republication or derived work distributed in source code form
\r
16 * must include this copyright and license notice.
\r
18 package compbio.stat.collector;
\r
20 import java.io.File;
\r
21 import java.io.FileFilter;
\r
22 import java.io.IOException;
\r
23 import java.sql.SQLException;
\r
24 import java.text.SimpleDateFormat;
\r
25 import java.util.ArrayList;
\r
26 import java.util.Date;
\r
27 import java.util.HashMap;
\r
28 import java.util.HashSet;
\r
29 import java.util.List;
\r
30 import java.util.Map;
\r
31 import java.util.Set;
\r
33 import org.apache.log4j.Logger;
\r
35 import compbio.engine.client.Executable;
\r
36 import compbio.engine.client.PathValidator;
\r
37 import compbio.engine.client.SkeletalExecutable;
\r
38 import compbio.metadata.JobStatus;
\r
39 import compbio.util.FileUtil;
\r
40 import compbio.ws.client.Services;
\r
41 import compbio.ws.client.ServicesUtil;
\r
44 * Number of runs of each WS = number of folders with name
\r
46 * Number of successful runs = all runs with no result file
\r
48 * Per period of time = limit per file creating time Runtime (avg/max) =
\r
50 * started time - finished time
\r
52 * Task & result size = result.size
\r
54 * Abandoned runs - not collected runs
\r
56 * Cancelled runs - cancelled
\r
58 * Cluster vs local runs
\r
60 * Reasons for failure = look in the err out?
\r
63 * Metadata required:
\r
65 * work directory for local and cluster tasks = from Helper or cmd parameter. WS
\r
66 * names - enumeration. Status file names and content.
\r
71 public class ExecutionStatCollector implements Runnable {
\r
73 static final int UNDEFINED = -1;
\r
75 private static final Logger log = Logger.getLogger(ExecutionStatCollector.class);
\r
77 static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss");
\r
79 final private File workDirectory;
\r
80 final private List<JobStat> stats;
\r
82 * Consider the job that has been working for longer than timeOutInHours
\r
83 * completed, whatever the outcome
\r
85 final private int timeOutInHours;
\r
88 * List subdirectories in the job directory
\r
90 * @param workDirectory
\r
91 * @param timeOutInHours
\r
93 public ExecutionStatCollector(String workDirectory, int timeOutInHours) {
\r
94 log.info("Starting stat collector for directory: " + workDirectory);
\r
95 log.info("Maximum allowed runtime(h): " + timeOutInHours);
\r
96 if (!PathValidator.isValidDirectory(workDirectory)) {
\r
97 throw new IllegalArgumentException("workDirectory '" + workDirectory + "' does not exist!");
\r
99 this.workDirectory = new File(workDirectory);
\r
100 stats = new ArrayList<JobStat>();
\r
101 if (timeOutInHours <= 0) {
\r
102 throw new IllegalArgumentException(
\r
103 "Timeout value must be greater than 0! Given value: " + timeOutInHours);
\r
105 this.timeOutInHours = timeOutInHours;
\r
108 boolean hasCompleted(JobDirectory jd) {
\r
109 JobStat jstat = jd.getJobStat();
\r
110 if (jstat.hasResult() || jstat.getIsCancelled()
\r
111 || jstat.getIsFinished() || hasTimedOut(jd)) {
\r
117 boolean hasTimedOut(JobDirectory jd) {
\r
118 return ((System.currentTimeMillis() - jd.jobdir.lastModified()) / (1000 * 60 * 60)) > timeOutInHours;
\r
122 * Make sure that collectStatistics methods was called prior to calling
\r
123 * this! TODO consider running collectStatistics from here on the first call
\r
125 StatProcessor getStats() {
\r
126 if (stats.isEmpty()) {
\r
127 log.info("Please make sure collectStatistics method was called prior to calling getStats()!");
\r
129 return new StatProcessor(stats);
\r
132 void writeStatToDB() throws SQLException {
\r
133 Set<JobStat> rjobs = new HashSet<JobStat>(stats);
\r
134 StatDB statdb = new StatDB();
\r
135 log.debug("Removing records that has already been recorded");
\r
137 statdb.removeRecordedJobs(rjobs);
\r
138 log.debug("New records left: " + rjobs.size());
\r
139 statdb.insertData(rjobs);
\r
143 * static void updateTime(File statFile) throws IOException { long lastMod =
\r
144 * statFile.lastModified(); FileWriter fw = new FileWriter(statFile);
\r
145 * fw.write(new Long(lastMod).toString()); fw.close(); }
\r
151 public static void main(String[] args) throws IOException, SQLException {
\r
153 // updateTime(new File(
\r
154 // "D:\\workspace\\JABA2\\jobsout\\AACon#170462904473672\\STARTED"));
\r
156 File[] files = FileUtil.getFiles("Y:\\fc\\www-jws2\\jaba\\jobsout",
\r
158 List<JobStat> stats = new ArrayList<JobStat>();
\r
159 for (File file : files) {
\r
160 JobDirectory jd = new JobDirectory(file);
\r
161 stats.add(jd.getJobStat());
\r
162 // System.out.println(jd.getJobStat().getJobReportTabulated());
\r
164 StatProcessor sp = new StatProcessor(stats);
\r
165 System.out.println(sp.reportStat());
\r
166 System.out.println();
\r
167 System.out.println("!!!!!!!!!!!!!!!!!!");
\r
168 System.out.println();
\r
170 Set<JobStat> rjobs = new HashSet<JobStat>(sp.stats);
\r
171 StatDB statdb = new StatDB();
\r
172 statdb.removeRecordedJobs(rjobs);
\r
173 statdb.insertData(rjobs);
\r
176 static FileFilter directories = new FileFilter() {
\r
178 public boolean accept(File pathname) {
\r
179 return pathname.isDirectory()
\r
180 && !pathname.getName().startsWith(".");
\r
184 static class JobDirectory {
\r
187 Map<String, File> files = new HashMap<String, File>();
\r
189 JobDirectory(File directory) {
\r
190 this.jobdir = directory;
\r
191 for (File f : jobdir.listFiles()) {
\r
192 files.put(f.getName(), f);
\r
196 boolean hasStatus(JobStatus status) {
\r
197 return files.containsKey(status.toString());
\r
200 boolean isCollected() {
\r
201 return hasStatus(JobStatus.COLLECTED);
\r
204 boolean isCancelled() {
\r
205 return hasStatus(JobStatus.CANCELLED);
\r
208 long getStartTime() {
\r
209 long starttime = UNDEFINED;
\r
210 File startfile = files.get(JobStatus.STARTED.toString());
\r
211 if (startfile == null) {
\r
212 startfile = files.get(JobStatus.SUBMITTED.toString());
\r
215 if (startfile != null) {
\r
216 String start = FileUtil.readFileToString(startfile);
\r
217 starttime = Long.parseLong(start.trim());
\r
219 } catch (IOException ignore) {
\r
221 "IOException while reading STARTED status file! Ignoring...",
\r
224 starttime = startfile.lastModified();
\r
225 } catch (NumberFormatException ignore) {
\r
227 "NumberFormatException while reading STARTED status file! Ignoring...",
\r
230 starttime = startfile.lastModified();
\r
236 String getClusterJobID() {
\r
237 String clustjobId = "";
\r
238 File jobid = files.get("JOBID");
\r
240 if (jobid != null) {
\r
241 clustjobId = FileUtil.readFileToString(jobid);
\r
243 } catch (IOException ioe) {
\r
245 "IO Exception while reading the content of JOBID file for job "
\r
248 return clustjobId.trim();
\r
251 long getFinishedTime() {
\r
252 long ftime = UNDEFINED;
\r
253 File finished = files.get(JobStatus.FINISHED.toString());
\r
254 if (finished != null) {
\r
256 if (finished != null) {
\r
257 String start = FileUtil.readFileToString(finished);
\r
258 ftime = Long.parseLong(start.trim());
\r
260 } catch (IOException ignore) {
\r
262 "IOException while reading FINISHED status file! Ignoring...",
\r
265 ftime = finished.lastModified();
\r
266 } catch (NumberFormatException ignore) {
\r
268 "NumberFormatException while reading FINISHED status file! Ignoring...",
\r
271 ftime = finished.lastModified();
\r
277 private Services getService() {
\r
278 return ServicesUtil.getServiceByJobDirectory(jobdir);
\r
281 long getResultSize() {
\r
282 Class<? extends Executable<?>> name = ServicesUtil
\r
283 .getRunnerByJobDirectory(jobdir);
\r
286 if (name.getSimpleName().equalsIgnoreCase("IUPred")) {
\r
287 f = files.get("out.glob");
\r
289 f = files.get("out.short");
\r
291 f = files.get("out.long");
\r
293 f = files.get(SkeletalExecutable.OUTPUT);
\r
301 long getInputSize() {
\r
302 Class<? extends Executable<?>> name = ServicesUtil
\r
303 .getRunnerByJobDirectory(jobdir);
\r
305 File input = files.get(SkeletalExecutable.INPUT);
\r
306 if (input != null) {
\r
307 return input.length();
\r
312 JobStat getJobStat() {
\r
313 return JobStat.newInstance(getService(), getClusterJobID(),
\r
314 jobdir.getName(), getStartTime(), getFinishedTime(),
\r
315 getInputSize(), getResultSize(), isCancelled(),
\r
320 public int hashCode() {
\r
321 final int prime = 31;
\r
323 result = prime * result
\r
324 + ((jobdir == null) ? 0 : jobdir.hashCode());
\r
329 public boolean equals(Object obj) {
\r
334 if (getClass() != obj.getClass())
\r
336 JobDirectory other = (JobDirectory) obj;
\r
337 if (jobdir == null) {
\r
338 if (other.jobdir != null)
\r
340 } else if (!jobdir.equals(other.jobdir))
\r
347 void collectStatistics() {
\r
348 // clear stats array;
\r
350 File[] files = workDirectory.listFiles(directories);
\r
351 for (File file : files) {
\r
352 // skip work directory with test input
\r
353 if (InputFilter.accept(new File(file.getPath() + File.separator + SkeletalExecutable.INPUT))) {
\r
354 JobDirectory jd = new JobDirectory(file);
\r
355 JobStat jstat = jd.getJobStat();
\r
356 // Do not record stats on the job that has not completed yet
\r
357 if (hasCompleted(jd)) {
\r
359 System.out.println("added: id = " + jd);
\r
361 log.debug("Skipping the job: " + jstat);
\r
362 log.debug("As it has not completed yet");
\r
364 // System.out.println(jd.getJobStat().getJobReportTabulated());
\r
366 log.trace("training input: " + file.getPath() + File.separator + SkeletalExecutable.INPUT);
\r
371 public void run() {
\r
372 log.info("Started updating statistics at " + new Date());
\r
373 log.info("For directory: " + workDirectory.getAbsolutePath());
\r
375 collectStatistics();
\r
377 StatProcessor local_stats = getStats();
\r
378 log.info("Found " + local_stats.getJobNumber() + " jobs!");
\r
381 } catch (SQLException e) {
\r
382 log.error("Fails to update jobs statistics database!");
\r
383 log.error(e.getLocalizedMessage(), e);
\r
385 log.info("Finished updating statistics at " + new Date());
\r