1 /* Copyright (c) 2013 Alexander Sherstnev
\r
2 * Copyright (c) 2011 Peter Troshin
\r
4 * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0
\r
6 * This library is free software; you can redistribute it and/or modify it under the terms of the
\r
7 * Apache License version 2 as published by the Apache Software Foundation
\r
9 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
\r
10 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache
\r
11 * License for more details.
\r
13 * A copy of the license is in apache_license.txt. It is also available here:
\r
14 * @see: http://www.apache.org/licenses/LICENSE-2.0.txt
\r
16 * Any republication or derived work distributed in source code form
\r
17 * must include this copyright and license notice.
\r
19 package compbio.stat.collector;
\r
21 import java.io.File;
\r
22 import java.io.FileFilter;
\r
23 import java.io.IOException;
\r
24 import java.sql.SQLException;
\r
25 import java.text.SimpleDateFormat;
\r
26 import java.util.ArrayList;
\r
27 import java.util.Date;
\r
28 import java.util.HashSet;
\r
29 import java.util.List;
\r
30 import java.util.Set;
\r
32 import com.beust.jcommander.JCommander;
\r
33 import com.beust.jcommander.Parameter;
\r
35 import org.apache.log4j.Logger;
\r
37 import compbio.engine.client.PathValidator;
\r
38 import compbio.engine.client.SkeletalExecutable;
\r
39 import compbio.util.FileUtil;
\r
42 * Class assumptions: 1. Number of runs of each WS = number of folders with name
\r
43 * 2. Number of successful runs = all runs with no result file 3. Per period of
\r
44 * time = limit per file creating time 4. Runtime (avg/max) = finish time -
\r
45 * start time 5. Task & result size = result.size
\r
47 * Abandoned runs - not collected runs
\r
49 * Cancelled runs - cancelled
\r
51 * Cluster vs local runs
\r
53 * Reasons for failure = look in the err out?
\r
56 * Metadata required:
\r
58 * work directory for local and cluster tasks = from Helper or cmd parameter. WS
\r
59 * names - enumeration. Status file names and content.
\r
61 * @author Peter Troshin
\r
62 * @author Alexander Sherstnev
\r
66 class mainJCommander {
\r
68 private List<String> parameters = new ArrayList<String>();
\r
70 @Parameter(names = { "-log", "-verbose" }, description = "Level of verbosity")
\r
71 Integer verbose = 1;
\r
73 @Parameter(names = "-start", description = "Start time")
\r
76 @Parameter(names = "-end", description = "Start time")
\r
79 @Parameter(names = "-db", description = "Path to database")
\r
83 public class ExecutionStatUpdater {
\r
84 static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss");
\r
85 private static final Logger log = Logger.getLogger(ExecutionStatUpdater.class);
\r
87 final private File workingDirectory;
\r
88 final private List<JobStat> stats;
\r
90 * Consider the job that has been working for longer than timeOutInHours
\r
91 * completed, whatever the outcome
\r
93 final private int timeOutInHours;
\r
96 * List subdirectories in the job directory
\r
98 * @param workingDirectory
\r
99 * @param timeOutInHours
\r
101 public ExecutionStatUpdater(String workingDirectory, int timeOutInHours) {
\r
102 log.info("Starting stat collector for directory: " + workingDirectory);
\r
103 log.info("Maximum allowed runtime(h): " + timeOutInHours);
\r
104 if (!PathValidator.isValidDirectory(workingDirectory)) {
\r
105 throw new IllegalArgumentException("workingDirectory '" + workingDirectory + "' does not exist!");
\r
107 this.workingDirectory = new File(workingDirectory);
\r
108 stats = new ArrayList<JobStat>();
\r
109 if (timeOutInHours <= 0) {
\r
110 throw new IllegalArgumentException("Timeout value must be greater than 0! Given value: " + timeOutInHours);
\r
112 this.timeOutInHours = timeOutInHours;
\r
115 boolean hasCompleted(JobDirectory jd) {
\r
116 JobStat jstat = jd.getJobStat();
\r
117 if (jstat.hasResult() || jstat.getIsCancelled() || jstat.getIsFinished() || hasTimedOut(jd)) {
\r
123 boolean hasTimedOut(JobDirectory jd) {
\r
124 return ((System.currentTimeMillis() - jd.jobdir.lastModified()) / (1000 * 60 * 60)) > timeOutInHours;
\r
128 * Make sure that collectStatistics methods was called prior to calling
\r
129 * this! TODO consider running collectStatistics from here on the first call
\r
131 StatProcessor getStats() {
\r
132 if (stats.isEmpty()) {
\r
133 log.info("Please make sure collectStatistics method was called prior to calling getStats()!");
\r
135 return new StatProcessor(stats);
\r
138 void writeStatToDB() throws SQLException {
\r
139 Set<JobStat> rjobs = new HashSet<JobStat>(stats);
\r
140 StatDB statdb = new StatDB();
\r
141 log.debug("Removing records that has already been recorded");
\r
142 statdb.removeRecordedJobs(rjobs);
\r
143 log.debug("New records left: " + rjobs.size());
\r
144 statdb.insertData(rjobs);
\r
150 public static void main(String[] args) throws IOException, SQLException {
\r
151 mainJCommander jct = new mainJCommander();
\r
152 new JCommander(jct, args);
\r
153 String WorkingDir = "jobout";
\r
154 String StartTime = jct.starttime;
\r
155 String EndTime = jct.endtime;
\r
156 String dbname = jct.dbname;
\r
157 long StartTimeLong = 0;
\r
158 long EndTimeLong = 0;
\r
160 ExecutionStatUpdater esu = new ExecutionStatUpdater(WorkingDir, 1);
\r
161 esu.collectStatistics(StartTimeLong, EndTimeLong);
\r
162 esu.writeStatToDB();
\r
165 static FileFilter directories = new FileFilter() {
\r
167 public boolean accept(File pathname) {
\r
168 return pathname.isDirectory() && !pathname.getName().startsWith(".");
\r
173 private void collectStatistics(long StartTime, long EndTime) {
\r
174 // clear stats array;
\r
176 File[] dirs = workingDirectory.listFiles(directories);
\r
177 for (File dir : dirs) {
\r
178 // skip work directory with test inputs and out of ordered time
\r
180 log.debug("check directory: " + dir.getName() + "...");
\r
181 if (InputFilter.accept(new File(dir.getPath() + File.separator + SkeletalExecutable.INPUT)) && StartTime < dir.lastModified()
\r
182 && dir.lastModified() < EndTime) {
\r
183 JobDirectory jd = new JobDirectory(dir);
\r
184 JobStat jstat = jd.getJobStat();
\r
185 // Do not record stats on the job that has not completed yet
\r
186 if (hasCompleted(jd)) {
\r
189 log.debug("Skipping the job: " + jstat + " as it has not completed yet");
\r
192 log.trace("training input: " + dir.getName() + File.separator + SkeletalExecutable.INPUT);
\r