1 /* Copyright (c) 2013 Alexander Sherstnev
\r
2 * Copyright (c) 2011 Peter Troshin
\r
4 * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0
\r
6 * This library is free software; you can redistribute it and/or modify it under the terms of the
\r
7 * Apache License version 2 as published by the Apache Software Foundation
\r
9 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
\r
10 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache
\r
11 * License for more details.
\r
13 * A copy of the license is in apache_license.txt. It is also available here:
\r
14 * @see: http://www.apache.org/licenses/LICENSE-2.0.txt
\r
16 * Any republication or derived work distributed in source code form
\r
17 * must include this copyright and license notice.
\r
19 package compbio.stat.collector;
\r
21 import java.io.File;
\r
22 import java.io.FileFilter;
\r
23 import java.io.IOException;
\r
24 import java.sql.SQLException;
\r
25 import java.text.ParseException;
\r
26 import java.text.SimpleDateFormat;
\r
27 import java.util.ArrayList;
\r
28 import java.util.Date;
\r
29 import java.util.HashSet;
\r
30 import java.util.List;
\r
31 import java.util.Set;
\r
33 import com.beust.jcommander.JCommander;
\r
34 import com.beust.jcommander.Parameter;
\r
36 import org.apache.log4j.Logger;
\r
38 import compbio.engine.client.PathValidator;
\r
39 import compbio.engine.client.SkeletalExecutable;
\r
42 * Class assumptions: 1. Number of runs of each WS = number of folders with name
\r
43 * 2. Number of successful runs = all runs with no result file 3. Per period of
\r
44 * time = limit per file creating time 4. Runtime (avg/max) = finish time -
\r
45 * start time 5. Task & result size = result.size
\r
47 * Abandoned runs - not collected runs
\r
49 * Cancelled runs - cancelled
\r
51 * Cluster vs local runs
\r
53 * Reasons for failure = look in the err out?
\r
56 * Metadata required:
\r
58 * work directory for local and cluster tasks = from Helper or cmd parameter. WS
\r
59 * names - enumeration. Status file names and content.
\r
61 * @author Peter Troshin
\r
62 * @author Alexander Sherstnev
\r
66 class mainJCommander {
\r
68 private List<String> parameters = new ArrayList<String>();
\r
70 @Parameter(names = { "-log", "-verbose" }, description = "Level of verbosity")
\r
71 public Integer verbose = 1;
\r
73 @Parameter(names = "-start", description = "Start time")
\r
74 public String starttime;
\r
76 @Parameter(names = "-end", description = "Start time")
\r
77 public String endtime;
\r
79 @Parameter(names = "-db", description = "Path to database")
\r
80 public String dbname;
\r
82 @Parameter(names = "-dir", description = "Path to job directory")
\r
83 public String workingdir;
\r
86 public class ExecutionStatUpdater {
\r
87 static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss");
\r
88 static SimpleDateFormat shortDF = new SimpleDateFormat("dd/MM/yyyy");
\r
89 private static final Logger log = Logger.getLogger(ExecutionStatUpdater.class);
\r
91 final private File workingDirectory;
\r
92 final private List<JobStat> stats;
\r
94 * Consider the job that has been working for longer than timeOutInHours
\r
95 * completed, whatever the outcome
\r
97 final private int timeOutInHours;
\r
100 * List subdirectories in the job directory
\r
102 * @param workingDirectory
\r
103 * @param timeOutInHours
\r
105 public ExecutionStatUpdater(String workingDirectory, int timeOutInHours) {
\r
106 log.info("Starting stat collector for directory: " + workingDirectory);
\r
107 log.info("Maximum allowed runtime(h): " + timeOutInHours);
\r
108 if (!PathValidator.isValidDirectory(workingDirectory)) {
\r
109 throw new IllegalArgumentException("workingDirectory '" + workingDirectory + "' does not exist!");
\r
111 this.workingDirectory = new File(workingDirectory);
\r
112 stats = new ArrayList<JobStat>();
\r
113 if (timeOutInHours <= 0) {
\r
114 throw new IllegalArgumentException("Timeout value must be greater than 0! Given value: " + timeOutInHours);
\r
116 this.timeOutInHours = timeOutInHours;
\r
119 boolean hasCompleted(JobDirectory jd) {
\r
120 JobStat jstat = jd.getJobStat();
\r
121 if (jstat.hasResult() || jstat.getIsCancelled() || jstat.getIsFinished() || hasTimedOut(jd)) {
\r
127 boolean hasTimedOut(JobDirectory jd) {
\r
128 return ((System.currentTimeMillis() - jd.jobdir.lastModified()) / (1000 * 60 * 60)) > timeOutInHours;
\r
132 * Make sure that collectStatistics methods was called prior to calling
\r
133 * this! TODO consider running collectStatistics from here on the first call
\r
135 StatProcessor getStats() {
\r
136 if (stats.isEmpty()) {
\r
137 log.info("Please make sure collectStatistics method was called prior to calling getStats()!");
\r
139 return new StatProcessor(stats);
\r
142 void writeStatToDB() throws SQLException {
\r
143 Set<JobStat> rjobs = new HashSet<JobStat>(stats);
\r
144 StatDB statdb = new StatDB();
\r
145 log.debug("Removing records that has already been recorded");
\r
146 statdb.removeRecordedJobs(rjobs);
\r
147 log.debug("New records left: " + rjobs.size());
\r
148 statdb.insertData(rjobs);
\r
153 * @throws ParseException
\r
155 public static void main(String[] args) throws IOException, SQLException, ParseException {
\r
156 mainJCommander jct = new mainJCommander();
\r
157 new JCommander(jct, args);
\r
158 String WorkingDir = jct.workingdir;
\r
159 String dbname = jct.dbname;
\r
160 Date ST = shortDF.parse(jct.starttime);
\r
161 Date ET = shortDF.parse(jct.endtime);
\r
162 long StartTime = 0;
\r
164 StartTime = ST.getTime();
\r
166 Date currDate = new Date();
\r
167 long EndTime = currDate.getTime();
\r
169 EndTime = ET.getTime();
\r
172 System.out.println("Start time: " + jct.starttime + " = " + StartTime);
\r
173 System.out.println("End time: " + jct.endtime + " = " + EndTime);
\r
175 ExecutionStatUpdater esu = new ExecutionStatUpdater(WorkingDir, 1);
\r
176 esu.collectStatistics(StartTime, EndTime);
\r
177 esu.writeStatToDB();
\r
180 static FileFilter directories = new FileFilter() {
\r
182 public boolean accept(File pathname) {
\r
183 return pathname.isDirectory() && !pathname.getName().startsWith(".");
\r
188 private void collectStatistics(long StartTime, long EndTime) {
\r
189 // clear stats array;
\r
191 File[] dirs = workingDirectory.listFiles(directories);
\r
192 for (File dir : dirs) {
\r
193 // skip work directory with test inputs and out of ordered time
\r
195 log.debug("check directory: " + dir.getName() + "...");
\r
196 if (InputFilter.accept(new File(dir.getPath() + File.separator + SkeletalExecutable.INPUT)) && StartTime < dir.lastModified()
\r
197 && dir.lastModified() < EndTime) {
\r
198 JobDirectory jd = new JobDirectory(dir);
\r
199 JobStat jstat = jd.getJobStat();
\r
200 // Do not record stats on the job that has not completed yet
\r
201 if (hasCompleted(jd)) {
\r
204 log.debug("Skipping the job: " + jstat + " as it has not completed yet");
\r
207 log.trace("training input: " + dir.getName() + File.separator + SkeletalExecutable.INPUT);
\r