--- /dev/null
+/* Copyright (c) 2013 Alexander Sherstnev\r
+ * Copyright (c) 2011 Peter Troshin\r
+ * \r
+ * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0 \r
+ * \r
+ * This library is free software; you can redistribute it and/or modify it under the terms of the\r
+ * Apache License version 2 as published by the Apache Software Foundation\r
+ * \r
+ * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without\r
+ * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache \r
+ * License for more details.\r
+ * \r
+ * A copy of the license is in apache_license.txt. It is also available here:\r
+ * @see: http://www.apache.org/licenses/LICENSE-2.0.txt\r
+ * \r
+ * Any republication or derived work distributed in source code form\r
+ * must include this copyright and license notice.\r
+ */\r
+package compbio.stat.collector;\r
+\r
+import java.io.File;\r
+import java.io.FileFilter;\r
+import java.io.IOException;\r
+import java.sql.SQLException;\r
+import java.text.SimpleDateFormat;\r
+import java.util.ArrayList;\r
+import java.util.Date;\r
+import java.util.HashSet;\r
+import java.util.List;\r
+import java.util.Set;\r
+\r
+import com.beust.jcommander.JCommander;\r
+import com.beust.jcommander.Parameter;\r
+\r
+import org.apache.log4j.Logger;\r
+\r
+import compbio.engine.client.PathValidator;\r
+import compbio.engine.client.SkeletalExecutable;\r
+import compbio.util.FileUtil;\r
+\r
+/**\r
+ * Class assumptions: 1. Number of runs of each WS = number of folders with name\r
+ * 2. Number of successful runs = all runs with no result file 3. Per period of\r
+ * time = limit per file creating time 4. Runtime (avg/max) = finish time -\r
+ * start time 5. Task & result size = result.size\r
+ * \r
+ * Abandoned runs - not collected runs\r
+ * \r
+ * Cancelled runs - cancelled\r
+ * \r
+ * Cluster vs local runs\r
+ * \r
+ * Reasons for failure = look in the err out?\r
+ * \r
+ * \r
+ * Metadata required:\r
+ * \r
+ * work directory for local and cluster tasks = from Helper or cmd parameter. WS\r
+ * names - enumeration. Status file names and content.\r
+ * \r
+ * @author Peter Troshin\r
+ * @author Alexander Sherstnev\r
+ * \r
+ */\r
+\r
+class mainJCommander {\r
+ @Parameter\r
+ private List<String> parameters = new ArrayList<String>();\r
+\r
+ @Parameter(names = { "-log", "-verbose" }, description = "Level of verbosity")\r
+ Integer verbose = 1;\r
+\r
+ @Parameter(names = "-start", description = "Start time")\r
+ String starttime;\r
+\r
+ @Parameter(names = "-end", description = "Start time")\r
+ String endtime;\r
+\r
+ @Parameter(names = "-db", description = "Path to database")\r
+ String dbname;\r
+}\r
+\r
+public class ExecutionStatUpdater {\r
+ static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss");\r
+ private static final Logger log = Logger.getLogger(ExecutionStatUpdater.class);\r
+\r
+ final private File workingDirectory;\r
+ final private List<JobStat> stats;\r
+ /**\r
+ * Consider the job that has been working for longer than timeOutInHours\r
+ * completed, whatever the outcome\r
+ */\r
+ final private int timeOutInHours;\r
+\r
+ /**\r
+ * List subdirectories in the job directory\r
+ * \r
+ * @param workingDirectory\r
+ * @param timeOutInHours\r
+ */\r
+ public ExecutionStatUpdater(String workingDirectory, int timeOutInHours) {\r
+ log.info("Starting stat collector for directory: " + workingDirectory);\r
+ log.info("Maximum allowed runtime(h): " + timeOutInHours);\r
+ if (!PathValidator.isValidDirectory(workingDirectory)) {\r
+ throw new IllegalArgumentException("workingDirectory '" + workingDirectory + "' does not exist!");\r
+ }\r
+ this.workingDirectory = new File(workingDirectory);\r
+ stats = new ArrayList<JobStat>();\r
+ if (timeOutInHours <= 0) {\r
+ throw new IllegalArgumentException("Timeout value must be greater than 0! Given value: " + timeOutInHours);\r
+ }\r
+ this.timeOutInHours = timeOutInHours;\r
+ }\r
+\r
+ boolean hasCompleted(JobDirectory jd) {\r
+ JobStat jstat = jd.getJobStat();\r
+ if (jstat.hasResult() || jstat.getIsCancelled() || jstat.getIsFinished() || hasTimedOut(jd)) {\r
+ return true;\r
+ }\r
+ return false;\r
+ }\r
+\r
+ boolean hasTimedOut(JobDirectory jd) {\r
+ return ((System.currentTimeMillis() - jd.jobdir.lastModified()) / (1000 * 60 * 60)) > timeOutInHours;\r
+ }\r
+\r
+ /*\r
+ * Make sure that collectStatistics methods was called prior to calling\r
+ * this! TODO consider running collectStatistics from here on the first call\r
+ */\r
+ StatProcessor getStats() {\r
+ if (stats.isEmpty()) {\r
+ log.info("Please make sure collectStatistics method was called prior to calling getStats()!");\r
+ }\r
+ return new StatProcessor(stats);\r
+ }\r
+\r
+ void writeStatToDB() throws SQLException {\r
+ Set<JobStat> rjobs = new HashSet<JobStat>(stats);\r
+ StatDB statdb = new StatDB();\r
+ log.debug("Removing records that has already been recorded");\r
+ statdb.removeRecordedJobs(rjobs);\r
+ log.debug("New records left: " + rjobs.size());\r
+ statdb.insertData(rjobs);\r
+ }\r
+\r
+ /**\r
+ * main function\r
+ */\r
+ public static void main(String[] args) throws IOException, SQLException {\r
+ mainJCommander jct = new mainJCommander();\r
+ new JCommander(jct, args);\r
+ String WorkingDir = "jobout";\r
+ String StartTime = jct.starttime;\r
+ String EndTime = jct.endtime;\r
+ String dbname = jct.dbname;\r
+ long StartTimeLong = 0;\r
+ long EndTimeLong = 0;\r
+\r
+ ExecutionStatUpdater esu = new ExecutionStatUpdater(WorkingDir, 1);\r
+ esu.collectStatistics(StartTimeLong, EndTimeLong);\r
+ esu.writeStatToDB();\r
+ }\r
+\r
+ static FileFilter directories = new FileFilter() {\r
+ @Override\r
+ public boolean accept(File pathname) {\r
+ return pathname.isDirectory() && !pathname.getName().startsWith(".");\r
+ }\r
+ };\r
+\r
+ // TODO test!\r
+ private void collectStatistics(long StartTime, long EndTime) {\r
+ // clear stats array;\r
+ stats.clear();\r
+ File[] dirs = workingDirectory.listFiles(directories);\r
+ for (File dir : dirs) {\r
+ // skip work directory with test inputs and out of ordered time\r
+ // range\r
+ log.debug("check directory: " + dir.getName() + "...");\r
+ if (InputFilter.accept(new File(dir.getPath() + File.separator + SkeletalExecutable.INPUT)) && StartTime < dir.lastModified()\r
+ && dir.lastModified() < EndTime) {\r
+ JobDirectory jd = new JobDirectory(dir);\r
+ JobStat jstat = jd.getJobStat();\r
+ // Do not record stats on the job that has not completed yet\r
+ if (hasCompleted(jd)) {\r
+ stats.add(jstat);\r
+ } else {\r
+ log.debug("Skipping the job: " + jstat + " as it has not completed yet");\r
+ }\r
+ } else {\r
+ log.trace("training input: " + dir.getName() + File.separator + SkeletalExecutable.INPUT);\r
+ }\r
+ }\r
+ }\r
+\r
+}\r