From: Sasha Sherstnev Date: Mon, 16 Jun 2014 06:50:05 +0000 (+0100) Subject: JWS-96 initial code for the stat crawler X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=6896ff23dcbc23498894f7e16d8d91a6b61e0a7d;p=jabaws.git JWS-96 initial code for the stat crawler --- diff --git a/.classpath b/.classpath index 46b788d..794a7ca 100644 --- a/.classpath +++ b/.classpath @@ -17,5 +17,6 @@ + diff --git a/lib/jcommander-1.30.jar b/lib/jcommander-1.30.jar new file mode 100644 index 0000000..ec6c420 Binary files /dev/null and b/lib/jcommander-1.30.jar differ diff --git a/webservices/compbio/stat/collector/ExecutionStatUpdater.java b/webservices/compbio/stat/collector/ExecutionStatUpdater.java new file mode 100644 index 0000000..8e9a6e4 --- /dev/null +++ b/webservices/compbio/stat/collector/ExecutionStatUpdater.java @@ -0,0 +1,197 @@ +/* Copyright (c) 2013 Alexander Sherstnev + * Copyright (c) 2011 Peter Troshin + * + * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0 + * + * This library is free software; you can redistribute it and/or modify it under the terms of the + * Apache License version 2 as published by the Apache Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without + * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache + * License for more details. + * + * A copy of the license is in apache_license.txt. It is also available here: + * @see: http://www.apache.org/licenses/LICENSE-2.0.txt + * + * Any republication or derived work distributed in source code form + * must include this copyright and license notice. + */ +package compbio.stat.collector; + +import java.io.File; +import java.io.FileFilter; +import java.io.IOException; +import java.sql.SQLException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import com.beust.jcommander.JCommander; +import com.beust.jcommander.Parameter; + +import org.apache.log4j.Logger; + +import compbio.engine.client.PathValidator; +import compbio.engine.client.SkeletalExecutable; +import compbio.util.FileUtil; + +/** + * Class assumptions: 1. Number of runs of each WS = number of folders with name + * 2. Number of successful runs = all runs with no result file 3. Per period of + * time = limit per file creating time 4. Runtime (avg/max) = finish time - + * start time 5. Task & result size = result.size + * + * Abandoned runs - not collected runs + * + * Cancelled runs - cancelled + * + * Cluster vs local runs + * + * Reasons for failure = look in the err out? + * + * + * Metadata required: + * + * work directory for local and cluster tasks = from Helper or cmd parameter. WS + * names - enumeration. Status file names and content. + * + * @author Peter Troshin + * @author Alexander Sherstnev + * + */ + +class mainJCommander { + @Parameter + private List parameters = new ArrayList(); + + @Parameter(names = { "-log", "-verbose" }, description = "Level of verbosity") + Integer verbose = 1; + + @Parameter(names = "-start", description = "Start time") + String starttime; + + @Parameter(names = "-end", description = "Start time") + String endtime; + + @Parameter(names = "-db", description = "Path to database") + String dbname; +} + +public class ExecutionStatUpdater { + static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss"); + private static final Logger log = Logger.getLogger(ExecutionStatUpdater.class); + + final private File workingDirectory; + final private List stats; + /** + * Consider the job that has been working for longer than timeOutInHours + * completed, whatever the outcome + */ + final private int timeOutInHours; + + /** + * List subdirectories in the job directory + * + * @param workingDirectory + * @param timeOutInHours + */ + public ExecutionStatUpdater(String workingDirectory, int timeOutInHours) { + log.info("Starting stat collector for directory: " + workingDirectory); + log.info("Maximum allowed runtime(h): " + timeOutInHours); + if (!PathValidator.isValidDirectory(workingDirectory)) { + throw new IllegalArgumentException("workingDirectory '" + workingDirectory + "' does not exist!"); + } + this.workingDirectory = new File(workingDirectory); + stats = new ArrayList(); + if (timeOutInHours <= 0) { + throw new IllegalArgumentException("Timeout value must be greater than 0! Given value: " + timeOutInHours); + } + this.timeOutInHours = timeOutInHours; + } + + boolean hasCompleted(JobDirectory jd) { + JobStat jstat = jd.getJobStat(); + if (jstat.hasResult() || jstat.getIsCancelled() || jstat.getIsFinished() || hasTimedOut(jd)) { + return true; + } + return false; + } + + boolean hasTimedOut(JobDirectory jd) { + return ((System.currentTimeMillis() - jd.jobdir.lastModified()) / (1000 * 60 * 60)) > timeOutInHours; + } + + /* + * Make sure that collectStatistics methods was called prior to calling + * this! TODO consider running collectStatistics from here on the first call + */ + StatProcessor getStats() { + if (stats.isEmpty()) { + log.info("Please make sure collectStatistics method was called prior to calling getStats()!"); + } + return new StatProcessor(stats); + } + + void writeStatToDB() throws SQLException { + Set rjobs = new HashSet(stats); + StatDB statdb = new StatDB(); + log.debug("Removing records that has already been recorded"); + statdb.removeRecordedJobs(rjobs); + log.debug("New records left: " + rjobs.size()); + statdb.insertData(rjobs); + } + + /** + * main function + */ + public static void main(String[] args) throws IOException, SQLException { + mainJCommander jct = new mainJCommander(); + new JCommander(jct, args); + String WorkingDir = "jobout"; + String StartTime = jct.starttime; + String EndTime = jct.endtime; + String dbname = jct.dbname; + long StartTimeLong = 0; + long EndTimeLong = 0; + + ExecutionStatUpdater esu = new ExecutionStatUpdater(WorkingDir, 1); + esu.collectStatistics(StartTimeLong, EndTimeLong); + esu.writeStatToDB(); + } + + static FileFilter directories = new FileFilter() { + @Override + public boolean accept(File pathname) { + return pathname.isDirectory() && !pathname.getName().startsWith("."); + } + }; + + // TODO test! + private void collectStatistics(long StartTime, long EndTime) { + // clear stats array; + stats.clear(); + File[] dirs = workingDirectory.listFiles(directories); + for (File dir : dirs) { + // skip work directory with test inputs and out of ordered time + // range + log.debug("check directory: " + dir.getName() + "..."); + if (InputFilter.accept(new File(dir.getPath() + File.separator + SkeletalExecutable.INPUT)) && StartTime < dir.lastModified() + && dir.lastModified() < EndTime) { + JobDirectory jd = new JobDirectory(dir); + JobStat jstat = jd.getJobStat(); + // Do not record stats on the job that has not completed yet + if (hasCompleted(jd)) { + stats.add(jstat); + } else { + log.debug("Skipping the job: " + jstat + " as it has not completed yet"); + } + } else { + log.trace("training input: " + dir.getName() + File.separator + SkeletalExecutable.INPUT); + } + } + } + +} diff --git a/webservices/compbio/stat/collector/StatDB.java b/webservices/compbio/stat/collector/StatDB.java index 32b7fde..45947a2 100644 --- a/webservices/compbio/stat/collector/StatDB.java +++ b/webservices/compbio/stat/collector/StatDB.java @@ -53,6 +53,10 @@ public class StatDB { static Connection conn; private synchronized static Connection getDBConnection() throws SQLException { + return getDBConnection(statDBName); + } + + private synchronized static Connection getDBConnection(String dbname) throws SQLException { if (conn != null && !conn.isClosed()) { return conn; @@ -64,7 +68,7 @@ public class StatDB { // Apparently under Tomcat webapp you cannot rely on Java // auto discovery and have to register the driver explicitly Class.forName(driver); - conn = DriverManager.getConnection(protocol + statDBName + ";create=false"); + conn = DriverManager.getConnection(protocol + dbname + ";create=false"); conn.setAutoCommit(true); } catch (ClassNotFoundException e) { @@ -74,6 +78,11 @@ public class StatDB { return conn; } + + public StatDB(String dbname) throws SQLException { + this.conn = getDBConnection(dbname); + } + public StatDB() throws SQLException { this.conn = getDBConnection(); } diff --git a/webservices/compbio/stat/servlet/StatisticCollector.java b/webservices/compbio/stat/servlet/StatisticCollector.java index e305fa5..5d7c251 100644 --- a/webservices/compbio/stat/servlet/StatisticCollector.java +++ b/webservices/compbio/stat/servlet/StatisticCollector.java @@ -108,6 +108,10 @@ public class StatisticCollector implements ServletContextListener { return getStringProperty(ph.getProperty("cluster.tmp.directory")); } + static String getStatDBname() { + return getStringProperty(ph.getProperty("cluster.tmp.directory")); + } + static int getClusterJobTimeOut() { int maxRunTime = 24 * 7; String clusterMaxRuntime = ph.getProperty("cluster.stat.maxruntime"); diff --git a/webservices/compbio/ws/server/MainManager.java b/webservices/compbio/ws/server/MainManager.java index 63267c7..8d57254 100644 --- a/webservices/compbio/ws/server/MainManager.java +++ b/webservices/compbio/ws/server/MainManager.java @@ -28,7 +28,6 @@ import javax.servlet.ServletContextListener; import org.apache.log4j.Logger; import compbio.stat.collector.DirCleaner; -import compbio.stat.collector.StatDB; import compbio.engine.conf.PropertyHelperManager; import compbio.engine.local.ExecutableWrapper; import compbio.engine.local.LocalExecutorService; @@ -75,7 +74,6 @@ public class MainManager implements ServletContextListener { log.info("Local engine is shutdown OK"); ExecutableWrapper.shutdownService(); log.info("Individual executables stream engine is shutdown OK"); - StatDB.shutdownDBServer(); } @Override