JWS-96 - first working version of the crawler JWS-96
authorSasha Sherstnev <a.sherstnev@dundee.ac.uk>
Wed, 18 Jun 2014 07:14:53 +0000 (08:14 +0100)
committerSasha Sherstnev <a.sherstnev@dundee.ac.uk>
Wed, 18 Jun 2014 07:14:53 +0000 (08:14 +0100)
build.xml
log/log4j.properties.updater
webservices/compbio/stat/collector/ExecutionStatUpdater.java
webservices/compbio/stat/collector/StatDB.java

index fe7b271..5acf11a 100644 (file)
--- a/build.xml
+++ b/build.xml
                        </fileset>
                        <fileset refid="statupdater"/>
                        <zipgroupfileset excludes="META-INF/*" dir="" includes="WEB-INF/lib/log4j-1.2.15.jar" />
+                       <zipgroupfileset excludes="META-INF/*" dir="" includes="WEB-INF/lib/derby-10.8.2.2.jar" />
                        <zipgroupfileset excludes="META-INF/*" dir="" includes="lib/jcommander-1.30.jar" />
                        <zipgroupfileset excludes="META-INF/*.SF" dir="${web.lib.path}" >
                                <include name="${compbio-util}"/>
index e6f4a06..16c4d69 100644 (file)
@@ -1,6 +1,6 @@
 \r
 ## CHANGE THIS (The root directory where to store all the log files)  \r
-#logDir = .\r
+logDir = .\r
 \r
 ## Uncomment to enable JWS2 activity logging to standard out (to the console if available)\r
 ## for possible log levels please refer to Log4j documentation http://logging.apache.org/log4j/1.2/manual.html \r
@@ -13,7 +13,7 @@
 ## FATAL - log fatal events only\r
 \r
 ##################################################################################################################################\r
-log4j.rootLogger=DEBUG\r
+log4j.rootLogger=TRACE,R\r
 log4j.appender.R=org.apache.log4j.FileAppender\r
 log4j.appender.R.File=StatDBupdater.log\r
 log4j.appender.R.layout=org.apache.log4j.PatternLayout\r
index a5d89d1..4f5d6de 100644 (file)
@@ -1,7 +1,6 @@
-/* Copyright (c) 2013 Alexander Sherstnev\r
- * Copyright (c) 2011 Peter Troshin\r
+/* Copyright (c) 2014 Alexander Sherstnev\r
  *  \r
- *  JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0     \r
+ *  JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.1\r
  * \r
  *  This library is free software; you can redistribute it and/or modify it under the terms of the\r
  *  Apache License version 2 as published by the Apache Software Foundation\r
@@ -39,26 +38,11 @@ import compbio.engine.client.PathValidator;
 import compbio.engine.client.SkeletalExecutable;\r
 \r
 /**\r
- * Class assumptions: 1. Number of runs of each WS = number of folders with name\r
- * 2. Number of successful runs = all runs with no result file 3. Per period of\r
- * time = limit per file creating time 4. Runtime (avg/max) = finish time -\r
- * start time 5. Task & result size = result.size\r
- * \r
- * Abandoned runs - not collected runs\r
- * \r
- * Cancelled runs - cancelled\r
- * \r
- * Cluster vs local runs\r
- * \r
- * Reasons for failure = look in the err out?\r
- * \r
- * \r
  * Metadata required:\r
  * \r
  * work directory for local and cluster tasks = from Helper or cmd parameter. WS\r
  * names - enumeration. Status file names and content.\r
  * \r
- * @author Peter Troshin\r
  * @author Alexander Sherstnev\r
  * \r
  */\r
@@ -67,8 +51,8 @@ class mainJCommander {
        @Parameter\r
        private List<String> parameters = new ArrayList<String>();\r
 \r
-       @Parameter(names = { "-log", "-verbose" }, description = "Level of verbosity")\r
-       public Integer verbose = 1;\r
+       @Parameter(names = { "-h", "-help", "--help" }, help = true, description = "Print help")\r
+       public boolean help;\r
 \r
        @Parameter(names = "-start", description = "Start time")\r
        public String starttime;\r
@@ -84,108 +68,57 @@ class mainJCommander {
 }\r
 \r
 public class ExecutionStatUpdater {\r
-       static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss");\r
-       static SimpleDateFormat shortDF = new SimpleDateFormat("dd/MM/yyyy");\r
+       static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy");\r
        private static final Logger log = Logger.getLogger(ExecutionStatUpdater.class);\r
-\r
        final private File workingDirectory;\r
        final private List<JobStat> stats;\r
-       /**\r
-        * Consider the job that has been working for longer than timeOutInHours\r
-        * completed, whatever the outcome\r
-        */\r
-       final private int timeOutInHours;\r
 \r
-       /**\r
-        * List subdirectories in the job directory\r
-        * \r
-        * @param workingDirectory\r
-        * @param timeOutInHours\r
-        */\r
-       public ExecutionStatUpdater(String workingDirectory, int timeOutInHours) {\r
-               log.info("Starting stat collector for directory: " + workingDirectory);\r
-               log.info("Maximum allowed runtime(h): " + timeOutInHours);\r
+       public ExecutionStatUpdater(String workingDirectory) {\r
+               log.info("Starting stat updater for directory: " + workingDirectory);\r
                if (!PathValidator.isValidDirectory(workingDirectory)) {\r
                        throw new IllegalArgumentException("workingDirectory '" + workingDirectory + "' does not exist!");\r
                }\r
                this.workingDirectory = new File(workingDirectory);\r
                stats = new ArrayList<JobStat>();\r
-               if (timeOutInHours <= 0) {\r
-                       throw new IllegalArgumentException("Timeout value must be greater than 0! Given value: " + timeOutInHours);\r
-               }\r
-               this.timeOutInHours = timeOutInHours;\r
        }\r
 \r
        boolean hasCompleted(JobDirectory jd) {\r
                JobStat jstat = jd.getJobStat();\r
-               if (jstat.hasResult() || jstat.getIsCancelled() || jstat.getIsFinished() || hasTimedOut(jd)) {\r
+               if (jstat.hasResult() || jstat.getIsCancelled() || jstat.getIsFinished()) {\r
                        return true;\r
                }\r
                return false;\r
        }\r
 \r
-       boolean hasTimedOut(JobDirectory jd) {\r
-               return ((System.currentTimeMillis() - jd.jobdir.lastModified()) / (1000 * 60 * 60)) > timeOutInHours;\r
-       }\r
-\r
-       /*\r
-        * Make sure that collectStatistics methods was called prior to calling\r
-        * this! TODO consider running collectStatistics from here on the first call\r
-        */\r
-       StatProcessor getStats() {\r
-               if (stats.isEmpty()) {\r
-                       log.info("Please make sure collectStatistics method was called prior to calling getStats()!");\r
-               }\r
-               return new StatProcessor(stats);\r
-       }\r
-\r
        void writeStatToDB(String dbname) throws SQLException {\r
                Set<JobStat> rjobs = new HashSet<JobStat>(stats);\r
                StatDB statdb = new StatDB(dbname);\r
-               log.debug("Removing records that has already been recorded");\r
+               log.debug("Filtering out records that has already been recorded. init Njobs = " + rjobs.size());\r
                statdb.removeRecordedJobs(rjobs);\r
-               log.debug("New records left: " + rjobs.size());\r
+               log.debug("Njobs left: " + rjobs.size());\r
                statdb.insertData(rjobs);\r
        }\r
 \r
-       /**\r
-        * main function\r
-        * @throws ParseException \r
-        */\r
-       public static void main(String[] args) throws IOException, SQLException, ParseException {\r
-               mainJCommander jct = new mainJCommander();\r
-               new JCommander(jct, args);\r
-               String WorkingDir = "jobsout";\r
-               String DBname = "ExecutionStatistic";\r
+       public static void printHelp() {\r
+               System.out.println("\nUsage: <Class or Jar file name> -dir <JOBDIR> -db <StatDB> -start <DATE1> -end <DATE2> -h");\r
+               System.out.println("\n[OPTIONS]");\r
 \r
-               long StartTime = 0L;\r
-               Date currDate = new Date();\r
-               long EndTime = currDate.getTime();\r
-               if (null != jct.starttime) {\r
-                       Date ST = shortDF.parse(jct.starttime);\r
-                       if (null != ST) {\r
-                               StartTime = ST.getTime();\r
-                       }\r
-               }\r
-               if (null != jct.endtime) {\r
-                       Date ET = shortDF.parse(jct.endtime);\r
-                       if (null != ET) {\r
-                               EndTime = ET.getTime();\r
-                       }\r
-               }\r
-               if (null != jct.dbname) {\r
-                       DBname = jct.dbname;\r
-               }\r
-               if (null != jct.workingdir) {\r
-                       WorkingDir = jct.workingdir;\r
-               }\r
+               System.out.println("\n-dir <JOBDIR> - a directory with jabaws jobs");\r
 \r
-               System.out.println("Start time: " + jct.starttime + " = " + StartTime);\r
-               System.out.println("End time: " + jct.endtime + " = " + EndTime);\r
+               System.out.println("\n-db <StatDB> - a JABAWS statistics database. If information on");\r
+               System.out.println("jobs from the job directory have been stored in the database,");\r
+               System.out.println("the information is not going to the database [Default:");\r
+               System.out.println("ExecutionStatistic]");\r
 \r
-               ExecutionStatUpdater esu = new ExecutionStatUpdater(WorkingDir, 1);\r
-               esu.collectStatistics(StartTime, EndTime);\r
-               esu.writeStatToDB(DBname);\r
+               System.out.println("\n-start <DATE1> - start date for processing jobs. Timestamp of");\r
+               System.out.println("last modification of a job should be later than DATE1. The");\r
+               System.out.println("date format is dd/mm/yy. If the input date format is different");\r
+               System.out.println("the default value is used [Default: 00/00/00]");\r
+\r
+               System.out.println("\n-end <DATE2> - end date for processing jobs. Timestamp of");\r
+               System.out.println("last modification of a job should be earlier than DATE2. The");\r
+               System.out.println("date format is dd/mm/yy. If the input date format is different");\r
+               System.out.println("the default value is used [Default: current date]\n");\r
        }\r
 \r
        static FileFilter directories = new FileFilter() {\r
@@ -221,7 +154,76 @@ public class ExecutionStatUpdater {
                                log.trace("training input: " + dir.getName() + File.separator + SkeletalExecutable.INPUT);\r
                        }\r
                }\r
-               log.debug("Statistics collected!");\r
+               log.debug("Statistics prepared...");\r
        }\r
 \r
+       /**\r
+        * Starts stat DB update program, the only mandatory parameter is a job\r
+        * directory.\r
+        * \r
+        * @param args\r
+        *            Usage: java -jar <Jar file name> -dir <JOBDIR> -db <StatDB>\r
+        *            -start <DATE1> -end <DATE2> -h\r
+        * \r
+        *            ACTION [OPTIONS]\r
+        * \r
+        *            -dir <JOBDIR> - a directory with jabaws jobs\r
+        * \r
+        *            -db <StatDB> - a JABAWS statistics database. If information on\r
+        *            jobs from the job directory have been stored in the database,\r
+        *            the information is not going to the database [Default:\r
+        *            ExecutionStatistic]\r
+        * \r
+        *            -start <DATE1> - start date for processing jobs. Timestamp of\r
+        *            last modification of a job should be later than DATE1. The\r
+        *            date format is dd/mm/yy. If the input date format is different\r
+        *            the default value is used [Default: 00/00/00]\r
+        * \r
+        *            -end <DATE2> - end date for processing jobs. Timestamp of last\r
+        *            modification of a job should be earlier than DATE2. The date\r
+        *            format is dd/mm/yy. If the input date format is different the\r
+        *            default value is used [Default: current date]\r
+        * \r
+        * @throws ParseException\r
+        * \r
+        */\r
+       public static void main(String[] args) throws IOException, SQLException, ParseException {\r
+               mainJCommander jct = new mainJCommander();\r
+               new JCommander(jct, args);\r
+               String WorkingDir = "";\r
+               String DBname = "ExecutionStatistic";\r
+\r
+               long StartTime = 0L;\r
+               Date currDate = new Date();\r
+               long EndTime = currDate.getTime();\r
+               if (null != jct.starttime) {\r
+                       Date ST = DF.parse(jct.starttime);\r
+                       if (null != ST) {\r
+                               StartTime = ST.getTime();\r
+                       }\r
+               }\r
+               if (null != jct.endtime) {\r
+                       Date ET = DF.parse(jct.endtime);\r
+                       if (null != ET) {\r
+                               EndTime = ET.getTime();\r
+                       }\r
+               }\r
+               if (null != jct.dbname) {\r
+                       DBname = jct.dbname;\r
+               }\r
+               if (null == jct.workingdir || jct.help) {\r
+                       printHelp();\r
+                       return;\r
+               }\r
+\r
+               WorkingDir = jct.workingdir;\r
+               log.trace("Collect statistics from jobs at " + WorkingDir);\r
+               log.trace("Start time: " + jct.starttime + " = " + StartTime + " ms");\r
+               log.trace("End time: " + jct.endtime + " = " + EndTime + " ms");\r
+\r
+               ExecutionStatUpdater esu = new ExecutionStatUpdater(WorkingDir);\r
+               esu.collectStatistics(StartTime, EndTime);\r
+               esu.writeStatToDB(DBname);\r
+               return;\r
+       }\r
 }\r
index 45947a2..9bfeb5d 100644 (file)
@@ -154,6 +154,7 @@ public class StatDB {
                                + "inputsize, resultsize, isCancelled, isCollected, isClusterJob) "\r
                                + "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)";\r
                PreparedStatement pstm = conn.prepareStatement(insert);\r
+               int i = 0;\r
                for (JobStat js : jobstatus) {\r
                        // Has to present\r
                        pstm.setString(1, js.webService.toString());\r
@@ -185,10 +186,12 @@ public class StatDB {
                        pstm.setBoolean(9, js.isCollected);\r
                        pstm.setBoolean(10, js.isClusterJob());\r
                        pstm.executeUpdate();\r
+                       ++i;\r
                }\r
                conn.commit();\r
                conn.setAutoCommit(true);\r
                pstm.close();\r
+               log.debug(i + " jobs have been recorded...");\r
        }\r
 \r
        public Date getEarliestRecord() throws SQLException {\r
@@ -319,6 +322,10 @@ public class StatDB {
 \r
                String query = "select job_id from exec_stat";\r
 \r
+               if (null == conn) {\r
+                       System.out.println ("Something wrong with the DB...");\r
+                       return;\r
+               }\r
                Statement st = conn.createStatement();\r
                ResultSet result = st.executeQuery(query);\r
 \r