New code update

[jabaws.git] / webservices / compbio / stat / collector / ExecutionStatCollector.java
diff --git a/webservices/compbio/stat/collector/ExecutionStatCollector.java b/webservices/compbio/stat/collector/ExecutionStatCollector.java

index ad89058..c50ab70 100644 (file)
--- a/webservices/compbio/stat/collector/ExecutionStatCollector.java
+++ b/webservices/compbio/stat/collector/ExecutionStatCollector.java
@@ -1,4 +1,5 @@
-/* Copyright (c) 2011 Peter Troshin\r
+/* Copyright (c) 2013 Alexander Sherstnev\r
+ * Copyright (c) 2011 Peter Troshin\r
   *  \r
   *  JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0     \r
   * \r
@@ -24,32 +25,23 @@ import java.sql.SQLException;
  import java.text.SimpleDateFormat;\r
  import java.util.ArrayList;\r
  import java.util.Date;\r
-import java.util.HashMap;\r
  import java.util.HashSet;\r
  import java.util.List;\r
-import java.util.Map;\r
  import java.util.Set;\r
  \r
  import org.apache.log4j.Logger;\r
  \r
-import compbio.engine.client.Executable;\r
  import compbio.engine.client.PathValidator;\r
  import compbio.engine.client.SkeletalExecutable;\r
-import compbio.metadata.JobStatus;\r
  import compbio.util.FileUtil;\r
-import compbio.ws.client.Services;\r
-import compbio.ws.client.ServicesUtil;\r
  \r
  /**\r
- * Number of runs of each WS = number of folders with name\r
- * \r
- * Number of successful runs = all runs with no result file\r
- * \r
- * Per period of time = limit per file creating time Runtime (avg/max) =\r
- * \r
- * started time - finished time\r
- * \r
- * Task & result size = result.size\r
+ * Class assumptions:\r
+ * 1. Number of runs of each WS = number of folders with name\r
+ * 2. Number of successful runs = all runs with no result file\r
+ * 3. Per period of time = limit per file creating time \r
+ * 4. Runtime (avg/max) = finish time - start time\r
+ * 5. Task & result size = result.size\r
   * \r
   * Abandoned runs - not collected runs\r
   * \r
@@ -65,19 +57,16 @@ import compbio.ws.client.ServicesUtil;
   * work directory for local and cluster tasks = from Helper or cmd parameter. WS\r
   * names - enumeration. Status file names and content.\r
   * \r
- * @author pvtroshin\r
+ * @author Peter Troshin\r
+ * @author Alexander Sherstnev\r
   * \r
   */\r
  public class ExecutionStatCollector implements Runnable {\r
-\r
         static final int UNDEFINED = -1;\r
-\r
-       private static final Logger log = Logger\r
-                       .getLogger(ExecutionStatCollector.class);\r
-\r
         static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss");\r
+       private static final Logger log = Logger.getLogger(ExecutionStatCollector.class);\r
  \r
-       final private File workDirectory;\r
+       final private File workingDirectory;\r
         final private List<JobStat> stats;\r
         /**\r
          * Consider the job that has been working for longer than timeOutInHours\r
@@ -88,22 +77,20 @@ public class ExecutionStatCollector implements Runnable {
         /**\r
          * List subdirectories in the job directory\r
          * \r
-        * @param workDirectory\r
+        * @param workingDirectory\r
          * @param timeOutInHours\r
          */\r
-       public ExecutionStatCollector(String workDirectory, int timeOutInHours) {\r
-               log.info("Starting stat collector for directory: " + workDirectory);\r
+       public ExecutionStatCollector(String workingDirectory, int timeOutInHours) {\r
+               log.info("Starting stat collector for directory: " + workingDirectory);\r
                 log.info("Maximum allowed runtime(h): " + timeOutInHours);\r
-               if (!PathValidator.isValidDirectory(workDirectory)) {\r
-                       throw new IllegalArgumentException("workDirectory '"\r
-                                       + workDirectory + "' does not exist!");\r
+               if (!PathValidator.isValidDirectory(workingDirectory)) {\r
+                       throw new IllegalArgumentException("workingDirectory '" + workingDirectory + "' does not exist!");\r
                 }\r
-               this.workDirectory = new File(workDirectory);\r
+               this.workingDirectory = new File(workingDirectory);\r
                 stats = new ArrayList<JobStat>();\r
                 if (timeOutInHours <= 0) {\r
                         throw new IllegalArgumentException(\r
-                                       "Timeout value must be greater than 0! Given value: "\r
-                                                       + timeOutInHours);\r
+                                       "Timeout value must be greater than 0! Given value: " + timeOutInHours);\r
                 }\r
                 this.timeOutInHours = timeOutInHours;\r
         }\r
@@ -142,12 +129,6 @@ public class ExecutionStatCollector implements Runnable {
                 statdb.insertData(rjobs);\r
         }\r
  \r
-       /*\r
-        * static void updateTime(File statFile) throws IOException { long lastMod =\r
-        * statFile.lastModified(); FileWriter fw = new FileWriter(statFile);\r
-        * fw.write(new Long(lastMod).toString()); fw.close(); }\r
-        */\r
-\r
         /**\r
          * Not in use\r
          */\r
@@ -179,193 +160,37 @@ public class ExecutionStatCollector implements Runnable {
         static FileFilter directories = new FileFilter() {\r
                 @Override\r
                 public boolean accept(File pathname) {\r
-                       return pathname.isDirectory()\r
-                                       && !pathname.getName().startsWith(".");\r
+                       return pathname.isDirectory() && !pathname.getName().startsWith(".");\r
                 }\r
         };\r
  \r
-       static class JobDirectory {\r
-\r
-               File jobdir;\r
-               Map<String, File> files = new HashMap<String, File>();\r
-\r
-               JobDirectory(File directory) {\r
-                       this.jobdir = directory;\r
-                       for (File f : jobdir.listFiles()) {\r
-                               files.put(f.getName(), f);\r
-                       }\r
-               }\r
-\r
-               boolean hasStatus(JobStatus status) {\r
-                       return files.containsKey(status.toString());\r
-               }\r
-\r
-               boolean isCollected() {\r
-                       return hasStatus(JobStatus.COLLECTED);\r
-               }\r
-\r
-               boolean isCancelled() {\r
-                       return hasStatus(JobStatus.CANCELLED);\r
-               }\r
-\r
-               long getStartTime() {\r
-                       long starttime = UNDEFINED;\r
-                       File startfile = files.get(JobStatus.STARTED.toString());\r
-                       if (startfile == null) {\r
-                               startfile = files.get(JobStatus.SUBMITTED.toString());\r
-                       }\r
-                       try {\r
-                               if (startfile != null) {\r
-                                       String start = FileUtil.readFileToString(startfile);\r
-                                       starttime = Long.parseLong(start.trim());\r
-                               }\r
-                       } catch (IOException ignore) {\r
-                               log.warn(\r
-                                               "IOException while reading STARTED status file! Ignoring...",\r
-                                               ignore);\r
-                               // fall back\r
-                               starttime = startfile.lastModified();\r
-                       } catch (NumberFormatException ignore) {\r
-                               log.warn(\r
-                                               "NumberFormatException while reading STARTED status file! Ignoring...",\r
-                                               ignore);\r
-                               // fall back\r
-                               starttime = startfile.lastModified();\r
-                       }\r
-\r
-                       return starttime;\r
-               }\r
-\r
-               String getClusterJobID() {\r
-                       String clustjobId = "";\r
-                       File jobid = files.get("JOBID");\r
-                       try {\r
-                               if (jobid != null) {\r
-                                       clustjobId = FileUtil.readFileToString(jobid);\r
-                               }\r
-                       } catch (IOException ioe) {\r
-                               log.error(\r
-                                               "IO Exception while reading the content of JOBID file for job "\r
-                                                               + jobid, ioe);\r
-                       }\r
-                       return clustjobId.trim();\r
-               }\r
-\r
-               long getFinishedTime() {\r
-                       long ftime = UNDEFINED;\r
-                       File finished = files.get(JobStatus.FINISHED.toString());\r
-                       if (finished != null) {\r
-                               try {\r
-                                       if (finished != null) {\r
-                                               String start = FileUtil.readFileToString(finished);\r
-                                               ftime = Long.parseLong(start.trim());\r
-                                       }\r
-                               } catch (IOException ignore) {\r
-                                       log.warn(\r
-                                                       "IOException while reading FINISHED status file! Ignoring...",\r
-                                                       ignore);\r
-                                       // fall back\r
-                                       ftime = finished.lastModified();\r
-                               } catch (NumberFormatException ignore) {\r
-                                       log.warn(\r
-                                                       "NumberFormatException while reading FINISHED status file! Ignoring...",\r
-                                                       ignore);\r
-                                       // fall back\r
-                                       ftime = finished.lastModified();\r
-                               }\r
-                       }\r
-                       return ftime;\r
-               }\r
-\r
-               private Services getService() {\r
-                       return ServicesUtil.getServiceByJobDirectory(jobdir);\r
-               }\r
-\r
-               long getResultSize() {\r
-                       Class<? extends Executable<?>> name = ServicesUtil\r
-                                       .getRunnerByJobDirectory(jobdir);\r
-\r
-                       File f = null;\r
-                       if (name.getSimpleName().equalsIgnoreCase("IUPred")) {\r
-                               f = files.get("out.glob");\r
-                               if (f == null)\r
-                                       f = files.get("out.short");\r
-                               if (f == null)\r
-                                       f = files.get("out.long");\r
-                       } else {\r
-                               f = files.get(SkeletalExecutable.OUTPUT);\r
-                       }\r
-                       if (f != null) {\r
-                               return f.length();\r
-                       }\r
-                       return UNDEFINED;\r
-               }\r
-\r
-               long getInputSize() {\r
-                       Class<? extends Executable<?>> name = ServicesUtil\r
-                                       .getRunnerByJobDirectory(jobdir);\r
-\r
-                       File input = files.get(SkeletalExecutable.INPUT);\r
-                       if (input != null) {\r
-                               return input.length();\r
-                       }\r
-                       return UNDEFINED;\r
-               }\r
-\r
-               JobStat getJobStat() {\r
-                       return JobStat.newInstance(getService(), getClusterJobID(),\r
-                                       jobdir.getName(), getStartTime(), getFinishedTime(),\r
-                                       getInputSize(), getResultSize(), isCancelled(),\r
-                                       isCollected());\r
-               }\r
-\r
-               @Override\r
-               public int hashCode() {\r
-                       final int prime = 31;\r
-                       int result = 1;\r
-                       result = prime * result\r
-                                       + ((jobdir == null) ? 0 : jobdir.hashCode());\r
-                       return result;\r
-               }\r
-\r
-               @Override\r
-               public boolean equals(Object obj) {\r
-                       if (this == obj)\r
-                               return true;\r
-                       if (obj == null)\r
-                               return false;\r
-                       if (getClass() != obj.getClass())\r
-                               return false;\r
-                       JobDirectory other = (JobDirectory) obj;\r
-                       if (jobdir == null) {\r
-                               if (other.jobdir != null)\r
-                                       return false;\r
-                       } else if (!jobdir.equals(other.jobdir))\r
-                               return false;\r
-                       return true;\r
-               }\r
-       }\r
-\r
+       // TODO test!\r
         void collectStatistics() {\r
-               File[] files = workDirectory.listFiles(directories);\r
-               for (File file : files) {\r
-                       JobDirectory jd = new JobDirectory(file);\r
-                       JobStat jstat = jd.getJobStat();\r
-                       // Do not record stats on the job that has not completed yet\r
-                       if (hasCompleted(jd)) {\r
-                               stats.add(jstat);\r
+               // clear stats array;\r
+               stats.clear();\r
+               File[] dirs = workingDirectory.listFiles(directories);\r
+               for (File dir : dirs) {\r
+                       // skip work directory with test inputas \r
+                       log.debug("check directory: " + dir.getName() + "...");\r
+                       if (InputFilter.accept(new File(dir.getPath() + File.separator + SkeletalExecutable.INPUT))) {\r
+                               JobDirectory jd = new JobDirectory(dir);\r
+                               JobStat jstat = jd.getJobStat();\r
+                               // Do not record stats on the job that has not completed yet\r
+                               if (hasCompleted(jd)) {\r
+                                       stats.add(jstat);\r
+                               } else {\r
+                                       log.debug("Skipping the job: " + jstat + " as it has not completed yet");\r
+                               }\r
                         } else {\r
-                               log.debug("Skipping the job: " + jstat);\r
-                               log.debug("As it has not completed yet");\r
+                               log.trace("training input: " + dir.getName() + File.separator + SkeletalExecutable.INPUT);\r
                         }\r
-                       // System.out.println(jd.getJobStat().getJobReportTabulated());\r
                 }\r
         }\r
  \r
         @Override\r
         public void run() {\r
                 log.info("Started updating statistics at " + new Date());\r
-               log.info("For directory: " + workDirectory.getAbsolutePath());\r
+               log.info("For directory: " + workingDirectory.getAbsolutePath());\r
  \r
                 collectStatistics();\r
  \r