webservices/compbio/stat/collector/ExecutionStatCollector.java

   1 /* Copyright (c) 2011 Peter Troshin\r
   2  *  \r
   3  *  JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0     \r
   4  * \r
   5  *  This library is free software; you can redistribute it and/or modify it under the terms of the\r
   6  *  Apache License version 2 as published by the Apache Software Foundation\r
   7  * \r
   8  *  This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without\r
   9  *  even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache \r
  10  *  License for more details.\r
  11  * \r
  12  *  A copy of the license is in apache_license.txt. It is also available here:\r
  13  * @see: http://www.apache.org/licenses/LICENSE-2.0.txt\r
  14  * \r
  15  * Any republication or derived work distributed in source code form\r
  16  * must include this copyright and license notice.\r
  17  */\r
  18 package compbio.stat.collector;\r
  19 \r
  20 import java.io.File;\r
  21 import java.io.FileFilter;\r
  22 import java.io.IOException;\r
  23 import java.sql.SQLException;\r
  24 import java.text.SimpleDateFormat;\r
  25 import java.util.ArrayList;\r
  26 import java.util.Date;\r
  27 import java.util.HashMap;\r
  28 import java.util.HashSet;\r
  29 import java.util.List;\r
  30 import java.util.Map;\r
  31 import java.util.Set;\r
  32 \r
  33 import org.apache.log4j.Logger;\r
  34 \r
  35 import compbio.engine.client.Executable;\r
  36 import compbio.engine.client.PathValidator;\r
  37 import compbio.engine.client.SkeletalExecutable;\r
  38 import compbio.metadata.JobStatus;\r
  39 import compbio.util.FileUtil;\r
  40 import compbio.ws.client.Services;\r
  41 \r
  42 /**\r
  43  * Number of runs of each WS = number of folders with name\r
  44  * \r
  45  * Number of successful runs = all runs with no result file\r
  46  * \r
  47  * Per period of time = limit per file creating time Runtime (avg/max) =\r
  48  * \r
  49  * started time - finished time\r
  50  * \r
  51  * Task & result size = result.size\r
  52  * \r
  53  * Abandoned runs - not collected runs\r
  54  * \r
  55  * Cancelled runs - cancelled\r
  56  * \r
  57  * Cluster vs local runs\r
  58  * \r
  59  * Reasons for failure = look in the err out?\r
  60  * \r
  61  * \r
  62  * Metadata required:\r
  63  * \r
  64  * work directory for local and cluster tasks = from Helper or cmd parameter. WS\r
  65  * names - enumeration. Status file names and content.\r
  66  * \r
  67  * @author pvtroshin\r
  68  * \r
  69  */\r
  70 public class ExecutionStatCollector implements Runnable {\r
  71 \r
  72         static final int UNDEFINED = -1;\r
  73 \r
  74         private static final Logger log = Logger\r
  75                         .getLogger(ExecutionStatCollector.class);\r
  76 \r
  77         static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss");\r
  78 \r
  79         final private File workDirectory;\r
  80         final private List<JobStat> stats;\r
  81         /**\r
  82          * Consider the job that has been working for longer than timeOutInHours\r
  83          * completed, whatever the outcome\r
  84          */\r
  85         final private int timeOutInHours;\r
  86 \r
  87         /**\r
  88          * List subdirectories in the job directory\r
  89          * \r
  90          * @param workDirectory\r
  91          * @param timeOutInHours\r
  92          */\r
  93         public ExecutionStatCollector(String workDirectory, int timeOutInHours) {\r
  94                 log.info("Starting stat collector for directory: " + workDirectory);\r
  95                 log.info("Maximum allowed runtime(h): " + timeOutInHours);\r
  96                 if (!PathValidator.isValidDirectory(workDirectory)) {\r
  97                         throw new IllegalArgumentException("workDirectory '"\r
  98                                         + workDirectory + "' does not exist!");\r
  99                 }\r
 100                 this.workDirectory = new File(workDirectory);\r
 101                 stats = new ArrayList<JobStat>();\r
 102                 if (timeOutInHours <= 0) {\r
 103                         throw new IllegalArgumentException(\r
 104                                         "Timeout value must be greater than 0! Given value: "\r
 105                                                         + timeOutInHours);\r
 106                 }\r
 107                 this.timeOutInHours = timeOutInHours;\r
 108         }\r
 109 \r
 110         boolean hasCompleted(JobDirectory jd) {\r
 111                 JobStat jstat = jd.getJobStat();\r
 112                 if (jstat.hasResult() || jstat.getIsCancelled()\r
 113                                 || jstat.getIsFinished() || hasTimedOut(jd)) {\r
 114                         return true;\r
 115                 }\r
 116                 return false;\r
 117         }\r
 118 \r
 119         boolean hasTimedOut(JobDirectory jd) {\r
 120                 return ((System.currentTimeMillis() - jd.jobdir.lastModified()) / (1000 * 60 * 60)) > timeOutInHours;\r
 121         }\r
 122 \r
 123         /*\r
 124          * Make sure that collectStatistics methods was called prior to calling\r
 125          * this! TODO consider running collectStatistics from here on the first call\r
 126          */\r
 127         StatProcessor getStats() {\r
 128                 if (stats.isEmpty()) {\r
 129                         log.info("Please make sure collectStatistics method was called prior to calling getStats()!");\r
 130                 }\r
 131                 return new StatProcessor(stats);\r
 132         }\r
 133 \r
 134         void writeStatToDB() throws SQLException {\r
 135                 Set<JobStat> rjobs = new HashSet<JobStat>(stats);\r
 136                 StatDB statdb = new StatDB();\r
 137                 log.debug("Removing records that has already been recorded");\r
 138 \r
 139                 statdb.removeRecordedJobs(rjobs);\r
 140                 log.debug("New records left: " + rjobs.size());\r
 141                 statdb.insertData(rjobs);\r
 142         }\r
 143 \r
 144         /*\r
 145          * static void updateTime(File statFile) throws IOException { long lastMod =\r
 146          * statFile.lastModified(); FileWriter fw = new FileWriter(statFile);\r
 147          * fw.write(new Long(lastMod).toString()); fw.close(); }\r
 148          */\r
 149 \r
 150         /**\r
 151          * Not in use\r
 152          */\r
 153         public static void main(String[] args) throws IOException, SQLException {\r
 154 \r
 155                 // updateTime(new File(\r
 156                 // "D:\\workspace\\JABA2\\jobsout\\AACon#170462904473672\\STARTED"));\r
 157 \r
 158                 File[] files = FileUtil.getFiles("Y:\\fc\\www-jws2\\jaba\\jobsout",\r
 159                                 directories);\r
 160                 List<JobStat> stats = new ArrayList<JobStat>();\r
 161                 for (File file : files) {\r
 162                         JobDirectory jd = new JobDirectory(file);\r
 163                         stats.add(jd.getJobStat());\r
 164                         // System.out.println(jd.getJobStat().getJobReportTabulated());\r
 165                 }\r
 166                 StatProcessor sp = new StatProcessor(stats);\r
 167                 System.out.println(sp.reportStat());\r
 168                 System.out.println();\r
 169                 System.out.println("!!!!!!!!!!!!!!!!!!");\r
 170                 System.out.println();\r
 171 \r
 172                 Set<JobStat> rjobs = new HashSet<JobStat>(sp.stats);\r
 173                 StatDB statdb = new StatDB();\r
 174                 statdb.removeRecordedJobs(rjobs);\r
 175                 statdb.insertData(rjobs);\r
 176         }\r
 177 \r
 178         static FileFilter directories = new FileFilter() {\r
 179                 @Override\r
 180                 public boolean accept(File pathname) {\r
 181                         return pathname.isDirectory()\r
 182                                         && !pathname.getName().startsWith(".");\r
 183                 }\r
 184         };\r
 185 \r
 186         static class JobDirectory {\r
 187 \r
 188                 File jobdir;\r
 189                 Map<String, File> files = new HashMap<String, File>();\r
 190 \r
 191                 JobDirectory(File directory) {\r
 192                         this.jobdir = directory;\r
 193                         for (File f : jobdir.listFiles()) {\r
 194                                 files.put(f.getName(), f);\r
 195                         }\r
 196                 }\r
 197 \r
 198                 boolean hasStatus(JobStatus status) {\r
 199                         return files.containsKey(status.toString());\r
 200                 }\r
 201 \r
 202                 boolean isCollected() {\r
 203                         return hasStatus(JobStatus.COLLECTED);\r
 204                 }\r
 205 \r
 206                 boolean isCancelled() {\r
 207                         return hasStatus(JobStatus.CANCELLED);\r
 208                 }\r
 209 \r
 210                 long getStartTime() {\r
 211                         long starttime = UNDEFINED;\r
 212                         File startfile = files.get(JobStatus.STARTED.toString());\r
 213                         if (startfile == null) {\r
 214                                 startfile = files.get(JobStatus.SUBMITTED.toString());\r
 215                         }\r
 216                         try {\r
 217                                 if (startfile != null) {\r
 218                                         String start = FileUtil.readFileToString(startfile);\r
 219                                         starttime = Long.parseLong(start.trim());\r
 220                                 }\r
 221                         } catch (IOException ignore) {\r
 222                                 log.warn(\r
 223                                                 "IOException while reading STARTED status file! Ignoring...",\r
 224                                                 ignore);\r
 225                                 // fall back\r
 226                                 starttime = startfile.lastModified();\r
 227                         } catch (NumberFormatException ignore) {\r
 228                                 log.warn(\r
 229                                                 "NumberFormatException while reading STARTED status file! Ignoring...",\r
 230                                                 ignore);\r
 231                                 // fall back\r
 232                                 starttime = startfile.lastModified();\r
 233                         }\r
 234 \r
 235                         return starttime;\r
 236                 }\r
 237 \r
 238                 String getClusterJobID() {\r
 239                         String clustjobId = "";\r
 240                         File jobid = files.get("JOBID");\r
 241                         try {\r
 242                                 if (jobid != null) {\r
 243                                         clustjobId = FileUtil.readFileToString(jobid);\r
 244                                 }\r
 245                         } catch (IOException ioe) {\r
 246                                 log.error(\r
 247                                                 "IO Exception while reading the content of JOBID file for job "\r
 248                                                                 + jobid, ioe);\r
 249                         }\r
 250                         return clustjobId.trim();\r
 251                 }\r
 252 \r
 253                 long getFinishedTime() {\r
 254                         long ftime = UNDEFINED;\r
 255                         File finished = files.get(JobStatus.FINISHED.toString());\r
 256                         if (finished != null) {\r
 257                                 try {\r
 258                                         if (finished != null) {\r
 259                                                 String start = FileUtil.readFileToString(finished);\r
 260                                                 ftime = Long.parseLong(start.trim());\r
 261                                         }\r
 262                                 } catch (IOException ignore) {\r
 263                                         log.warn(\r
 264                                                         "IOException while reading FINISHED status file! Ignoring...",\r
 265                                                         ignore);\r
 266                                         // fall back\r
 267                                         ftime = finished.lastModified();\r
 268                                 } catch (NumberFormatException ignore) {\r
 269                                         log.warn(\r
 270                                                         "NumberFormatException while reading FINISHED status file! Ignoring...",\r
 271                                                         ignore);\r
 272                                         // fall back\r
 273                                         ftime = finished.lastModified();\r
 274                                 }\r
 275                         }\r
 276                         return ftime;\r
 277                 }\r
 278 \r
 279                 private Services getService() {\r
 280                         return Services.getServiceByJobDirectory(jobdir);\r
 281                 }\r
 282 \r
 283                 long getResultSize() {\r
 284                         Class<? extends Executable<?>> name = Services\r
 285                                         .getRunnerByJobDirectory(jobdir);\r
 286 \r
 287                         File f = null;\r
 288                         if (name.getSimpleName().equalsIgnoreCase("IUPred")) {\r
 289                                 f = files.get("out.glob");\r
 290                                 if (f == null)\r
 291                                         f = files.get("out.short");\r
 292                                 if (f == null)\r
 293                                         f = files.get("out.long");\r
 294                         } else {\r
 295                                 f = files.get(SkeletalExecutable.OUTPUT);\r
 296                         }\r
 297                         if (f != null) {\r
 298                                 return f.length();\r
 299                         }\r
 300                         return UNDEFINED;\r
 301                 }\r
 302 \r
 303                 long getInputSize() {\r
 304                         Class<? extends Executable<?>> name = Services\r
 305                                         .getRunnerByJobDirectory(jobdir);\r
 306 \r
 307                         File input = files.get(SkeletalExecutable.INPUT);\r
 308                         if (input != null) {\r
 309                                 return input.length();\r
 310                         }\r
 311                         return UNDEFINED;\r
 312                 }\r
 313 \r
 314                 JobStat getJobStat() {\r
 315                         return JobStat.newInstance(getService(), getClusterJobID(),\r
 316                                         jobdir.getName(), getStartTime(), getFinishedTime(),\r
 317                                         getInputSize(), getResultSize(), isCancelled(),\r
 318                                         isCollected());\r
 319                 }\r
 320 \r
 321                 @Override\r
 322                 public int hashCode() {\r
 323                         final int prime = 31;\r
 324                         int result = 1;\r
 325                         result = prime * result\r
 326                                         + ((jobdir == null) ? 0 : jobdir.hashCode());\r
 327                         return result;\r
 328                 }\r
 329 \r
 330                 @Override\r
 331                 public boolean equals(Object obj) {\r
 332                         if (this == obj)\r
 333                                 return true;\r
 334                         if (obj == null)\r
 335                                 return false;\r
 336                         if (getClass() != obj.getClass())\r
 337                                 return false;\r
 338                         JobDirectory other = (JobDirectory) obj;\r
 339                         if (jobdir == null) {\r
 340                                 if (other.jobdir != null)\r
 341                                         return false;\r
 342                         } else if (!jobdir.equals(other.jobdir))\r
 343                                 return false;\r
 344                         return true;\r
 345                 }\r
 346         }\r
 347 \r
 348         void collectStatistics() {\r
 349                 File[] files = workDirectory.listFiles(directories);\r
 350                 for (File file : files) {\r
 351                         JobDirectory jd = new JobDirectory(file);\r
 352                         JobStat jstat = jd.getJobStat();\r
 353                         // Do not record stats on the job that has not completed yet\r
 354                         if (hasCompleted(jd)) {\r
 355                                 stats.add(jstat);\r
 356                         } else {\r
 357                                 log.debug("Skipping the job: " + jstat);\r
 358                                 log.debug("As it has not completed yet");\r
 359                         }\r
 360                         // System.out.println(jd.getJobStat().getJobReportTabulated());\r
 361                 }\r
 362         }\r
 363 \r
 364         @Override\r
 365         public void run() {\r
 366                 log.info("Started updating statistics at " + new Date());\r
 367                 log.info("For directory: " + workDirectory.getAbsolutePath());\r
 368 \r
 369                 collectStatistics();\r
 370 \r
 371                 StatProcessor local_stats = getStats();\r
 372                 log.info("Found " + local_stats.getJobNumber() + " jobs!");\r
 373                 try {\r
 374                         writeStatToDB();\r
 375                 } catch (SQLException e) {\r
 376                         log.error("Fails to update jobs statistics database!");\r
 377                         log.error(e.getLocalizedMessage(), e);\r
 378                 }\r
 379                 log.info("Finished updating statistics at " + new Date());\r
 380         }\r
 381 }\r