webservices/compbio/stat/collector/ExecutionStatCollector.java

   1 package compbio.stat.collector;\r
   2 \r
   3 import java.io.File;\r
   4 import java.io.FileFilter;\r
   5 import java.io.IOException;\r
   6 import java.sql.SQLException;\r
   7 import java.text.SimpleDateFormat;\r
   8 import java.util.ArrayList;\r
   9 import java.util.Date;\r
  10 import java.util.HashMap;\r
  11 import java.util.HashSet;\r
  12 import java.util.List;\r
  13 import java.util.Map;\r
  14 import java.util.Set;\r
  15 \r
  16 import org.apache.log4j.Logger;\r
  17 \r
  18 import compbio.engine.client.Executable;\r
  19 import compbio.engine.client.PathValidator;\r
  20 import compbio.metadata.JobStatus;\r
  21 import compbio.util.FileUtil;\r
  22 import compbio.ws.client.Services;\r
  23 \r
  24 /**\r
  25  * Number of runs of each WS = number of folders with name\r
  26  * \r
  27  * Number of successful runs = all runs with no result file\r
  28  * \r
  29  * Per period of time = limit per file creating time Runtime (avg/max) =\r
  30  * \r
  31  * started time - finished time\r
  32  * \r
  33  * Task & result size = result.size\r
  34  * \r
  35  * Abandoned runs - not collected runs\r
  36  * \r
  37  * Cancelled runs - cancelled\r
  38  * \r
  39  * Cluster vs local runs\r
  40  * \r
  41  * Reasons for failure = look in the err out?\r
  42  * \r
  43  * \r
  44  * Metadata required:\r
  45  * \r
  46  * work directory for local and cluster tasks = from Helper or cmd parameter. WS\r
  47  * names - enumeration. Status file names and content.\r
  48  * \r
  49  * @author pvtroshin\r
  50  * \r
  51  */\r
  52 public class ExecutionStatCollector implements Runnable {\r
  53 \r
  54         static final int UNDEFINED = -1;\r
  55 \r
  56         private static final Logger log = Logger\r
  57                         .getLogger(ExecutionStatCollector.class);\r
  58 \r
  59         static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss");\r
  60 \r
  61         final private File workDirectory;\r
  62         final private List<JobStat> stats;\r
  63         /**\r
  64          * Consider the job that has been working for longer than timeOutInHours\r
  65          * completed, whatever the outcome\r
  66          */\r
  67         final private int timeOutInHours;\r
  68 \r
  69         /**\r
  70          * List subdirectories in the job directory\r
  71          * \r
  72          * @param workDirectory\r
  73          * @param timeOutInHours\r
  74          */\r
  75         public ExecutionStatCollector(String workDirectory, int timeOutInHours) {\r
  76                 log.info("Starting stat collector for directory: " + workDirectory);\r
  77                 log.info("Maximum allowed runtime(h): " + timeOutInHours);\r
  78                 if (!PathValidator.isValidDirectory(workDirectory)) {\r
  79                         throw new IllegalArgumentException("workDirectory '"\r
  80                                         + workDirectory + "' does not exist!");\r
  81                 }\r
  82                 this.workDirectory = new File(workDirectory);\r
  83                 stats = new ArrayList<JobStat>();\r
  84                 if (timeOutInHours <= 0) {\r
  85                         throw new IllegalArgumentException(\r
  86                                         "Timeout value must be greater than 0! Given value: "\r
  87                                                         + timeOutInHours);\r
  88                 }\r
  89                 this.timeOutInHours = timeOutInHours;\r
  90         }\r
  91 \r
  92         boolean hasCompleted(JobDirectory jd) {\r
  93                 JobStat jstat = jd.getJobStat();\r
  94                 if (jstat.hasResult() || jstat.getIsCancelled()\r
  95                                 || jstat.getIsFinished() || hasTimedOut(jd)) {\r
  96                         return true;\r
  97                 }\r
  98                 return false;\r
  99         }\r
 100 \r
 101         boolean hasTimedOut(JobDirectory jd) {\r
 102                 return ((System.currentTimeMillis() - jd.jobdir.lastModified()) / (1000 * 60 * 60)) > timeOutInHours;\r
 103         }\r
 104 \r
 105    /* Make sure that collectStatistics methods was called prior to calling this! \r
 106         * TODO consider running collectStatistics from here on the first call \r
 107         */\r
 108         StatProcessor getStats() {\r
 109                 if(stats.isEmpty()) {\r
 110                         log.info("Please make sure collectStatistics method was called prior to calling getStats()!");\r
 111                 }\r
 112                 return new StatProcessor(stats);\r
 113         }\r
 114 \r
 115         void writeStatToDB() throws SQLException {\r
 116                 Set<JobStat> rjobs = new HashSet<JobStat>(stats);\r
 117                 StatDB statdb = new StatDB();\r
 118                 log.debug("Removing records that has already been recorded");\r
 119 \r
 120                 statdb.removeRecordedJobs(rjobs);\r
 121                 log.debug("New records left: " + rjobs.size());\r
 122                 statdb.insertData(rjobs);\r
 123         }\r
 124 \r
 125         /*\r
 126          * static void updateTime(File statFile) throws IOException { long lastMod =\r
 127          * statFile.lastModified(); FileWriter fw = new FileWriter(statFile);\r
 128          * fw.write(new Long(lastMod).toString()); fw.close(); }\r
 129          */\r
 130 \r
 131         /**\r
 132          * Not in use\r
 133          */\r
 134         public static void main(String[] args) throws IOException, SQLException {\r
 135 \r
 136                 // updateTime(new File(\r
 137                 // "D:\\workspace\\JABA2\\jobsout\\AACon#170462904473672\\STARTED"));\r
 138 \r
 139                 File[] files = FileUtil.getFiles("Y:\\fc\\www-jws2\\jaba\\jobsout",\r
 140                                 directories);\r
 141                 List<JobStat> stats = new ArrayList<JobStat>();\r
 142                 for (File file : files) {\r
 143                         JobDirectory jd = new JobDirectory(file);\r
 144                         stats.add(jd.getJobStat());\r
 145                         // System.out.println(jd.getJobStat().getJobReportTabulated());\r
 146                 }\r
 147                 StatProcessor sp = new StatProcessor(stats);\r
 148                 System.out.println(sp.reportStat());\r
 149                 System.out.println();\r
 150                 System.out.println("!!!!!!!!!!!!!!!!!!");\r
 151                 System.out.println();\r
 152 \r
 153                 Set<JobStat> rjobs = new HashSet<JobStat>(sp.stats);\r
 154                 StatDB statdb = new StatDB();\r
 155                 statdb.removeRecordedJobs(rjobs);\r
 156                 statdb.insertData(rjobs);\r
 157         }\r
 158 \r
 159         static FileFilter directories = new FileFilter() {\r
 160                 @Override\r
 161                 public boolean accept(File pathname) {\r
 162                         return pathname.isDirectory()\r
 163                                         && !pathname.getName().startsWith(".");\r
 164                 }\r
 165         };\r
 166 \r
 167         static class JobDirectory {\r
 168 \r
 169                 File jobdir;\r
 170                 Map<String, File> files = new HashMap<String, File>();\r
 171 \r
 172                 JobDirectory(File directory) {\r
 173                         this.jobdir = directory;\r
 174                         for (File f : jobdir.listFiles()) {\r
 175                                 files.put(f.getName(), f);\r
 176                         }\r
 177                 }\r
 178 \r
 179                 boolean hasStatus(JobStatus status) {\r
 180                         return files.containsKey(status.toString());\r
 181                 }\r
 182 \r
 183                 boolean isCollected() {\r
 184                         return hasStatus(JobStatus.COLLECTED);\r
 185                 }\r
 186 \r
 187                 boolean isCancelled() {\r
 188                         return hasStatus(JobStatus.CANCELLED);\r
 189                 }\r
 190 \r
 191                 long getStartTime() {\r
 192                         long starttime = UNDEFINED;\r
 193                         File startfile = files.get(JobStatus.STARTED.toString());\r
 194                         if (startfile == null) {\r
 195                                 startfile = files.get(JobStatus.SUBMITTED.toString());\r
 196                         }\r
 197                         if (startfile != null) {\r
 198                                 starttime = startfile.lastModified();\r
 199                                 /*\r
 200                                  * String start = FileUtil.readFileToString(startfile);\r
 201                                  * starttime = Long.parseLong(start.trim());\r
 202                                  */\r
 203                         }\r
 204                         return starttime;\r
 205                 }\r
 206 \r
 207                 String getClusterJobID() {\r
 208                         String clustjobId = "";\r
 209                         File jobid = files.get("JOBID");\r
 210                         try {\r
 211                                 if (jobid != null) {\r
 212                                         clustjobId = FileUtil.readFileToString(jobid);\r
 213                                 }\r
 214                         } catch (IOException ioe) {\r
 215                                 log.error(\r
 216                                                 "IO Exception while reading the content of JOBID file for job "\r
 217                                                                 + jobid, ioe);\r
 218                         }\r
 219                         return clustjobId.trim();\r
 220                 }\r
 221 \r
 222                 long getFinishedTime() {\r
 223                         long ftime = UNDEFINED;\r
 224                         File finished = files.get(JobStatus.FINISHED.toString());\r
 225                         if (finished != null) {\r
 226                                 ftime = finished.lastModified();\r
 227                                 /*\r
 228                                  * String start = FileUtil.readFileToString(finished); ftime =\r
 229                                  * Long.parseLong(start.trim());\r
 230                                  */\r
 231                                 // System.out.println("f " + ftime);\r
 232                         }\r
 233                         /*\r
 234                          * } catch (IOException e) { log.log(Level.WARN,\r
 235                          * "Cannot parse finished time: " + e.getMessage(), e); } catch\r
 236                          * (NumberFormatException e) { log.log(Level.WARN,\r
 237                          * "Cannot parse finished time: " + e.getMessage(), e); }\r
 238                          */\r
 239                         return ftime;\r
 240                 }\r
 241 \r
 242                 private Services getService() {\r
 243                         return Services.getServiceByJobDirectory(jobdir);\r
 244                 }\r
 245 \r
 246                 // Mafft, Muscle, Tcoffee, Clustal task:fasta.in result:fasta.out\r
 247                 // Probcons task:fasta.in result:alignment.out\r
 248                 /*\r
 249                  * TODO replace with Universal names for WS!\r
 250                  */\r
 251                 long getResultSize() {\r
 252                         Class<? extends Executable<?>> name = Services\r
 253                                         .getRunnerByJobDirectory(jobdir);\r
 254 \r
 255                         File f = null;\r
 256                         if (name.getSimpleName().equalsIgnoreCase("Probcons")) {\r
 257                                 f = files.get("alignment.out");\r
 258                         } else if (name.getSimpleName().equalsIgnoreCase("ClustalW")) {\r
 259                                 f = files.get("output.txt");\r
 260                         } else {\r
 261                                 f = files.get("fasta.out");\r
 262                         }\r
 263                         if (f != null) {\r
 264                                 return f.length();\r
 265                         }\r
 266                         return UNDEFINED;\r
 267                 }\r
 268 \r
 269                 /*\r
 270                  * TODO unify input!\r
 271                  */\r
 272                 long getInputSize() {\r
 273                         Class<? extends Executable<?>> name = Services\r
 274                                         .getRunnerByJobDirectory(jobdir);\r
 275 \r
 276                         File input = null;\r
 277                         if (name.getSimpleName().equalsIgnoreCase("ClustalW")) {\r
 278                                 input = files.get("input.txt");\r
 279                         } else {\r
 280                                 input = files.get("fasta.in");\r
 281                         }\r
 282 \r
 283                         if (input != null) {\r
 284                                 return input.length();\r
 285                         }\r
 286                         return UNDEFINED;\r
 287                 }\r
 288 \r
 289                 JobStat getJobStat() {\r
 290                         return JobStat.newInstance(getService(), getClusterJobID(),\r
 291                                         jobdir.getName(), getStartTime(), getFinishedTime(),\r
 292                                         getInputSize(), getResultSize(), isCancelled(),\r
 293                                         isCollected());\r
 294                 }\r
 295 \r
 296                 @Override\r
 297                 public int hashCode() {\r
 298                         final int prime = 31;\r
 299                         int result = 1;\r
 300                         result = prime * result\r
 301                                         + ((jobdir == null) ? 0 : jobdir.hashCode());\r
 302                         return result;\r
 303                 }\r
 304 \r
 305                 @Override\r
 306                 public boolean equals(Object obj) {\r
 307                         if (this == obj)\r
 308                                 return true;\r
 309                         if (obj == null)\r
 310                                 return false;\r
 311                         if (getClass() != obj.getClass())\r
 312                                 return false;\r
 313                         JobDirectory other = (JobDirectory) obj;\r
 314                         if (jobdir == null) {\r
 315                                 if (other.jobdir != null)\r
 316                                         return false;\r
 317                         } else if (!jobdir.equals(other.jobdir))\r
 318                                 return false;\r
 319                         return true;\r
 320                 }\r
 321         }\r
 322 \r
 323         void collectStatistics() {\r
 324                 File[] files = workDirectory.listFiles(directories);\r
 325                 for (File file : files) {\r
 326                         JobDirectory jd = new JobDirectory(file);\r
 327                         JobStat jstat = jd.getJobStat();\r
 328                         // Do not record stats on the job that has not completed yet\r
 329                         if (hasCompleted(jd)) {\r
 330                                 stats.add(jstat);\r
 331                         } else {\r
 332                                 log.debug("Skipping the job: " + jstat);\r
 333                                 log.debug("As it has not completed yet");\r
 334                         }\r
 335                         // System.out.println(jd.getJobStat().getJobReportTabulated());\r
 336                 }\r
 337         }\r
 338 \r
 339         @Override\r
 340         public void run() {\r
 341                 log.info("Started updating statistics at " + new Date());\r
 342                 log.info("For directory: " + workDirectory.getAbsolutePath());\r
 343 \r
 344                 collectStatistics();\r
 345 \r
 346                 StatProcessor local_stats = getStats();\r
 347                 log.info("Found " + local_stats.getJobNumber() + " jobs!");\r
 348                 try {\r
 349                         writeStatToDB();\r
 350                 } catch (SQLException e) {\r
 351                         log.error("Fails to update jobs statistics database!");\r
 352                         log.error(e.getLocalizedMessage(), e);\r
 353                 }\r
 354                 log.info("Finished updating statistics at " + new Date());\r
 355         }\r
 356 }\r