webservices/compbio/stat/collector/ExecutionStatUpdater.java

   1 /* Copyright (c) 2013 Alexander Sherstnev\r
   2  * Copyright (c) 2011 Peter Troshin\r
   3  *  \r
   4  *  JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0     \r
   5  * \r
   6  *  This library is free software; you can redistribute it and/or modify it under the terms of the\r
   7  *  Apache License version 2 as published by the Apache Software Foundation\r
   8  * \r
   9  *  This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without\r
  10  *  even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache \r
  11  *  License for more details.\r
  12  * \r
  13  *  A copy of the license is in apache_license.txt. It is also available here:\r
  14  * @see: http://www.apache.org/licenses/LICENSE-2.0.txt\r
  15  * \r
  16  * Any republication or derived work distributed in source code form\r
  17  * must include this copyright and license notice.\r
  18  */\r
  19 package compbio.stat.collector;\r
  20 \r
  21 import java.io.File;\r
  22 import java.io.FileFilter;\r
  23 import java.io.IOException;\r
  24 import java.sql.SQLException;\r
  25 import java.text.ParseException;\r
  26 import java.text.SimpleDateFormat;\r
  27 import java.util.ArrayList;\r
  28 import java.util.Date;\r
  29 import java.util.HashSet;\r
  30 import java.util.List;\r
  31 import java.util.Set;\r
  32 \r
  33 import com.beust.jcommander.JCommander;\r
  34 import com.beust.jcommander.Parameter;\r
  35 \r
  36 import org.apache.log4j.Logger;\r
  37 \r
  38 import compbio.engine.client.PathValidator;\r
  39 import compbio.engine.client.SkeletalExecutable;\r
  40 \r
  41 /**\r
  42  * Class assumptions: 1. Number of runs of each WS = number of folders with name\r
  43  * 2. Number of successful runs = all runs with no result file 3. Per period of\r
  44  * time = limit per file creating time 4. Runtime (avg/max) = finish time -\r
  45  * start time 5. Task & result size = result.size\r
  46  * \r
  47  * Abandoned runs - not collected runs\r
  48  * \r
  49  * Cancelled runs - cancelled\r
  50  * \r
  51  * Cluster vs local runs\r
  52  * \r
  53  * Reasons for failure = look in the err out?\r
  54  * \r
  55  * \r
  56  * Metadata required:\r
  57  * \r
  58  * work directory for local and cluster tasks = from Helper or cmd parameter. WS\r
  59  * names - enumeration. Status file names and content.\r
  60  * \r
  61  * @author Peter Troshin\r
  62  * @author Alexander Sherstnev\r
  63  * \r
  64  */\r
  65 \r
  66 class mainJCommander {\r
  67         @Parameter\r
  68         private List<String> parameters = new ArrayList<String>();\r
  69 \r
  70         @Parameter(names = { "-log", "-verbose" }, description = "Level of verbosity")\r
  71         public Integer verbose = 1;\r
  72 \r
  73         @Parameter(names = "-start", description = "Start time")\r
  74         public String starttime;\r
  75 \r
  76         @Parameter(names = "-end", description = "Start time")\r
  77         public String endtime;\r
  78 \r
  79         @Parameter(names = "-db", description = "Path to database")\r
  80         public String dbname;\r
  81 \r
  82         @Parameter(names = "-dir", description = "Path to job directory")\r
  83         public String workingdir;\r
  84 }\r
  85 \r
  86 public class ExecutionStatUpdater {\r
  87         static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss");\r
  88         static SimpleDateFormat shortDF = new SimpleDateFormat("dd/MM/yyyy");\r
  89         private static final Logger log = Logger.getLogger(ExecutionStatUpdater.class);\r
  90 \r
  91         final private File workingDirectory;\r
  92         final private List<JobStat> stats;\r
  93         /**\r
  94          * Consider the job that has been working for longer than timeOutInHours\r
  95          * completed, whatever the outcome\r
  96          */\r
  97         final private int timeOutInHours;\r
  98 \r
  99         /**\r
 100          * List subdirectories in the job directory\r
 101          * \r
 102          * @param workingDirectory\r
 103          * @param timeOutInHours\r
 104          */\r
 105         public ExecutionStatUpdater(String workingDirectory, int timeOutInHours) {\r
 106                 log.info("Starting stat collector for directory: " + workingDirectory);\r
 107                 log.info("Maximum allowed runtime(h): " + timeOutInHours);\r
 108                 if (!PathValidator.isValidDirectory(workingDirectory)) {\r
 109                         throw new IllegalArgumentException("workingDirectory '" + workingDirectory + "' does not exist!");\r
 110                 }\r
 111                 this.workingDirectory = new File(workingDirectory);\r
 112                 stats = new ArrayList<JobStat>();\r
 113                 if (timeOutInHours <= 0) {\r
 114                         throw new IllegalArgumentException("Timeout value must be greater than 0! Given value: " + timeOutInHours);\r
 115                 }\r
 116                 this.timeOutInHours = timeOutInHours;\r
 117         }\r
 118 \r
 119         boolean hasCompleted(JobDirectory jd) {\r
 120                 JobStat jstat = jd.getJobStat();\r
 121                 if (jstat.hasResult() || jstat.getIsCancelled() || jstat.getIsFinished() || hasTimedOut(jd)) {\r
 122                         return true;\r
 123                 }\r
 124                 return false;\r
 125         }\r
 126 \r
 127         boolean hasTimedOut(JobDirectory jd) {\r
 128                 return ((System.currentTimeMillis() - jd.jobdir.lastModified()) / (1000 * 60 * 60)) > timeOutInHours;\r
 129         }\r
 130 \r
 131         /*\r
 132          * Make sure that collectStatistics methods was called prior to calling\r
 133          * this! TODO consider running collectStatistics from here on the first call\r
 134          */\r
 135         StatProcessor getStats() {\r
 136                 if (stats.isEmpty()) {\r
 137                         log.info("Please make sure collectStatistics method was called prior to calling getStats()!");\r
 138                 }\r
 139                 return new StatProcessor(stats);\r
 140         }\r
 141 \r
 142         void writeStatToDB() throws SQLException {\r
 143                 Set<JobStat> rjobs = new HashSet<JobStat>(stats);\r
 144                 StatDB statdb = new StatDB();\r
 145                 log.debug("Removing records that has already been recorded");\r
 146                 statdb.removeRecordedJobs(rjobs);\r
 147                 log.debug("New records left: " + rjobs.size());\r
 148                 statdb.insertData(rjobs);\r
 149         }\r
 150 \r
 151         /**\r
 152          * main function\r
 153          * @throws ParseException \r
 154          */\r
 155         public static void main(String[] args) throws IOException, SQLException, ParseException {\r
 156                 mainJCommander jct = new mainJCommander();\r
 157                 new JCommander(jct, args);\r
 158                 String WorkingDir = jct.workingdir;\r
 159                 String dbname = jct.dbname;\r
 160                 Date ST = shortDF.parse(jct.starttime);\r
 161                 Date ET = shortDF.parse(jct.endtime);\r
 162                 long StartTime = 0;\r
 163                 if (null != ST) {\r
 164                         StartTime = ST.getTime();\r
 165                 }\r
 166                 Date currDate = new Date();\r
 167                 long EndTime = currDate.getTime();\r
 168                 if (null != ET) {\r
 169                         EndTime = ET.getTime();\r
 170                 }\r
 171 \r
 172                 System.out.println("Start time: " + jct.starttime + " = " + StartTime);\r
 173                 System.out.println("End time: " + jct.endtime + " = " + EndTime);\r
 174 \r
 175                 ExecutionStatUpdater esu = new ExecutionStatUpdater(WorkingDir, 1);\r
 176                 esu.collectStatistics(StartTime, EndTime);\r
 177                 esu.writeStatToDB();\r
 178         }\r
 179 \r
 180         static FileFilter directories = new FileFilter() {\r
 181                 @Override\r
 182                 public boolean accept(File pathname) {\r
 183                         return pathname.isDirectory() && !pathname.getName().startsWith(".");\r
 184                 }\r
 185         };\r
 186 \r
 187         // TODO test!\r
 188         private void collectStatistics(long StartTime, long EndTime) {\r
 189                 // clear stats array;\r
 190                 stats.clear();\r
 191                 File[] dirs = workingDirectory.listFiles(directories);\r
 192                 for (File dir : dirs) {\r
 193                         // skip work directory with test inputs and out of ordered time\r
 194                         // range\r
 195                         log.debug("check directory: " + dir.getName() + "...");\r
 196                         if (InputFilter.accept(new File(dir.getPath() + File.separator + SkeletalExecutable.INPUT)) && StartTime < dir.lastModified()\r
 197                                         && dir.lastModified() < EndTime) {\r
 198                                 JobDirectory jd = new JobDirectory(dir);\r
 199                                 JobStat jstat = jd.getJobStat();\r
 200                                 // Do not record stats on the job that has not completed yet\r
 201                                 if (hasCompleted(jd)) {\r
 202                                         stats.add(jstat);\r
 203                                 } else {\r
 204                                         log.debug("Skipping the job: " + jstat + " as it has not completed yet");\r
 205                                 }\r
 206                         } else {\r
 207                                 log.trace("training input: " + dir.getName() + File.separator + SkeletalExecutable.INPUT);\r
 208                         }\r
 209                 }\r
 210         }\r
 211 \r
 212 }\r