Change header template for a new version
[jabaws.git] / webservices / compbio / stat / collector / ExecutionStatCollector.java
1 /* Copyright (c) 2011 Peter Troshin\r
2  *  \r
3  *  JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0     \r
4  * \r
5  *  This library is free software; you can redistribute it and/or modify it under the terms of the\r
6  *  Apache License version 2 as published by the Apache Software Foundation\r
7  * \r
8  *  This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without\r
9  *  even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache \r
10  *  License for more details.\r
11  * \r
12  *  A copy of the license is in apache_license.txt. It is also available here:\r
13  * @see: http://www.apache.org/licenses/LICENSE-2.0.txt\r
14  * \r
15  * Any republication or derived work distributed in source code form\r
16  * must include this copyright and license notice.\r
17  */\r
18 package compbio.stat.collector;\r
19 \r
20 import java.io.File;\r
21 import java.io.FileFilter;\r
22 import java.io.IOException;\r
23 import java.sql.SQLException;\r
24 import java.text.SimpleDateFormat;\r
25 import java.util.ArrayList;\r
26 import java.util.Date;\r
27 import java.util.HashMap;\r
28 import java.util.HashSet;\r
29 import java.util.List;\r
30 import java.util.Map;\r
31 import java.util.Set;\r
32 \r
33 import org.apache.log4j.Logger;\r
34 \r
35 import compbio.engine.client.Executable;\r
36 import compbio.engine.client.PathValidator;\r
37 import compbio.engine.client.SkeletalExecutable;\r
38 import compbio.metadata.JobStatus;\r
39 import compbio.util.FileUtil;\r
40 import compbio.ws.client.Services;\r
41 \r
42 /**\r
43  * Number of runs of each WS = number of folders with name\r
44  * \r
45  * Number of successful runs = all runs with no result file\r
46  * \r
47  * Per period of time = limit per file creating time Runtime (avg/max) =\r
48  * \r
49  * started time - finished time\r
50  * \r
51  * Task & result size = result.size\r
52  * \r
53  * Abandoned runs - not collected runs\r
54  * \r
55  * Cancelled runs - cancelled\r
56  * \r
57  * Cluster vs local runs\r
58  * \r
59  * Reasons for failure = look in the err out?\r
60  * \r
61  * \r
62  * Metadata required:\r
63  * \r
64  * work directory for local and cluster tasks = from Helper or cmd parameter. WS\r
65  * names - enumeration. Status file names and content.\r
66  * \r
67  * @author pvtroshin\r
68  * \r
69  */\r
70 public class ExecutionStatCollector implements Runnable {\r
71 \r
72         static final int UNDEFINED = -1;\r
73 \r
74         private static final Logger log = Logger\r
75                         .getLogger(ExecutionStatCollector.class);\r
76 \r
77         static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss");\r
78 \r
79         final private File workDirectory;\r
80         final private List<JobStat> stats;\r
81         /**\r
82          * Consider the job that has been working for longer than timeOutInHours\r
83          * completed, whatever the outcome\r
84          */\r
85         final private int timeOutInHours;\r
86 \r
87         /**\r
88          * List subdirectories in the job directory\r
89          * \r
90          * @param workDirectory\r
91          * @param timeOutInHours\r
92          */\r
93         public ExecutionStatCollector(String workDirectory, int timeOutInHours) {\r
94                 log.info("Starting stat collector for directory: " + workDirectory);\r
95                 log.info("Maximum allowed runtime(h): " + timeOutInHours);\r
96                 if (!PathValidator.isValidDirectory(workDirectory)) {\r
97                         throw new IllegalArgumentException("workDirectory '"\r
98                                         + workDirectory + "' does not exist!");\r
99                 }\r
100                 this.workDirectory = new File(workDirectory);\r
101                 stats = new ArrayList<JobStat>();\r
102                 if (timeOutInHours <= 0) {\r
103                         throw new IllegalArgumentException(\r
104                                         "Timeout value must be greater than 0! Given value: "\r
105                                                         + timeOutInHours);\r
106                 }\r
107                 this.timeOutInHours = timeOutInHours;\r
108         }\r
109 \r
110         boolean hasCompleted(JobDirectory jd) {\r
111                 JobStat jstat = jd.getJobStat();\r
112                 if (jstat.hasResult() || jstat.getIsCancelled()\r
113                                 || jstat.getIsFinished() || hasTimedOut(jd)) {\r
114                         return true;\r
115                 }\r
116                 return false;\r
117         }\r
118 \r
119         boolean hasTimedOut(JobDirectory jd) {\r
120                 return ((System.currentTimeMillis() - jd.jobdir.lastModified()) / (1000 * 60 * 60)) > timeOutInHours;\r
121         }\r
122 \r
123         /*\r
124          * Make sure that collectStatistics methods was called prior to calling\r
125          * this! TODO consider running collectStatistics from here on the first call\r
126          */\r
127         StatProcessor getStats() {\r
128                 if (stats.isEmpty()) {\r
129                         log.info("Please make sure collectStatistics method was called prior to calling getStats()!");\r
130                 }\r
131                 return new StatProcessor(stats);\r
132         }\r
133 \r
134         void writeStatToDB() throws SQLException {\r
135                 Set<JobStat> rjobs = new HashSet<JobStat>(stats);\r
136                 StatDB statdb = new StatDB();\r
137                 log.debug("Removing records that has already been recorded");\r
138 \r
139                 statdb.removeRecordedJobs(rjobs);\r
140                 log.debug("New records left: " + rjobs.size());\r
141                 statdb.insertData(rjobs);\r
142         }\r
143 \r
144         /*\r
145          * static void updateTime(File statFile) throws IOException { long lastMod =\r
146          * statFile.lastModified(); FileWriter fw = new FileWriter(statFile);\r
147          * fw.write(new Long(lastMod).toString()); fw.close(); }\r
148          */\r
149 \r
150         /**\r
151          * Not in use\r
152          */\r
153         public static void main(String[] args) throws IOException, SQLException {\r
154 \r
155                 // updateTime(new File(\r
156                 // "D:\\workspace\\JABA2\\jobsout\\AACon#170462904473672\\STARTED"));\r
157 \r
158                 File[] files = FileUtil.getFiles("Y:\\fc\\www-jws2\\jaba\\jobsout",\r
159                                 directories);\r
160                 List<JobStat> stats = new ArrayList<JobStat>();\r
161                 for (File file : files) {\r
162                         JobDirectory jd = new JobDirectory(file);\r
163                         stats.add(jd.getJobStat());\r
164                         // System.out.println(jd.getJobStat().getJobReportTabulated());\r
165                 }\r
166                 StatProcessor sp = new StatProcessor(stats);\r
167                 System.out.println(sp.reportStat());\r
168                 System.out.println();\r
169                 System.out.println("!!!!!!!!!!!!!!!!!!");\r
170                 System.out.println();\r
171 \r
172                 Set<JobStat> rjobs = new HashSet<JobStat>(sp.stats);\r
173                 StatDB statdb = new StatDB();\r
174                 statdb.removeRecordedJobs(rjobs);\r
175                 statdb.insertData(rjobs);\r
176         }\r
177 \r
178         static FileFilter directories = new FileFilter() {\r
179                 @Override\r
180                 public boolean accept(File pathname) {\r
181                         return pathname.isDirectory()\r
182                                         && !pathname.getName().startsWith(".");\r
183                 }\r
184         };\r
185 \r
186         static class JobDirectory {\r
187 \r
188                 File jobdir;\r
189                 Map<String, File> files = new HashMap<String, File>();\r
190 \r
191                 JobDirectory(File directory) {\r
192                         this.jobdir = directory;\r
193                         for (File f : jobdir.listFiles()) {\r
194                                 files.put(f.getName(), f);\r
195                         }\r
196                 }\r
197 \r
198                 boolean hasStatus(JobStatus status) {\r
199                         return files.containsKey(status.toString());\r
200                 }\r
201 \r
202                 boolean isCollected() {\r
203                         return hasStatus(JobStatus.COLLECTED);\r
204                 }\r
205 \r
206                 boolean isCancelled() {\r
207                         return hasStatus(JobStatus.CANCELLED);\r
208                 }\r
209 \r
210                 long getStartTime() {\r
211                         long starttime = UNDEFINED;\r
212                         File startfile = files.get(JobStatus.STARTED.toString());\r
213                         if (startfile == null) {\r
214                                 startfile = files.get(JobStatus.SUBMITTED.toString());\r
215                         }\r
216                         try {\r
217                                 if (startfile != null) {\r
218                                         String start = FileUtil.readFileToString(startfile);\r
219                                         starttime = Long.parseLong(start.trim());\r
220                                 }\r
221                         } catch (IOException ignore) {\r
222                                 log.warn(\r
223                                                 "IOException while reading STARTED status file! Ignoring...",\r
224                                                 ignore);\r
225                                 // fall back\r
226                                 starttime = startfile.lastModified();\r
227                         } catch (NumberFormatException ignore) {\r
228                                 log.warn(\r
229                                                 "NumberFormatException while reading STARTED status file! Ignoring...",\r
230                                                 ignore);\r
231                                 // fall back\r
232                                 starttime = startfile.lastModified();\r
233                         }\r
234 \r
235                         return starttime;\r
236                 }\r
237 \r
238                 String getClusterJobID() {\r
239                         String clustjobId = "";\r
240                         File jobid = files.get("JOBID");\r
241                         try {\r
242                                 if (jobid != null) {\r
243                                         clustjobId = FileUtil.readFileToString(jobid);\r
244                                 }\r
245                         } catch (IOException ioe) {\r
246                                 log.error(\r
247                                                 "IO Exception while reading the content of JOBID file for job "\r
248                                                                 + jobid, ioe);\r
249                         }\r
250                         return clustjobId.trim();\r
251                 }\r
252 \r
253                 long getFinishedTime() {\r
254                         long ftime = UNDEFINED;\r
255                         File finished = files.get(JobStatus.FINISHED.toString());\r
256                         if (finished != null) {\r
257                                 try {\r
258                                         if (finished != null) {\r
259                                                 String start = FileUtil.readFileToString(finished);\r
260                                                 ftime = Long.parseLong(start.trim());\r
261                                         }\r
262                                 } catch (IOException ignore) {\r
263                                         log.warn(\r
264                                                         "IOException while reading FINISHED status file! Ignoring...",\r
265                                                         ignore);\r
266                                         // fall back\r
267                                         ftime = finished.lastModified();\r
268                                 } catch (NumberFormatException ignore) {\r
269                                         log.warn(\r
270                                                         "NumberFormatException while reading FINISHED status file! Ignoring...",\r
271                                                         ignore);\r
272                                         // fall back\r
273                                         ftime = finished.lastModified();\r
274                                 }\r
275                         }\r
276                         return ftime;\r
277                 }\r
278 \r
279                 private Services getService() {\r
280                         return Services.getServiceByJobDirectory(jobdir);\r
281                 }\r
282 \r
283                 long getResultSize() {\r
284                         Class<? extends Executable<?>> name = Services\r
285                                         .getRunnerByJobDirectory(jobdir);\r
286 \r
287                         File f = null;\r
288                         if (name.getSimpleName().equalsIgnoreCase("IUPred")) {\r
289                                 f = files.get("out.glob");\r
290                                 if (f == null)\r
291                                         f = files.get("out.short");\r
292                                 if (f == null)\r
293                                         f = files.get("out.long");\r
294                         } else {\r
295                                 f = files.get(SkeletalExecutable.OUTPUT);\r
296                         }\r
297                         if (f != null) {\r
298                                 return f.length();\r
299                         }\r
300                         return UNDEFINED;\r
301                 }\r
302 \r
303                 long getInputSize() {\r
304                         Class<? extends Executable<?>> name = Services\r
305                                         .getRunnerByJobDirectory(jobdir);\r
306 \r
307                         File input = files.get(SkeletalExecutable.INPUT);\r
308                         if (input != null) {\r
309                                 return input.length();\r
310                         }\r
311                         return UNDEFINED;\r
312                 }\r
313 \r
314                 JobStat getJobStat() {\r
315                         return JobStat.newInstance(getService(), getClusterJobID(),\r
316                                         jobdir.getName(), getStartTime(), getFinishedTime(),\r
317                                         getInputSize(), getResultSize(), isCancelled(),\r
318                                         isCollected());\r
319                 }\r
320 \r
321                 @Override\r
322                 public int hashCode() {\r
323                         final int prime = 31;\r
324                         int result = 1;\r
325                         result = prime * result\r
326                                         + ((jobdir == null) ? 0 : jobdir.hashCode());\r
327                         return result;\r
328                 }\r
329 \r
330                 @Override\r
331                 public boolean equals(Object obj) {\r
332                         if (this == obj)\r
333                                 return true;\r
334                         if (obj == null)\r
335                                 return false;\r
336                         if (getClass() != obj.getClass())\r
337                                 return false;\r
338                         JobDirectory other = (JobDirectory) obj;\r
339                         if (jobdir == null) {\r
340                                 if (other.jobdir != null)\r
341                                         return false;\r
342                         } else if (!jobdir.equals(other.jobdir))\r
343                                 return false;\r
344                         return true;\r
345                 }\r
346         }\r
347 \r
348         void collectStatistics() {\r
349                 File[] files = workDirectory.listFiles(directories);\r
350                 for (File file : files) {\r
351                         JobDirectory jd = new JobDirectory(file);\r
352                         JobStat jstat = jd.getJobStat();\r
353                         // Do not record stats on the job that has not completed yet\r
354                         if (hasCompleted(jd)) {\r
355                                 stats.add(jstat);\r
356                         } else {\r
357                                 log.debug("Skipping the job: " + jstat);\r
358                                 log.debug("As it has not completed yet");\r
359                         }\r
360                         // System.out.println(jd.getJobStat().getJobReportTabulated());\r
361                 }\r
362         }\r
363 \r
364         @Override\r
365         public void run() {\r
366                 log.info("Started updating statistics at " + new Date());\r
367                 log.info("For directory: " + workDirectory.getAbsolutePath());\r
368 \r
369                 collectStatistics();\r
370 \r
371                 StatProcessor local_stats = getStats();\r
372                 log.info("Found " + local_stats.getJobNumber() + " jobs!");\r
373                 try {\r
374                         writeStatToDB();\r
375                 } catch (SQLException e) {\r
376                         log.error("Fails to update jobs statistics database!");\r
377                         log.error(e.getLocalizedMessage(), e);\r
378                 }\r
379                 log.info("Finished updating statistics at " + new Date());\r
380         }\r
381 }\r