Remove test jobs from statistics
[jabaws.git] / webservices / compbio / stat / collector / ExecutionStatCollector.java
1 /* Copyright (c) 2011 Peter Troshin\r
2  *  \r
3  *  JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0     \r
4  * \r
5  *  This library is free software; you can redistribute it and/or modify it under the terms of the\r
6  *  Apache License version 2 as published by the Apache Software Foundation\r
7  * \r
8  *  This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without\r
9  *  even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache \r
10  *  License for more details.\r
11  * \r
12  *  A copy of the license is in apache_license.txt. It is also available here:\r
13  * @see: http://www.apache.org/licenses/LICENSE-2.0.txt\r
14  * \r
15  * Any republication or derived work distributed in source code form\r
16  * must include this copyright and license notice.\r
17  */\r
18 package compbio.stat.collector;\r
19 \r
20 import java.io.File;\r
21 import java.io.FileFilter;\r
22 import java.io.IOException;\r
23 import java.sql.SQLException;\r
24 import java.text.SimpleDateFormat;\r
25 import java.util.ArrayList;\r
26 import java.util.Date;\r
27 import java.util.HashMap;\r
28 import java.util.HashSet;\r
29 import java.util.List;\r
30 import java.util.Map;\r
31 import java.util.Set;\r
32 \r
33 import org.apache.log4j.Logger;\r
34 \r
35 import compbio.engine.client.Executable;\r
36 import compbio.engine.client.PathValidator;\r
37 import compbio.engine.client.SkeletalExecutable;\r
38 import compbio.metadata.JobStatus;\r
39 import compbio.util.FileUtil;\r
40 import compbio.ws.client.Services;\r
41 import compbio.ws.client.ServicesUtil;\r
42 \r
43 /**\r
44  * Number of runs of each WS = number of folders with name\r
45  * \r
46  * Number of successful runs = all runs with no result file\r
47  * \r
48  * Per period of time = limit per file creating time Runtime (avg/max) =\r
49  * \r
50  * started time - finished time\r
51  * \r
52  * Task & result size = result.size\r
53  * \r
54  * Abandoned runs - not collected runs\r
55  * \r
56  * Cancelled runs - cancelled\r
57  * \r
58  * Cluster vs local runs\r
59  * \r
60  * Reasons for failure = look in the err out?\r
61  * \r
62  * \r
63  * Metadata required:\r
64  * \r
65  * work directory for local and cluster tasks = from Helper or cmd parameter. WS\r
66  * names - enumeration. Status file names and content.\r
67  * \r
68  * @author pvtroshin\r
69  * \r
70  */\r
71 public class ExecutionStatCollector implements Runnable {\r
72 \r
73         static final int UNDEFINED = -1;\r
74 \r
75         private static final Logger log = Logger\r
76                         .getLogger(ExecutionStatCollector.class);\r
77 \r
78         static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss");\r
79 \r
80         final private File workDirectory;\r
81         final private List<JobStat> stats;\r
82         /**\r
83          * Consider the job that has been working for longer than timeOutInHours\r
84          * completed, whatever the outcome\r
85          */\r
86         final private int timeOutInHours;\r
87 \r
88         /**\r
89          * List subdirectories in the job directory\r
90          * \r
91          * @param workDirectory\r
92          * @param timeOutInHours\r
93          */\r
94         public ExecutionStatCollector(String workDirectory, int timeOutInHours) {\r
95                 log.info("Starting stat collector for directory: " + workDirectory);\r
96                 log.info("Maximum allowed runtime(h): " + timeOutInHours);\r
97                 if (!PathValidator.isValidDirectory(workDirectory)) {\r
98                         throw new IllegalArgumentException("workDirectory '"\r
99                                         + workDirectory + "' does not exist!");\r
100                 }\r
101                 this.workDirectory = new File(workDirectory);\r
102                 stats = new ArrayList<JobStat>();\r
103                 if (timeOutInHours <= 0) {\r
104                         throw new IllegalArgumentException(\r
105                                         "Timeout value must be greater than 0! Given value: "\r
106                                                         + timeOutInHours);\r
107                 }\r
108                 this.timeOutInHours = timeOutInHours;\r
109         }\r
110 \r
111         boolean hasCompleted(JobDirectory jd) {\r
112                 JobStat jstat = jd.getJobStat();\r
113                 if (jstat.hasResult() || jstat.getIsCancelled()\r
114                                 || jstat.getIsFinished() || hasTimedOut(jd)) {\r
115                         return true;\r
116                 }\r
117                 return false;\r
118         }\r
119 \r
120         boolean hasTimedOut(JobDirectory jd) {\r
121                 return ((System.currentTimeMillis() - jd.jobdir.lastModified()) / (1000 * 60 * 60)) > timeOutInHours;\r
122         }\r
123 \r
124         /*\r
125          * Make sure that collectStatistics methods was called prior to calling\r
126          * this! TODO consider running collectStatistics from here on the first call\r
127          */\r
128         StatProcessor getStats() {\r
129                 if (stats.isEmpty()) {\r
130                         log.info("Please make sure collectStatistics method was called prior to calling getStats()!");\r
131                 }\r
132                 return new StatProcessor(stats);\r
133         }\r
134 \r
135         void writeStatToDB() throws SQLException {\r
136                 Set<JobStat> rjobs = new HashSet<JobStat>(stats);\r
137                 StatDB statdb = new StatDB();\r
138                 log.debug("Removing records that has already been recorded");\r
139 \r
140                 statdb.removeRecordedJobs(rjobs);\r
141                 log.debug("New records left: " + rjobs.size());\r
142                 statdb.insertData(rjobs);\r
143         }\r
144 \r
145         /*\r
146          * static void updateTime(File statFile) throws IOException { long lastMod =\r
147          * statFile.lastModified(); FileWriter fw = new FileWriter(statFile);\r
148          * fw.write(new Long(lastMod).toString()); fw.close(); }\r
149          */\r
150 \r
151         /**\r
152          * Not in use\r
153          */\r
154         public static void main(String[] args) throws IOException, SQLException {\r
155 \r
156                 // updateTime(new File(\r
157                 // "D:\\workspace\\JABA2\\jobsout\\AACon#170462904473672\\STARTED"));\r
158 \r
159                 File[] files = FileUtil.getFiles("Y:\\fc\\www-jws2\\jaba\\jobsout",\r
160                                 directories);\r
161                 List<JobStat> stats = new ArrayList<JobStat>();\r
162                 for (File file : files) {\r
163                         JobDirectory jd = new JobDirectory(file);\r
164                         stats.add(jd.getJobStat());\r
165                         // System.out.println(jd.getJobStat().getJobReportTabulated());\r
166                 }\r
167                 StatProcessor sp = new StatProcessor(stats);\r
168                 System.out.println(sp.reportStat());\r
169                 System.out.println();\r
170                 System.out.println("!!!!!!!!!!!!!!!!!!");\r
171                 System.out.println();\r
172 \r
173                 Set<JobStat> rjobs = new HashSet<JobStat>(sp.stats);\r
174                 StatDB statdb = new StatDB();\r
175                 statdb.removeRecordedJobs(rjobs);\r
176                 statdb.insertData(rjobs);\r
177         }\r
178 \r
179         static FileFilter directories = new FileFilter() {\r
180                 @Override\r
181                 public boolean accept(File pathname) {\r
182                         return pathname.isDirectory()\r
183                                         && !pathname.getName().startsWith(".");\r
184                 }\r
185         };\r
186 \r
187         static class JobDirectory {\r
188 \r
189                 File jobdir;\r
190                 Map<String, File> files = new HashMap<String, File>();\r
191 \r
192                 JobDirectory(File directory) {\r
193                         this.jobdir = directory;\r
194                         for (File f : jobdir.listFiles()) {\r
195                                 files.put(f.getName(), f);\r
196                         }\r
197                 }\r
198 \r
199                 boolean hasStatus(JobStatus status) {\r
200                         return files.containsKey(status.toString());\r
201                 }\r
202 \r
203                 boolean isCollected() {\r
204                         return hasStatus(JobStatus.COLLECTED);\r
205                 }\r
206 \r
207                 boolean isCancelled() {\r
208                         return hasStatus(JobStatus.CANCELLED);\r
209                 }\r
210 \r
211                 long getStartTime() {\r
212                         long starttime = UNDEFINED;\r
213                         File startfile = files.get(JobStatus.STARTED.toString());\r
214                         if (startfile == null) {\r
215                                 startfile = files.get(JobStatus.SUBMITTED.toString());\r
216                         }\r
217                         try {\r
218                                 if (startfile != null) {\r
219                                         String start = FileUtil.readFileToString(startfile);\r
220                                         starttime = Long.parseLong(start.trim());\r
221                                 }\r
222                         } catch (IOException ignore) {\r
223                                 log.warn(\r
224                                                 "IOException while reading STARTED status file! Ignoring...",\r
225                                                 ignore);\r
226                                 // fall back\r
227                                 starttime = startfile.lastModified();\r
228                         } catch (NumberFormatException ignore) {\r
229                                 log.warn(\r
230                                                 "NumberFormatException while reading STARTED status file! Ignoring...",\r
231                                                 ignore);\r
232                                 // fall back\r
233                                 starttime = startfile.lastModified();\r
234                         }\r
235 \r
236                         return starttime;\r
237                 }\r
238 \r
239                 String getClusterJobID() {\r
240                         String clustjobId = "";\r
241                         File jobid = files.get("JOBID");\r
242                         try {\r
243                                 if (jobid != null) {\r
244                                         clustjobId = FileUtil.readFileToString(jobid);\r
245                                 }\r
246                         } catch (IOException ioe) {\r
247                                 log.error(\r
248                                                 "IO Exception while reading the content of JOBID file for job "\r
249                                                                 + jobid, ioe);\r
250                         }\r
251                         return clustjobId.trim();\r
252                 }\r
253 \r
254                 long getFinishedTime() {\r
255                         long ftime = UNDEFINED;\r
256                         File finished = files.get(JobStatus.FINISHED.toString());\r
257                         if (finished != null) {\r
258                                 try {\r
259                                         if (finished != null) {\r
260                                                 String start = FileUtil.readFileToString(finished);\r
261                                                 ftime = Long.parseLong(start.trim());\r
262                                         }\r
263                                 } catch (IOException ignore) {\r
264                                         log.warn(\r
265                                                         "IOException while reading FINISHED status file! Ignoring...",\r
266                                                         ignore);\r
267                                         // fall back\r
268                                         ftime = finished.lastModified();\r
269                                 } catch (NumberFormatException ignore) {\r
270                                         log.warn(\r
271                                                         "NumberFormatException while reading FINISHED status file! Ignoring...",\r
272                                                         ignore);\r
273                                         // fall back\r
274                                         ftime = finished.lastModified();\r
275                                 }\r
276                         }\r
277                         return ftime;\r
278                 }\r
279 \r
280                 private Services getService() {\r
281                         return ServicesUtil.getServiceByJobDirectory(jobdir);\r
282                 }\r
283 \r
284                 long getResultSize() {\r
285                         Class<? extends Executable<?>> name = ServicesUtil\r
286                                         .getRunnerByJobDirectory(jobdir);\r
287 \r
288                         File f = null;\r
289                         if (name.getSimpleName().equalsIgnoreCase("IUPred")) {\r
290                                 f = files.get("out.glob");\r
291                                 if (f == null)\r
292                                         f = files.get("out.short");\r
293                                 if (f == null)\r
294                                         f = files.get("out.long");\r
295                         } else {\r
296                                 f = files.get(SkeletalExecutable.OUTPUT);\r
297                         }\r
298                         if (f != null) {\r
299                                 return f.length();\r
300                         }\r
301                         return UNDEFINED;\r
302                 }\r
303 \r
304                 long getInputSize() {\r
305                         Class<? extends Executable<?>> name = ServicesUtil\r
306                                         .getRunnerByJobDirectory(jobdir);\r
307 \r
308                         File input = files.get(SkeletalExecutable.INPUT);\r
309                         if (input != null) {\r
310                                 return input.length();\r
311                         }\r
312                         return UNDEFINED;\r
313                 }\r
314 \r
315                 JobStat getJobStat() {\r
316                         return JobStat.newInstance(getService(), getClusterJobID(),\r
317                                         jobdir.getName(), getStartTime(), getFinishedTime(),\r
318                                         getInputSize(), getResultSize(), isCancelled(),\r
319                                         isCollected());\r
320                 }\r
321 \r
322                 @Override\r
323                 public int hashCode() {\r
324                         final int prime = 31;\r
325                         int result = 1;\r
326                         result = prime * result\r
327                                         + ((jobdir == null) ? 0 : jobdir.hashCode());\r
328                         return result;\r
329                 }\r
330 \r
331                 @Override\r
332                 public boolean equals(Object obj) {\r
333                         if (this == obj)\r
334                                 return true;\r
335                         if (obj == null)\r
336                                 return false;\r
337                         if (getClass() != obj.getClass())\r
338                                 return false;\r
339                         JobDirectory other = (JobDirectory) obj;\r
340                         if (jobdir == null) {\r
341                                 if (other.jobdir != null)\r
342                                         return false;\r
343                         } else if (!jobdir.equals(other.jobdir))\r
344                                 return false;\r
345                         return true;\r
346                 }\r
347         }\r
348 \r
349         // TODO test!\r
350         void collectStatistics() {\r
351                 File[] files = workDirectory.listFiles(directories);\r
352                 for (File file : files) {\r
353                         if (!InputFilter.accept(new File(file.getPath() + File.separator\r
354                                         + SkeletalExecutable.INPUT))) {\r
355                                 // skip work directory with test input\r
356                                 continue;\r
357                         }\r
358                         JobDirectory jd = new JobDirectory(file);\r
359                         JobStat jstat = jd.getJobStat();\r
360                         // Do not record stats on the job that has not completed yet\r
361                         if (hasCompleted(jd)) {\r
362                                 stats.add(jstat);\r
363                         } else {\r
364                                 log.debug("Skipping the job: " + jstat);\r
365                                 log.debug("As it has not completed yet");\r
366                         }\r
367                         // System.out.println(jd.getJobStat().getJobReportTabulated());\r
368                 }\r
369         }\r
370         @Override\r
371         public void run() {\r
372                 log.info("Started updating statistics at " + new Date());\r
373                 log.info("For directory: " + workDirectory.getAbsolutePath());\r
374 \r
375                 collectStatistics();\r
376 \r
377                 StatProcessor local_stats = getStats();\r
378                 log.info("Found " + local_stats.getJobNumber() + " jobs!");\r
379                 try {\r
380                         writeStatToDB();\r
381                 } catch (SQLException e) {\r
382                         log.error("Fails to update jobs statistics database!");\r
383                         log.error(e.getLocalizedMessage(), e);\r
384                 }\r
385                 log.info("Finished updating statistics at " + new Date());\r
386         }\r
387 }\r