cleat stats array and simplify internal structure
[jabaws.git] / webservices / compbio / stat / collector / ExecutionStatCollector.java
1 /* Copyright (c) 2011 Peter Troshin\r
2  *  \r
3  *  JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0     \r
4  * \r
5  *  This library is free software; you can redistribute it and/or modify it under the terms of the\r
6  *  Apache License version 2 as published by the Apache Software Foundation\r
7  * \r
8  *  This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without\r
9  *  even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache \r
10  *  License for more details.\r
11  * \r
12  *  A copy of the license is in apache_license.txt. It is also available here:\r
13  * @see: http://www.apache.org/licenses/LICENSE-2.0.txt\r
14  * \r
15  * Any republication or derived work distributed in source code form\r
16  * must include this copyright and license notice.\r
17  */\r
18 package compbio.stat.collector;\r
19 \r
20 import java.io.File;\r
21 import java.io.FileFilter;\r
22 import java.io.IOException;\r
23 import java.sql.SQLException;\r
24 import java.text.SimpleDateFormat;\r
25 import java.util.ArrayList;\r
26 import java.util.Date;\r
27 import java.util.HashMap;\r
28 import java.util.HashSet;\r
29 import java.util.List;\r
30 import java.util.Map;\r
31 import java.util.Set;\r
32 \r
33 import org.apache.log4j.Logger;\r
34 \r
35 import compbio.engine.client.Executable;\r
36 import compbio.engine.client.PathValidator;\r
37 import compbio.engine.client.SkeletalExecutable;\r
38 import compbio.metadata.JobStatus;\r
39 import compbio.util.FileUtil;\r
40 import compbio.ws.client.Services;\r
41 import compbio.ws.client.ServicesUtil;\r
42 \r
43 /**\r
44  * Number of runs of each WS = number of folders with name\r
45  * \r
46  * Number of successful runs = all runs with no result file\r
47  * \r
48  * Per period of time = limit per file creating time Runtime (avg/max) =\r
49  * \r
50  * started time - finished time\r
51  * \r
52  * Task & result size = result.size\r
53  * \r
54  * Abandoned runs - not collected runs\r
55  * \r
56  * Cancelled runs - cancelled\r
57  * \r
58  * Cluster vs local runs\r
59  * \r
60  * Reasons for failure = look in the err out?\r
61  * \r
62  * \r
63  * Metadata required:\r
64  * \r
65  * work directory for local and cluster tasks = from Helper or cmd parameter. WS\r
66  * names - enumeration. Status file names and content.\r
67  * \r
68  * @author pvtroshin\r
69  * \r
70  */\r
71 public class ExecutionStatCollector implements Runnable {\r
72 \r
73         static final int UNDEFINED = -1;\r
74 \r
75         private static final Logger log = Logger.getLogger(ExecutionStatCollector.class);\r
76 \r
77         static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss");\r
78 \r
79         final private File workDirectory;\r
80         final private List<JobStat> stats;\r
81         /**\r
82          * Consider the job that has been working for longer than timeOutInHours\r
83          * completed, whatever the outcome\r
84          */\r
85         final private int timeOutInHours;\r
86 \r
87         /**\r
88          * List subdirectories in the job directory\r
89          * \r
90          * @param workDirectory\r
91          * @param timeOutInHours\r
92          */\r
93         public ExecutionStatCollector(String workDirectory, int timeOutInHours) {\r
94                 log.info("Starting stat collector for directory: " + workDirectory);\r
95                 log.info("Maximum allowed runtime(h): " + timeOutInHours);\r
96                 if (!PathValidator.isValidDirectory(workDirectory)) {\r
97                         throw new IllegalArgumentException("workDirectory '" + workDirectory + "' does not exist!");\r
98                 }\r
99                 this.workDirectory = new File(workDirectory);\r
100                 stats = new ArrayList<JobStat>();\r
101                 if (timeOutInHours <= 0) {\r
102                         throw new IllegalArgumentException(\r
103                                         "Timeout value must be greater than 0! Given value: " + timeOutInHours);\r
104                 }\r
105                 this.timeOutInHours = timeOutInHours;\r
106         }\r
107 \r
108         boolean hasCompleted(JobDirectory jd) {\r
109                 JobStat jstat = jd.getJobStat();\r
110                 if (jstat.hasResult() || jstat.getIsCancelled()\r
111                                 || jstat.getIsFinished() || hasTimedOut(jd)) {\r
112                         return true;\r
113                 }\r
114                 return false;\r
115         }\r
116 \r
117         boolean hasTimedOut(JobDirectory jd) {\r
118                 return ((System.currentTimeMillis() - jd.jobdir.lastModified()) / (1000 * 60 * 60)) > timeOutInHours;\r
119         }\r
120 \r
121         /*\r
122          * Make sure that collectStatistics methods was called prior to calling\r
123          * this! TODO consider running collectStatistics from here on the first call\r
124          */\r
125         StatProcessor getStats() {\r
126                 if (stats.isEmpty()) {\r
127                         log.info("Please make sure collectStatistics method was called prior to calling getStats()!");\r
128                 }\r
129                 return new StatProcessor(stats);\r
130         }\r
131 \r
132         void writeStatToDB() throws SQLException {\r
133                 Set<JobStat> rjobs = new HashSet<JobStat>(stats);\r
134                 StatDB statdb = new StatDB();\r
135                 log.debug("Removing records that has already been recorded");\r
136 \r
137                 statdb.removeRecordedJobs(rjobs);\r
138                 log.debug("New records left: " + rjobs.size());\r
139                 statdb.insertData(rjobs);\r
140         }\r
141 \r
142         /*\r
143          * static void updateTime(File statFile) throws IOException { long lastMod =\r
144          * statFile.lastModified(); FileWriter fw = new FileWriter(statFile);\r
145          * fw.write(new Long(lastMod).toString()); fw.close(); }\r
146          */\r
147 \r
148         /**\r
149          * Not in use\r
150          */\r
151         public static void main(String[] args) throws IOException, SQLException {\r
152 \r
153                 // updateTime(new File(\r
154                 // "D:\\workspace\\JABA2\\jobsout\\AACon#170462904473672\\STARTED"));\r
155 \r
156                 File[] files = FileUtil.getFiles("Y:\\fc\\www-jws2\\jaba\\jobsout",\r
157                                 directories);\r
158                 List<JobStat> stats = new ArrayList<JobStat>();\r
159                 for (File file : files) {\r
160                         JobDirectory jd = new JobDirectory(file);\r
161                         stats.add(jd.getJobStat());\r
162                         // System.out.println(jd.getJobStat().getJobReportTabulated());\r
163                 }\r
164                 StatProcessor sp = new StatProcessor(stats);\r
165                 System.out.println(sp.reportStat());\r
166                 System.out.println();\r
167                 System.out.println("!!!!!!!!!!!!!!!!!!");\r
168                 System.out.println();\r
169 \r
170                 Set<JobStat> rjobs = new HashSet<JobStat>(sp.stats);\r
171                 StatDB statdb = new StatDB();\r
172                 statdb.removeRecordedJobs(rjobs);\r
173                 statdb.insertData(rjobs);\r
174         }\r
175 \r
176         static FileFilter directories = new FileFilter() {\r
177                 @Override\r
178                 public boolean accept(File pathname) {\r
179                         return pathname.isDirectory()\r
180                                         && !pathname.getName().startsWith(".");\r
181                 }\r
182         };\r
183 \r
184         static class JobDirectory {\r
185 \r
186                 File jobdir;\r
187                 Map<String, File> files = new HashMap<String, File>();\r
188 \r
189                 JobDirectory(File directory) {\r
190                         this.jobdir = directory;\r
191                         for (File f : jobdir.listFiles()) {\r
192                                 files.put(f.getName(), f);\r
193                         }\r
194                 }\r
195 \r
196                 boolean hasStatus(JobStatus status) {\r
197                         return files.containsKey(status.toString());\r
198                 }\r
199 \r
200                 boolean isCollected() {\r
201                         return hasStatus(JobStatus.COLLECTED);\r
202                 }\r
203 \r
204                 boolean isCancelled() {\r
205                         return hasStatus(JobStatus.CANCELLED);\r
206                 }\r
207 \r
208                 long getStartTime() {\r
209                         long starttime = UNDEFINED;\r
210                         File startfile = files.get(JobStatus.STARTED.toString());\r
211                         if (startfile == null) {\r
212                                 startfile = files.get(JobStatus.SUBMITTED.toString());\r
213                         }\r
214                         try {\r
215                                 if (startfile != null) {\r
216                                         String start = FileUtil.readFileToString(startfile);\r
217                                         starttime = Long.parseLong(start.trim());\r
218                                 }\r
219                         } catch (IOException ignore) {\r
220                                 log.warn(\r
221                                                 "IOException while reading STARTED status file! Ignoring...",\r
222                                                 ignore);\r
223                                 // fall back\r
224                                 starttime = startfile.lastModified();\r
225                         } catch (NumberFormatException ignore) {\r
226                                 log.warn(\r
227                                                 "NumberFormatException while reading STARTED status file! Ignoring...",\r
228                                                 ignore);\r
229                                 // fall back\r
230                                 starttime = startfile.lastModified();\r
231                         }\r
232 \r
233                         return starttime;\r
234                 }\r
235 \r
236                 String getClusterJobID() {\r
237                         String clustjobId = "";\r
238                         File jobid = files.get("JOBID");\r
239                         try {\r
240                                 if (jobid != null) {\r
241                                         clustjobId = FileUtil.readFileToString(jobid);\r
242                                 }\r
243                         } catch (IOException ioe) {\r
244                                 log.error(\r
245                                                 "IO Exception while reading the content of JOBID file for job "\r
246                                                                 + jobid, ioe);\r
247                         }\r
248                         return clustjobId.trim();\r
249                 }\r
250 \r
251                 long getFinishedTime() {\r
252                         long ftime = UNDEFINED;\r
253                         File finished = files.get(JobStatus.FINISHED.toString());\r
254                         if (finished != null) {\r
255                                 try {\r
256                                         if (finished != null) {\r
257                                                 String start = FileUtil.readFileToString(finished);\r
258                                                 ftime = Long.parseLong(start.trim());\r
259                                         }\r
260                                 } catch (IOException ignore) {\r
261                                         log.warn(\r
262                                                         "IOException while reading FINISHED status file! Ignoring...",\r
263                                                         ignore);\r
264                                         // fall back\r
265                                         ftime = finished.lastModified();\r
266                                 } catch (NumberFormatException ignore) {\r
267                                         log.warn(\r
268                                                         "NumberFormatException while reading FINISHED status file! Ignoring...",\r
269                                                         ignore);\r
270                                         // fall back\r
271                                         ftime = finished.lastModified();\r
272                                 }\r
273                         }\r
274                         return ftime;\r
275                 }\r
276 \r
277                 private Services getService() {\r
278                         return ServicesUtil.getServiceByJobDirectory(jobdir);\r
279                 }\r
280 \r
281                 long getResultSize() {\r
282                         Class<? extends Executable<?>> name = ServicesUtil\r
283                                         .getRunnerByJobDirectory(jobdir);\r
284 \r
285                         File f = null;\r
286                         if (name.getSimpleName().equalsIgnoreCase("IUPred")) {\r
287                                 f = files.get("out.glob");\r
288                                 if (f == null)\r
289                                         f = files.get("out.short");\r
290                                 if (f == null)\r
291                                         f = files.get("out.long");\r
292                         } else {\r
293                                 f = files.get(SkeletalExecutable.OUTPUT);\r
294                         }\r
295                         if (f != null) {\r
296                                 return f.length();\r
297                         }\r
298                         return UNDEFINED;\r
299                 }\r
300 \r
301                 long getInputSize() {\r
302                         Class<? extends Executable<?>> name = ServicesUtil\r
303                                         .getRunnerByJobDirectory(jobdir);\r
304 \r
305                         File input = files.get(SkeletalExecutable.INPUT);\r
306                         if (input != null) {\r
307                                 return input.length();\r
308                         }\r
309                         return UNDEFINED;\r
310                 }\r
311 \r
312                 JobStat getJobStat() {\r
313                         return JobStat.newInstance(getService(), getClusterJobID(),\r
314                                         jobdir.getName(), getStartTime(), getFinishedTime(),\r
315                                         getInputSize(), getResultSize(), isCancelled(),\r
316                                         isCollected());\r
317                 }\r
318 \r
319                 @Override\r
320                 public int hashCode() {\r
321                         final int prime = 31;\r
322                         int result = 1;\r
323                         result = prime * result\r
324                                         + ((jobdir == null) ? 0 : jobdir.hashCode());\r
325                         return result;\r
326                 }\r
327 \r
328                 @Override\r
329                 public boolean equals(Object obj) {\r
330                         if (this == obj)\r
331                                 return true;\r
332                         if (obj == null)\r
333                                 return false;\r
334                         if (getClass() != obj.getClass())\r
335                                 return false;\r
336                         JobDirectory other = (JobDirectory) obj;\r
337                         if (jobdir == null) {\r
338                                 if (other.jobdir != null)\r
339                                         return false;\r
340                         } else if (!jobdir.equals(other.jobdir))\r
341                                 return false;\r
342                         return true;\r
343                 }\r
344         }\r
345 \r
346         // TODO test!\r
347         void collectStatistics() {\r
348                 // clear stats array;\r
349                 stats.clear();\r
350                 File[] files = workDirectory.listFiles(directories);\r
351                 for (File file : files) {\r
352                         // skip work directory with test input\r
353                         if (InputFilter.accept(new File(file.getPath() + File.separator + SkeletalExecutable.INPUT))) {\r
354                                 JobDirectory jd = new JobDirectory(file);\r
355                                 JobStat jstat = jd.getJobStat();\r
356                                 // Do not record stats on the job that has not completed yet\r
357                                 if (hasCompleted(jd)) {\r
358                                         stats.add(jstat);\r
359                                         System.out.println("added: id = " + jd);\r
360                                 } else {\r
361                                         log.debug("Skipping the job: " + jstat);\r
362                                         log.debug("As it has not completed yet");\r
363                                 }\r
364                                 // System.out.println(jd.getJobStat().getJobReportTabulated());\r
365                         } else {\r
366                                 log.trace("training input: " + file.getPath() + File.separator + SkeletalExecutable.INPUT);\r
367                         }\r
368                 }\r
369         }\r
370         @Override\r
371         public void run() {\r
372                 log.info("Started updating statistics at " + new Date());\r
373                 log.info("For directory: " + workDirectory.getAbsolutePath());\r
374 \r
375                 collectStatistics();\r
376 \r
377                 StatProcessor local_stats = getStats();\r
378                 log.info("Found " + local_stats.getJobNumber() + " jobs!");\r
379                 try {\r
380                         writeStatToDB();\r
381                 } catch (SQLException e) {\r
382                         log.error("Fails to update jobs statistics database!");\r
383                         log.error(e.getLocalizedMessage(), e);\r
384                 }\r
385                 log.info("Finished updating statistics at " + new Date());\r
386         }\r
387 }\r