Fix for testcases
[jabaws.git] / webservices / compbio / stat / collector / ExecutionStatCollector.java
1 package compbio.stat.collector;\r
2 \r
3 import java.io.File;\r
4 import java.io.FileFilter;\r
5 import java.io.IOException;\r
6 import java.sql.SQLException;\r
7 import java.text.SimpleDateFormat;\r
8 import java.util.ArrayList;\r
9 import java.util.Date;\r
10 import java.util.HashMap;\r
11 import java.util.HashSet;\r
12 import java.util.List;\r
13 import java.util.Map;\r
14 import java.util.Set;\r
15 \r
16 import org.apache.log4j.Logger;\r
17 \r
18 import compbio.engine.client.Executable;\r
19 import compbio.engine.client.PathValidator;\r
20 import compbio.metadata.JobStatus;\r
21 import compbio.util.FileUtil;\r
22 import compbio.ws.client.Services;\r
23 \r
24 /**\r
25  * Number of runs of each WS = number of folders with name\r
26  * \r
27  * Number of successful runs = all runs with no result file\r
28  * \r
29  * Per period of time = limit per file creating time Runtime (avg/max) =\r
30  * \r
31  * started time - finished time\r
32  * \r
33  * Task & result size = result.size\r
34  * \r
35  * Abandoned runs - not collected runs\r
36  * \r
37  * Cancelled runs - cancelled\r
38  * \r
39  * Cluster vs local runs\r
40  * \r
41  * Reasons for failure = look in the err out?\r
42  * \r
43  * \r
44  * Metadata required:\r
45  * \r
46  * work directory for local and cluster tasks = from Helper or cmd parameter. WS\r
47  * names - enumeration. Status file names and content.\r
48  * \r
49  * @author pvtroshin\r
50  * \r
51  */\r
52 public class ExecutionStatCollector implements Runnable {\r
53 \r
54         static final int UNDEFINED = -1;\r
55 \r
56         private static final Logger log = Logger\r
57                         .getLogger(ExecutionStatCollector.class);\r
58 \r
59         static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss");\r
60 \r
61         final private File workDirectory;\r
62         final private List<JobStat> stats;\r
63         /**\r
64          * Consider the job that has been working for longer than timeOutInHours\r
65          * completed, whatever the outcome\r
66          */\r
67         final private int timeOutInHours;\r
68 \r
69         /**\r
70          * List subdirectories in the job directory\r
71          * \r
72          * @param workDirectory\r
73          * @param timeOutInHours\r
74          */\r
75         public ExecutionStatCollector(String workDirectory, int timeOutInHours) {\r
76                 log.info("Starting stat collector for directory: " + workDirectory);\r
77                 log.info("Maximum allowed runtime(h): " + timeOutInHours);\r
78                 if (!PathValidator.isValidDirectory(workDirectory)) {\r
79                         throw new IllegalArgumentException("workDirectory '"\r
80                                         + workDirectory + "' does not exist!");\r
81                 }\r
82                 this.workDirectory = new File(workDirectory);\r
83                 stats = new ArrayList<JobStat>();\r
84                 if (timeOutInHours <= 0) {\r
85                         throw new IllegalArgumentException(\r
86                                         "Timeout value must be greater than 0! Given value: "\r
87                                                         + timeOutInHours);\r
88                 }\r
89                 this.timeOutInHours = timeOutInHours;\r
90         }\r
91 \r
92         boolean hasCompleted(JobDirectory jd) {\r
93                 JobStat jstat = jd.getJobStat();\r
94                 if (jstat.hasResult() || jstat.getIsCancelled()\r
95                                 || jstat.getIsFinished() || hasTimedOut(jd)) {\r
96                         return true;\r
97                 }\r
98                 return false;\r
99         }\r
100 \r
101         boolean hasTimedOut(JobDirectory jd) {\r
102                 return ((System.currentTimeMillis() - jd.jobdir.lastModified()) / (1000 * 60 * 60)) > timeOutInHours;\r
103         }\r
104 \r
105    /* Make sure that collectStatistics methods was called prior to calling this! \r
106         * TODO consider running collectStatistics from here on the first call \r
107         */\r
108         StatProcessor getStats() {\r
109                 if(stats.isEmpty()) {\r
110                         log.info("Please make sure collectStatistics method was called prior to calling getStats()!");\r
111                 }\r
112                 return new StatProcessor(stats);\r
113         }\r
114 \r
115         void writeStatToDB() throws SQLException {\r
116                 Set<JobStat> rjobs = new HashSet<JobStat>(stats);\r
117                 StatDB statdb = new StatDB();\r
118                 log.debug("Removing records that has already been recorded");\r
119 \r
120                 statdb.removeRecordedJobs(rjobs);\r
121                 log.debug("New records left: " + rjobs.size());\r
122                 statdb.insertData(rjobs);\r
123         }\r
124 \r
125         /*\r
126          * static void updateTime(File statFile) throws IOException { long lastMod =\r
127          * statFile.lastModified(); FileWriter fw = new FileWriter(statFile);\r
128          * fw.write(new Long(lastMod).toString()); fw.close(); }\r
129          */\r
130 \r
131         /**\r
132          * Not in use\r
133          */\r
134         public static void main(String[] args) throws IOException, SQLException {\r
135 \r
136                 // updateTime(new File(\r
137                 // "D:\\workspace\\JABA2\\jobsout\\AACon#170462904473672\\STARTED"));\r
138 \r
139                 File[] files = FileUtil.getFiles("Y:\\fc\\www-jws2\\jaba\\jobsout",\r
140                                 directories);\r
141                 List<JobStat> stats = new ArrayList<JobStat>();\r
142                 for (File file : files) {\r
143                         JobDirectory jd = new JobDirectory(file);\r
144                         stats.add(jd.getJobStat());\r
145                         // System.out.println(jd.getJobStat().getJobReportTabulated());\r
146                 }\r
147                 StatProcessor sp = new StatProcessor(stats);\r
148                 System.out.println(sp.reportStat());\r
149                 System.out.println();\r
150                 System.out.println("!!!!!!!!!!!!!!!!!!");\r
151                 System.out.println();\r
152 \r
153                 Set<JobStat> rjobs = new HashSet<JobStat>(sp.stats);\r
154                 StatDB statdb = new StatDB();\r
155                 statdb.removeRecordedJobs(rjobs);\r
156                 statdb.insertData(rjobs);\r
157         }\r
158 \r
159         static FileFilter directories = new FileFilter() {\r
160                 @Override\r
161                 public boolean accept(File pathname) {\r
162                         return pathname.isDirectory()\r
163                                         && !pathname.getName().startsWith(".");\r
164                 }\r
165         };\r
166 \r
167         static class JobDirectory {\r
168 \r
169                 File jobdir;\r
170                 Map<String, File> files = new HashMap<String, File>();\r
171 \r
172                 JobDirectory(File directory) {\r
173                         this.jobdir = directory;\r
174                         for (File f : jobdir.listFiles()) {\r
175                                 files.put(f.getName(), f);\r
176                         }\r
177                 }\r
178 \r
179                 boolean hasStatus(JobStatus status) {\r
180                         return files.containsKey(status.toString());\r
181                 }\r
182 \r
183                 boolean isCollected() {\r
184                         return hasStatus(JobStatus.COLLECTED);\r
185                 }\r
186 \r
187                 boolean isCancelled() {\r
188                         return hasStatus(JobStatus.CANCELLED);\r
189                 }\r
190 \r
191                 long getStartTime() {\r
192                         long starttime = UNDEFINED;\r
193                         File startfile = files.get(JobStatus.STARTED.toString());\r
194                         if (startfile == null) {\r
195                                 startfile = files.get(JobStatus.SUBMITTED.toString());\r
196                         }\r
197                         if (startfile != null) {\r
198                                 starttime = startfile.lastModified();\r
199                                 /*\r
200                                  * String start = FileUtil.readFileToString(startfile);\r
201                                  * starttime = Long.parseLong(start.trim());\r
202                                  */\r
203                         }\r
204                         return starttime;\r
205                 }\r
206 \r
207                 String getClusterJobID() {\r
208                         String clustjobId = "";\r
209                         File jobid = files.get("JOBID");\r
210                         try {\r
211                                 if (jobid != null) {\r
212                                         clustjobId = FileUtil.readFileToString(jobid);\r
213                                 }\r
214                         } catch (IOException ioe) {\r
215                                 log.error(\r
216                                                 "IO Exception while reading the content of JOBID file for job "\r
217                                                                 + jobid, ioe);\r
218                         }\r
219                         return clustjobId.trim();\r
220                 }\r
221 \r
222                 long getFinishedTime() {\r
223                         long ftime = UNDEFINED;\r
224                         File finished = files.get(JobStatus.FINISHED.toString());\r
225                         if (finished != null) {\r
226                                 ftime = finished.lastModified();\r
227                                 /*\r
228                                  * String start = FileUtil.readFileToString(finished); ftime =\r
229                                  * Long.parseLong(start.trim());\r
230                                  */\r
231                                 // System.out.println("f " + ftime);\r
232                         }\r
233                         /*\r
234                          * } catch (IOException e) { log.log(Level.WARN,\r
235                          * "Cannot parse finished time: " + e.getMessage(), e); } catch\r
236                          * (NumberFormatException e) { log.log(Level.WARN,\r
237                          * "Cannot parse finished time: " + e.getMessage(), e); }\r
238                          */\r
239                         return ftime;\r
240                 }\r
241 \r
242                 private Services getService() {\r
243                         return Services.getServiceByJobDirectory(jobdir);\r
244                 }\r
245 \r
246                 // Mafft, Muscle, Tcoffee, Clustal task:fasta.in result:fasta.out\r
247                 // Probcons task:fasta.in result:alignment.out\r
248                 /*\r
249                  * TODO replace with Universal names for WS!\r
250                  */\r
251                 long getResultSize() {\r
252                         Class<? extends Executable<?>> name = Services\r
253                                         .getRunnerByJobDirectory(jobdir);\r
254 \r
255                         File f = null;\r
256                         if (name.getSimpleName().equalsIgnoreCase("Probcons")) {\r
257                                 f = files.get("alignment.out");\r
258                         } else if (name.getSimpleName().equalsIgnoreCase("ClustalW")) {\r
259                                 f = files.get("output.txt");\r
260                         } else {\r
261                                 f = files.get("fasta.out");\r
262                         }\r
263                         if (f != null) {\r
264                                 return f.length();\r
265                         }\r
266                         return UNDEFINED;\r
267                 }\r
268 \r
269                 /*\r
270                  * TODO unify input!\r
271                  */\r
272                 long getInputSize() {\r
273                         Class<? extends Executable<?>> name = Services\r
274                                         .getRunnerByJobDirectory(jobdir);\r
275 \r
276                         File input = null;\r
277                         if (name.getSimpleName().equalsIgnoreCase("ClustalW")) {\r
278                                 input = files.get("input.txt");\r
279                         } else {\r
280                                 input = files.get("fasta.in");\r
281                         }\r
282 \r
283                         if (input != null) {\r
284                                 return input.length();\r
285                         }\r
286                         return UNDEFINED;\r
287                 }\r
288 \r
289                 JobStat getJobStat() {\r
290                         return JobStat.newInstance(getService(), getClusterJobID(),\r
291                                         jobdir.getName(), getStartTime(), getFinishedTime(),\r
292                                         getInputSize(), getResultSize(), isCancelled(),\r
293                                         isCollected());\r
294                 }\r
295 \r
296                 @Override\r
297                 public int hashCode() {\r
298                         final int prime = 31;\r
299                         int result = 1;\r
300                         result = prime * result\r
301                                         + ((jobdir == null) ? 0 : jobdir.hashCode());\r
302                         return result;\r
303                 }\r
304 \r
305                 @Override\r
306                 public boolean equals(Object obj) {\r
307                         if (this == obj)\r
308                                 return true;\r
309                         if (obj == null)\r
310                                 return false;\r
311                         if (getClass() != obj.getClass())\r
312                                 return false;\r
313                         JobDirectory other = (JobDirectory) obj;\r
314                         if (jobdir == null) {\r
315                                 if (other.jobdir != null)\r
316                                         return false;\r
317                         } else if (!jobdir.equals(other.jobdir))\r
318                                 return false;\r
319                         return true;\r
320                 }\r
321         }\r
322 \r
323         void collectStatistics() {\r
324                 File[] files = workDirectory.listFiles(directories);\r
325                 for (File file : files) {\r
326                         JobDirectory jd = new JobDirectory(file);\r
327                         JobStat jstat = jd.getJobStat();\r
328                         // Do not record stats on the job that has not completed yet\r
329                         if (hasCompleted(jd)) {\r
330                                 stats.add(jstat);\r
331                         } else {\r
332                                 log.debug("Skipping the job: " + jstat);\r
333                                 log.debug("As it has not completed yet");\r
334                         }\r
335                         // System.out.println(jd.getJobStat().getJobReportTabulated());\r
336                 }\r
337         }\r
338 \r
339         @Override\r
340         public void run() {\r
341                 log.info("Started updating statistics at " + new Date());\r
342                 log.info("For directory: " + workDirectory.getAbsolutePath());\r
343 \r
344                 collectStatistics();\r
345 \r
346                 StatProcessor local_stats = getStats();\r
347                 log.info("Found " + local_stats.getJobNumber() + " jobs!");\r
348                 try {\r
349                         writeStatToDB();\r
350                 } catch (SQLException e) {\r
351                         log.error("Fails to update jobs statistics database!");\r
352                         log.error(e.getLocalizedMessage(), e);\r
353                 }\r
354                 log.info("Finished updating statistics at " + new Date());\r
355         }\r
356 }\r