Further work to enable stat collection and display
[jabaws.git] / webservices / compbio / stat / collector / ExecutionStatCollector.java
1 package compbio.stat.collector;\r
2 \r
3 import java.io.File;\r
4 import java.io.FileFilter;\r
5 import java.io.IOException;\r
6 import java.sql.SQLException;\r
7 import java.text.SimpleDateFormat;\r
8 import java.util.ArrayList;\r
9 import java.util.Date;\r
10 import java.util.HashMap;\r
11 import java.util.HashSet;\r
12 import java.util.List;\r
13 import java.util.Map;\r
14 import java.util.Set;\r
15 \r
16 import org.apache.log4j.Logger;\r
17 \r
18 import compbio.engine.client.Executable;\r
19 import compbio.metadata.JobStatus;\r
20 import compbio.util.FileUtil;\r
21 import compbio.ws.client.Services;\r
22 \r
23 /**\r
24  * Number of runs of each WS = number of folders with name\r
25  * \r
26  * Number of successful runs = all runs with no result file\r
27  * \r
28  * Per period of time = limit per file creating time Runtime (avg/max) =\r
29  * \r
30  * started time - finished time\r
31  * \r
32  * Task & result size = result.size\r
33  * \r
34  * Abandoned runs - not collected runs\r
35  * \r
36  * Cancelled runs - cancelled\r
37  * \r
38  * Cluster vs local runs\r
39  * \r
40  * Reasons for failure = look in the err out?\r
41  * \r
42  * \r
43  * Metadata required:\r
44  * \r
45  * work directory for local and cluster tasks = from Helper or cmd parameter. WS\r
46  * names - enumeration. Status file names and content.\r
47  * \r
48  * @author pvtroshin\r
49  * \r
50  */\r
51 public class ExecutionStatCollector implements Runnable {\r
52 \r
53         static final int UNDEFINED = -1;\r
54 \r
55         private static final Logger log = Logger\r
56                         .getLogger(ExecutionStatCollector.class);\r
57 \r
58         static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss");\r
59 \r
60         final private List<JobStat> stats;\r
61         /**\r
62          * Consider the job that has been working for longer than timeOutInHours\r
63          * completed, whatever the outcome\r
64          */\r
65         final private int timeOutInHours;\r
66 \r
67         /**\r
68          * List subdirectories in the job directory\r
69          * \r
70          * @param workDirectory\r
71          * @param timeOutInHours\r
72          */\r
73         public ExecutionStatCollector(String workDirectory, int timeOutInHours) {\r
74                 log.info("Starting stat collector for directory: " + workDirectory);\r
75                 log.info("Maximum allowed runtime(h): " + timeOutInHours);\r
76                 File[] files = FileUtil.getFiles(workDirectory, directories);\r
77                 stats = new ArrayList<JobStat>();\r
78                 assert timeOutInHours > 0;\r
79                 this.timeOutInHours = timeOutInHours;\r
80                 for (File file : files) {\r
81                         JobDirectory jd = new JobDirectory(file);\r
82                         JobStat jstat = jd.getJobStat();\r
83                         // Do not record stats on the job that has not completed yet\r
84                         if (hasCompleted(jd)) {\r
85                                 stats.add(jstat);\r
86                         } else {\r
87                                 log.debug("Skipping the job: " + jstat);\r
88                                 log.debug("As it has not completed yet");\r
89                         }\r
90                         // System.out.println(jd.getJobStat().getJobReportTabulated());\r
91                 }\r
92         }\r
93 \r
94         boolean hasCompleted(JobDirectory jd) {\r
95                 JobStat jstat = jd.getJobStat();\r
96                 if (jstat.hasResult() || jstat.getIsCancelled()\r
97                                 || jstat.getIsFinished() || hasTimedOut(jd)) {\r
98                         return true;\r
99                 }\r
100                 return false;\r
101         }\r
102 \r
103         boolean hasTimedOut(JobDirectory jd) {\r
104                 return ((System.currentTimeMillis() - jd.jobdir.lastModified()) / (1000 * 60 * 60)) > timeOutInHours;\r
105         }\r
106 \r
107         public StatProcessor getStats() {\r
108                 return new StatProcessor(stats);\r
109         }\r
110 \r
111         public void writeStatToDB() throws SQLException {\r
112                 Set<JobStat> rjobs = new HashSet<JobStat>(stats);\r
113                 StatDB statdb = new StatDB();\r
114                 log.debug("Removing records that has already been recorded");\r
115 \r
116                 statdb.removeRecordedJobs(rjobs);\r
117                 log.debug("New records left: " + rjobs.size());\r
118                 statdb.insertData(rjobs);\r
119         }\r
120 \r
121         /*\r
122          * static void updateTime(File statFile) throws IOException { long lastMod =\r
123          * statFile.lastModified(); FileWriter fw = new FileWriter(statFile);\r
124          * fw.write(new Long(lastMod).toString()); fw.close(); }\r
125          */\r
126 \r
127         /**\r
128          * \r
129          * @param args\r
130          * @throws IOException\r
131          * @throws SQLException\r
132          */\r
133         public static void main(String[] args) throws IOException, SQLException {\r
134 \r
135                 // updateTime(new File(\r
136                 // "D:\\workspace\\JABA2\\jobsout\\AACon#170462904473672\\STARTED"));\r
137 \r
138                 File[] files = FileUtil.getFiles("Y:\\fc\\www-jws2\\jaba\\jobsout",\r
139                                 directories);\r
140                 List<JobStat> stats = new ArrayList<JobStat>();\r
141                 for (File file : files) {\r
142                         JobDirectory jd = new JobDirectory(file);\r
143                         stats.add(jd.getJobStat());\r
144                         // System.out.println(jd.getJobStat().getJobReportTabulated());\r
145                 }\r
146                 StatProcessor sp = new StatProcessor(stats);\r
147                 System.out.println(sp.reportStat());\r
148                 System.out.println();\r
149                 System.out.println("!!!!!!!!!!!!!!!!!!");\r
150                 System.out.println();\r
151 \r
152                 Set<JobStat> rjobs = new HashSet<JobStat>(sp.stats);\r
153                 StatDB statdb = new StatDB();\r
154                 statdb.removeRecordedJobs(rjobs);\r
155                 statdb.insertData(rjobs);\r
156         }\r
157 \r
158         static FileFilter directories = new FileFilter() {\r
159                 @Override\r
160                 public boolean accept(File pathname) {\r
161                         return pathname.isDirectory()\r
162                                         && !pathname.getName().startsWith(".");\r
163                 }\r
164         };\r
165 \r
166         static class JobDirectory {\r
167 \r
168                 File jobdir;\r
169                 Map<String, File> files = new HashMap<String, File>();\r
170 \r
171                 public JobDirectory(File directory) {\r
172                         this.jobdir = directory;\r
173                         for (File f : jobdir.listFiles()) {\r
174                                 files.put(f.getName(), f);\r
175                         }\r
176                 }\r
177 \r
178                 public boolean hasStatus(JobStatus status) {\r
179                         return files.containsKey(status.toString());\r
180                 }\r
181 \r
182                 boolean isCollected() {\r
183                         return hasStatus(JobStatus.COLLECTED);\r
184                 }\r
185 \r
186                 boolean isCancelled() {\r
187                         return hasStatus(JobStatus.CANCELLED);\r
188                 }\r
189 \r
190                 long getStartTime() {\r
191                         long starttime = UNDEFINED;\r
192                         File startfile = files.get(JobStatus.STARTED.toString());\r
193                         if (startfile == null) {\r
194                                 startfile = files.get(JobStatus.SUBMITTED.toString());\r
195                         }\r
196                         if (startfile != null) {\r
197                                 starttime = startfile.lastModified();\r
198                                 /*\r
199                                  * String start = FileUtil.readFileToString(startfile);\r
200                                  * starttime = Long.parseLong(start.trim());\r
201                                  */\r
202                         }\r
203                         return starttime;\r
204                 }\r
205 \r
206                 String getClusterJobID() {\r
207                         String clustjobId = "";\r
208                         File jobid = files.get("JOBID");\r
209                         try {\r
210                                 if (jobid != null) {\r
211                                         clustjobId = FileUtil.readFileToString(jobid);\r
212                                 }\r
213                         } catch (IOException ioe) {\r
214                                 ioe.printStackTrace();\r
215                                 // TODO LOG\r
216                         }\r
217                         return clustjobId.trim();\r
218                 }\r
219 \r
220                 long getFinishedTime() {\r
221                         long ftime = UNDEFINED;\r
222                         File finished = files.get(JobStatus.FINISHED.toString());\r
223                         if (finished != null) {\r
224                                 ftime = finished.lastModified();\r
225                                 /*\r
226                                  * String start = FileUtil.readFileToString(finished); ftime =\r
227                                  * Long.parseLong(start.trim());\r
228                                  */\r
229                                 // System.out.println("f " + ftime);\r
230                         }\r
231                         /*\r
232                          * } catch (IOException e) { log.log(Level.WARN,\r
233                          * "Cannot parse finished time: " + e.getMessage(), e); } catch\r
234                          * (NumberFormatException e) { log.log(Level.WARN,\r
235                          * "Cannot parse finished time: " + e.getMessage(), e); }\r
236                          */\r
237                         return ftime;\r
238                 }\r
239 \r
240                 private Services getService() {\r
241                         return Services.getServiceByJobDirectory(jobdir);\r
242                 }\r
243 \r
244                 // Mafft, Muscle, Tcoffee, Clustal task:fasta.in result:fasta.out\r
245                 // Probcons task:fasta.in result:alignment.out\r
246                 /*\r
247                  * TODO replace with Universal names for WS!\r
248                  */\r
249                 long getResultSize() {\r
250                         Class<? extends Executable<?>> name = Services\r
251                                         .getRunnerByJobDirectory(jobdir);\r
252 \r
253                         File f = null;\r
254                         if (name.getSimpleName().equalsIgnoreCase("Probcons")) {\r
255                                 f = files.get("alignment.out");\r
256                         } else if (name.getSimpleName().equalsIgnoreCase("ClustalW")) {\r
257                                 f = files.get("output.txt");\r
258                         } else {\r
259                                 f = files.get("fasta.out");\r
260                         }\r
261                         if (f != null) {\r
262                                 return f.length();\r
263                         }\r
264                         return UNDEFINED;\r
265                 }\r
266 \r
267                 long getInputSize() {\r
268                         File input = files.get("fasta.in");\r
269                         if (input != null) {\r
270                                 return input.length();\r
271                         }\r
272                         return UNDEFINED;\r
273                 }\r
274 \r
275                 JobStat getJobStat() {\r
276                         return JobStat.newInstance(getService(), getClusterJobID(),\r
277                                         jobdir.getName(), getStartTime(), getFinishedTime(),\r
278                                         getInputSize(), getResultSize(), isCancelled(),\r
279                                         isCollected());\r
280                 }\r
281 \r
282                 @Override\r
283                 public int hashCode() {\r
284                         final int prime = 31;\r
285                         int result = 1;\r
286                         result = prime * result\r
287                                         + ((jobdir == null) ? 0 : jobdir.hashCode());\r
288                         return result;\r
289                 }\r
290 \r
291                 @Override\r
292                 public boolean equals(Object obj) {\r
293                         if (this == obj)\r
294                                 return true;\r
295                         if (obj == null)\r
296                                 return false;\r
297                         if (getClass() != obj.getClass())\r
298                                 return false;\r
299                         JobDirectory other = (JobDirectory) obj;\r
300                         if (jobdir == null) {\r
301                                 if (other.jobdir != null)\r
302                                         return false;\r
303                         } else if (!jobdir.equals(other.jobdir))\r
304                                 return false;\r
305                         return true;\r
306                 }\r
307 \r
308         }\r
309 \r
310         @Override\r
311         public void run() {\r
312                 log.info("Started updating statistics at " + new Date());\r
313 \r
314                 StatProcessor local_stats = getStats();\r
315                 log.info("Found " + local_stats.getJobNumber() + " jobs!");\r
316                 try {\r
317                         writeStatToDB();\r
318                 } catch (SQLException e) {\r
319                         log.error("Fails to update jobs statistics database!");\r
320                         log.error(e.getLocalizedMessage(), e);\r
321                 }\r
322                 log.info("Finished updating statistics at " + new Date());\r
323         }\r
324 \r
325 }\r