1 package compbio.stat.collector;
\r
4 import java.io.FileFilter;
\r
5 import java.io.IOException;
\r
6 import java.sql.SQLException;
\r
7 import java.text.SimpleDateFormat;
\r
8 import java.util.ArrayList;
\r
9 import java.util.Date;
\r
10 import java.util.HashMap;
\r
11 import java.util.HashSet;
\r
12 import java.util.List;
\r
13 import java.util.Map;
\r
14 import java.util.Set;
\r
16 import org.apache.log4j.Logger;
\r
18 import compbio.engine.client.Executable;
\r
19 import compbio.metadata.JobStatus;
\r
20 import compbio.util.FileUtil;
\r
21 import compbio.ws.client.Services;
\r
24 * Number of runs of each WS = number of folders with name
\r
26 * Number of successful runs = all runs with no result file
\r
28 * Per period of time = limit per file creating time Runtime (avg/max) =
\r
30 * started time - finished time
\r
32 * Task & result size = result.size
\r
34 * Abandoned runs - not collected runs
\r
36 * Cancelled runs - cancelled
\r
38 * Cluster vs local runs
\r
40 * Reasons for failure = look in the err out?
\r
43 * Metadata required:
\r
45 * work directory for local and cluster tasks = from Helper or cmd parameter. WS
\r
46 * names - enumeration. Status file names and content.
\r
51 public class ExecutionStatCollector implements Runnable {
\r
53 static final int UNDEFINED = -1;
\r
55 private static final Logger log = Logger
\r
56 .getLogger(ExecutionStatCollector.class);
\r
58 static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss");
\r
60 final private List<JobStat> stats;
\r
62 * Consider the job that has been working for longer than timeOutInHours
\r
63 * completed, whatever the outcome
\r
65 final private int timeOutInHours;
\r
68 * List subdirectories in the job directory
\r
70 * @param workDirectory
\r
71 * @param timeOutInHours
\r
73 public ExecutionStatCollector(String workDirectory, int timeOutInHours) {
\r
74 log.info("Starting stat collector for directory: " + workDirectory);
\r
75 log.info("Maximum allowed runtime(h): " + timeOutInHours);
\r
76 File[] files = FileUtil.getFiles(workDirectory, directories);
\r
77 stats = new ArrayList<JobStat>();
\r
78 assert timeOutInHours > 0;
\r
79 this.timeOutInHours = timeOutInHours;
\r
80 for (File file : files) {
\r
81 JobDirectory jd = new JobDirectory(file);
\r
82 JobStat jstat = jd.getJobStat();
\r
83 // Do not record stats on the job that has not completed yet
\r
84 if (hasCompleted(jd)) {
\r
87 log.debug("Skipping the job: " + jstat);
\r
88 log.debug("As it has not completed yet");
\r
90 // System.out.println(jd.getJobStat().getJobReportTabulated());
\r
94 boolean hasCompleted(JobDirectory jd) {
\r
95 JobStat jstat = jd.getJobStat();
\r
96 if (jstat.hasResult() || jstat.getIsCancelled()
\r
97 || jstat.getIsFinished() || hasTimedOut(jd)) {
\r
103 boolean hasTimedOut(JobDirectory jd) {
\r
104 return ((System.currentTimeMillis() - jd.jobdir.lastModified()) / (1000 * 60 * 60)) > timeOutInHours;
\r
107 public StatProcessor getStats() {
\r
108 return new StatProcessor(stats);
\r
111 public void writeStatToDB() throws SQLException {
\r
112 Set<JobStat> rjobs = new HashSet<JobStat>(stats);
\r
113 StatDB statdb = new StatDB();
\r
114 log.debug("Removing records that has already been recorded");
\r
116 statdb.removeRecordedJobs(rjobs);
\r
117 log.debug("New records left: " + rjobs.size());
\r
118 statdb.insertData(rjobs);
\r
122 * static void updateTime(File statFile) throws IOException { long lastMod =
\r
123 * statFile.lastModified(); FileWriter fw = new FileWriter(statFile);
\r
124 * fw.write(new Long(lastMod).toString()); fw.close(); }
\r
130 * @throws IOException
\r
131 * @throws SQLException
\r
133 public static void main(String[] args) throws IOException, SQLException {
\r
135 // updateTime(new File(
\r
136 // "D:\\workspace\\JABA2\\jobsout\\AACon#170462904473672\\STARTED"));
\r
138 File[] files = FileUtil.getFiles("Y:\\fc\\www-jws2\\jaba\\jobsout",
\r
140 List<JobStat> stats = new ArrayList<JobStat>();
\r
141 for (File file : files) {
\r
142 JobDirectory jd = new JobDirectory(file);
\r
143 stats.add(jd.getJobStat());
\r
144 // System.out.println(jd.getJobStat().getJobReportTabulated());
\r
146 StatProcessor sp = new StatProcessor(stats);
\r
147 System.out.println(sp.reportStat());
\r
148 System.out.println();
\r
149 System.out.println("!!!!!!!!!!!!!!!!!!");
\r
150 System.out.println();
\r
152 Set<JobStat> rjobs = new HashSet<JobStat>(sp.stats);
\r
153 StatDB statdb = new StatDB();
\r
154 statdb.removeRecordedJobs(rjobs);
\r
155 statdb.insertData(rjobs);
\r
158 static FileFilter directories = new FileFilter() {
\r
160 public boolean accept(File pathname) {
\r
161 return pathname.isDirectory()
\r
162 && !pathname.getName().startsWith(".");
\r
166 static class JobDirectory {
\r
169 Map<String, File> files = new HashMap<String, File>();
\r
171 public JobDirectory(File directory) {
\r
172 this.jobdir = directory;
\r
173 for (File f : jobdir.listFiles()) {
\r
174 files.put(f.getName(), f);
\r
178 public boolean hasStatus(JobStatus status) {
\r
179 return files.containsKey(status.toString());
\r
182 boolean isCollected() {
\r
183 return hasStatus(JobStatus.COLLECTED);
\r
186 boolean isCancelled() {
\r
187 return hasStatus(JobStatus.CANCELLED);
\r
190 long getStartTime() {
\r
191 long starttime = UNDEFINED;
\r
192 File startfile = files.get(JobStatus.STARTED.toString());
\r
193 if (startfile == null) {
\r
194 startfile = files.get(JobStatus.SUBMITTED.toString());
\r
196 if (startfile != null) {
\r
197 starttime = startfile.lastModified();
\r
199 * String start = FileUtil.readFileToString(startfile);
\r
200 * starttime = Long.parseLong(start.trim());
\r
206 String getClusterJobID() {
\r
207 String clustjobId = "";
\r
208 File jobid = files.get("JOBID");
\r
210 if (jobid != null) {
\r
211 clustjobId = FileUtil.readFileToString(jobid);
\r
213 } catch (IOException ioe) {
\r
214 ioe.printStackTrace();
\r
217 return clustjobId.trim();
\r
220 long getFinishedTime() {
\r
221 long ftime = UNDEFINED;
\r
222 File finished = files.get(JobStatus.FINISHED.toString());
\r
223 if (finished != null) {
\r
224 ftime = finished.lastModified();
\r
226 * String start = FileUtil.readFileToString(finished); ftime =
\r
227 * Long.parseLong(start.trim());
\r
229 // System.out.println("f " + ftime);
\r
232 * } catch (IOException e) { log.log(Level.WARN,
\r
233 * "Cannot parse finished time: " + e.getMessage(), e); } catch
\r
234 * (NumberFormatException e) { log.log(Level.WARN,
\r
235 * "Cannot parse finished time: " + e.getMessage(), e); }
\r
240 private Services getService() {
\r
241 return Services.getServiceByJobDirectory(jobdir);
\r
244 // Mafft, Muscle, Tcoffee, Clustal task:fasta.in result:fasta.out
\r
245 // Probcons task:fasta.in result:alignment.out
\r
247 * TODO replace with Universal names for WS!
\r
249 long getResultSize() {
\r
250 Class<? extends Executable<?>> name = Services
\r
251 .getRunnerByJobDirectory(jobdir);
\r
254 if (name.getSimpleName().equalsIgnoreCase("Probcons")) {
\r
255 f = files.get("alignment.out");
\r
256 } else if (name.getSimpleName().equalsIgnoreCase("ClustalW")) {
\r
257 f = files.get("output.txt");
\r
259 f = files.get("fasta.out");
\r
267 long getInputSize() {
\r
268 File input = files.get("fasta.in");
\r
269 if (input != null) {
\r
270 return input.length();
\r
275 JobStat getJobStat() {
\r
276 return JobStat.newInstance(getService(), getClusterJobID(),
\r
277 jobdir.getName(), getStartTime(), getFinishedTime(),
\r
278 getInputSize(), getResultSize(), isCancelled(),
\r
283 public int hashCode() {
\r
284 final int prime = 31;
\r
286 result = prime * result
\r
287 + ((jobdir == null) ? 0 : jobdir.hashCode());
\r
292 public boolean equals(Object obj) {
\r
297 if (getClass() != obj.getClass())
\r
299 JobDirectory other = (JobDirectory) obj;
\r
300 if (jobdir == null) {
\r
301 if (other.jobdir != null)
\r
303 } else if (!jobdir.equals(other.jobdir))
\r
311 public void run() {
\r
312 log.info("Started updating statistics at " + new Date());
\r
314 StatProcessor local_stats = getStats();
\r
315 log.info("Found " + local_stats.getJobNumber() + " jobs!");
\r
318 } catch (SQLException e) {
\r
319 log.error("Fails to update jobs statistics database!");
\r
320 log.error(e.getLocalizedMessage(), e);
\r
322 log.info("Finished updating statistics at " + new Date());
\r