package compbio.stat.collector; import java.io.File; import java.io.FileFilter; import java.io.IOException; import java.sql.SQLException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.log4j.Logger; import compbio.engine.client.Executable; import compbio.engine.client.PathValidator; import compbio.metadata.JobStatus; import compbio.util.FileUtil; import compbio.ws.client.Services; /** * Number of runs of each WS = number of folders with name * * Number of successful runs = all runs with no result file * * Per period of time = limit per file creating time Runtime (avg/max) = * * started time - finished time * * Task & result size = result.size * * Abandoned runs - not collected runs * * Cancelled runs - cancelled * * Cluster vs local runs * * Reasons for failure = look in the err out? * * * Metadata required: * * work directory for local and cluster tasks = from Helper or cmd parameter. WS * names - enumeration. Status file names and content. * * @author pvtroshin * */ public class ExecutionStatCollector implements Runnable { static final int UNDEFINED = -1; private static final Logger log = Logger .getLogger(ExecutionStatCollector.class); static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss"); final private File workDirectory; final private List stats; /** * Consider the job that has been working for longer than timeOutInHours * completed, whatever the outcome */ final private int timeOutInHours; /** * List subdirectories in the job directory * * @param workDirectory * @param timeOutInHours */ public ExecutionStatCollector(String workDirectory, int timeOutInHours) { log.info("Starting stat collector for directory: " + workDirectory); log.info("Maximum allowed runtime(h): " + timeOutInHours); if (!PathValidator.isValidDirectory(workDirectory)) { throw new IllegalArgumentException("workDirectory '" + workDirectory + "' does not exist!"); } this.workDirectory = new File(workDirectory); stats = new ArrayList(); if (timeOutInHours <= 0) { throw new IllegalArgumentException( "Timeout value must be greater than 0! Given value: " + timeOutInHours); } this.timeOutInHours = timeOutInHours; } boolean hasCompleted(JobDirectory jd) { JobStat jstat = jd.getJobStat(); if (jstat.hasResult() || jstat.getIsCancelled() || jstat.getIsFinished() || hasTimedOut(jd)) { return true; } return false; } boolean hasTimedOut(JobDirectory jd) { return ((System.currentTimeMillis() - jd.jobdir.lastModified()) / (1000 * 60 * 60)) > timeOutInHours; } StatProcessor getStats() { return new StatProcessor(stats); } void writeStatToDB() throws SQLException { Set rjobs = new HashSet(stats); StatDB statdb = new StatDB(); log.debug("Removing records that has already been recorded"); statdb.removeRecordedJobs(rjobs); log.debug("New records left: " + rjobs.size()); statdb.insertData(rjobs); } /* * static void updateTime(File statFile) throws IOException { long lastMod = * statFile.lastModified(); FileWriter fw = new FileWriter(statFile); * fw.write(new Long(lastMod).toString()); fw.close(); } */ /** * Not in use */ public static void main(String[] args) throws IOException, SQLException { // updateTime(new File( // "D:\\workspace\\JABA2\\jobsout\\AACon#170462904473672\\STARTED")); File[] files = FileUtil.getFiles("Y:\\fc\\www-jws2\\jaba\\jobsout", directories); List stats = new ArrayList(); for (File file : files) { JobDirectory jd = new JobDirectory(file); stats.add(jd.getJobStat()); // System.out.println(jd.getJobStat().getJobReportTabulated()); } StatProcessor sp = new StatProcessor(stats); System.out.println(sp.reportStat()); System.out.println(); System.out.println("!!!!!!!!!!!!!!!!!!"); System.out.println(); Set rjobs = new HashSet(sp.stats); StatDB statdb = new StatDB(); statdb.removeRecordedJobs(rjobs); statdb.insertData(rjobs); } static FileFilter directories = new FileFilter() { @Override public boolean accept(File pathname) { return pathname.isDirectory() && !pathname.getName().startsWith("."); } }; static class JobDirectory { File jobdir; Map files = new HashMap(); JobDirectory(File directory) { this.jobdir = directory; for (File f : jobdir.listFiles()) { files.put(f.getName(), f); } } boolean hasStatus(JobStatus status) { return files.containsKey(status.toString()); } boolean isCollected() { return hasStatus(JobStatus.COLLECTED); } boolean isCancelled() { return hasStatus(JobStatus.CANCELLED); } long getStartTime() { long starttime = UNDEFINED; File startfile = files.get(JobStatus.STARTED.toString()); if (startfile == null) { startfile = files.get(JobStatus.SUBMITTED.toString()); } if (startfile != null) { starttime = startfile.lastModified(); /* * String start = FileUtil.readFileToString(startfile); * starttime = Long.parseLong(start.trim()); */ } return starttime; } String getClusterJobID() { String clustjobId = ""; File jobid = files.get("JOBID"); try { if (jobid != null) { clustjobId = FileUtil.readFileToString(jobid); } } catch (IOException ioe) { log.error( "IO Exception while reading the content of JOBID file for job " + jobid, ioe); } return clustjobId.trim(); } long getFinishedTime() { long ftime = UNDEFINED; File finished = files.get(JobStatus.FINISHED.toString()); if (finished != null) { ftime = finished.lastModified(); /* * String start = FileUtil.readFileToString(finished); ftime = * Long.parseLong(start.trim()); */ // System.out.println("f " + ftime); } /* * } catch (IOException e) { log.log(Level.WARN, * "Cannot parse finished time: " + e.getMessage(), e); } catch * (NumberFormatException e) { log.log(Level.WARN, * "Cannot parse finished time: " + e.getMessage(), e); } */ return ftime; } private Services getService() { return Services.getServiceByJobDirectory(jobdir); } // Mafft, Muscle, Tcoffee, Clustal task:fasta.in result:fasta.out // Probcons task:fasta.in result:alignment.out /* * TODO replace with Universal names for WS! */ long getResultSize() { Class> name = Services .getRunnerByJobDirectory(jobdir); File f = null; if (name.getSimpleName().equalsIgnoreCase("Probcons")) { f = files.get("alignment.out"); } else if (name.getSimpleName().equalsIgnoreCase("ClustalW")) { f = files.get("output.txt"); } else { f = files.get("fasta.out"); } if (f != null) { return f.length(); } return UNDEFINED; } long getInputSize() { File input = files.get("fasta.in"); if (input != null) { return input.length(); } return UNDEFINED; } JobStat getJobStat() { return JobStat.newInstance(getService(), getClusterJobID(), jobdir.getName(), getStartTime(), getFinishedTime(), getInputSize(), getResultSize(), isCancelled(), isCollected()); } @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + ((jobdir == null) ? 0 : jobdir.hashCode()); return result; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; JobDirectory other = (JobDirectory) obj; if (jobdir == null) { if (other.jobdir != null) return false; } else if (!jobdir.equals(other.jobdir)) return false; return true; } } private void collectStatistics() { File[] files = workDirectory.listFiles(directories); for (File file : files) { JobDirectory jd = new JobDirectory(file); JobStat jstat = jd.getJobStat(); // Do not record stats on the job that has not completed yet if (hasCompleted(jd)) { stats.add(jstat); } else { log.debug("Skipping the job: " + jstat); log.debug("As it has not completed yet"); } // System.out.println(jd.getJobStat().getJobReportTabulated()); } } @Override public void run() { log.info("Started updating statistics at " + new Date()); collectStatistics(); StatProcessor local_stats = getStats(); log.info("Found " + local_stats.getJobNumber() + " jobs!"); try { writeStatToDB(); } catch (SQLException e) { log.error("Fails to update jobs statistics database!"); log.error(e.getLocalizedMessage(), e); } log.info("Finished updating statistics at " + new Date()); } }