/* Copyright (c) 2013 Alexander Sherstnev * Copyright (c) 2011 Peter Troshin * * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0 * * This library is free software; you can redistribute it and/or modify it under the terms of the * Apache License version 2 as published by the Apache Software Foundation * * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache * License for more details. * * A copy of the license is in apache_license.txt. It is also available here: * @see: http://www.apache.org/licenses/LICENSE-2.0.txt * * Any republication or derived work distributed in source code form * must include this copyright and license notice. */ package compbio.stat.collector; import java.io.File; import java.io.FileFilter; import java.io.IOException; import java.sql.SQLException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.HashSet; import java.util.List; import java.util.Set; import org.apache.log4j.Logger; import compbio.engine.client.PathValidator; import compbio.engine.client.SkeletalExecutable; import compbio.util.FileUtil; /** * Class assumptions: * 1. Number of runs of each WS = number of folders with name * 2. Number of successful runs = all runs with no result file * 3. Per period of time = limit per file creating time * 4. Runtime (avg/max) = finish time - start time * 5. Task & result size = result.size * * Abandoned runs - not collected runs * * Cancelled runs - cancelled * * Cluster vs local runs * * Reasons for failure = look in the err out? * * * Metadata required: * * work directory for local and cluster tasks = from Helper or cmd parameter. WS * names - enumeration. Status file names and content. * * @author Peter Troshin * @author Alexander Sherstnev * */ public class ExecutionStatCollector implements Runnable { static final int UNDEFINED = -1; static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss"); private static final Logger log = Logger.getLogger(ExecutionStatCollector.class); final private File workingDirectory; final private List stats; /** * Consider the job that has been working for longer than timeOutInHours * completed, whatever the outcome */ final private int timeOutInHours; /** * List subdirectories in the job directory * * @param workingDirectory * @param timeOutInHours */ public ExecutionStatCollector(String workingDirectory, int timeOutInHours) { log.info("Starting stat collector for directory: " + workingDirectory); log.info("Maximum allowed runtime(h): " + timeOutInHours); if (!PathValidator.isValidDirectory(workingDirectory)) { throw new IllegalArgumentException("workingDirectory '" + workingDirectory + "' does not exist!"); } this.workingDirectory = new File(workingDirectory); stats = new ArrayList(); if (timeOutInHours <= 0) { throw new IllegalArgumentException( "Timeout value must be greater than 0! Given value: " + timeOutInHours); } this.timeOutInHours = timeOutInHours; } boolean hasCompleted(JobDirectory jd) { JobStat jstat = jd.getJobStat(); if (jstat.hasResult() || jstat.getIsCancelled() || jstat.getIsFinished() || hasTimedOut(jd)) { return true; } return false; } boolean hasTimedOut(JobDirectory jd) { return ((System.currentTimeMillis() - jd.jobdir.lastModified()) / (1000 * 60 * 60)) > timeOutInHours; } /* * Make sure that collectStatistics methods was called prior to calling * this! TODO consider running collectStatistics from here on the first call */ StatProcessor getStats() { if (stats.isEmpty()) { log.info("Please make sure collectStatistics method was called prior to calling getStats()!"); } return new StatProcessor(stats); } void writeStatToDB() throws SQLException { Set rjobs = new HashSet(stats); StatDB statdb = new StatDB(); log.debug("Removing records that has already been recorded"); statdb.removeRecordedJobs(rjobs); log.debug("New records left: " + rjobs.size()); statdb.insertData(rjobs); } /** * Not in use */ public static void main(String[] args) throws IOException, SQLException { // updateTime(new File( // "D:\\workspace\\JABA2\\jobsout\\AACon#170462904473672\\STARTED")); File[] files = FileUtil.getFiles("Y:\\fc\\www-jws2\\jaba\\jobsout", directories); List stats = new ArrayList(); for (File file : files) { JobDirectory jd = new JobDirectory(file); stats.add(jd.getJobStat()); // System.out.println(jd.getJobStat().getJobReportTabulated()); } StatProcessor sp = new StatProcessor(stats); System.out.println(sp.reportStat()); System.out.println(); System.out.println("!!!!!!!!!!!!!!!!!!"); System.out.println(); Set rjobs = new HashSet(sp.stats); StatDB statdb = new StatDB(); statdb.removeRecordedJobs(rjobs); statdb.insertData(rjobs); } static FileFilter directories = new FileFilter() { @Override public boolean accept(File pathname) { return pathname.isDirectory() && !pathname.getName().startsWith("."); } }; // TODO test! void collectStatistics() { // clear stats array; stats.clear(); File[] dirs = workingDirectory.listFiles(directories); for (File dir : dirs) { // skip work directory with test inputas log.debug("check directory: " + dir.getName() + "..."); if (InputFilter.accept(new File(dir.getPath() + File.separator + SkeletalExecutable.INPUT))) { JobDirectory jd = new JobDirectory(dir); JobStat jstat = jd.getJobStat(); // Do not record stats on the job that has not completed yet if (hasCompleted(jd)) { stats.add(jstat); } else { log.debug("Skipping the job: " + jstat + " as it has not completed yet"); } } else { log.trace("training input: " + dir.getName() + File.separator + SkeletalExecutable.INPUT); } } } @Override public void run() { log.info("Started updating statistics at " + new Date()); log.info("For directory: " + workingDirectory.getAbsolutePath()); collectStatistics(); StatProcessor local_stats = getStats(); log.info("Found " + local_stats.getJobNumber() + " jobs!"); try { writeStatToDB(); } catch (SQLException e) { log.error("Fails to update jobs statistics database!"); log.error(e.getLocalizedMessage(), e); } log.info("Finished updating statistics at " + new Date()); } }