/* Copyright (c) 2013 Alexander Sherstnev * Copyright (c) 2011 Peter Troshin * * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0 * * This library is free software; you can redistribute it and/or modify it under the terms of the * Apache License version 2 as published by the Apache Software Foundation * * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache * License for more details. * * A copy of the license is in apache_license.txt. It is also available here: * @see: http://www.apache.org/licenses/LICENSE-2.0.txt * * Any republication or derived work distributed in source code form * must include this copyright and license notice. */ package compbio.stat.collector; import java.io.File; import java.io.FileFilter; import java.io.IOException; import java.sql.SQLException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.HashSet; import java.util.List; import java.util.Set; import com.beust.jcommander.JCommander; import com.beust.jcommander.Parameter; import org.apache.log4j.Logger; import compbio.engine.client.PathValidator; import compbio.engine.client.SkeletalExecutable; /** * Class assumptions: 1. Number of runs of each WS = number of folders with name * 2. Number of successful runs = all runs with no result file 3. Per period of * time = limit per file creating time 4. Runtime (avg/max) = finish time - * start time 5. Task & result size = result.size * * Abandoned runs - not collected runs * * Cancelled runs - cancelled * * Cluster vs local runs * * Reasons for failure = look in the err out? * * * Metadata required: * * work directory for local and cluster tasks = from Helper or cmd parameter. WS * names - enumeration. Status file names and content. * * @author Peter Troshin * @author Alexander Sherstnev * */ class mainJCommander { @Parameter private List parameters = new ArrayList(); @Parameter(names = { "-log", "-verbose" }, description = "Level of verbosity") Integer verbose = 1; @Parameter(names = "-start", description = "Start time") String starttime; @Parameter(names = "-end", description = "Start time") String endtime; @Parameter(names = "-db", description = "Path to database") String dbname; } public class ExecutionStatUpdater { static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss"); private static final Logger log = Logger.getLogger(ExecutionStatUpdater.class); final private File workingDirectory; final private List stats; /** * Consider the job that has been working for longer than timeOutInHours * completed, whatever the outcome */ final private int timeOutInHours; /** * List subdirectories in the job directory * * @param workingDirectory * @param timeOutInHours */ public ExecutionStatUpdater(String workingDirectory, int timeOutInHours) { log.info("Starting stat collector for directory: " + workingDirectory); log.info("Maximum allowed runtime(h): " + timeOutInHours); if (!PathValidator.isValidDirectory(workingDirectory)) { throw new IllegalArgumentException("workingDirectory '" + workingDirectory + "' does not exist!"); } this.workingDirectory = new File(workingDirectory); stats = new ArrayList(); if (timeOutInHours <= 0) { throw new IllegalArgumentException("Timeout value must be greater than 0! Given value: " + timeOutInHours); } this.timeOutInHours = timeOutInHours; } boolean hasCompleted(JobDirectory jd) { JobStat jstat = jd.getJobStat(); if (jstat.hasResult() || jstat.getIsCancelled() || jstat.getIsFinished() || hasTimedOut(jd)) { return true; } return false; } boolean hasTimedOut(JobDirectory jd) { return ((System.currentTimeMillis() - jd.jobdir.lastModified()) / (1000 * 60 * 60)) > timeOutInHours; } /* * Make sure that collectStatistics methods was called prior to calling * this! TODO consider running collectStatistics from here on the first call */ StatProcessor getStats() { if (stats.isEmpty()) { log.info("Please make sure collectStatistics method was called prior to calling getStats()!"); } return new StatProcessor(stats); } void writeStatToDB() throws SQLException { Set rjobs = new HashSet(stats); StatDB statdb = new StatDB(); log.debug("Removing records that has already been recorded"); statdb.removeRecordedJobs(rjobs); log.debug("New records left: " + rjobs.size()); statdb.insertData(rjobs); } /** * main function */ public static void main(String[] args) throws IOException, SQLException { mainJCommander jct = new mainJCommander(); new JCommander(jct, args); String WorkingDir = "jobout"; String StartTime = jct.starttime; String EndTime = jct.endtime; String dbname = jct.dbname; long StartTimeLong = 0; long EndTimeLong = 0; ExecutionStatUpdater esu = new ExecutionStatUpdater(WorkingDir, 1); esu.collectStatistics(StartTimeLong, EndTimeLong); esu.writeStatToDB(); } static FileFilter directories = new FileFilter() { @Override public boolean accept(File pathname) { return pathname.isDirectory() && !pathname.getName().startsWith("."); } }; // TODO test! private void collectStatistics(long StartTime, long EndTime) { // clear stats array; stats.clear(); File[] dirs = workingDirectory.listFiles(directories); for (File dir : dirs) { // skip work directory with test inputs and out of ordered time // range log.debug("check directory: " + dir.getName() + "..."); if (InputFilter.accept(new File(dir.getPath() + File.separator + SkeletalExecutable.INPUT)) && StartTime < dir.lastModified() && dir.lastModified() < EndTime) { JobDirectory jd = new JobDirectory(dir); JobStat jstat = jd.getJobStat(); // Do not record stats on the job that has not completed yet if (hasCompleted(jd)) { stats.add(jstat); } else { log.debug("Skipping the job: " + jstat + " as it has not completed yet"); } } else { log.trace("training input: " + dir.getName() + File.separator + SkeletalExecutable.INPUT); } } } }