X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=webservices%2Fcompbio%2Fstat%2Fcollector%2FExecutionStatCollector.java;h=c50ab70b38bf3f1c9eee429f421176ebf9d9f164;hb=4825c5456a465ac3e13d0131f8e87f584b9871fa;hp=785f659fb15c3c594f67209571f965e67c7aee2b;hpb=d866a29d849b599983e1102d88bfe0cd40188f5b;p=jabaws.git diff --git a/webservices/compbio/stat/collector/ExecutionStatCollector.java b/webservices/compbio/stat/collector/ExecutionStatCollector.java index 785f659..c50ab70 100644 --- a/webservices/compbio/stat/collector/ExecutionStatCollector.java +++ b/webservices/compbio/stat/collector/ExecutionStatCollector.java @@ -1,3 +1,21 @@ +/* Copyright (c) 2013 Alexander Sherstnev + * Copyright (c) 2011 Peter Troshin + * + * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0 + * + * This library is free software; you can redistribute it and/or modify it under the terms of the + * Apache License version 2 as published by the Apache Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without + * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache + * License for more details. + * + * A copy of the license is in apache_license.txt. It is also available here: + * @see: http://www.apache.org/licenses/LICENSE-2.0.txt + * + * Any republication or derived work distributed in source code form + * must include this copyright and license notice. + */ package compbio.stat.collector; import java.io.File; @@ -7,30 +25,23 @@ import java.sql.SQLException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; -import java.util.HashMap; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Set; import org.apache.log4j.Logger; -import compbio.engine.client.Executable; import compbio.engine.client.PathValidator; -import compbio.metadata.JobStatus; +import compbio.engine.client.SkeletalExecutable; import compbio.util.FileUtil; -import compbio.ws.client.Services; /** - * Number of runs of each WS = number of folders with name - * - * Number of successful runs = all runs with no result file - * - * Per period of time = limit per file creating time Runtime (avg/max) = - * - * started time - finished time - * - * Task & result size = result.size + * Class assumptions: + * 1. Number of runs of each WS = number of folders with name + * 2. Number of successful runs = all runs with no result file + * 3. Per period of time = limit per file creating time + * 4. Runtime (avg/max) = finish time - start time + * 5. Task & result size = result.size * * Abandoned runs - not collected runs * @@ -46,19 +57,16 @@ import compbio.ws.client.Services; * work directory for local and cluster tasks = from Helper or cmd parameter. WS * names - enumeration. Status file names and content. * - * @author pvtroshin + * @author Peter Troshin + * @author Alexander Sherstnev * */ public class ExecutionStatCollector implements Runnable { - static final int UNDEFINED = -1; - - private static final Logger log = Logger - .getLogger(ExecutionStatCollector.class); - static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss"); + private static final Logger log = Logger.getLogger(ExecutionStatCollector.class); - final private File workDirectory; + final private File workingDirectory; final private List stats; /** * Consider the job that has been working for longer than timeOutInHours @@ -69,22 +77,20 @@ public class ExecutionStatCollector implements Runnable { /** * List subdirectories in the job directory * - * @param workDirectory + * @param workingDirectory * @param timeOutInHours */ - public ExecutionStatCollector(String workDirectory, int timeOutInHours) { - log.info("Starting stat collector for directory: " + workDirectory); + public ExecutionStatCollector(String workingDirectory, int timeOutInHours) { + log.info("Starting stat collector for directory: " + workingDirectory); log.info("Maximum allowed runtime(h): " + timeOutInHours); - if (!PathValidator.isValidDirectory(workDirectory)) { - throw new IllegalArgumentException("workDirectory '" - + workDirectory + "' does not exist!"); + if (!PathValidator.isValidDirectory(workingDirectory)) { + throw new IllegalArgumentException("workingDirectory '" + workingDirectory + "' does not exist!"); } - this.workDirectory = new File(workDirectory); + this.workingDirectory = new File(workingDirectory); stats = new ArrayList(); if (timeOutInHours <= 0) { throw new IllegalArgumentException( - "Timeout value must be greater than 0! Given value: " - + timeOutInHours); + "Timeout value must be greater than 0! Given value: " + timeOutInHours); } this.timeOutInHours = timeOutInHours; } @@ -102,7 +108,14 @@ public class ExecutionStatCollector implements Runnable { return ((System.currentTimeMillis() - jd.jobdir.lastModified()) / (1000 * 60 * 60)) > timeOutInHours; } + /* + * Make sure that collectStatistics methods was called prior to calling + * this! TODO consider running collectStatistics from here on the first call + */ StatProcessor getStats() { + if (stats.isEmpty()) { + log.info("Please make sure collectStatistics method was called prior to calling getStats()!"); + } return new StatProcessor(stats); } @@ -116,12 +129,6 @@ public class ExecutionStatCollector implements Runnable { statdb.insertData(rjobs); } - /* - * static void updateTime(File statFile) throws IOException { long lastMod = - * statFile.lastModified(); FileWriter fw = new FileWriter(statFile); - * fw.write(new Long(lastMod).toString()); fw.close(); } - */ - /** * Not in use */ @@ -153,187 +160,37 @@ public class ExecutionStatCollector implements Runnable { static FileFilter directories = new FileFilter() { @Override public boolean accept(File pathname) { - return pathname.isDirectory() - && !pathname.getName().startsWith("."); + return pathname.isDirectory() && !pathname.getName().startsWith("."); } }; - static class JobDirectory { - - File jobdir; - Map files = new HashMap(); - - JobDirectory(File directory) { - this.jobdir = directory; - for (File f : jobdir.listFiles()) { - files.put(f.getName(), f); - } - } - - boolean hasStatus(JobStatus status) { - return files.containsKey(status.toString()); - } - - boolean isCollected() { - return hasStatus(JobStatus.COLLECTED); - } - - boolean isCancelled() { - return hasStatus(JobStatus.CANCELLED); - } - - long getStartTime() { - long starttime = UNDEFINED; - File startfile = files.get(JobStatus.STARTED.toString()); - if (startfile == null) { - startfile = files.get(JobStatus.SUBMITTED.toString()); - } - if (startfile != null) { - starttime = startfile.lastModified(); - /* - * String start = FileUtil.readFileToString(startfile); - * starttime = Long.parseLong(start.trim()); - */ - } - return starttime; - } - - String getClusterJobID() { - String clustjobId = ""; - File jobid = files.get("JOBID"); - try { - if (jobid != null) { - clustjobId = FileUtil.readFileToString(jobid); + // TODO test! + void collectStatistics() { + // clear stats array; + stats.clear(); + File[] dirs = workingDirectory.listFiles(directories); + for (File dir : dirs) { + // skip work directory with test inputas + log.debug("check directory: " + dir.getName() + "..."); + if (InputFilter.accept(new File(dir.getPath() + File.separator + SkeletalExecutable.INPUT))) { + JobDirectory jd = new JobDirectory(dir); + JobStat jstat = jd.getJobStat(); + // Do not record stats on the job that has not completed yet + if (hasCompleted(jd)) { + stats.add(jstat); + } else { + log.debug("Skipping the job: " + jstat + " as it has not completed yet"); } - } catch (IOException ioe) { - log.error( - "IO Exception while reading the content of JOBID file for job " - + jobid, ioe); - } - return clustjobId.trim(); - } - - long getFinishedTime() { - long ftime = UNDEFINED; - File finished = files.get(JobStatus.FINISHED.toString()); - if (finished != null) { - ftime = finished.lastModified(); - /* - * String start = FileUtil.readFileToString(finished); ftime = - * Long.parseLong(start.trim()); - */ - // System.out.println("f " + ftime); - } - /* - * } catch (IOException e) { log.log(Level.WARN, - * "Cannot parse finished time: " + e.getMessage(), e); } catch - * (NumberFormatException e) { log.log(Level.WARN, - * "Cannot parse finished time: " + e.getMessage(), e); } - */ - return ftime; - } - - private Services getService() { - return Services.getServiceByJobDirectory(jobdir); - } - - // Mafft, Muscle, Tcoffee, Clustal task:fasta.in result:fasta.out - // Probcons task:fasta.in result:alignment.out - /* - * TODO replace with Universal names for WS! - */ - long getResultSize() { - Class> name = Services - .getRunnerByJobDirectory(jobdir); - - File f = null; - if (name.getSimpleName().equalsIgnoreCase("Probcons")) { - f = files.get("alignment.out"); - } else if (name.getSimpleName().equalsIgnoreCase("ClustalW")) { - f = files.get("output.txt"); } else { - f = files.get("fasta.out"); - } - if (f != null) { - return f.length(); + log.trace("training input: " + dir.getName() + File.separator + SkeletalExecutable.INPUT); } - return UNDEFINED; - } - - /* - * TODO unify input! - */ - long getInputSize() { - Class> name = Services - .getRunnerByJobDirectory(jobdir); - - File input = null; - if (name.getSimpleName().equalsIgnoreCase("ClustalW")) { - input = files.get("input.txt"); - } else { - input = files.get("fasta.in"); - } - - if (input != null) { - return input.length(); - } - return UNDEFINED; - } - - JobStat getJobStat() { - return JobStat.newInstance(getService(), getClusterJobID(), - jobdir.getName(), getStartTime(), getFinishedTime(), - getInputSize(), getResultSize(), isCancelled(), - isCollected()); - } - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result - + ((jobdir == null) ? 0 : jobdir.hashCode()); - return result; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - JobDirectory other = (JobDirectory) obj; - if (jobdir == null) { - if (other.jobdir != null) - return false; - } else if (!jobdir.equals(other.jobdir)) - return false; - return true; - } - } - - private void collectStatistics() { - File[] files = workDirectory.listFiles(directories); - for (File file : files) { - JobDirectory jd = new JobDirectory(file); - JobStat jstat = jd.getJobStat(); - // Do not record stats on the job that has not completed yet - if (hasCompleted(jd)) { - stats.add(jstat); - } else { - log.debug("Skipping the job: " + jstat); - log.debug("As it has not completed yet"); - } - // System.out.println(jd.getJobStat().getJobReportTabulated()); } } @Override public void run() { log.info("Started updating statistics at " + new Date()); - log.info("For directory: " + workDirectory.getAbsolutePath()); + log.info("For directory: " + workingDirectory.getAbsolutePath()); collectStatistics();