X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=webservices%2Fcompbio%2Fstat%2Fcollector%2FExecutionStatCollector.java;h=c50ab70b38bf3f1c9eee429f421176ebf9d9f164;hb=4825c5456a465ac3e13d0131f8e87f584b9871fa;hp=bf3130aa2ff1c4599866d460f2452943f59487d6;hpb=5289bedee673d95739fdf622dba7be64f8c2df13;p=jabaws.git diff --git a/webservices/compbio/stat/collector/ExecutionStatCollector.java b/webservices/compbio/stat/collector/ExecutionStatCollector.java index bf3130a..c50ab70 100644 --- a/webservices/compbio/stat/collector/ExecutionStatCollector.java +++ b/webservices/compbio/stat/collector/ExecutionStatCollector.java @@ -1,3 +1,21 @@ +/* Copyright (c) 2013 Alexander Sherstnev + * Copyright (c) 2011 Peter Troshin + * + * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0 + * + * This library is free software; you can redistribute it and/or modify it under the terms of the + * Apache License version 2 as published by the Apache Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without + * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache + * License for more details. + * + * A copy of the license is in apache_license.txt. It is also available here: + * @see: http://www.apache.org/licenses/LICENSE-2.0.txt + * + * Any republication or derived work distributed in source code form + * must include this copyright and license notice. + */ package compbio.stat.collector; import java.io.File; @@ -7,29 +25,23 @@ import java.sql.SQLException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; -import java.util.HashMap; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Set; import org.apache.log4j.Logger; -import compbio.engine.client.Executable; -import compbio.metadata.JobStatus; +import compbio.engine.client.PathValidator; +import compbio.engine.client.SkeletalExecutable; import compbio.util.FileUtil; -import compbio.ws.client.Services; /** - * Number of runs of each WS = number of folders with name - * - * Number of successful runs = all runs with no result file - * - * Per period of time = limit per file creating time Runtime (avg/max) = - * - * started time - finished time - * - * Task & result size = result.size + * Class assumptions: + * 1. Number of runs of each WS = number of folders with name + * 2. Number of successful runs = all runs with no result file + * 3. Per period of time = limit per file creating time + * 4. Runtime (avg/max) = finish time - start time + * 5. Task & result size = result.size * * Abandoned runs - not collected runs * @@ -45,18 +57,16 @@ import compbio.ws.client.Services; * work directory for local and cluster tasks = from Helper or cmd parameter. WS * names - enumeration. Status file names and content. * - * @author pvtroshin + * @author Peter Troshin + * @author Alexander Sherstnev * */ public class ExecutionStatCollector implements Runnable { - static final int UNDEFINED = -1; - - private static final Logger log = Logger - .getLogger(ExecutionStatCollector.class); - static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss"); + private static final Logger log = Logger.getLogger(ExecutionStatCollector.class); + final private File workingDirectory; final private List stats; /** * Consider the job that has been working for longer than timeOutInHours @@ -67,28 +77,22 @@ public class ExecutionStatCollector implements Runnable { /** * List subdirectories in the job directory * - * @param workDirectory + * @param workingDirectory * @param timeOutInHours */ - public ExecutionStatCollector(String workDirectory, int timeOutInHours) { - log.info("Starting stat collector for directory: " + workDirectory); + public ExecutionStatCollector(String workingDirectory, int timeOutInHours) { + log.info("Starting stat collector for directory: " + workingDirectory); log.info("Maximum allowed runtime(h): " + timeOutInHours); - File[] files = FileUtil.getFiles(workDirectory, directories); + if (!PathValidator.isValidDirectory(workingDirectory)) { + throw new IllegalArgumentException("workingDirectory '" + workingDirectory + "' does not exist!"); + } + this.workingDirectory = new File(workingDirectory); stats = new ArrayList(); - assert timeOutInHours > 0; - this.timeOutInHours = timeOutInHours; - for (File file : files) { - JobDirectory jd = new JobDirectory(file); - JobStat jstat = jd.getJobStat(); - // Do not record stats on the job that has not completed yet - if (hasCompleted(jd)) { - stats.add(jstat); - } else { - log.debug("Skipping the job: " + jstat); - log.debug("As it has not completed yet"); - } - // System.out.println(jd.getJobStat().getJobReportTabulated()); + if (timeOutInHours <= 0) { + throw new IllegalArgumentException( + "Timeout value must be greater than 0! Given value: " + timeOutInHours); } + this.timeOutInHours = timeOutInHours; } boolean hasCompleted(JobDirectory jd) { @@ -104,11 +108,18 @@ public class ExecutionStatCollector implements Runnable { return ((System.currentTimeMillis() - jd.jobdir.lastModified()) / (1000 * 60 * 60)) > timeOutInHours; } - public StatProcessor getStats() { + /* + * Make sure that collectStatistics methods was called prior to calling + * this! TODO consider running collectStatistics from here on the first call + */ + StatProcessor getStats() { + if (stats.isEmpty()) { + log.info("Please make sure collectStatistics method was called prior to calling getStats()!"); + } return new StatProcessor(stats); } - public void writeStatToDB() throws SQLException { + void writeStatToDB() throws SQLException { Set rjobs = new HashSet(stats); StatDB statdb = new StatDB(); log.debug("Removing records that has already been recorded"); @@ -118,17 +129,8 @@ public class ExecutionStatCollector implements Runnable { statdb.insertData(rjobs); } - /* - * static void updateTime(File statFile) throws IOException { long lastMod = - * statFile.lastModified(); FileWriter fw = new FileWriter(statFile); - * fw.write(new Long(lastMod).toString()); fw.close(); } - */ - /** - * - * @param args - * @throws IOException - * @throws SQLException + * Not in use */ public static void main(String[] args) throws IOException, SQLException { @@ -158,158 +160,39 @@ public class ExecutionStatCollector implements Runnable { static FileFilter directories = new FileFilter() { @Override public boolean accept(File pathname) { - return pathname.isDirectory() - && !pathname.getName().startsWith("."); + return pathname.isDirectory() && !pathname.getName().startsWith("."); } }; - static class JobDirectory { - - File jobdir; - Map files = new HashMap(); - - public JobDirectory(File directory) { - this.jobdir = directory; - for (File f : jobdir.listFiles()) { - files.put(f.getName(), f); - } - } - - public boolean hasStatus(JobStatus status) { - return files.containsKey(status.toString()); - } - - boolean isCollected() { - return hasStatus(JobStatus.COLLECTED); - } - - boolean isCancelled() { - return hasStatus(JobStatus.CANCELLED); - } - - long getStartTime() { - long starttime = UNDEFINED; - File startfile = files.get(JobStatus.STARTED.toString()); - if (startfile == null) { - startfile = files.get(JobStatus.SUBMITTED.toString()); - } - if (startfile != null) { - starttime = startfile.lastModified(); - /* - * String start = FileUtil.readFileToString(startfile); - * starttime = Long.parseLong(start.trim()); - */ - } - return starttime; - } - - String getClusterJobID() { - String clustjobId = ""; - File jobid = files.get("JOBID"); - try { - if (jobid != null) { - clustjobId = FileUtil.readFileToString(jobid); + // TODO test! + void collectStatistics() { + // clear stats array; + stats.clear(); + File[] dirs = workingDirectory.listFiles(directories); + for (File dir : dirs) { + // skip work directory with test inputas + log.debug("check directory: " + dir.getName() + "..."); + if (InputFilter.accept(new File(dir.getPath() + File.separator + SkeletalExecutable.INPUT))) { + JobDirectory jd = new JobDirectory(dir); + JobStat jstat = jd.getJobStat(); + // Do not record stats on the job that has not completed yet + if (hasCompleted(jd)) { + stats.add(jstat); + } else { + log.debug("Skipping the job: " + jstat + " as it has not completed yet"); } - } catch (IOException ioe) { - ioe.printStackTrace(); - // TODO LOG - } - return clustjobId.trim(); - } - - long getFinishedTime() { - long ftime = UNDEFINED; - File finished = files.get(JobStatus.FINISHED.toString()); - if (finished != null) { - ftime = finished.lastModified(); - /* - * String start = FileUtil.readFileToString(finished); ftime = - * Long.parseLong(start.trim()); - */ - // System.out.println("f " + ftime); - } - /* - * } catch (IOException e) { log.log(Level.WARN, - * "Cannot parse finished time: " + e.getMessage(), e); } catch - * (NumberFormatException e) { log.log(Level.WARN, - * "Cannot parse finished time: " + e.getMessage(), e); } - */ - return ftime; - } - - private Services getService() { - return Services.getServiceByJobDirectory(jobdir); - } - - // Mafft, Muscle, Tcoffee, Clustal task:fasta.in result:fasta.out - // Probcons task:fasta.in result:alignment.out - /* - * TODO replace with Universal names for WS! - */ - long getResultSize() { - Class> name = Services - .getRunnerByJobDirectory(jobdir); - - File f = null; - if (name.getSimpleName().equalsIgnoreCase("Probcons")) { - f = files.get("alignment.out"); - } else if (name.getSimpleName().equalsIgnoreCase("ClustalW")) { - f = files.get("output.txt"); } else { - f = files.get("fasta.out"); + log.trace("training input: " + dir.getName() + File.separator + SkeletalExecutable.INPUT); } - if (f != null) { - return f.length(); - } - return UNDEFINED; - } - - long getInputSize() { - File input = files.get("fasta.in"); - if (input != null) { - return input.length(); - } - return UNDEFINED; - } - - JobStat getJobStat() { - return JobStat.newInstance(getService(), getClusterJobID(), - jobdir.getName(), getStartTime(), getFinishedTime(), - getInputSize(), getResultSize(), isCancelled(), - isCollected()); - } - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result - + ((jobdir == null) ? 0 : jobdir.hashCode()); - return result; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - JobDirectory other = (JobDirectory) obj; - if (jobdir == null) { - if (other.jobdir != null) - return false; - } else if (!jobdir.equals(other.jobdir)) - return false; - return true; } - } @Override public void run() { log.info("Started updating statistics at " + new Date()); + log.info("For directory: " + workingDirectory.getAbsolutePath()); + + collectStatistics(); StatProcessor local_stats = getStats(); log.info("Found " + local_stats.getJobNumber() + " jobs!"); @@ -321,5 +204,4 @@ public class ExecutionStatCollector implements Runnable { } log.info("Finished updating statistics at " + new Date()); } - }