-/* Copyright (c) 2011 Peter Troshin\r
+/* Copyright (c) 2013 Alexander Sherstnev\r
+ * Copyright (c) 2011 Peter Troshin\r
* \r
* JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0 \r
* \r
import java.text.SimpleDateFormat;\r
import java.util.ArrayList;\r
import java.util.Date;\r
-import java.util.HashMap;\r
import java.util.HashSet;\r
import java.util.List;\r
-import java.util.Map;\r
import java.util.Set;\r
\r
import org.apache.log4j.Logger;\r
\r
-import compbio.engine.client.Executable;\r
import compbio.engine.client.PathValidator;\r
import compbio.engine.client.SkeletalExecutable;\r
-import compbio.metadata.JobStatus;\r
import compbio.util.FileUtil;\r
-import compbio.ws.client.Services;\r
-import compbio.ws.client.ServicesUtil;\r
\r
/**\r
- * Number of runs of each WS = number of folders with name\r
- * \r
- * Number of successful runs = all runs with no result file\r
- * \r
- * Per period of time = limit per file creating time Runtime (avg/max) =\r
- * \r
- * started time - finished time\r
- * \r
- * Task & result size = result.size\r
+ * Class assumptions:\r
+ * 1. Number of runs of each WS = number of folders with name\r
+ * 2. Number of successful runs = all runs with no result file\r
+ * 3. Per period of time = limit per file creating time \r
+ * 4. Runtime (avg/max) = finish time - start time\r
+ * 5. Task & result size = result.size\r
* \r
* Abandoned runs - not collected runs\r
* \r
* work directory for local and cluster tasks = from Helper or cmd parameter. WS\r
* names - enumeration. Status file names and content.\r
* \r
- * @author pvtroshin\r
+ * @author Peter Troshin\r
+ * @author Alexander Sherstnev\r
* \r
*/\r
public class ExecutionStatCollector implements Runnable {\r
-\r
static final int UNDEFINED = -1;\r
-\r
- private static final Logger log = Logger\r
- .getLogger(ExecutionStatCollector.class);\r
-\r
static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss");\r
+ private static final Logger log = Logger.getLogger(ExecutionStatCollector.class);\r
\r
- final private File workDirectory;\r
+ final private File workingDirectory;\r
final private List<JobStat> stats;\r
/**\r
* Consider the job that has been working for longer than timeOutInHours\r
/**\r
* List subdirectories in the job directory\r
* \r
- * @param workDirectory\r
+ * @param workingDirectory\r
* @param timeOutInHours\r
*/\r
- public ExecutionStatCollector(String workDirectory, int timeOutInHours) {\r
- log.info("Starting stat collector for directory: " + workDirectory);\r
+ public ExecutionStatCollector(String workingDirectory, int timeOutInHours) {\r
+ log.info("Starting stat collector for directory: " + workingDirectory);\r
log.info("Maximum allowed runtime(h): " + timeOutInHours);\r
- if (!PathValidator.isValidDirectory(workDirectory)) {\r
- throw new IllegalArgumentException("workDirectory '"\r
- + workDirectory + "' does not exist!");\r
+ if (!PathValidator.isValidDirectory(workingDirectory)) {\r
+ throw new IllegalArgumentException("workingDirectory '" + workingDirectory + "' does not exist!");\r
}\r
- this.workDirectory = new File(workDirectory);\r
+ this.workingDirectory = new File(workingDirectory);\r
stats = new ArrayList<JobStat>();\r
if (timeOutInHours <= 0) {\r
throw new IllegalArgumentException(\r
- "Timeout value must be greater than 0! Given value: "\r
- + timeOutInHours);\r
+ "Timeout value must be greater than 0! Given value: " + timeOutInHours);\r
}\r
this.timeOutInHours = timeOutInHours;\r
}\r
statdb.insertData(rjobs);\r
}\r
\r
- /*\r
- * static void updateTime(File statFile) throws IOException { long lastMod =\r
- * statFile.lastModified(); FileWriter fw = new FileWriter(statFile);\r
- * fw.write(new Long(lastMod).toString()); fw.close(); }\r
- */\r
-\r
/**\r
* Not in use\r
*/\r
static FileFilter directories = new FileFilter() {\r
@Override\r
public boolean accept(File pathname) {\r
- return pathname.isDirectory()\r
- && !pathname.getName().startsWith(".");\r
+ return pathname.isDirectory() && !pathname.getName().startsWith(".");\r
}\r
};\r
\r
- static class JobDirectory {\r
-\r
- File jobdir;\r
- Map<String, File> files = new HashMap<String, File>();\r
-\r
- JobDirectory(File directory) {\r
- this.jobdir = directory;\r
- for (File f : jobdir.listFiles()) {\r
- files.put(f.getName(), f);\r
- }\r
- }\r
-\r
- boolean hasStatus(JobStatus status) {\r
- return files.containsKey(status.toString());\r
- }\r
-\r
- boolean isCollected() {\r
- return hasStatus(JobStatus.COLLECTED);\r
- }\r
-\r
- boolean isCancelled() {\r
- return hasStatus(JobStatus.CANCELLED);\r
- }\r
-\r
- long getStartTime() {\r
- long starttime = UNDEFINED;\r
- File startfile = files.get(JobStatus.STARTED.toString());\r
- if (startfile == null) {\r
- startfile = files.get(JobStatus.SUBMITTED.toString());\r
- }\r
- try {\r
- if (startfile != null) {\r
- String start = FileUtil.readFileToString(startfile);\r
- starttime = Long.parseLong(start.trim());\r
- }\r
- } catch (IOException ignore) {\r
- log.warn(\r
- "IOException while reading STARTED status file! Ignoring...",\r
- ignore);\r
- // fall back\r
- starttime = startfile.lastModified();\r
- } catch (NumberFormatException ignore) {\r
- log.warn(\r
- "NumberFormatException while reading STARTED status file! Ignoring...",\r
- ignore);\r
- // fall back\r
- starttime = startfile.lastModified();\r
- }\r
-\r
- return starttime;\r
- }\r
-\r
- String getClusterJobID() {\r
- String clustjobId = "";\r
- File jobid = files.get("JOBID");\r
- try {\r
- if (jobid != null) {\r
- clustjobId = FileUtil.readFileToString(jobid);\r
- }\r
- } catch (IOException ioe) {\r
- log.error(\r
- "IO Exception while reading the content of JOBID file for job "\r
- + jobid, ioe);\r
- }\r
- return clustjobId.trim();\r
- }\r
-\r
- long getFinishedTime() {\r
- long ftime = UNDEFINED;\r
- File finished = files.get(JobStatus.FINISHED.toString());\r
- if (finished != null) {\r
- try {\r
- if (finished != null) {\r
- String start = FileUtil.readFileToString(finished);\r
- ftime = Long.parseLong(start.trim());\r
- }\r
- } catch (IOException ignore) {\r
- log.warn(\r
- "IOException while reading FINISHED status file! Ignoring...",\r
- ignore);\r
- // fall back\r
- ftime = finished.lastModified();\r
- } catch (NumberFormatException ignore) {\r
- log.warn(\r
- "NumberFormatException while reading FINISHED status file! Ignoring...",\r
- ignore);\r
- // fall back\r
- ftime = finished.lastModified();\r
- }\r
- }\r
- return ftime;\r
- }\r
-\r
- private Services getService() {\r
- return ServicesUtil.getServiceByJobDirectory(jobdir);\r
- }\r
-\r
- long getResultSize() {\r
- Class<? extends Executable<?>> name = ServicesUtil\r
- .getRunnerByJobDirectory(jobdir);\r
-\r
- File f = null;\r
- if (name.getSimpleName().equalsIgnoreCase("IUPred")) {\r
- f = files.get("out.glob");\r
- if (f == null)\r
- f = files.get("out.short");\r
- if (f == null)\r
- f = files.get("out.long");\r
- } else {\r
- f = files.get(SkeletalExecutable.OUTPUT);\r
- }\r
- if (f != null) {\r
- return f.length();\r
- }\r
- return UNDEFINED;\r
- }\r
-\r
- long getInputSize() {\r
- Class<? extends Executable<?>> name = ServicesUtil\r
- .getRunnerByJobDirectory(jobdir);\r
-\r
- File input = files.get(SkeletalExecutable.INPUT);\r
- if (input != null) {\r
- return input.length();\r
- }\r
- return UNDEFINED;\r
- }\r
-\r
- JobStat getJobStat() {\r
- return JobStat.newInstance(getService(), getClusterJobID(),\r
- jobdir.getName(), getStartTime(), getFinishedTime(),\r
- getInputSize(), getResultSize(), isCancelled(),\r
- isCollected());\r
- }\r
-\r
- @Override\r
- public int hashCode() {\r
- final int prime = 31;\r
- int result = 1;\r
- result = prime * result\r
- + ((jobdir == null) ? 0 : jobdir.hashCode());\r
- return result;\r
- }\r
-\r
- @Override\r
- public boolean equals(Object obj) {\r
- if (this == obj)\r
- return true;\r
- if (obj == null)\r
- return false;\r
- if (getClass() != obj.getClass())\r
- return false;\r
- JobDirectory other = (JobDirectory) obj;\r
- if (jobdir == null) {\r
- if (other.jobdir != null)\r
- return false;\r
- } else if (!jobdir.equals(other.jobdir))\r
- return false;\r
- return true;\r
- }\r
- }\r
-\r
+ // TODO test!\r
void collectStatistics() {\r
- File[] files = workDirectory.listFiles(directories);\r
- for (File file : files) {\r
- JobDirectory jd = new JobDirectory(file);\r
- JobStat jstat = jd.getJobStat();\r
- // Do not record stats on the job that has not completed yet\r
- if (hasCompleted(jd)) {\r
- stats.add(jstat);\r
+ // clear stats array;\r
+ stats.clear();\r
+ File[] dirs = workingDirectory.listFiles(directories);\r
+ for (File dir : dirs) {\r
+ // skip work directory with test inputas \r
+ log.debug("check directory: " + dir.getName() + "...");\r
+ if (InputFilter.accept(new File(dir.getPath() + File.separator + SkeletalExecutable.INPUT))) {\r
+ JobDirectory jd = new JobDirectory(dir);\r
+ JobStat jstat = jd.getJobStat();\r
+ // Do not record stats on the job that has not completed yet\r
+ if (hasCompleted(jd)) {\r
+ stats.add(jstat);\r
+ } else {\r
+ log.debug("Skipping the job: " + jstat + " as it has not completed yet");\r
+ }\r
} else {\r
- log.debug("Skipping the job: " + jstat);\r
- log.debug("As it has not completed yet");\r
+ log.trace("training input: " + dir.getName() + File.separator + SkeletalExecutable.INPUT);\r
}\r
- // System.out.println(jd.getJobStat().getJobReportTabulated());\r
}\r
}\r
\r
@Override\r
public void run() {\r
log.info("Started updating statistics at " + new Date());\r
- log.info("For directory: " + workDirectory.getAbsolutePath());\r
+ log.info("For directory: " + workingDirectory.getAbsolutePath());\r
\r
collectStatistics();\r
\r