1 package compbio.stat.collector;
\r
4 import java.io.FileFilter;
\r
5 import java.io.FileWriter;
\r
6 import java.io.IOException;
\r
7 import java.sql.SQLException;
\r
8 import java.text.SimpleDateFormat;
\r
9 import java.util.ArrayList;
\r
10 import java.util.HashMap;
\r
11 import java.util.HashSet;
\r
12 import java.util.List;
\r
13 import java.util.Map;
\r
14 import java.util.Set;
\r
16 import org.apache.log4j.Logger;
\r
18 import compbio.engine.client.ConfExecutable;
\r
19 import compbio.engine.client.Executable;
\r
20 import compbio.engine.conf.PropertyHelperManager;
\r
21 import compbio.metadata.JobStatus;
\r
22 import compbio.runner.msa.ClustalW;
\r
23 import compbio.util.FileUtil;
\r
24 import compbio.util.PropertyHelper;
\r
25 import compbio.ws.client.Services;
\r
28 * Number of runs of each WS = number of folders with name
\r
30 * Number of successful runs = all runs with no result file
\r
32 * Per period of time = limit per file creating time Runtime (avg/max) =
\r
34 * started time - finished time
\r
36 * Task & result size = result.size
\r
38 * Abandoned runs - not collected runs
\r
40 * Cancelled runs - cancelled
\r
42 * Cluster vs local runs
\r
44 * Reasons for failure = look in the err out?
\r
47 * Metadata required:
\r
49 * work directory for local and cluster tasks = from Helper or cmd parameter. WS
\r
50 * names - enumeration. Status file names and content.
\r
55 public class ExecutionStatCollector {
\r
57 static final int UNDEFINED = -1;
\r
59 private static final Logger log = Logger
\r
60 .getLogger(ExecutionStatCollector.class);
\r
62 static SimpleDateFormat DF = new SimpleDateFormat("dd/MM/yyyy hh:mm:ss");
\r
64 static PropertyHelper ph = PropertyHelperManager.getPropertyHelper();
\r
66 final private List<JobStat> stats;
\r
68 public ExecutionStatCollector(String workDirectory) {
\r
69 File[] files = FileUtil.getFiles(workDirectory, directories);
\r
70 stats = new ArrayList<JobStat>();
\r
71 for (File file : files) {
\r
72 JobDirectory jd = new JobDirectory(file);
\r
73 stats.add(jd.getJobStat());
\r
74 // System.out.println(jd.getJobStat().getJobReportTabulated());
\r
78 public StatProcessor getStats() {
\r
79 return new StatProcessor(stats);
\r
82 public void writeStatToDB() throws SQLException {
\r
83 Set<JobStat> rjobs = new HashSet<JobStat>(stats);
\r
84 StatDB statdb = new StatDB();
\r
85 statdb.removeRecordedJobs(rjobs);
\r
86 statdb.insertData(rjobs);
\r
87 statdb.conn.close();
\r
90 static String getClusterJobDir() {
\r
91 String clusterdir = ph.getProperty("cluster.tmp.directory");
\r
92 if (clusterdir != null) {
\r
98 static void updateTime(File statFile) throws IOException {
\r
99 long lastMod = statFile.lastModified();
\r
100 FileWriter fw = new FileWriter(statFile);
\r
101 fw.write(new Long(lastMod).toString());
\r
105 static String getLocalJobDir() {
\r
106 String locdir = ph.getProperty("local.tmp.directory");
\r
107 if (locdir != null) {
\r
116 * @throws IOException
\r
117 * @throws SQLException
\r
119 public static void main(String[] args) throws IOException, SQLException {
\r
121 // updateTime(new File(
\r
122 // "D:\\workspace\\JABA2\\jobsout\\AACon#170462904473672\\STARTED"));
\r
124 String workDir = PropertyHelperManager.getLocalPath()
\r
125 + getLocalJobDir().trim();
\r
126 System.out.println(workDir);
\r
127 File[] files = FileUtil.getFiles("Y:\\fc\\www-jws2\\jaba\\jobsout",
\r
129 List<JobStat> stats = new ArrayList<JobStat>();
\r
130 for (File file : files) {
\r
131 JobDirectory jd = new JobDirectory(file);
\r
132 stats.add(jd.getJobStat());
\r
133 // System.out.println(jd.getJobStat().getJobReportTabulated());
\r
135 StatProcessor sp = new StatProcessor(stats);
\r
136 System.out.println(sp.reportStat());
\r
137 System.out.println();
\r
138 System.out.println("!!!!!!!!!!!!!!!!!!");
\r
139 System.out.println();
\r
141 Set<JobStat> rjobs = new HashSet<JobStat>(sp.stats);
\r
142 StatDB statdb = new StatDB();
\r
143 statdb.removeRecordedJobs(rjobs);
\r
144 statdb.insertData(rjobs);
\r
147 static FileFilter directories = new FileFilter() {
\r
149 public boolean accept(File pathname) {
\r
150 return pathname.isDirectory();
\r
154 static class JobDirectory {
\r
157 Map<String, File> files = new HashMap<String, File>();
\r
159 public JobDirectory(File directory) {
\r
160 this.jobdir = directory;
\r
161 for (File f : jobdir.listFiles()) {
\r
162 files.put(f.getName(), f);
\r
166 public boolean hasStatus(JobStatus status) {
\r
167 return files.containsKey(status.toString());
\r
170 boolean isCollected() {
\r
171 return hasStatus(JobStatus.COLLECTED);
\r
174 boolean isCancelled() {
\r
175 return hasStatus(JobStatus.CANCELLED);
\r
178 long getStartTime() {
\r
179 long starttime = UNDEFINED;
\r
180 File startfile = files.get(JobStatus.STARTED.toString());
\r
181 if (startfile == null) {
\r
182 startfile = files.get(JobStatus.SUBMITTED.toString());
\r
184 if (startfile != null) {
\r
185 starttime = startfile.lastModified();
\r
187 * String start = FileUtil.readFileToString(startfile);
\r
188 * starttime = Long.parseLong(start.trim());
\r
194 String getClusterJobID() {
\r
195 String clustjobId = "";
\r
196 File jobid = files.get("JOBID");
\r
198 if (jobid != null) {
\r
199 clustjobId = FileUtil.readFileToString(jobid);
\r
201 } catch (IOException ioe) {
\r
202 ioe.printStackTrace();
\r
205 return clustjobId.trim();
\r
208 long getFinishedTime() {
\r
209 long ftime = UNDEFINED;
\r
210 File finished = files.get(JobStatus.FINISHED.toString());
\r
211 if (finished != null) {
\r
212 ftime = finished.lastModified();
\r
214 * String start = FileUtil.readFileToString(finished); ftime =
\r
215 * Long.parseLong(start.trim());
\r
217 // System.out.println("f " + ftime);
\r
220 * } catch (IOException e) { log.log(Level.WARN,
\r
221 * "Cannot parse finished time: " + e.getMessage(), e); } catch
\r
222 * (NumberFormatException e) { log.log(Level.WARN,
\r
223 * "Cannot parse finished time: " + e.getMessage(), e); }
\r
228 @SuppressWarnings("unchecked")
\r
229 Class<Executable<?>> getWSRunnerName() {
\r
230 String name = jobdir.getName().split("#")[0];
\r
232 if (name.startsWith(ConfExecutable.CLUSTER_TASK_ID_PREFIX)) {
\r
233 assert ConfExecutable.CLUSTER_TASK_ID_PREFIX.length() == 1;
\r
234 name = name.substring(1);
\r
236 name = ClustalW.class.getPackage().getName() + "." + name;
\r
237 return (Class<Executable<?>>) Class.forName(name);
\r
238 } catch (ClassNotFoundException e) {
\r
239 e.printStackTrace();
\r
240 throw new RuntimeException(
\r
241 "Cannot match the directory name to the executable! Executable name is "
\r
246 private Services getService() {
\r
247 return Services.getService(getWSRunnerName());
\r
250 // Mafft, Muscle, Tcoffee, Clustal task:fasta.in result:fasta.out
\r
251 // Probcons task:fasta.in result:alignment.out
\r
253 * TODO replace with Universal names for WS!
\r
255 long getResultSize() {
\r
256 Class<Executable<?>> name = getWSRunnerName();
\r
258 if (name.getSimpleName().equalsIgnoreCase("Probcons")) {
\r
259 f = files.get("alignment.out");
\r
260 } else if (name.getSimpleName().equalsIgnoreCase("ClustalW")) {
\r
261 f = files.get("output.txt");
\r
263 f = files.get("fasta.out");
\r
271 long getInputSize() {
\r
272 File input = files.get("fasta.in");
\r
273 if (input != null) {
\r
274 return input.length();
\r
279 JobStat getJobStat() {
\r
280 return JobStat.newInstance(getService(), getClusterJobID(),
\r
281 jobdir.getName(), getStartTime(), getFinishedTime(),
\r
282 getInputSize(), getResultSize(), isCancelled(),
\r
287 public int hashCode() {
\r
288 final int prime = 31;
\r
290 result = prime * result
\r
291 + ((jobdir == null) ? 0 : jobdir.hashCode());
\r
296 public boolean equals(Object obj) {
\r
301 if (getClass() != obj.getClass())
\r
303 JobDirectory other = (JobDirectory) obj;
\r
304 if (jobdir == null) {
\r
305 if (other.jobdir != null)
\r
307 } else if (!jobdir.equals(other.jobdir))
\r