JWS-96 - first working version of the crawler
[jabaws.git] / webservices / compbio / stat / collector / StatDB.java
1 /* Copyright (c) 2011 Peter Troshin\r
2  *  \r
3  *  JAva Bioinformatics Analysis Web Services (JABAWS) @version: 2.0     \r
4  * \r
5  *  This library is free software; you can redistribute it and/or modify it under the terms of the\r
6  *  Apache License version 2 as published by the Apache Software Foundation\r
7  * \r
8  *  This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without\r
9  *  even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache \r
10  *  License for more details.\r
11  * \r
12  *  A copy of the license is in apache_license.txt. It is also available here:\r
13  * @see: http://www.apache.org/licenses/LICENSE-2.0.txt\r
14  * \r
15  * Any republication or derived work distributed in source code form\r
16  * must include this copyright and license notice.\r
17  */\r
18 package compbio.stat.collector;\r
19 \r
20 import java.sql.Connection;\r
21 import java.sql.DriverManager;\r
22 import java.sql.PreparedStatement;\r
23 import java.sql.ResultSet;\r
24 import java.sql.SQLException;\r
25 import java.sql.Statement;\r
26 import java.sql.Timestamp;\r
27 import java.util.ArrayList;\r
28 import java.util.Date;\r
29 import java.util.List;\r
30 import java.util.Set;\r
31 \r
32 import org.apache.log4j.Logger;\r
33 \r
34 import compbio.engine.conf.PropertyHelperManager;\r
35 import compbio.util.Util;\r
36 import compbio.ws.client.Services;\r
37 \r
38 /**\r
39  * The database must be stored in the application root directory and called\r
40  * "ExecutionStatistic"\r
41  * \r
42  * @author pvtroshin\r
43  * \r
44  */\r
45 public class StatDB {\r
46 \r
47         private static final String driver = "org.apache.derby.jdbc.EmbeddedDriver";\r
48         private static final String protocol = "jdbc:derby:";\r
49         private static final String statDBName = "ExecutionStatistic";\r
50 \r
51         private static final Logger log = Logger.getLogger(StatDB.class);\r
52 \r
53         static Connection conn;\r
54 \r
55         private synchronized static Connection getDBConnection() throws SQLException {\r
56                 return getDBConnection(statDBName);\r
57         }\r
58 \r
59         private synchronized static Connection getDBConnection(String dbname) throws SQLException {\r
60 \r
61                 if (conn != null && !conn.isClosed()) {\r
62                         return conn;\r
63                 } else {\r
64                         try {\r
65                                 String dbpath = PropertyHelperManager.getLocalPath();\r
66                                 log.info("Looking for JABAWS access statistics database at: " + dbpath);\r
67                                 System.setProperty("derby.system.home", dbpath);\r
68                                 // Apparently under Tomcat webapp you cannot rely on Java\r
69                                 // auto discovery and have to register the driver explicitly\r
70                                 Class.forName(driver);\r
71                                 conn = DriverManager.getConnection(protocol + dbname + ";create=false");\r
72 \r
73                                 conn.setAutoCommit(true);\r
74                         } catch (ClassNotFoundException e) {\r
75                                 log.error(e.getMessage(), e);\r
76                         }\r
77                 }\r
78                 return conn;\r
79         }\r
80 \r
81         \r
82         public StatDB(String dbname) throws SQLException {\r
83                 this.conn = getDBConnection(dbname);\r
84         }\r
85 \r
86         public StatDB() throws SQLException {\r
87                 this.conn = getDBConnection();\r
88         }\r
89 \r
90         /**\r
91          * Connect to test database\r
92          * \r
93          * @param ignored\r
94          * @throws SQLException\r
95          */\r
96         StatDB(boolean ignored) throws SQLException {\r
97                 this.conn = getTestDBConnection();\r
98         }\r
99 \r
100         private static Connection getTestDBConnection() throws SQLException {\r
101                 System.setProperty("derby.system.home", "testsrc/testdata");\r
102                 Connection conn = DriverManager.getConnection(protocol + statDBName\r
103                                 + ";create=false");\r
104                 conn.setAutoCommit(true);\r
105                 log.debug("Connecting to the TEST database!");\r
106                 return conn;\r
107         }\r
108 \r
109         // ServiceName,jobname,start,finish,inputSize,resultSize,isCancelled,isCollected\r
110         /**\r
111          * \r
112          * rs.getBoolean(i) will return true for any non-zero value and false for 0\r
113          * on SMALLINT data column.\r
114          * \r
115          * @throws SQLException\r
116          */\r
117         private void createStatTable() throws SQLException {\r
118                 // Creating a statement object that we can use for running various SQL\r
119                 // statements commands against the database.\r
120                 Statement s = conn.createStatement();\r
121                 String create = "create table exec_stat("\r
122                                 + "number INT GENERATED ALWAYS AS IDENTITY,"\r
123                                 + "service_name VARCHAR(15) NOT NULL, "\r
124                                 + "cluster_job_id VARCHAR(30), "\r
125                                 + "job_id VARCHAR(35) NOT NULL PRIMARY KEY, "\r
126                                 + "start TIMESTAMP," + "finish TIMESTAMP,"\r
127                                 + "inputsize BIGINT," + "resultsize BIGINT,"\r
128                                 + "isCancelled SMALLINT NOT NULL,"\r
129                                 + "isCollected SMALLINT NOT NULL, "\r
130                                 + "isClusterJob SMALLINT NOT NULL)";\r
131                 // We create a table...\r
132                 log.debug(create);\r
133                 s.execute(create);\r
134                 s.close();\r
135                 conn.close();\r
136         }\r
137 \r
138         static void clearStatTable() throws SQLException {\r
139                 Connection conn = getDBConnection();\r
140                 String query = "delete from exec_stat";\r
141                 Statement st = conn.createStatement();\r
142                 st.executeUpdate(query);\r
143                 st.close();\r
144                 conn.commit();\r
145                 conn.close();\r
146         }\r
147 \r
148         void insertData(Set<JobStat> jobstatus) throws SQLException {\r
149                 log.info("Inserting " + jobstatus.size()\r
150                                 + " new records into the statistics database");\r
151 \r
152                 conn.setAutoCommit(false);\r
153                 String insert = "insert into exec_stat (service_name, cluster_job_id, job_id, start, finish, "\r
154                                 + "inputsize, resultsize, isCancelled, isCollected, isClusterJob) "\r
155                                 + "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)";\r
156                 PreparedStatement pstm = conn.prepareStatement(insert);\r
157                 int i = 0;\r
158                 for (JobStat js : jobstatus) {\r
159                         // Has to present\r
160                         pstm.setString(1, js.webService.toString());\r
161 \r
162                         if (!Util.isEmpty(js.clusterJobId)) {\r
163                                 pstm.setString(2, js.clusterJobId);\r
164                         } else {\r
165                                 pstm.setString(2, null);\r
166                         }\r
167                         // Has to present\r
168                         pstm.setString(3, js.jobname);\r
169 \r
170                         if (js.start != ExecutionStatCollector.UNDEFINED) {\r
171                                 pstm.setTimestamp(4, new Timestamp(js.start));\r
172                         } else {\r
173                                 pstm.setTimestamp(4, null);\r
174                         }\r
175                         if (js.finish != ExecutionStatCollector.UNDEFINED) {\r
176                                 pstm.setTimestamp(5, new Timestamp(js.finish));\r
177                         } else {\r
178                                 pstm.setTimestamp(5, null);\r
179                         }\r
180                         // -1 if UNDEFINED\r
181                         pstm.setLong(6, js.inputSize);\r
182                         // -1 if UNDEFINED\r
183                         pstm.setLong(7, js.resultSize);\r
184 \r
185                         pstm.setBoolean(8, js.isCancelled);\r
186                         pstm.setBoolean(9, js.isCollected);\r
187                         pstm.setBoolean(10, js.isClusterJob());\r
188                         pstm.executeUpdate();\r
189                         ++i;\r
190                 }\r
191                 conn.commit();\r
192                 conn.setAutoCommit(true);\r
193                 pstm.close();\r
194                 log.debug(i + " jobs have been recorded...");\r
195         }\r
196 \r
197         public Date getEarliestRecord() throws SQLException {\r
198                 String query = "select min(start) from exec_stat";\r
199                 Statement st = conn.createStatement();\r
200                 ResultSet res = st.executeQuery(query);\r
201                 boolean exist = res.next();\r
202                 Date date = new Date();\r
203                 if (exist) {\r
204                         date = res.getDate(1);\r
205                 }\r
206                 res.close();\r
207                 st.close();\r
208                 return date;\r
209         }\r
210 \r
211         public int getTotalJobsCount(Timestamp from, Timestamp to)\r
212                         throws SQLException {\r
213                 String allQuery = "select count(*) from exec_stat where start BETWEEN ? and ? ";\r
214                 return getIntResult(from, to, allQuery);\r
215         }\r
216 \r
217         public int getCancelledCount(Timestamp from, Timestamp to)\r
218                         throws SQLException {\r
219                 // js.isCancelled\r
220                 String cancelledQuery = "select count(*) from exec_stat where start BETWEEN ? and ?  and  isCancelled=1 ";\r
221                 return getIntResult(from, to, cancelledQuery);\r
222         }\r
223 \r
224         public int getAbandonedCount(Timestamp from, Timestamp to)\r
225                         throws SQLException {\r
226                 // !js.isCollected && !js.isCancelled && js.hasResult()\r
227                 String abandonedQuery = "select count(*) from exec_stat where start BETWEEN ? and ? and isCollected=0 and isCancelled=0 and resultsize>0 ";\r
228                 return getIntResult(from, to, abandonedQuery);\r
229         }\r
230 \r
231         public int getIncompleteCount(Timestamp from, Timestamp to)\r
232                         throws SQLException {\r
233                 // !js.hasResult()\r
234                 String incompleteQuery = "select count(*) from exec_stat where start BETWEEN ? and ? and resultsize<=0 and isCancelled=0";\r
235                 return getIntResult(from, to, incompleteQuery);\r
236         }\r
237 \r
238         private int getIntResult(Timestamp from, Timestamp to, String query)\r
239                         throws SQLException {\r
240 \r
241                 log.debug("getIntRes: QUERY: " + query);\r
242                 log.debug("getIntRes: FROM: " + from);\r
243                 log.debug("getIntRes: TO: " + to);\r
244 \r
245                 PreparedStatement pstm = conn.prepareStatement(query);\r
246                 pstm.setTimestamp(1, from);\r
247                 pstm.setTimestamp(2, to);\r
248                 pstm.execute();\r
249                 ResultSet res = pstm.getResultSet();\r
250                 boolean exist = res.next();\r
251                 int count = 0;\r
252                 if (exist) {\r
253                         count = res.getInt(1);\r
254                 }\r
255                 log.debug("getIntRes: RES: " + count);\r
256                 res.close();\r
257                 pstm.close();\r
258                 return count;\r
259         }\r
260 \r
261         public List<JobStat> readData(Timestamp from, Timestamp to,\r
262                         Services wservice, Boolean clusterOnly) throws SQLException {\r
263 \r
264                 String query = "select service_name, cluster_job_id, job_id, start, finish, inputsize, "\r
265                                 + "resultsize, isCancelled, isCollected from exec_stat where start BETWEEN ? and ? ";\r
266 \r
267                 if (wservice != null) {\r
268                         query += " and service_name=? ";\r
269                 }\r
270 \r
271                 if (clusterOnly != null) {\r
272                         if (clusterOnly) {\r
273                                 query += " and isClusterJob!=0 ";\r
274                         } else {\r
275                                 query += " and isClusterJob=0 ";\r
276                         }\r
277                 }\r
278 \r
279                 log.debug("QUERY: " + query);\r
280                 log.debug("FROM: " + from);\r
281                 log.debug("TO: " + to);\r
282                 log.debug("WS: " + wservice);\r
283 \r
284                 PreparedStatement pstm = conn.prepareStatement(query);\r
285                 pstm.setTimestamp(1, from);\r
286                 pstm.setTimestamp(2, to);\r
287                 if (wservice != null) {\r
288                         pstm.setString(3, wservice.toString());\r
289                 }\r
290                 pstm.execute();\r
291                 List<JobStat> stats = new ArrayList<JobStat>();\r
292                 ResultSet rs = pstm.getResultSet();\r
293                 int rcount = 0;\r
294 \r
295                 while (rs.next()) {\r
296                         rcount++;\r
297                         stats.add(JobStat.newInstance(Services.getService(rs.getString(1)),\r
298                                         rs.getString(2), rs.getString(3), rs.getTimestamp(4),\r
299                                         rs.getTimestamp(5), rs.getLong(6), rs.getLong(7),\r
300                                         rs.getBoolean(8), rs.getBoolean(9)));\r
301                 }\r
302 \r
303                 log.debug("QUERY result len: " + rcount);\r
304                 rs.close();\r
305                 pstm.close();\r
306 \r
307                 return stats;\r
308         }\r
309 \r
310         /**\r
311          * Removes the job if\r
312          * \r
313          * 1) It has already been recorded\r
314          * \r
315          * 2) It has not completed and did not timeout - this is to prevent\r
316          * recording the information on the incomplete jobs.\r
317          * \r
318          * @param fsJobs\r
319          * @throws SQLException\r
320          */\r
321         public void removeRecordedJobs(Set<JobStat> fsJobs) throws SQLException {\r
322 \r
323                 String query = "select job_id from exec_stat";\r
324 \r
325                 if (null == conn) {\r
326                         System.out.println ("Something wrong with the DB...");\r
327                         return;\r
328                 }\r
329                 Statement st = conn.createStatement();\r
330                 ResultSet result = st.executeQuery(query);\r
331 \r
332                 while (result.next()) {\r
333                         String recordedJob = result.getString(1);\r
334                         JobStat recStat = JobStat.newIncompleteStat(recordedJob);\r
335                         if (fsJobs.contains(recStat)) {\r
336                                 fsJobs.remove(recStat);\r
337                         }\r
338                 }\r
339                 result.close();\r
340                 st.close();\r
341         }\r
342 \r
343         public static synchronized final void shutdownDBServer() {\r
344                 // ## DATABASE SHUTDOWN SECTION ##\r
345                 /***\r
346                  * In embedded mode, an application should shut down Derby. Shutdown\r
347                  * throws the XJ015 exception to confirm success.\r
348                  ***/\r
349                 try {\r
350                         if (conn != null) {\r
351                                 conn.close();\r
352                         }\r
353                 } catch (SQLException e) {\r
354                         log.warn("Database commit failed with " + e.getLocalizedMessage());\r
355                 }\r
356                 boolean gotSQLExc = false;\r
357                 try {\r
358                         DriverManager.getConnection("jdbc:derby:;shutdown=true");\r
359                 } catch (SQLException se) {\r
360                         if (se.getSQLState().equals("XJ015")) {\r
361                                 gotSQLExc = true;\r
362                         }\r
363                 }\r
364                 if (!gotSQLExc) {\r
365                         log.warn("Database did not shut down normally");\r
366                 } else {\r
367                         log.info("Database shut down normally");\r
368                 }\r
369         }\r
370         public static void main(String[] args) {\r
371                 // This is called from Ant cleanStatTable task\r
372                 try {\r
373                         clearStatTable();\r
374                         shutdownDBServer();\r
375                 } catch (SQLException e) {\r
376                         System.err.println("Fails to clean up JABAWS stat database!");\r
377                         e.printStackTrace();\r
378                 }\r
379                 // new StatDB().createStatTable();\r
380                 // insertData(null);\r
381                 /*\r
382                  * StatDB statdb = new StatDB(); Date from = new Date();\r
383                  * from.setMonth(1); System.out.println(new\r
384                  * StatProcessor(statdb.readData( new Timestamp(from.getTime()), new\r
385                  * Timestamp(new Date().getTime()), null, null)).reportStat());\r
386                  */\r
387         }\r
388 }\r