From 98502eb1394f85b0e73577790381c210a3800a60 Mon Sep 17 00:00:00 2001 From: Sasha Sherstnev <a.sherstnev@dundee.ac.uk> Date: Tue, 5 Nov 2013 14:18:16 +0000 Subject: [PATCH] Restore one servlet --- .../cassandra/CassandraNativeConnector.java | 79 +++++++--- datadb/compbio/cassandra/Pair.java | 24 +++ server/compbio/listeners/ContextListener.java | 2 +- server/compbio/statistic/CassandraRequester.java | 163 +++++++------------- 4 files changed, 138 insertions(+), 130 deletions(-) create mode 100644 datadb/compbio/cassandra/Pair.java diff --git a/datadb/compbio/cassandra/CassandraNativeConnector.java b/datadb/compbio/cassandra/CassandraNativeConnector.java index 1fb01fc..be90b05 100644 --- a/datadb/compbio/cassandra/CassandraNativeConnector.java +++ b/datadb/compbio/cassandra/CassandraNativeConnector.java @@ -1,25 +1,21 @@ package compbio.cassandra; import java.io.IOException; +import java.util.Calendar; +import java.util.HashMap; import java.util.List; +import java.util.ArrayList; import com.datastax.driver.core.Cluster; import com.datastax.driver.core.Host; import com.datastax.driver.core.Metadata; +import com.datastax.driver.core.Row; import com.datastax.driver.core.Session; import com.datastax.driver.core.ResultSet; public class CassandraNativeConnector { private static Cluster cluster; private static Session session; - - /* - * private static Keyspace ksp; private static Mutator<Long> mutatorLong; - * private static Mutator<String> mutatorString; private static - * Mutator<String> mutatorLog; StringSerializer ss = StringSerializer.get(); - * LongSerializer ls = LongSerializer.get(); - */ - /* * connect to the cluster and look weather the dababase has any data inside */ @@ -41,8 +37,8 @@ public class CassandraNativeConnector { + "(JobID ascii, DataBegin ascii, DataEnd ascii, ip ascii, FinalStatus ascii, ExecutionStatus ascii, Protein ascii, PRIMARY KEY(JobID));"); session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinData (jobtime bigint, JobID ascii, Protein ascii, PRIMARY KEY(JobID));"); - session.execute("CREATE INDEX ProteinSeq ON ProteinKeyspace.ProteinRow (protein);"); - session.execute("CREATE INDEX JobDateStamp ON ProteinKeyspace.ProteinData (jobtime);"); + session.execute("CREATE INDEX IF NOT EXISTS ProteinSeq ON ProteinKeyspace.ProteinRow (protein);"); + session.execute("CREATE INDEX IF NOT EXISTS JobDateStamp ON ProteinKeyspace.ProteinData (jobtime);"); System.out.println("Cassandra connected"); } @@ -51,7 +47,7 @@ public class CassandraNativeConnector { * parsing data source and filling the database */ public void Parsing() throws IOException { - if (false) { + if (true) { // if (source.equals("http")) { // get data from real Jpred production server System.out.println("Parsing web data source......"); @@ -60,7 +56,7 @@ public class CassandraNativeConnector { JpredParserHTTP parser = new JpredParserHTTP(prefix); parser.Parsing(datasrc, 4); } - if (true) { + if (false) { // if (source.equals("file")) { // get irtifical data generated for the DB stress tests System.out.println("Parsing local file data source......"); @@ -72,29 +68,27 @@ public class CassandraNativeConnector { } public void Closing() { + session.shutdown(); cluster.shutdown(); System.out.println("Cassandra has been shut down"); } /* - * prepare data for insertion into the db + * inserting data into the db */ public void InsertData(long jobtime, String startdate, String enddate, String ip, String jobid, String statusEx, String statusFinal, String protein, List<FastaSequence> predictions) { String check1 = "SELECT * FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';"; - //System.out.println(check1); ResultSet results1 = session.execute(check1); if (results1.isExhausted()) { String com1 = "INSERT INTO ProteinKeyspace.ProteinLog " + "(JobID, IP, DataBegin, DataEnd, FinalStatus, ExecutionStatus, Protein)" + " VALUES ('" + jobid + "','" + ip + "','" + startdate + "','" + enddate + "','" + statusFinal + "','" + statusEx + "','" + protein + "');"; - //System.out.println(com1); session.execute(com1); String com2 = "INSERT INTO ProteinKeyspace.ProteinData " + "(jobtime, JobID, Protein)" + " VALUES (" + jobtime + ",'" + jobid + "','" + protein + "');"; - //System.out.println(com2); session.execute(com2); String allpredictions = ""; @@ -109,15 +103,64 @@ public class CassandraNativeConnector { } String check2 = "SELECT * FROM ProteinKeyspace.ProteinRow WHERE JobID = '" + jobid + "';"; - //System.out.println(check2); ResultSet results2 = session.execute(check2); if (results2.isExhausted()) { String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(Protein, JobID, Predictions)" + " VALUES ('" + protein + "','" + jobid + "',{" + final_prediction + "});"; - //System.out.println(com3); session.execute(com3); } } } + /* + * getting data from the db + */ + public List<Pair<String, String>> ReadProteinDataTable() { + final long startTime = System.currentTimeMillis(); + String com = "SELECT DataBegin,DataEnd FROM ProteinKeyspace.ProteinLog;"; + System.out.println("Command: " + com); + ResultSet results = session.execute(com); + final long queryTime = System.currentTimeMillis(); + List<Row> rows = results.all(); + System.out.println ("Query time is " + (queryTime - startTime) + " msec"); + + List<Pair<String, String>> res = new ArrayList<Pair<String, String>>(); + int c = 0; + for (Row r : rows) { + Pair<String, String> pair = new Pair<String, String>(r.getString("DataBegin"),r.getString("DataEnd")); + res.add(pair); + ++c; + } + final long endTime = System.currentTimeMillis(); + System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); + return res; + } + + /* + * getting earlest date of jobs from the db + */ + public long getEarliestDateInDB() { + final long startTime = System.currentTimeMillis(); + String com = "SELECT jobtime FROM ProteinKeyspace.ProteinData;"; + System.out.println("Command: " + com); + ResultSet results = session.execute(com); + final long queryTime = System.currentTimeMillis(); + System.out.println ("Query time is " + (queryTime - startTime) + " msec"); + + Calendar cal = Calendar.getInstance(); + long res = cal.getTimeInMillis(); + int c = 0; + while (!results.isExhausted()) { + Row r = results.one(); + long d1 = r.getLong("jobtime"); + if (res > d1) { + res = d1; + } + ++c; + } + final long endTime = System.currentTimeMillis(); + System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); + return res; + } + } diff --git a/datadb/compbio/cassandra/Pair.java b/datadb/compbio/cassandra/Pair.java new file mode 100644 index 0000000..d942fea --- /dev/null +++ b/datadb/compbio/cassandra/Pair.java @@ -0,0 +1,24 @@ +package compbio.cassandra; + +public class Pair<K, V> { + + private final K element0; + private final V element1; + + public static <K, V> Pair<K, V> createPair(K element0, V element1) { + return new Pair<K, V>(element0, element1); + } + + public Pair(K element0, V element1) { + this.element0 = element0; + this.element1 = element1; + } + + public K getElement0() { + return element0; + } + + public V getElement1() { + return element1; + } +} \ No newline at end of file diff --git a/server/compbio/listeners/ContextListener.java b/server/compbio/listeners/ContextListener.java index 44cf66b..f8bcc75 100644 --- a/server/compbio/listeners/ContextListener.java +++ b/server/compbio/listeners/ContextListener.java @@ -38,7 +38,7 @@ public class ContextListener implements ServletContextListener { e.printStackTrace(); } } - }, 0, 60, TimeUnit.SECONDS); + }, 0, 600, TimeUnit.SECONDS); } diff --git a/server/compbio/statistic/CassandraRequester.java b/server/compbio/statistic/CassandraRequester.java index 1906c97..9885d64 100755 --- a/server/compbio/statistic/CassandraRequester.java +++ b/server/compbio/statistic/CassandraRequester.java @@ -4,13 +4,12 @@ import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; -import java.util.Collections; import java.util.Date; -import java.util.Iterator; import java.util.List; import compbio.cassandra.CassandraNativeConnector; import compbio.cassandra.DataBase; +import compbio.cassandra.Pair; public class CassandraRequester { private CassandraNativeConnector DBInstance = new CassandraNativeConnector(); @@ -18,11 +17,16 @@ public class CassandraRequester { private static long currentDate = 0; private static long earlestDate = 0; - /* * query: execution time for the period from date1 till date2 - * */ + */ public List<DataBase> extractExecutionTime(String date1, String date2) { + if (null == date1) { + date1 = "1970/1/1"; + } + if (null == date2) { + date1 = "2100/1/1"; + } if (!isThisDateValid(date1) || !isThisDateValid(date2)) { System.out.println("Wrong date: point 3"); return null; @@ -31,15 +35,13 @@ public class CassandraRequester { int nbins = 5; long dateStart = DateParsing(date1); long dateEnd = DateParsing(date2); - if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate) || dateStart > dateEnd) + if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd) return null; if (dateStart < earlestDate) dateStart = earlestDate; if (dateEnd > currentDate) dateStart = currentDate; - System.out.println("CassandraRequester.extractExecutionTime: earlestDate = " + earlestDate + ", currentDate = " + currentDate); - Calendar start = Calendar.getInstance(); start.setTime(new Date(dateStart)); Calendar end = Calendar.getInstance(); @@ -48,24 +50,34 @@ public class CassandraRequester { List<Integer> totalTime = new ArrayList<Integer>(); for (int i = 0; i < nbins; i++) totalTime.add(i, 0); - /* + List<Pair<String, String>> res = DBInstance.ReadProteinDataTable(); + List<Pair<Date, Long>> numres = new ArrayList<Pair<Date, Long>>(); + + for (Pair<String, String> entry : res) { + SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd"); + try { + Date jobstartdate = dateformatter.parse(entry.getElement0()); + long date = jobstartdate.getTime(); + if (dateStart <= date && date <= dateEnd) { + SimpleDateFormat datetimeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s"); + Date jobstarttime = datetimeformatter.parse(entry.getElement0()); + Date jobendtime = datetimeformatter.parse(entry.getElement1()); + long diff = (jobendtime.getTime() - jobstarttime.getTime()) / 1000; + Pair<Date, Long> pair = new Pair<Date, Long>(jobstartdate, Long.valueOf(diff)); + numres.add(pair); + } + } catch (ParseException e) { + e.printStackTrace(); + } + } + for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) { List<Integer> timeResult = new ArrayList<Integer>(); - SliceQuery<Long, String, String> result = HFactory.createSliceQuery(DBInstance.GetKeyspace(), LongSerializer.get(), - StringSerializer.get(), StringSerializer.get()); - result.setColumnFamily("ProteinData"); - result.setKey(date.getTime()); - result.setRange(null, null, false, Integer.MAX_VALUE); - QueryResult<ColumnSlice<String, String>> columnSlice = result.execute(); - List<HColumn<String, String>> col = columnSlice.get().getColumns(); - if (!col.isEmpty()) { - Iterator<HColumn<String, String>> itCol = col.iterator(); - for (int i = 0; i < nbins; i++) - timeResult.add(i, 0); - // split all jobs into nbins bins - while (itCol.hasNext()) { - String id = itCol.next().getName(); - long lenResult = CountID(id); + for (int i = 0; i < nbins; i++) + timeResult.add(i, 0); + for (Pair<Date, Long> p : numres) { + if (date.equals(p.getElement0())) { + long lenResult = p.getElement1().longValue(); if (lenResult <= 30) timeResult.set(0, timeResult.get(0) + 1); else if (lenResult > 30 && lenResult <= 60) @@ -78,15 +90,15 @@ public class CassandraRequester { timeResult.set(4, timeResult.get(4) + 1); } } - for (int i = 0; i < nbins; i++) - totalTime.set(i, totalTime.get(i) + timeResult.get(i)); - DataBase db = new DataBase(); - db.setTimeRez(timeResult); - db.setDate(DateFormat(date.getTime())); - query.add(db); } + for (int i = 0; i < nbins; i++) + totalTime.set(i, totalTime.get(i) + timeResult.get(i)); + DataBase db = new DataBase(); + db.setTimeRez(timeResult); + db.setDate(DateFormat(date.getTime())); + query.add(db); } - */ + DataBase db = new DataBase(); db.setTimeTotalExec(totalTime); query.add(db); @@ -94,7 +106,7 @@ public class CassandraRequester { return query; } - /* + /* * convert String date into long date (miliseconds since the epoch start) */ private static long DateParsing(String datInput) { @@ -111,71 +123,24 @@ public class CassandraRequester { return dateWorkSt; } - /* - * convert String date:time into long date:time (miliseconds since the epoch start) - */ - private static long TimeConvert(String datInput) { - long dateWorkSt = 0; - if (datInput == null) { - return dateWorkSt; - } - SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss"); - try { - dateWorkSt = formatter.parse(datInput).getTime(); - } catch (ParseException e) { - e.printStackTrace(); - } - return dateWorkSt; - } - // convert long to date in string format private static String DateFormat(long inDate) { SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy"); - String dateString = datformat.format(new Date(inDate)); - return dateString; - } - - /* - * convert ??? - */ - public static String DateFormatYYMMDD(long indate) { - SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd"); - String dateString = datformat.format(new Date(indate)); - return dateString; + return datformat.format(new Date(inDate)); } /* - * ??? - */ - public long CountID(String id) { - /* - SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(DBInstance.GetKeyspace(), StringSerializer.get(), - StringSerializer.get(), StringSerializer.get()); - sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100); - QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute(); - String datBegin = result.get().getColumnByName("DataBegin").getValue(); - String datEnd = result.get().getColumnByName("DataEnd").getValue(); - - long datBeginLong = TimeConvert(datBegin); - long datEndLong = TimeConvert(datEnd); - return (datEndLong - datBeginLong) / 1000; - */ - return 0; - } - - /* - * set earlest date and current dates. - * earlestDate is static and should be set at the 1st call - * currentDate should be re-calculated every time + * set earlest date and current dates. earlestDate is static and should be + * set at the 1st call currentDate should be re-calculated every time */ private static void SetDateRange() { + Calendar cal = Calendar.getInstance(); + currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH)); if (0 == earlestDate) { - StatisticsProt sp = new StatisticsProt(); - earlestDate = sp.earliestDate(); + CassandraRequester cr = new CassandraRequester(); + earlestDate = cr.earliestDate(); System.out.println("Set earlest Date = " + earlestDate); } - Calendar cal = Calendar.getInstance(); - currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH)); } public boolean isThisDateValid(String dateToValidate) { @@ -199,31 +164,7 @@ public class CassandraRequester { * find the earliest date in the database */ public long earliestDate() { - /* - ArrayList<Long> dateSort = new ArrayList<Long>(); - int row_count = 10000; - RangeSlicesQuery<Long, String, String> result = HFactory.createRangeSlicesQuery(DBInstance.GetKeyspace(), LongSerializer.get(), - StringSerializer.get(), StringSerializer.get()); - result.setColumnFamily("ProteinData"); - result.setRange(null, null, false, Integer.MAX_VALUE); - result.setRowCount(row_count); - Long last_key = null; - while (true) { - result.setKeys(last_key, null); - QueryResult<OrderedRows<Long, String, String>> columnSlice = result.execute(); - OrderedRows<Long, String, String> rows = columnSlice.get(); - Iterator<Row<Long, String, String>> rowsIterator = rows.iterator(); - while (rowsIterator.hasNext()) { - Row<Long, String, String> row = rowsIterator.next(); - last_key = row.getKey(); - dateSort.add(last_key); - } - if (rows.getCount() < row_count) - break; - } - Collections.sort(dateSort); - return dateSort.get(0); - */ - return 0; + earlestDate = DBInstance.getEarliestDateInDB(); + return earlestDate; } } -- 1.7.10.2