package compbio.cassandra;
import java.io.IOException;
+import java.util.Calendar;
+import java.util.HashMap;
import java.util.List;
+import java.util.ArrayList;
import com.datastax.driver.core.Cluster;
import com.datastax.driver.core.Host;
import com.datastax.driver.core.Metadata;
+import com.datastax.driver.core.Row;
import com.datastax.driver.core.Session;
import com.datastax.driver.core.ResultSet;
public class CassandraNativeConnector {
private static Cluster cluster;
private static Session session;
-
- /*
- * private static Keyspace ksp; private static Mutator<Long> mutatorLong;
- * private static Mutator<String> mutatorString; private static
- * Mutator<String> mutatorLog; StringSerializer ss = StringSerializer.get();
- * LongSerializer ls = LongSerializer.get();
- */
-
/*
* connect to the cluster and look weather the dababase has any data inside
*/
+ "(JobID ascii, DataBegin ascii, DataEnd ascii, ip ascii, FinalStatus ascii, ExecutionStatus ascii, Protein ascii, PRIMARY KEY(JobID));");
session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinData (jobtime bigint, JobID ascii, Protein ascii, PRIMARY KEY(JobID));");
- session.execute("CREATE INDEX ProteinSeq ON ProteinKeyspace.ProteinRow (protein);");
- session.execute("CREATE INDEX JobDateStamp ON ProteinKeyspace.ProteinData (jobtime);");
+ session.execute("CREATE INDEX IF NOT EXISTS ProteinSeq ON ProteinKeyspace.ProteinRow (protein);");
+ session.execute("CREATE INDEX IF NOT EXISTS JobDateStamp ON ProteinKeyspace.ProteinData (jobtime);");
System.out.println("Cassandra connected");
}
* parsing data source and filling the database
*/
public void Parsing() throws IOException {
- if (false) {
+ if (true) {
// if (source.equals("http")) {
// get data from real Jpred production server
System.out.println("Parsing web data source......");
JpredParserHTTP parser = new JpredParserHTTP(prefix);
parser.Parsing(datasrc, 4);
}
- if (true) {
+ if (false) {
// if (source.equals("file")) {
// get irtifical data generated for the DB stress tests
System.out.println("Parsing local file data source......");
}
public void Closing() {
+ session.shutdown();
cluster.shutdown();
System.out.println("Cassandra has been shut down");
}
/*
- * prepare data for insertion into the db
+ * inserting data into the db
*/
public void InsertData(long jobtime, String startdate, String enddate, String ip, String jobid, String statusEx, String statusFinal,
String protein, List<FastaSequence> predictions) {
String check1 = "SELECT * FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';";
- //System.out.println(check1);
ResultSet results1 = session.execute(check1);
if (results1.isExhausted()) {
String com1 = "INSERT INTO ProteinKeyspace.ProteinLog "
+ "(JobID, IP, DataBegin, DataEnd, FinalStatus, ExecutionStatus, Protein)" + " VALUES ('" + jobid + "','" + ip + "','"
+ startdate + "','" + enddate + "','" + statusFinal + "','" + statusEx + "','" + protein + "');";
- //System.out.println(com1);
session.execute(com1);
String com2 = "INSERT INTO ProteinKeyspace.ProteinData " + "(jobtime, JobID, Protein)" + " VALUES (" + jobtime + ",'" + jobid
+ "','" + protein + "');";
- //System.out.println(com2);
session.execute(com2);
String allpredictions = "";
}
String check2 = "SELECT * FROM ProteinKeyspace.ProteinRow WHERE JobID = '" + jobid + "';";
- //System.out.println(check2);
ResultSet results2 = session.execute(check2);
if (results2.isExhausted()) {
String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(Protein, JobID, Predictions)" + " VALUES ('"
+ protein + "','" + jobid + "',{" + final_prediction + "});";
- //System.out.println(com3);
session.execute(com3);
}
}
}
+ /*
+ * getting data from the db
+ */
+ public List<Pair<String, String>> ReadProteinDataTable() {
+ final long startTime = System.currentTimeMillis();
+ String com = "SELECT DataBegin,DataEnd FROM ProteinKeyspace.ProteinLog;";
+ System.out.println("Command: " + com);
+ ResultSet results = session.execute(com);
+ final long queryTime = System.currentTimeMillis();
+ List<Row> rows = results.all();
+ System.out.println ("Query time is " + (queryTime - startTime) + " msec");
+
+ List<Pair<String, String>> res = new ArrayList<Pair<String, String>>();
+ int c = 0;
+ for (Row r : rows) {
+ Pair<String, String> pair = new Pair<String, String>(r.getString("DataBegin"),r.getString("DataEnd"));
+ res.add(pair);
+ ++c;
+ }
+ final long endTime = System.currentTimeMillis();
+ System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
+ return res;
+ }
+
+ /*
+ * getting earlest date of jobs from the db
+ */
+ public long getEarliestDateInDB() {
+ final long startTime = System.currentTimeMillis();
+ String com = "SELECT jobtime FROM ProteinKeyspace.ProteinData;";
+ System.out.println("Command: " + com);
+ ResultSet results = session.execute(com);
+ final long queryTime = System.currentTimeMillis();
+ System.out.println ("Query time is " + (queryTime - startTime) + " msec");
+
+ Calendar cal = Calendar.getInstance();
+ long res = cal.getTimeInMillis();
+ int c = 0;
+ while (!results.isExhausted()) {
+ Row r = results.one();
+ long d1 = r.getLong("jobtime");
+ if (res > d1) {
+ res = d1;
+ }
+ ++c;
+ }
+ final long endTime = System.currentTimeMillis();
+ System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
+ return res;
+ }
+
}
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
-import java.util.Collections;
import java.util.Date;
-import java.util.Iterator;
import java.util.List;
import compbio.cassandra.CassandraNativeConnector;
import compbio.cassandra.DataBase;
+import compbio.cassandra.Pair;
public class CassandraRequester {
private CassandraNativeConnector DBInstance = new CassandraNativeConnector();
private static long currentDate = 0;
private static long earlestDate = 0;
-
/*
* query: execution time for the period from date1 till date2
- * */
+ */
public List<DataBase> extractExecutionTime(String date1, String date2) {
+ if (null == date1) {
+ date1 = "1970/1/1";
+ }
+ if (null == date2) {
+ date1 = "2100/1/1";
+ }
if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
System.out.println("Wrong date: point 3");
return null;
int nbins = 5;
long dateStart = DateParsing(date1);
long dateEnd = DateParsing(date2);
- if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate) || dateStart > dateEnd)
+ if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd)
return null;
if (dateStart < earlestDate)
dateStart = earlestDate;
if (dateEnd > currentDate)
dateStart = currentDate;
- System.out.println("CassandraRequester.extractExecutionTime: earlestDate = " + earlestDate + ", currentDate = " + currentDate);
-
Calendar start = Calendar.getInstance();
start.setTime(new Date(dateStart));
Calendar end = Calendar.getInstance();
List<Integer> totalTime = new ArrayList<Integer>();
for (int i = 0; i < nbins; i++)
totalTime.add(i, 0);
- /*
+ List<Pair<String, String>> res = DBInstance.ReadProteinDataTable();
+ List<Pair<Date, Long>> numres = new ArrayList<Pair<Date, Long>>();
+
+ for (Pair<String, String> entry : res) {
+ SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd");
+ try {
+ Date jobstartdate = dateformatter.parse(entry.getElement0());
+ long date = jobstartdate.getTime();
+ if (dateStart <= date && date <= dateEnd) {
+ SimpleDateFormat datetimeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
+ Date jobstarttime = datetimeformatter.parse(entry.getElement0());
+ Date jobendtime = datetimeformatter.parse(entry.getElement1());
+ long diff = (jobendtime.getTime() - jobstarttime.getTime()) / 1000;
+ Pair<Date, Long> pair = new Pair<Date, Long>(jobstartdate, Long.valueOf(diff));
+ numres.add(pair);
+ }
+ } catch (ParseException e) {
+ e.printStackTrace();
+ }
+ }
+
for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
List<Integer> timeResult = new ArrayList<Integer>();
- SliceQuery<Long, String, String> result = HFactory.createSliceQuery(DBInstance.GetKeyspace(), LongSerializer.get(),
- StringSerializer.get(), StringSerializer.get());
- result.setColumnFamily("ProteinData");
- result.setKey(date.getTime());
- result.setRange(null, null, false, Integer.MAX_VALUE);
- QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
- List<HColumn<String, String>> col = columnSlice.get().getColumns();
- if (!col.isEmpty()) {
- Iterator<HColumn<String, String>> itCol = col.iterator();
- for (int i = 0; i < nbins; i++)
- timeResult.add(i, 0);
- // split all jobs into nbins bins
- while (itCol.hasNext()) {
- String id = itCol.next().getName();
- long lenResult = CountID(id);
+ for (int i = 0; i < nbins; i++)
+ timeResult.add(i, 0);
+ for (Pair<Date, Long> p : numres) {
+ if (date.equals(p.getElement0())) {
+ long lenResult = p.getElement1().longValue();
if (lenResult <= 30)
timeResult.set(0, timeResult.get(0) + 1);
else if (lenResult > 30 && lenResult <= 60)
timeResult.set(4, timeResult.get(4) + 1);
}
}
- for (int i = 0; i < nbins; i++)
- totalTime.set(i, totalTime.get(i) + timeResult.get(i));
- DataBase db = new DataBase();
- db.setTimeRez(timeResult);
- db.setDate(DateFormat(date.getTime()));
- query.add(db);
}
+ for (int i = 0; i < nbins; i++)
+ totalTime.set(i, totalTime.get(i) + timeResult.get(i));
+ DataBase db = new DataBase();
+ db.setTimeRez(timeResult);
+ db.setDate(DateFormat(date.getTime()));
+ query.add(db);
}
- */
+
DataBase db = new DataBase();
db.setTimeTotalExec(totalTime);
query.add(db);
return query;
}
- /*
+ /*
* convert String date into long date (miliseconds since the epoch start)
*/
private static long DateParsing(String datInput) {
return dateWorkSt;
}
- /*
- * convert String date:time into long date:time (miliseconds since the epoch start)
- */
- private static long TimeConvert(String datInput) {
- long dateWorkSt = 0;
- if (datInput == null) {
- return dateWorkSt;
- }
- SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss");
- try {
- dateWorkSt = formatter.parse(datInput).getTime();
- } catch (ParseException e) {
- e.printStackTrace();
- }
- return dateWorkSt;
- }
-
// convert long to date in string format
private static String DateFormat(long inDate) {
SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
- String dateString = datformat.format(new Date(inDate));
- return dateString;
- }
-
- /*
- * convert ???
- */
- public static String DateFormatYYMMDD(long indate) {
- SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd");
- String dateString = datformat.format(new Date(indate));
- return dateString;
+ return datformat.format(new Date(inDate));
}
/*
- * ???
- */
- public long CountID(String id) {
- /*
- SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(DBInstance.GetKeyspace(), StringSerializer.get(),
- StringSerializer.get(), StringSerializer.get());
- sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100);
- QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
- String datBegin = result.get().getColumnByName("DataBegin").getValue();
- String datEnd = result.get().getColumnByName("DataEnd").getValue();
-
- long datBeginLong = TimeConvert(datBegin);
- long datEndLong = TimeConvert(datEnd);
- return (datEndLong - datBeginLong) / 1000;
- */
- return 0;
- }
-
- /*
- * set earlest date and current dates.
- * earlestDate is static and should be set at the 1st call
- * currentDate should be re-calculated every time
+ * set earlest date and current dates. earlestDate is static and should be
+ * set at the 1st call currentDate should be re-calculated every time
*/
private static void SetDateRange() {
+ Calendar cal = Calendar.getInstance();
+ currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH));
if (0 == earlestDate) {
- StatisticsProt sp = new StatisticsProt();
- earlestDate = sp.earliestDate();
+ CassandraRequester cr = new CassandraRequester();
+ earlestDate = cr.earliestDate();
System.out.println("Set earlest Date = " + earlestDate);
}
- Calendar cal = Calendar.getInstance();
- currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH));
}
public boolean isThisDateValid(String dateToValidate) {
* find the earliest date in the database
*/
public long earliestDate() {
- /*
- ArrayList<Long> dateSort = new ArrayList<Long>();
- int row_count = 10000;
- RangeSlicesQuery<Long, String, String> result = HFactory.createRangeSlicesQuery(DBInstance.GetKeyspace(), LongSerializer.get(),
- StringSerializer.get(), StringSerializer.get());
- result.setColumnFamily("ProteinData");
- result.setRange(null, null, false, Integer.MAX_VALUE);
- result.setRowCount(row_count);
- Long last_key = null;
- while (true) {
- result.setKeys(last_key, null);
- QueryResult<OrderedRows<Long, String, String>> columnSlice = result.execute();
- OrderedRows<Long, String, String> rows = columnSlice.get();
- Iterator<Row<Long, String, String>> rowsIterator = rows.iterator();
- while (rowsIterator.hasNext()) {
- Row<Long, String, String> row = rowsIterator.next();
- last_key = row.getKey();
- dateSort.add(last_key);
- }
- if (rows.getCount() < row_count)
- break;
- }
- Collections.sort(dateSort);
- return dateSort.get(0);
- */
- return 0;
+ earlestDate = DBInstance.getEarliestDateInDB();
+ return earlestDate;
}
}