From 561c8c711c21527404b6bddb05d423aa3a1f6547 Mon Sep 17 00:00:00 2001 From: Sasha Sherstnev Date: Tue, 5 Nov 2013 14:51:38 +0000 Subject: [PATCH] Add Jpred archive table --- .../cassandra/CassandraNativeConnector.java | 60 ++++++++++++++++---- datadb/compbio/cassandra/JpredParserHTTP.java | 7 +-- datadb/compbio/cassandra/JpredParserLocalFile.java | 2 +- server/compbio/statistic/CassandraRequester.java | 2 +- 4 files changed, 53 insertions(+), 18 deletions(-) diff --git a/datadb/compbio/cassandra/CassandraNativeConnector.java b/datadb/compbio/cassandra/CassandraNativeConnector.java index be90b05..7109c78 100644 --- a/datadb/compbio/cassandra/CassandraNativeConnector.java +++ b/datadb/compbio/cassandra/CassandraNativeConnector.java @@ -2,7 +2,6 @@ package compbio.cassandra; import java.io.IOException; import java.util.Calendar; -import java.util.HashMap; import java.util.List; import java.util.ArrayList; @@ -16,6 +15,7 @@ import com.datastax.driver.core.ResultSet; public class CassandraNativeConnector { private static Cluster cluster; private static Session session; + /* * connect to the cluster and look weather the dababase has any data inside */ @@ -36,6 +36,8 @@ public class CassandraNativeConnector { session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinLog " + "(JobID ascii, DataBegin ascii, DataEnd ascii, ip ascii, FinalStatus ascii, ExecutionStatus ascii, Protein ascii, PRIMARY KEY(JobID));"); session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinData (jobtime bigint, JobID ascii, Protein ascii, PRIMARY KEY(JobID));"); + session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.JpredArchive " + + "(JobID ascii, Protein varchar, IP ascii, StartTime bigint, ExecTime int, alignment map, predictions map, archive blob, LOG varchar, PRIMARY KEY(JobID));"); session.execute("CREATE INDEX IF NOT EXISTS ProteinSeq ON ProteinKeyspace.ProteinRow (protein);"); session.execute("CREATE INDEX IF NOT EXISTS JobDateStamp ON ProteinKeyspace.ProteinData (jobtime);"); @@ -76,7 +78,7 @@ public class CassandraNativeConnector { /* * inserting data into the db */ - public void InsertData(long jobtime, String startdate, String enddate, String ip, String jobid, String statusEx, String statusFinal, + public void FormQueryTables(long jobtime, String startdate, String enddate, String ip, String jobid, String statusEx, String statusFinal, String protein, List predictions) { String check1 = "SELECT * FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';"; @@ -105,13 +107,51 @@ public class CassandraNativeConnector { String check2 = "SELECT * FROM ProteinKeyspace.ProteinRow WHERE JobID = '" + jobid + "';"; ResultSet results2 = session.execute(check2); if (results2.isExhausted()) { - String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(Protein, JobID, Predictions)" + " VALUES ('" - + protein + "','" + jobid + "',{" + final_prediction + "});"; + String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(Protein, JobID, Predictions)" + " VALUES ('" + protein + "','" + + jobid + "',{" + final_prediction + "});"; session.execute(com3); } } } + public void ArchiveData(long starttime, int exectime, String ip, String jobid, String statusEx, String statusFinal, + String protein, List predictions, List seqs, String LogFile) { + + String check1 = "SELECT * FROM ProteinKeyspace.JpredArchive WHERE JobID = '" + jobid + "';"; + ResultSet results1 = session.execute(check1); + if (results1.isExhausted()) { + String allpredictions = ""; + for (FastaSequence pred : predictions) { + String predictionname = pred.getId(); + String prediction = pred.getSequence().replaceAll("\n", ""); + allpredictions += "'" + predictionname + "':'" + prediction + "',"; + } + String final_allpredictions = ""; + if (null != allpredictions) { + final_allpredictions = allpredictions.substring(0, allpredictions.length() - 1); + } + String alignment = ""; + for (FastaSequence seq : seqs) { + String predictionname = seq.getId(); + String prediction = seq.getSequence().replaceAll("\n", ""); + alignment += "'" + predictionname + "':'" + prediction + "',"; + } + String final_alignment = ""; + if (null != allpredictions) { + final_alignment = alignment.substring(0, allpredictions.length() - 1); + } + + String com1 = "INSERT INTO ProteinKeyspace.JpredArchive " + + "(JobID, Protein, IP, StartTime, ExecTime, alignment, predictions, LOG))" + + " VALUES ('" + + jobid + "','" + protein + "','" + ip + "'," + starttime + "," + exectime + + "',[" + final_allpredictions + "],[" + final_alignment + "],'" + LogFile + "]);"; + session.execute(com1); + } + } + + + /* * getting data from the db */ @@ -122,17 +162,17 @@ public class CassandraNativeConnector { ResultSet results = session.execute(com); final long queryTime = System.currentTimeMillis(); List rows = results.all(); - System.out.println ("Query time is " + (queryTime - startTime) + " msec"); + System.out.println("Query time is " + (queryTime - startTime) + " msec"); List> res = new ArrayList>(); int c = 0; for (Row r : rows) { - Pair pair = new Pair(r.getString("DataBegin"),r.getString("DataEnd")); + Pair pair = new Pair(r.getString("DataBegin"), r.getString("DataEnd")); res.add(pair); ++c; } final long endTime = System.currentTimeMillis(); - System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); + System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); return res; } @@ -145,7 +185,7 @@ public class CassandraNativeConnector { System.out.println("Command: " + com); ResultSet results = session.execute(com); final long queryTime = System.currentTimeMillis(); - System.out.println ("Query time is " + (queryTime - startTime) + " msec"); + System.out.println("Query time is " + (queryTime - startTime) + " msec"); Calendar cal = Calendar.getInstance(); long res = cal.getTimeInMillis(); @@ -159,8 +199,8 @@ public class CassandraNativeConnector { ++c; } final long endTime = System.currentTimeMillis(); - System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); + System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); return res; } - + } diff --git a/datadb/compbio/cassandra/JpredParserHTTP.java b/datadb/compbio/cassandra/JpredParserHTTP.java index b780ddf..51fc8e3 100644 --- a/datadb/compbio/cassandra/JpredParserHTTP.java +++ b/datadb/compbio/cassandra/JpredParserHTTP.java @@ -100,14 +100,9 @@ public class JpredParserHTTP implements JpredParser { } catch (ParseException e) { e.printStackTrace(); } - cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs); + cc.FormQueryTables(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs); ++countinsertions; ++njobs; - // flush every 50 insertions - // if (0 == countinsertions % 50) { - // cc.flushData(); - // njobs -= 50; - // } } } catch (IOException e) { e.printStackTrace(); diff --git a/datadb/compbio/cassandra/JpredParserLocalFile.java b/datadb/compbio/cassandra/JpredParserLocalFile.java index 281a20b..54e50ab 100644 --- a/datadb/compbio/cassandra/JpredParserLocalFile.java +++ b/datadb/compbio/cassandra/JpredParserLocalFile.java @@ -104,7 +104,7 @@ public class JpredParserLocalFile implements JpredParser { } catch (ParseException e) { e.printStackTrace(); } - cc.InsertData(insertdate, starttime, finishtime, ip, id, "OK", "OK", newprotein, seqs); + cc.FormQueryTables(insertdate, starttime, finishtime, ip, id, "OK", "OK", newprotein, seqs); ++countinsertions; ++njobs; } diff --git a/server/compbio/statistic/CassandraRequester.java b/server/compbio/statistic/CassandraRequester.java index 9885d64..83c8a7a 100755 --- a/server/compbio/statistic/CassandraRequester.java +++ b/server/compbio/statistic/CassandraRequester.java @@ -164,7 +164,7 @@ public class CassandraRequester { * find the earliest date in the database */ public long earliestDate() { - earlestDate = DBInstance.getEarliestDateInDB(); + earlestDate = DBInstance.getEarliestDateInDB(); return earlestDate; } } -- 1.7.10.2