package compbio.cassandra;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-
import org.apache.log4j.Logger;
-import com.datastax.driver.core.Row;
-import com.datastax.driver.core.Session;
import com.datastax.driver.core.ResultSet;
-
-import compbio.engine.JobStatus;
+import com.datastax.driver.core.Session;
public class CassandraReader {
+ private static long earlestDate = 0;
private Session session;
private static Logger log = Logger.getLogger(CassandraNativeConnector.class);
assert s != null;
session = s;
}
-
- /*
- * getting data from the db
- */
- public List<Pair<String, String>> ReadProteinDataTable() {
- final long startTime = System.currentTimeMillis();
- String com = "SELECT DataBegin,DataEnd FROM ProteinLog;";
+
+ public ResultSet CassandraQuery(String column, String table, String condition) {
+ String com = "SELECT " + column + " FROM " + table + ";";
System.out.println("Command: " + com);
ResultSet results = session.execute(com);
- final long queryTime = System.currentTimeMillis();
- List<Row> rows = results.all();
- System.out.println("Query time is " + (queryTime - startTime) + " msec");
-
- List<Pair<String, String>> res = new ArrayList<Pair<String, String>>();
- int c = 0;
- for (Row r : rows) {
- Pair<String, String> pair = new Pair<String, String>(r.getString("DataBegin"), r.getString("DataEnd"));
- res.add(pair);
- ++c;
- }
- final long endTime = System.currentTimeMillis();
- System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
- return res;
+ return results;
}
-
+
/*
- * getting data from the db
- */
- public DateBean ReadProteinData(long day, String date) {
- String com = "SELECT JobID, Protein FROM ProteinData WHERE jobtime = " + day + ";";
- System.out.println("Command: " + com);
- ResultSet results = session.execute(com);
- if (results.isExhausted())
- return null;
- List<Row> rows = results.all();
- DateBean res = new DateBean(date);
- for (Row r : rows) {
- res.setJobidAndSeq(r.getString("JobID"), r.getString("Protein"));
- }
- return res;
- }
-
- /**
- * getting data from the db
+ * find the earliest date in the database
*/
- public DateBean ReadFailedJobs(long day, String date, JobStatus status) {
- // FailLog (jobtime, JobID, ExecTime, ip, FinalStatus)
- String com = "SELECT JobID FROM FailLog WHERE jobtime = " + day + " and FinalStatus = '" + status.name() + "';";
- ResultSet results = session.execute(com);
- if (results.isExhausted())
- return null;
- List<Row> rows = results.all();
- DateBean res = new DateBean(date);
- for (Row r : rows) {
- String jobid = r.getString("JobID");
- String com1 = "SELECT Protein FROM ProteinLog WHERE JobID = '" + jobid + "';";
- System.out.println("Command: " + com1);
- ResultSet results2 = session.execute(com1);
- List<Row> jrows = results2.all();
- if (1 == jrows.size()) {
- String protein = jrows.get(0).getString("Protein");
- res.setJobidAndSeq(jobid, protein);
- }
- }
- return res;
- }
-
- /*
- * getting data from the db JobDateInfo
- */
- public Total ReadDateTable(long queryDate) {
- ResultSet results = session.execute("SELECT * FROM JobDateInfo WHERE jobday = " + queryDate + ";");
- if (results.isExhausted())
- return null;
- Row therow = results.one();
- Total res = new Total(therow.getLong("Total"), therow.getLong("TotalOK"), therow.getLong("TotalStopped"),
- therow.getLong("TotalError"), therow.getLong("TotalTimeOut"));
- if (!results.isExhausted()) {
- Date date = new Date(queryDate);
- log.warn("CassandraReader.ReadDateTable: date row for " + date.toString() + " (" + queryDate + ") duplicated ");
- }
- return res;
- }
-
- /*
- * getting whole protein sequence from the db ProteinRow
- */
- public List<ProteinBean> ReadWholeSequence(String queryProtein) {
- final long startTime = System.currentTimeMillis();
- String com = "SELECT JobID, Predictions FROM ProteinRow WHERE Protein = '" + queryProtein + "';";
- System.out.println("Command: " + com);
- ResultSet results = session.execute(com);
- if (results.isExhausted())
- return null;
- final long queryTime = System.currentTimeMillis();
- List<Row> rows = results.all();
- System.out.println("first size : " + rows.size());
- System.out.println("Query time is " + (queryTime - startTime) + " msec");
- System.out.println(" rows analysed, " + rows.size());
- List<ProteinBean> res = new ArrayList<ProteinBean>();
- ProteinBean structure = new ProteinBean(queryProtein, rows.get(0).getMap("Predictions", String.class, String.class));
- System.out.println("second size : " + rows.size());
- int c = 0;
- for (Row r : rows) {
- structure.setJobid(r.getString("JobID"));
- ++c;
- }
- res.add(structure);
- final long endTime = System.currentTimeMillis();
- System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
- return res;
- }
-
- /*
- * getting jobs by ip
- */
- public Map<String, String[]> ReadIpWithJobs(String ip) {
- final long startTime = System.currentTimeMillis();
- String com = "SELECT JobID, Protein, FinalStatus, DataBegin FROM ProteinLog WHERE ip = '" + ip + "';";
- System.out.println("Command: " + com);
- ResultSet results = session.execute(com);
- if (results.isExhausted())
- return null;
- final long queryTime = System.currentTimeMillis();
- List<Row> rows = results.all();
- Map<String, String[]> res = new HashMap<String, String[]>();
- System.out.println("Query time is " + (queryTime - startTime) + " msec");
- System.out.println(" rows analysed, " + rows.size());
- int c = 0;
- for (Row r : rows) {
- if (r.getString("FinalStatus").equals("OK")) {
- String date = r.getString("DataBegin");
- res.put(r.getString("JobID"), new String[] { date.substring(0, date.indexOf(":")), r.getString("Protein") });
- ++c;
- }
- }
- final long endTime = System.currentTimeMillis();
- System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
- return res;
+ public static long earliestDate() {
+ earlestDate = CassandraNativeConnector.getEarliestDateInDB();
+ return earlestDate;
}
- /*
- * getting part of protein sequence from the db ProteinRow
- */
- public List<ProteinBean> ReadPartOfSequence(String queryProtein) {
- final long startTime = System.currentTimeMillis();
- String com = "SELECT * FROM ProteinRow;";
- System.out.println("Command: " + com);
- ResultSet results = session.execute(com);
- if (results.isExhausted())
- return null;
- final long queryTime = System.currentTimeMillis();
- List<Row> rows = results.all();
- System.out.println("Query time is " + (queryTime - startTime) + " msec");
- System.out.println(" rows analysed, " + rows.size());
- List<ProteinBean> res = new ArrayList<ProteinBean>();
- int c = 0;
- for (Row r : rows) {
- String prot = r.getString("Protein");
- if (prot.matches("(.*)" + queryProtein + "(.*)")) {
- ProteinBean structure = new ProteinBean(prot, r.getMap("Predictions", String.class, String.class));
- structure.setJobid(r.getString("JobID"));
- res.add(structure);
- ++c;
- }
- }
- final long endTime = System.currentTimeMillis();
- System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
- return res;
- }
-
- /*
- * getting protein sequence from the db ProteinRow
- */
- public Map<String, String> ReadProtein() {
- final long startTime = System.currentTimeMillis();
- String com = "SELECT * FROM ProteinRow;";
- System.out.println("Command: " + com);
- ResultSet results = session.execute(com);
- if (results.isExhausted())
- return null;
- final long queryTime = System.currentTimeMillis();
- List<Row> rows = results.all();
- System.out.println("Query time is " + (queryTime - startTime) + " msec");
- System.out.println(" rows analysed, " + rows.size());
- Map<String, String> res = new HashMap<String, String>();
- int c = 0;
- for (Row r : rows) {
- String prot = r.getString("Protein");
- String prediction = findJnetpred(r.getMap("Predictions", String.class, String.class));
- if (prot != null || prediction != null)
- res.put(prot, prediction);
- }
- final long endTime = System.currentTimeMillis();
- System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
- return res;
- }
-
- private String findJnetpred (Map<String,String> pred) {
- if (pred != null) {
- if (pred.containsKey("jnetpred"))
- return pred.get("jnetpred");
- }
- return null;
- }
- /*
- * getting protein sequences by counter
- */
- public Map<String, Integer> ReadProteinSequenceByCounter() {
- final long startTime = System.currentTimeMillis();
- String com = "SELECT Protein, JobID FROM ProteinRow;";
- System.out.println("Command: " + com);
- ResultSet results = session.execute(com);
- if (results.isExhausted())
- return null;
- final long queryTime = System.currentTimeMillis();
- List<Row> rows = results.all();
- System.out.println("Query time is " + (queryTime - startTime) + " msec");
- System.out.println(" rows analysed, " + rows.size());
- Map<String, Integer> res = new HashMap<String, Integer>();
- int c = 0;
- for (Row r : rows) {
- String protein = r.getString("Protein");
- String id = r.getString("JobID");
- if (res.containsKey(protein))
- res.put(protein, res.get(protein) + 1);
- else
- res.put(protein, 1);
- }
- final long endTime = System.currentTimeMillis();
- System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
- return res;
- }
-
- /*
- * getting ip by counter
- */
- public Map<String, Integer> ReadIpByCounter() {
- final long startTime = System.currentTimeMillis();
- String com = "SELECT JobID, ip FROM ProteinLog;";
- System.out.println("Command: " + com);
- ResultSet results = session.execute(com);
- if (results.isExhausted())
- return null;
- final long queryTime = System.currentTimeMillis();
- List<Row> rows = results.all();
- System.out.println("Query time is " + (queryTime - startTime) + " msec");
- System.out.println(" rows analysed, " + rows.size());
- Map<String, Integer> res = new HashMap<String, Integer>();
- int c = 0;
- for (Row r : rows) {
- String ip = r.getString("ip");
- String id = r.getString("JobID");
- if (res.containsKey(ip))
- res.put(ip, res.get(ip) + 1);
- else
- res.put(ip, 1);
- }
- final long endTime = System.currentTimeMillis();
- System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
- return res;
- }
-
- /*
- * getting log info for jobid
- */
- public JobBean ReadJobLog(String jobid) {
- final long startTime = System.currentTimeMillis();
- String com = "SELECT * FROM ProteinLog WHERE JobID = '" + jobid + "';";
- System.out.println("Command: " + com);
- ResultSet results = session.execute(com);
- if (results.isExhausted())
- return null;
- final long queryTime = System.currentTimeMillis();
- Row row = results.one();
- String com1 = "SELECT * FROM ProteinRow WHERE JobID = '" + jobid + "' ALLOW FILTERING;";
- System.out.println("Command: " + com1);
- ResultSet results1 = session.execute(com1);
- if (results1.isExhausted())
- return null;
- Row row1 = results1.one();
- JobBean res = new JobBean(row.getString("Protein"), row.getString("JobID"), row.getString("DataBegin"), row.getString("DataEnd"),
- row.getString("ip"), row1.getMap("Predictions", String.class, String.class));
- System.out.println("Query time is " + (queryTime - startTime) + " msec");
- final long endTime = System.currentTimeMillis();
- System.out.println(" rows analysed, execution time is " + (endTime - startTime) + " msec");
- return res;
- }
+
}