package compbio.cassandra; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.ArrayList; import java.util.Map; import org.apache.log4j.Logger; import com.datastax.driver.core.Row; import com.datastax.driver.core.Session; import com.datastax.driver.core.ResultSet; public class CassandraReader { private Session session; private static Logger log = Logger.getLogger(CassandraNativeConnector.class); public CassandraReader() { Session inis = CassandraNativeConnector.getSession(); setSession (inis); } public void setSession(Session s) { assert s != null; session = s; } /* * getting data from the db */ public List> ReadProteinDataTable() { final long startTime = System.currentTimeMillis(); String com = "SELECT DataBegin,DataEnd FROM ProteinLog;"; System.out.println("Command: " + com); ResultSet results = session.execute(com); final long queryTime = System.currentTimeMillis(); List rows = results.all(); System.out.println("Query time is " + (queryTime - startTime) + " msec"); List> res = new ArrayList>(); int c = 0; for (Row r : rows) { Pair pair = new Pair(r.getString("DataBegin"), r.getString("DataEnd")); res.add(pair); ++c; } final long endTime = System.currentTimeMillis(); System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); return res; } /* * getting data from the db */ public List> ReadProteinData(long day) { final long startTime = System.currentTimeMillis(); String com = "SELECT JobID, Protein FROM ProteinData WHERE jobtime = " + day + ";"; System.out.println("Command: " + com); ResultSet results = session.execute(com); if (results.isExhausted()) return null; final long queryTime = System.currentTimeMillis(); List rows = results.all(); System.out.println("Query time is " + (queryTime - startTime) + " msec"); List> res = new ArrayList>(); int c = 0; for (Row r : rows) { Pair pair = new Pair(r.getString("JobID"), r.getString("Protein")); res.add(pair); ++c; } final long endTime = System.currentTimeMillis(); System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); return res; } /* * getting data from the db JobDateInfo */ public List ReadDateTable(long queryDate) { ResultSet results = session.execute("SELECT * FROM JobDateInfo WHERE jobday = " + queryDate + ";"); if (results.isExhausted()) return null; Row therow = results.one(); List res = new ArrayList(); res.add(therow.getLong("Total")); res.add(therow.getLong("TotalOK")); res.add(therow.getLong("TotalStopped")); res.add(therow.getLong("TotalError")); res.add(therow.getLong("TotalTimeOut")); if (!results.isExhausted()) { Date date = new Date (queryDate); log.warn("CassandraReader.ReadDateTable: date row for " + date.toString () + " ("+ queryDate + ") duplicated "); } return res; } /* * getting whole protein sequence from the db ProteinRow */ public List ReadWholeSequence(String queryProtein) { final long startTime = System.currentTimeMillis(); String com = "SELECT JobID, Predictions FROM ProteinRow WHERE Protein = '" + queryProtein + "';"; System.out.println("Command: " + com); ResultSet results = session.execute(com); if (results.isExhausted()) return null; final long queryTime = System.currentTimeMillis(); List rows = results.all(); System.out.println("Query time is " + (queryTime - startTime) + " msec"); System.out.println(" rows analysed, " + rows.size()); List res = new ArrayList(); int c = 0; for (Row r : rows) { AnnotatedProteinSequenceBean structure = new AnnotatedProteinSequenceBean(queryProtein, r.getString("JobID"), r.getMap( "Predictions", String.class, String.class)); res.add(structure); ++c; } final long endTime = System.currentTimeMillis(); System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); return res; } /* * getting jobs by ip */ public List> ReadIpWithJobs(String ip) { final long startTime = System.currentTimeMillis(); String com = "SELECT JobID, Protein, FinalStatus FROM ProteinLog WHERE ip = '" + ip + "';"; System.out.println("Command: " + com); ResultSet results = session.execute(com); if (results.isExhausted()) return null; final long queryTime = System.currentTimeMillis(); List rows = results.all(); List> res = new ArrayList>(); System.out.println("Query time is " + (queryTime - startTime) + " msec"); System.out.println(" rows analysed, " + rows.size()); int c = 0; for (Row r : rows) { if (r.getString("FinalStatus").equals("OK")) { Pair pair = new Pair(r.getString("JobID"), r.getString("Protein")); System.out.println(pair.getElement0()); System.out.println(pair.getElement1()); res.add(pair); ++c; } } final long endTime = System.currentTimeMillis(); System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); return res; } /* * getting part of protein sequence from the db ProteinRow */ public List ReadPartOfSequence(String queryProtein) { final long startTime = System.currentTimeMillis(); String com = "SELECT * FROM ProteinRow;"; System.out.println("Command: " + com); ResultSet results = session.execute(com); if (results.isExhausted()) return null; final long queryTime = System.currentTimeMillis(); List rows = results.all(); System.out.println("Query time is " + (queryTime - startTime) + " msec"); System.out.println(" rows analysed, " + rows.size()); List res = new ArrayList(); int c = 0; for (Row r : rows) { String prot = r.getString("Protein"); if (prot.matches("(.*)" + queryProtein + "(.*)")) { AnnotatedProteinSequenceBean structure = new AnnotatedProteinSequenceBean(prot, r.getString("JobID"), r.getMap("Predictions", String.class, String.class)); res.add(structure); ++c; } } final long endTime = System.currentTimeMillis(); System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); return res; } /* * getting protein sequences by counter */ public Map ReadProteinSequenceByCounter() { final long startTime = System.currentTimeMillis(); String com = "SELECT Protein, JobID FROM ProteinRow;"; System.out.println("Command: " + com); ResultSet results = session.execute(com); if (results.isExhausted()) return null; final long queryTime = System.currentTimeMillis(); List rows = results.all(); System.out.println("Query time is " + (queryTime - startTime) + " msec"); System.out.println(" rows analysed, " + rows.size()); Map res = new HashMap(); int c = 0; for (Row r : rows) { String protein = r.getString("Protein"); String id = r.getString("JobID"); if (res.containsKey(protein)) res.put(protein, res.get(protein) + 1); else res.put(protein, 1); } final long endTime = System.currentTimeMillis(); System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); return res; } /* * getting ip by counter */ public Map ReadIpByCounter() { final long startTime = System.currentTimeMillis(); String com = "SELECT JobID, ip FROM ProteinLog;"; System.out.println("Command: " + com); ResultSet results = session.execute(com); if (results.isExhausted()) return null; final long queryTime = System.currentTimeMillis(); List rows = results.all(); System.out.println("Query time is " + (queryTime - startTime) + " msec"); System.out.println(" rows analysed, " + rows.size()); Map res = new HashMap(); int c = 0; for (Row r : rows) { String protein = r.getString("ip"); String id = r.getString("JobID"); if (res.containsKey(protein)) res.put(protein, res.get(protein) + 1); else res.put(protein, 1); } final long endTime = System.currentTimeMillis(); System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); return res; } /* * getting log info for jobid */ public StructureJobLog ReadJobLog(String jobid) { final long startTime = System.currentTimeMillis(); String com = "SELECT * FROM ProteinLog WHERE JobID = '" + jobid + "';"; System.out.println("Command: " + com); ResultSet results = session.execute(com); if (results.isExhausted()) return null; final long queryTime = System.currentTimeMillis(); Row row = results.one(); String com1 = "SELECT * FROM ProteinRow WHERE JobID = '" + jobid + "' ALLOW FILTERING;"; System.out.println("Command: " + com1); ResultSet results1 = session.execute(com1); if (results1.isExhausted()) return null; Row row1 = results1.one(); StructureJobLog res = new StructureJobLog(row.getString("Protein"), row.getString("JobID"), row.getString("DataBegin"), row.getString("DataEnd"), row.getString("ip"), row1.getMap("Predictions", String.class, String.class)); System.out.println("Query time is " + (queryTime - startTime) + " msec"); final long endTime = System.currentTimeMillis(); System.out.println(" rows analysed, execution time is " + (endTime - startTime) + " msec"); return res; } }