create DateBean, UserBean, TotalBean
[proteocache.git] / datadb / compbio / cassandra / CassandraReader.java
index d2d2e1b..69f7c08 100644 (file)
 package compbio.cassandra;
 
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+
+import org.apache.log4j.Logger;
+
+import com.datastax.driver.core.Row;
 import com.datastax.driver.core.Session;
+import com.datastax.driver.core.ResultSet;
+
+public class CassandraReader {
+       private Session session;
+       private static Logger log = Logger.getLogger(CassandraNativeConnector.class);
+
+       public CassandraReader() {
+               Session inis = CassandraNativeConnector.getSession();
+               setSession (inis);
+       }
+
+       public void setSession(Session s) {
+               assert s != null;
+               session = s;
+       }
 
-public interface CassandraReader {
+       /*
+        * getting data from the db
+        */
+       public List<Pair<String, String>> ReadProteinDataTable() {
+               final long startTime = System.currentTimeMillis();
+               String com = "SELECT DataBegin,DataEnd FROM ProteinLog;";
+               System.out.println("Command: " + com);
+               ResultSet results = session.execute(com);
+               final long queryTime = System.currentTimeMillis();
+               List<Row> rows = results.all();
+               System.out.println("Query time is " + (queryTime - startTime) + " msec");
+
+               List<Pair<String, String>> res = new ArrayList<Pair<String, String>>();
+               int c = 0;
+               for (Row r : rows) {
+                       Pair<String, String> pair = new Pair<String, String>(r.getString("DataBegin"), r.getString("DataEnd"));
+                       res.add(pair);
+                       ++c;
+               }
+               final long endTime = System.currentTimeMillis();
+               System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
+               return res;
+       }
+
+       /*
+        * getting data from the db
+        */
+       public DateBean ReadProteinData(long day, String date) {
+               final long startTime = System.currentTimeMillis();
+               String com = "SELECT JobID, Protein FROM ProteinData WHERE jobtime = " + day + ";";
+               System.out.println("Command: " + com);
+               ResultSet results = session.execute(com);
+               if (results.isExhausted())
+                       return null;
+               final long queryTime = System.currentTimeMillis();
+               List<Row> rows = results.all();
+               System.out.println("Query time is " + (queryTime - startTime) + " msec");
+               DateBean res = new DateBean(date);
+               int c = 0;
+               for (Row r : rows) {
+                       res.setJobidAndSeq(r.getString("JobID"), r.getString("Protein"));
+                       ++c;
+               }
+               final long endTime = System.currentTimeMillis();
+               System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
+               return res;
+       }
+       /*
+        * getting data from the db JobDateInfo
+        */
+       public Total ReadDateTable(long queryDate) {
+               ResultSet results = session.execute("SELECT * FROM JobDateInfo WHERE jobday = " + queryDate + ";");
+               if (results.isExhausted())
+                       return null;
+               Row therow = results.one();
+               Total res = new Total(therow.getLong("Total"), therow.getLong("TotalOK"), therow.getLong("TotalStopped"), 
+                               therow.getLong("TotalError"), therow.getLong("TotalTimeOut"));
+               if (!results.isExhausted()) {
+                       Date date = new Date (queryDate);
+                       log.warn("CassandraReader.ReadDateTable: date row for " + date.toString () + " ("+ queryDate + ") duplicated ");
+               }
+               return res;
+       }
 
        /*
-        * Defines a source file with metainformation of Jpred Jobs
-        **/
-       void setSession (Session s);
+        * getting whole protein sequence from the db ProteinRow
+        */
+       public List<ProteinBean> ReadWholeSequence(String queryProtein) {
+               final long startTime = System.currentTimeMillis();
+               String com = "SELECT JobID, Predictions FROM ProteinRow WHERE Protein = '" + queryProtein + "';";
+               System.out.println("Command: " + com);
+               ResultSet results = session.execute(com);
+               if (results.isExhausted())
+                       return null;
+               final long queryTime = System.currentTimeMillis();
+               List<Row> rows = results.all();
+               System.out.println("first size : " + rows.size());
+               System.out.println("Query time is " + (queryTime - startTime) + " msec");
+               System.out.println(" rows analysed,  " + rows.size());
+               List<ProteinBean> res = new ArrayList<ProteinBean>();
+               ProteinBean structure = new ProteinBean(queryProtein, rows.get(0).getMap(
+                               "Predictions", String.class, String.class));
+               System.out.println("second size : " + rows.size());
+               int c = 0;
+               for (Row r : rows) {
+                       structure.setJobid(r.getString("JobID"));                       
+                       ++c;
+               }
+               res.add(structure);
+               final long endTime = System.currentTimeMillis();
+               System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
+               return res;
+       }
+
        
        /*
-        * Makes real parsing of the source file
-        **/
-       /*void getResults();*/
+        * getting jobs by ip
+        */
+       public Map<String, String[]> ReadIpWithJobs(String ip) {
+               final long startTime = System.currentTimeMillis();
+               String com = "SELECT JobID, Protein, FinalStatus, DataBegin FROM ProteinLog WHERE ip = '" + ip + "';";
+               System.out.println("Command: " + com);
+               ResultSet results = session.execute(com);
+               if (results.isExhausted())
+                       return null;
+               final long queryTime = System.currentTimeMillis();
+               List<Row> rows = results.all();
+               Map<String, String[]> res = new HashMap<String, String[]>();
+               System.out.println("Query time is " + (queryTime - startTime) + " msec");
+               System.out.println(" rows analysed,  " + rows.size());
+               int c = 0;
+               for (Row r : rows) {
+                       if (r.getString("FinalStatus").equals("OK")) { 
+                               String date = r.getString("DataBegin");
+                               res.put(r.getString("JobID"), new String[] {date.substring(0, date.indexOf(":")), r.getString("Protein")});
+                               ++c;
+                       }
+               }
+               final long endTime = System.currentTimeMillis();
+               System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
+               return res;
+       }
+
+       /*
+        * getting part of protein sequence from the db ProteinRow
+        */
+       public List<ProteinBean> ReadPartOfSequence(String queryProtein) {
+               final long startTime = System.currentTimeMillis();
+               String com = "SELECT * FROM ProteinRow;";
+               System.out.println("Command: " + com);
+               ResultSet results = session.execute(com);
+               if (results.isExhausted())
+                       return null;
+               final long queryTime = System.currentTimeMillis();
+               List<Row> rows = results.all();
+               System.out.println("Query time is " + (queryTime - startTime) + " msec");
+               System.out.println(" rows analysed,  " + rows.size());
+               List<ProteinBean> res = new ArrayList<ProteinBean>();
+               int c = 0;
+               for (Row r : rows) {
+                       String prot = r.getString("Protein");
+                       if (prot.matches("(.*)" + queryProtein + "(.*)")) {
+                               ProteinBean structure = new ProteinBean(prot,  r.getMap("Predictions", String.class, String.class));
+                               structure.setJobid(r.getString("JobID"));
+                               res.add(structure);
+                               ++c;
+                       }
+               }
+               final long endTime = System.currentTimeMillis();
+               System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
+               return res;
+       }
+
+       /*
+        * getting protein sequences by counter
+        */
+       public Map<String, Integer> ReadProteinSequenceByCounter() {
+               final long startTime = System.currentTimeMillis();
+               String com = "SELECT Protein, JobID FROM ProteinRow;";
+               System.out.println("Command: " + com);
+               ResultSet results = session.execute(com);
+               if (results.isExhausted())
+                       return null;
+               final long queryTime = System.currentTimeMillis();
+               List<Row> rows = results.all();
+               System.out.println("Query time is " + (queryTime - startTime) + " msec");
+               System.out.println(" rows analysed,  " + rows.size());
+               Map<String, Integer> res = new HashMap<String, Integer>();
+               int c = 0;
+               for (Row r : rows) {
+                       String protein = r.getString("Protein");
+                       String id = r.getString("JobID");
+                       if (res.containsKey(protein))
+                               res.put(protein, res.get(protein) + 1);
+                       else
+                               res.put(protein, 1);
+               }
+               final long endTime = System.currentTimeMillis();
+               System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
+               return res;
+       }
+
+       /*
+        * getting ip by counter
+        */
+       public Map<String, Integer> ReadIpByCounter() {
+               final long startTime = System.currentTimeMillis();
+               String com = "SELECT JobID, ip FROM ProteinLog;";
+               System.out.println("Command: " + com);
+               ResultSet results = session.execute(com);
+               if (results.isExhausted())
+                       return null;
+               final long queryTime = System.currentTimeMillis();
+               List<Row> rows = results.all();
+               System.out.println("Query time is " + (queryTime - startTime) + " msec");
+               System.out.println(" rows analysed,  " + rows.size());
+               Map<String, Integer> res = new HashMap<String, Integer>();
+               int c = 0;
+               for (Row r : rows) {
+                       String ip = r.getString("ip");
+                       String id = r.getString("JobID");
+                       if (res.containsKey(ip))
+                               res.put(ip, res.get(ip) + 1);
+                       else
+                               res.put(ip, 1);
+               }
+               final long endTime = System.currentTimeMillis();
+               System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
+               return res;
+       }
+
+       /*
+        * getting log info for jobid
+        */
+       public JobBean ReadJobLog(String jobid) {
+               final long startTime = System.currentTimeMillis();
+               String com = "SELECT * FROM ProteinLog WHERE JobID = '" + jobid + "';";
+               System.out.println("Command: " + com);
+               ResultSet results = session.execute(com);
+               if (results.isExhausted())
+                       return null;
+               final long queryTime = System.currentTimeMillis();
+               Row row = results.one();
+               String com1 = "SELECT * FROM ProteinRow WHERE JobID = '" + jobid + "' ALLOW FILTERING;";
+               System.out.println("Command: " + com1);
+               ResultSet results1 = session.execute(com1);
+               if (results1.isExhausted())
+                       return null;
+               Row row1 = results1.one();
+               JobBean res = new JobBean(row.getString("Protein"), row.getString("JobID"), row.getString("DataBegin"),
+                               row.getString("DataEnd"), row.getString("ip"), row1.getMap("Predictions", String.class, String.class));
+               System.out.println("Query time is " + (queryTime - startTime) + " msec");
+               final long endTime = System.currentTimeMillis();
+               System.out.println(" rows analysed, execution time is " + (endTime - startTime) + " msec");
+               return res;
+       }
 }