Re-engineering of database I/O
[proteocache.git] / datadb / compbio / cassandra / CassandraReader.java
index d2d2e1b..c71243b 100644 (file)
 package compbio.cassandra;
 
+import java.util.Calendar;
+import java.util.HashMap;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+
+import org.apache.log4j.Logger;
+
+import com.datastax.driver.core.Row;
 import com.datastax.driver.core.Session;
+import com.datastax.driver.core.ResultSet;
+
+import compbio.engine.ProteoCachePropertyHelperManager;
+import compbio.util.PropertyHelper;
+
+public class CassandraReader {
+       private Session session;
+       private static Logger log = Logger.getLogger(CassandraNativeConnector.class);
+
+       public CassandraReader() {
+               Session inis = CassandraNativeConnector.getSession();
+               setSession (inis);
+       }
+
+       public void setSession(Session s) {
+               assert s != null;
+               session = s;
+       }
+
+       /*
+        * getting data from the db
+        */
+       public List<Pair<String, String>> ReadProteinDataTable() {
+               final long startTime = System.currentTimeMillis();
+               String com = "SELECT DataBegin,DataEnd FROM ProteinLog;";
+               System.out.println("Command: " + com);
+               ResultSet results = session.execute(com);
+               final long queryTime = System.currentTimeMillis();
+               List<Row> rows = results.all();
+               System.out.println("Query time is " + (queryTime - startTime) + " msec");
 
-public interface CassandraReader {
+               List<Pair<String, String>> res = new ArrayList<Pair<String, String>>();
+               int c = 0;
+               for (Row r : rows) {
+                       Pair<String, String> pair = new Pair<String, String>(r.getString("DataBegin"), r.getString("DataEnd"));
+                       res.add(pair);
+                       ++c;
+               }
+               final long endTime = System.currentTimeMillis();
+               System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
+               return res;
+       }
 
        /*
-        * Defines a source file with metainformation of Jpred Jobs
-        **/
-       void setSession (Session s);
-       
+        * getting data from the db ProteinData
+        */
+       public Integer ReadDateTable(long queryDate) {
+               final long startTime = System.currentTimeMillis();
+               String com = "SELECT jobtime, JobID FROM ProteinData WHERE jobtime = " + queryDate + ";";
+               System.out.println("Command: " + com);
+               ResultSet results = session.execute(com);
+               final long queryTime = System.currentTimeMillis();
+               System.out.println("Query time is " + (queryTime - startTime) + " msec");
+               if (results.isExhausted())
+                       return 0;
+               List<Row> rows = results.all();
+               final long endTime = System.currentTimeMillis();
+               System.out.println("Processing time is " + (endTime - queryTime) + " msec");
+               return rows.size();
+       }
+
+       /*
+        * getting whole protein sequence from the db ProteinRow
+        */
+       public List<StructureProteinPrediction> ReadWholeSequence(String queryProtein) {
+               final long startTime = System.currentTimeMillis();
+               String com = "SELECT JobID, Predictions FROM ProteinRow WHERE Protein = '" + queryProtein + "';";
+               System.out.println("Command: " + com);
+               ResultSet results = session.execute(com);
+               if (results.isExhausted())
+                       return null;
+               final long queryTime = System.currentTimeMillis();
+               List<Row> rows = results.all();
+               System.out.println("Query time is " + (queryTime - startTime) + " msec");
+               System.out.println(" rows analysed,  " + rows.size());
+               List<StructureProteinPrediction> res = new ArrayList<StructureProteinPrediction>();
+               int c = 0;
+               for (Row r : rows) {
+                       StructureProteinPrediction structure = new StructureProteinPrediction(queryProtein, r.getString("JobID"), r.getMap(
+                                       "Predictions", String.class, String.class));
+                       res.add(structure);
+                       ++c;
+               }
+               final long endTime = System.currentTimeMillis();
+               System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
+               return res;
+       }
+
+       /*
+        * getting part of protein sequence from the db ProteinRow
+        */
+       public List<StructureProteinPrediction> ReadPartOfSequence(String queryProtein) {
+               final long startTime = System.currentTimeMillis();
+               String com = "SELECT * FROM ProteinRow;";
+               System.out.println("Command: " + com);
+               ResultSet results = session.execute(com);
+               if (results.isExhausted())
+                       return null;
+               final long queryTime = System.currentTimeMillis();
+               List<Row> rows = results.all();
+               System.out.println("Query time is " + (queryTime - startTime) + " msec");
+               System.out.println(" rows analysed,  " + rows.size());
+               List<StructureProteinPrediction> res = new ArrayList<StructureProteinPrediction>();
+               int c = 0;
+               for (Row r : rows) {
+                       String prot = r.getString("Protein");
+                       if (prot.matches("(.*)" + queryProtein + "(.*)")) {
+                               StructureProteinPrediction structure = new StructureProteinPrediction(prot, r.getString("JobID"), r.getMap("Predictions",
+                                               String.class, String.class));
+                               res.add(structure);
+                               ++c;
+                       }
+               }
+               final long endTime = System.currentTimeMillis();
+               System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
+               return res;
+       }
+
+       /*
+        * getting protein sequences by counter
+        */
+       public Map<String, Integer> ReadProteinDataByCounter() {
+               final long startTime = System.currentTimeMillis();
+               String com = "SELECT Protein FROM ProteinRow;";
+               System.out.println("Command: " + com);
+               ResultSet results = session.execute(com);
+               if (results.isExhausted())
+                       return null;
+               final long queryTime = System.currentTimeMillis();
+               List<Row> rows = results.all();
+               System.out.println("Query time is " + (queryTime - startTime) + " msec");
+               System.out.println(" rows analysed,  " + rows.size());
+               Map<String, Integer> res = new HashMap<String, Integer>();
+               int c = 0;
+               for (Row r : rows) {
+                       String protein = r.getString("Protein");
+                       if (res.containsKey(protein))
+                               res.put(protein, res.get(protein) + 1);
+                       else
+                               res.put(protein, 1);
+               }
+               final long endTime = System.currentTimeMillis();
+               System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
+               return res;
+       }
+
        /*
-        * Makes real parsing of the source file
-        **/
-       /*void getResults();*/
+        * getting protein sequences by counter
+        */
+       public StructureJobLog ReadJobLog(String jobid) {
+               final long startTime = System.currentTimeMillis();
+               String com = "SELECT * FROM ProteinLog WHERE JobID = '" + jobid + "';";
+               System.out.println("Command: " + com);
+               ResultSet results = session.execute(com);
+               if (results.isExhausted())
+                       return null;
+               final long queryTime = System.currentTimeMillis();
+               Row row = results.one();
+               String com1 = "SELECT * FROM ProteinRow WHERE JobID = '" + jobid + "' ALLOW FILTERING;";
+               System.out.println("Command: " + com1);
+               ResultSet results1 = session.execute(com1);
+               if (results1.isExhausted())
+                       return null;
+               Row row1 = results1.one();
+               StructureJobLog res = new StructureJobLog(row.getString("Protein"), row.getString("JobID"), row.getString("DataBegin"),
+                               row.getString("DataEnd"), row.getString("ip"), row1.getMap("Predictions", String.class, String.class));
+               System.out.println("Query time is " + (queryTime - startTime) + " msec");
+               final long endTime = System.currentTimeMillis();
+               System.out.println(" rows analysed, execution time is " + (endTime - startTime) + " msec");
+               return res;
+       }
 }