Rebuild tables for queries
[proteocache.git] / datadb / compbio / cassandra / CassandraReader.java
1 package compbio.cassandra;
2
3 import java.util.Date;
4 import java.util.HashMap;
5 import java.util.List;
6 import java.util.ArrayList;
7 import java.util.Map;
8
9 import org.apache.log4j.Logger;
10
11 import com.datastax.driver.core.Row;
12 import com.datastax.driver.core.Session;
13 import com.datastax.driver.core.ResultSet;
14
15 import compbio.engine.ProteoCachePropertyHelperManager;
16 import compbio.util.PropertyHelper;
17
18 public class CassandraReader {
19         private Session session;
20         private static Logger log = Logger.getLogger(CassandraNativeConnector.class);
21
22         public CassandraReader() {
23                 Session inis = CassandraNativeConnector.getSession();
24                 setSession (inis);
25         }
26
27         public void setSession(Session s) {
28                 assert s != null;
29                 session = s;
30         }
31
32         /*
33          * getting data from the db
34          */
35         public List<Pair<String, String>> ReadProteinDataTable() {
36                 final long startTime = System.currentTimeMillis();
37                 String com = "SELECT DataBegin,DataEnd FROM ProteinLog;";
38                 System.out.println("Command: " + com);
39                 ResultSet results = session.execute(com);
40                 final long queryTime = System.currentTimeMillis();
41                 List<Row> rows = results.all();
42                 System.out.println("Query time is " + (queryTime - startTime) + " msec");
43
44                 List<Pair<String, String>> res = new ArrayList<Pair<String, String>>();
45                 int c = 0;
46                 for (Row r : rows) {
47                         Pair<String, String> pair = new Pair<String, String>(r.getString("DataBegin"), r.getString("DataEnd"));
48                         res.add(pair);
49                         ++c;
50                 }
51                 final long endTime = System.currentTimeMillis();
52                 System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
53                 return res;
54         }
55
56         /*
57          * getting data from the db JobDateInfo
58          */
59         public long ReadDateTable(long queryDate) {
60                 ResultSet results = session.execute("SELECT Total FROM JobDateInfo WHERE jobday = " + queryDate + ";");
61                 if (results.isExhausted())
62                         return 0;
63                 Row therow = results.one();
64                 long res = therow.getLong("Total");
65                 if (!results.isExhausted()) {
66                         Date date = new Date (queryDate);
67                         log.warn("CassandraReader.ReadDateTable: date row for " + date.toString () + " ("+ queryDate + ") duplicated ");
68                 }
69                 return res;
70         }
71         /*
72          * getting whole protein sequence from the db ProteinRow
73          */
74         public List<StructureProteinPrediction> ReadWholeSequence(String queryProtein) {
75                 final long startTime = System.currentTimeMillis();
76                 String com = "SELECT JobID, Predictions FROM ProteinRow WHERE Protein = '" + queryProtein + "';";
77                 System.out.println("Command: " + com);
78                 ResultSet results = session.execute(com);
79                 if (results.isExhausted())
80                         return null;
81                 final long queryTime = System.currentTimeMillis();
82                 List<Row> rows = results.all();
83                 System.out.println("Query time is " + (queryTime - startTime) + " msec");
84                 System.out.println(" rows analysed,  " + rows.size());
85                 List<StructureProteinPrediction> res = new ArrayList<StructureProteinPrediction>();
86                 int c = 0;
87                 for (Row r : rows) {
88                         StructureProteinPrediction structure = new StructureProteinPrediction(queryProtein, r.getString("JobID"), r.getMap(
89                                         "Predictions", String.class, String.class));
90                         res.add(structure);
91                         ++c;
92                 }
93                 final long endTime = System.currentTimeMillis();
94                 System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
95                 return res;
96         }
97
98         /*
99          * getting part of protein sequence from the db ProteinRow
100          */
101         public List<StructureProteinPrediction> ReadPartOfSequence(String queryProtein) {
102                 final long startTime = System.currentTimeMillis();
103                 String com = "SELECT * FROM ProteinRow;";
104                 System.out.println("Command: " + com);
105                 ResultSet results = session.execute(com);
106                 if (results.isExhausted())
107                         return null;
108                 final long queryTime = System.currentTimeMillis();
109                 List<Row> rows = results.all();
110                 System.out.println("Query time is " + (queryTime - startTime) + " msec");
111                 System.out.println(" rows analysed,  " + rows.size());
112                 List<StructureProteinPrediction> res = new ArrayList<StructureProteinPrediction>();
113                 int c = 0;
114                 for (Row r : rows) {
115                         String prot = r.getString("Protein");
116                         if (prot.matches("(.*)" + queryProtein + "(.*)")) {
117                                 StructureProteinPrediction structure = new StructureProteinPrediction(prot, r.getString("JobID"), r.getMap("Predictions",
118                                                 String.class, String.class));
119                                 res.add(structure);
120                                 ++c;
121                         }
122                 }
123                 final long endTime = System.currentTimeMillis();
124                 System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
125                 return res;
126         }
127
128         /*
129          * getting protein sequences by counter
130          */
131         public Map<String, Integer> ReadProteinSequenceByCounter() {
132                 final long startTime = System.currentTimeMillis();
133                 String com = "SELECT Protein FROM ProteinRow;";
134                 System.out.println("Command: " + com);
135                 ResultSet results = session.execute(com);
136                 if (results.isExhausted())
137                         return null;
138                 final long queryTime = System.currentTimeMillis();
139                 List<Row> rows = results.all();
140                 System.out.println("Query time is " + (queryTime - startTime) + " msec");
141                 System.out.println(" rows analysed,  " + rows.size());
142                 Map<String, Integer> res = new HashMap<String, Integer>();
143                 int c = 0;
144                 for (Row r : rows) {
145                         String protein = r.getString("Protein");
146                         if (res.containsKey(protein))
147                                 res.put(protein, res.get(protein) + 1);
148                         else
149                                 res.put(protein, 1);
150                 }
151                 final long endTime = System.currentTimeMillis();
152                 System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
153                 return res;
154         }
155
156         /*
157          * getting protein sequences by counter
158          */
159         public StructureJobLog ReadJobLog(String jobid) {
160                 final long startTime = System.currentTimeMillis();
161                 String com = "SELECT * FROM ProteinLog WHERE JobID = '" + jobid + "';";
162                 System.out.println("Command: " + com);
163                 ResultSet results = session.execute(com);
164                 if (results.isExhausted())
165                         return null;
166                 final long queryTime = System.currentTimeMillis();
167                 Row row = results.one();
168                 String com1 = "SELECT * FROM ProteinRow WHERE JobID = '" + jobid + "' ALLOW FILTERING;";
169                 System.out.println("Command: " + com1);
170                 ResultSet results1 = session.execute(com1);
171                 if (results1.isExhausted())
172                         return null;
173                 Row row1 = results1.one();
174                 StructureJobLog res = new StructureJobLog(row.getString("Protein"), row.getString("JobID"), row.getString("DataBegin"),
175                                 row.getString("DataEnd"), row.getString("ip"), row1.getMap("Predictions", String.class, String.class));
176                 System.out.println("Query time is " + (queryTime - startTime) + " msec");
177                 final long endTime = System.currentTimeMillis();
178                 System.out.println(" rows analysed, execution time is " + (endTime - startTime) + " msec");
179                 return res;
180         }
181 }