00a9f27f265667b3eaea9de2331f345111cea3f2
[proteocache.git] / datadb / compbio / cassandra / CassandraReader.java
1 package compbio.cassandra;
2
3 import java.util.Date;
4 import java.util.HashMap;
5 import java.util.List;
6 import java.util.ArrayList;
7 import java.util.Map;
8
9 import org.apache.log4j.Logger;
10
11 import com.datastax.driver.core.Row;
12 import com.datastax.driver.core.Session;
13 import com.datastax.driver.core.ResultSet;
14 import compbio.engine.JobStatus;
15
16 public class CassandraReader {
17         private Session session;
18         private static Logger log = Logger.getLogger(CassandraNativeConnector.class);
19
20         public CassandraReader() {
21                 Session inis = CassandraNativeConnector.getSession();
22                 setSession(inis);
23         }
24
25         public void setSession(Session s) {
26                 assert s != null;
27                 session = s;
28         }
29
30         /*
31          * getting data from the db
32          */
33         public List<Pair<String, String>> ReadProteinDataTable() {
34                 final long startTime = System.currentTimeMillis();
35                 String com = "SELECT DataBegin,DataEnd FROM ProteinLog;";
36                 System.out.println("Command: " + com);
37                 ResultSet results = session.execute(com);
38                 final long queryTime = System.currentTimeMillis();
39                 List<Row> rows = results.all();
40                 System.out.println("Query time is " + (queryTime - startTime) + " msec");
41
42                 List<Pair<String, String>> res = new ArrayList<Pair<String, String>>();
43                 int c = 0;
44                 for (Row r : rows) {
45                         Pair<String, String> pair = new Pair<String, String>(r.getString("DataBegin"), r.getString("DataEnd"));
46                         res.add(pair);
47                         ++c;
48                 }
49                 final long endTime = System.currentTimeMillis();
50                 System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
51                 return res;
52         }
53
54         /*
55          * getting data from the db
56          */
57         public DateBean ReadProteinData(long day, String date) {
58                 String com = "SELECT JobID, Protein FROM ProteinData WHERE jobtime = " + day + ";";
59                 System.out.println("Command: " + com);
60                 ResultSet results = session.execute(com);
61                 if (results.isExhausted())
62                         return null;
63                 List<Row> rows = results.all();
64                 DateBean res = new DateBean(date);
65                 for (Row r : rows) {
66                         res.setJobidAndSeq(r.getString("JobID"), r.getString("Protein"));
67                 }
68                 return res;
69         }
70
71         /**
72          * getting data from the db
73          */
74         public DateBean ReadFailedJobs(long day, String date, JobStatus status) {
75                 // FailLog (jobtime, JobID, ExecTime, ip, FinalStatus)
76                 String com = "SELECT JobID FROM FailLog WHERE jobtime = " + day + " and FinalStatus = '" + status.name() + "';";
77                 ResultSet results = session.execute(com);
78                 if (results.isExhausted())
79                         return null;
80                 List<Row> rows = results.all();
81                 DateBean res = new DateBean(date);
82                 for (Row r : rows) {
83                         String jobid = r.getString("JobID");
84                         String com1 = "SELECT Protein FROM ProteinLog WHERE JobID = '" + jobid + "';";
85                         System.out.println("Command: " + com1);
86                         ResultSet results2 = session.execute(com1);
87                         List<Row> jrows = results2.all();
88                         if (1 == jrows.size()) {
89                                 String protein = jrows.get(0).getString("Protein");
90                                 res.setJobidAndSeq(jobid, protein);
91                         }
92                 }
93                 return res;
94         }
95
96         /*
97          * getting data from the db JobDateInfo
98          */
99         public Total ReadDateTable(long queryDate) {
100                 ResultSet results = session.execute("SELECT * FROM JobDateInfo WHERE jobday = " + queryDate + ";");
101                 if (results.isExhausted())
102                         return null;
103                 Row therow = results.one();
104                 Total res = new Total(therow.getLong("Total"), therow.getLong("TotalOK"), therow.getLong("TotalStopped"),
105                                 therow.getLong("TotalError"), therow.getLong("TotalTimeOut"));
106                 if (!results.isExhausted()) {
107                         Date date = new Date(queryDate);
108                         log.warn("CassandraReader.ReadDateTable: date row for " + date.toString() + " (" + queryDate + ") duplicated ");
109                 }
110                 return res;
111         }
112
113         /*
114          * getting whole protein sequence from the db ProteinRow
115          */
116         public List<ProteinBean> ReadWholeSequence(String queryProtein) {
117                 final long startTime = System.currentTimeMillis();
118                 String com = "SELECT JobID, Predictions FROM ProteinRow WHERE Protein = '" + queryProtein + "';";
119                 System.out.println("Command: " + com);
120                 ResultSet results = session.execute(com);
121                 if (results.isExhausted())
122                         return null;
123                 final long queryTime = System.currentTimeMillis();
124                 List<Row> rows = results.all();
125                 System.out.println("first size : " + rows.size());
126                 System.out.println("Query time is " + (queryTime - startTime) + " msec");
127                 System.out.println(" rows analysed,  " + rows.size());
128                 List<ProteinBean> res = new ArrayList<ProteinBean>();
129                 ProteinBean structure = new ProteinBean(queryProtein, rows.get(0).getMap("Predictions", String.class, String.class));
130                 System.out.println("second size : " + rows.size());
131                 int c = 0;
132                 for (Row r : rows) {
133                         structure.setJobid(r.getString("JobID"));
134                         ++c;
135                 }
136                 res.add(structure);
137                 final long endTime = System.currentTimeMillis();
138                 System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
139                 return res;
140         }
141
142         /*
143          * getting jobs by ip
144          */
145         public Map<String, String[]> ReadIpWithJobs(String ip) {
146                 final long startTime = System.currentTimeMillis();
147                 String com = "SELECT JobID, Protein, FinalStatus, DataBegin FROM ProteinLog WHERE ip = '" + ip + "';";
148                 System.out.println("Command: " + com);
149                 ResultSet results = session.execute(com);
150                 if (results.isExhausted())
151                         return null;
152                 final long queryTime = System.currentTimeMillis();
153                 List<Row> rows = results.all();
154                 Map<String, String[]> res = new HashMap<String, String[]>();
155                 System.out.println("Query time is " + (queryTime - startTime) + " msec");
156                 System.out.println(" rows analysed,  " + rows.size());
157                 int c = 0;
158                 for (Row r : rows) {
159                         if (r.getString("FinalStatus").equals("OK")) {
160                                 String date = r.getString("DataBegin");
161                                 res.put(r.getString("JobID"), new String[] { date.substring(0, date.indexOf(":")), r.getString("Protein") });
162                                 ++c;
163                         }
164                 }
165                 final long endTime = System.currentTimeMillis();
166                 System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
167                 return res;
168         }
169
170         /*
171          * getting part of protein sequence from the db ProteinRow
172          */
173         public List<ProteinBean> ReadPartOfSequence(String queryProtein) {
174                 final long startTime = System.currentTimeMillis();
175                 String com = "SELECT * FROM ProteinRow;";
176                 System.out.println("Command: " + com);
177                 ResultSet results = session.execute(com);
178                 if (results.isExhausted())
179                         return null;
180                 final long queryTime = System.currentTimeMillis();
181                 List<Row> rows = results.all();
182                 System.out.println("Query time is " + (queryTime - startTime) + " msec");
183                 System.out.println(" rows analysed,  " + rows.size());
184                 List<ProteinBean> res = new ArrayList<ProteinBean>();
185                 int c = 0;
186                 for (Row r : rows) {
187                         String prot = r.getString("Protein");
188                         if (prot.matches("(.*)" + queryProtein + "(.*)")) {
189                                 ProteinBean structure = new ProteinBean(prot, r.getMap("Predictions", String.class, String.class));
190                                 structure.setJobid(r.getString("JobID"));
191                                 res.add(structure);
192                                 ++c;
193                         }
194                 }
195                 final long endTime = System.currentTimeMillis();
196                 System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
197                 return res;
198         }
199
200         /*
201          * getting protein sequences by counter
202          */
203         public Map<String, Integer> ReadProteinSequenceByCounter() {
204                 final long startTime = System.currentTimeMillis();
205                 String com = "SELECT Protein, JobID FROM ProteinRow;";
206                 System.out.println("Command: " + com);
207                 ResultSet results = session.execute(com);
208                 if (results.isExhausted())
209                         return null;
210                 final long queryTime = System.currentTimeMillis();
211                 List<Row> rows = results.all();
212                 System.out.println("Query time is " + (queryTime - startTime) + " msec");
213                 System.out.println(" rows analysed,  " + rows.size());
214                 Map<String, Integer> res = new HashMap<String, Integer>();
215                 int c = 0;
216                 for (Row r : rows) {
217                         String protein = r.getString("Protein");
218                         String id = r.getString("JobID");
219                         if (res.containsKey(protein))
220                                 res.put(protein, res.get(protein) + 1);
221                         else
222                                 res.put(protein, 1);
223                 }
224                 final long endTime = System.currentTimeMillis();
225                 System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
226                 return res;
227         }
228
229         /*
230          * getting ip by counter
231          */
232         public Map<String, Integer> ReadIpByCounter() {
233                 final long startTime = System.currentTimeMillis();
234                 String com = "SELECT JobID, ip FROM ProteinLog;";
235                 System.out.println("Command: " + com);
236                 ResultSet results = session.execute(com);
237                 if (results.isExhausted())
238                         return null;
239                 final long queryTime = System.currentTimeMillis();
240                 List<Row> rows = results.all();
241                 System.out.println("Query time is " + (queryTime - startTime) + " msec");
242                 System.out.println(" rows analysed,  " + rows.size());
243                 Map<String, Integer> res = new HashMap<String, Integer>();
244                 int c = 0;
245                 for (Row r : rows) {
246                         String ip = r.getString("ip");
247                         String id = r.getString("JobID");
248                         if (res.containsKey(ip))
249                                 res.put(ip, res.get(ip) + 1);
250                         else
251                                 res.put(ip, 1);
252                 }
253                 final long endTime = System.currentTimeMillis();
254                 System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
255                 return res;
256         }
257
258         /*
259          * getting log info for jobid
260          */
261         public JobBean ReadJobLog(String jobid) {
262                 final long startTime = System.currentTimeMillis();
263                 String com = "SELECT * FROM ProteinLog WHERE JobID = '" + jobid + "';";
264                 System.out.println("Command: " + com);
265                 ResultSet results = session.execute(com);
266                 if (results.isExhausted())
267                         return null;
268                 final long queryTime = System.currentTimeMillis();
269                 Row row = results.one();
270                 String com1 = "SELECT * FROM ProteinRow WHERE JobID = '" + jobid + "' ALLOW FILTERING;";
271                 System.out.println("Command: " + com1);
272                 ResultSet results1 = session.execute(com1);
273                 if (results1.isExhausted())
274                         return null;
275                 Row row1 = results1.one();
276                 JobBean res = new JobBean(row.getString("Protein"), row.getString("JobID"), row.getString("DataBegin"), row.getString("DataEnd"),
277                                 row.getString("ip"), row1.getMap("Predictions", String.class, String.class));
278                 System.out.println("Query time is " + (queryTime - startTime) + " msec");
279                 final long endTime = System.currentTimeMillis();
280                 System.out.println(" rows analysed, execution time is " + (endTime - startTime) + " msec");
281                 return res;
282         }
283 }