create ProteinBean
[proteocache.git] / datadb / compbio / cassandra / CassandraReader.java
1 package compbio.cassandra;
2
3 import java.util.Date;
4 import java.util.HashMap;
5 import java.util.List;
6 import java.util.ArrayList;
7 import java.util.Map;
8
9 import org.apache.log4j.Logger;
10
11 import com.datastax.driver.core.Row;
12 import com.datastax.driver.core.Session;
13 import com.datastax.driver.core.ResultSet;
14
15 public class CassandraReader {
16         private Session session;
17         private static Logger log = Logger.getLogger(CassandraNativeConnector.class);
18
19         public CassandraReader() {
20                 Session inis = CassandraNativeConnector.getSession();
21                 setSession (inis);
22         }
23
24         public void setSession(Session s) {
25                 assert s != null;
26                 session = s;
27         }
28
29         /*
30          * getting data from the db
31          */
32         public List<Pair<String, String>> ReadProteinDataTable() {
33                 final long startTime = System.currentTimeMillis();
34                 String com = "SELECT DataBegin,DataEnd FROM ProteinLog;";
35                 System.out.println("Command: " + com);
36                 ResultSet results = session.execute(com);
37                 final long queryTime = System.currentTimeMillis();
38                 List<Row> rows = results.all();
39                 System.out.println("Query time is " + (queryTime - startTime) + " msec");
40
41                 List<Pair<String, String>> res = new ArrayList<Pair<String, String>>();
42                 int c = 0;
43                 for (Row r : rows) {
44                         Pair<String, String> pair = new Pair<String, String>(r.getString("DataBegin"), r.getString("DataEnd"));
45                         res.add(pair);
46                         ++c;
47                 }
48                 final long endTime = System.currentTimeMillis();
49                 System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
50                 return res;
51         }
52
53         /*
54          * getting data from the db
55          */
56         public List<Pair<String, String>> ReadProteinData(long day) {
57                 final long startTime = System.currentTimeMillis();
58                 String com = "SELECT JobID, Protein FROM ProteinData WHERE jobtime = " + day + ";";
59                 System.out.println("Command: " + com);
60                 ResultSet results = session.execute(com);
61                 if (results.isExhausted())
62                         return null;
63                 final long queryTime = System.currentTimeMillis();
64                 List<Row> rows = results.all();
65                 System.out.println("Query time is " + (queryTime - startTime) + " msec");
66                 List<Pair<String, String>> res = new ArrayList<Pair<String, String>>();
67                 int c = 0;
68                 for (Row r : rows) {
69                         Pair<String, String> pair = new Pair<String, String>(r.getString("JobID"), r.getString("Protein"));
70                         res.add(pair);
71                         ++c;
72                 }
73                 final long endTime = System.currentTimeMillis();
74                 System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
75                 return res;
76         }
77         /*
78          * getting data from the db JobDateInfo
79          */
80         public List<Long> ReadDateTable(long queryDate) {
81                 ResultSet results = session.execute("SELECT * FROM JobDateInfo WHERE jobday = " + queryDate + ";");
82                 if (results.isExhausted())
83                         return null;
84                 Row therow = results.one();
85                 List<Long> res = new ArrayList<Long>();
86                 res.add(therow.getLong("Total"));
87                 res.add(therow.getLong("TotalOK"));
88                 res.add(therow.getLong("TotalStopped"));
89                 res.add(therow.getLong("TotalError"));
90                 res.add(therow.getLong("TotalTimeOut"));
91                 if (!results.isExhausted()) {
92                         Date date = new Date (queryDate);
93                         log.warn("CassandraReader.ReadDateTable: date row for " + date.toString () + " ("+ queryDate + ") duplicated ");
94                 }
95                 return res;
96         }
97
98         /*
99          * getting whole protein sequence from the db ProteinRow
100          */
101         public List<ProteinBean> ReadWholeSequence(String queryProtein) {
102                 final long startTime = System.currentTimeMillis();
103                 String com = "SELECT JobID, Predictions FROM ProteinRow WHERE Protein = '" + queryProtein + "';";
104                 System.out.println("Command: " + com);
105                 ResultSet results = session.execute(com);
106                 if (results.isExhausted())
107                         return null;
108                 final long queryTime = System.currentTimeMillis();
109                 List<Row> rows = results.all();
110                 System.out.println("first size : " + rows.size());
111                 System.out.println("Query time is " + (queryTime - startTime) + " msec");
112                 System.out.println(" rows analysed,  " + rows.size());
113                 List<ProteinBean> res = new ArrayList<ProteinBean>();
114                 ProteinBean structure = new ProteinBean(queryProtein, rows.get(0).getMap(
115                                 "Predictions", String.class, String.class));
116                 System.out.println("second size : " + rows.size());
117                 int c = 0;
118                 for (Row r : rows) {
119                         structure.setJobid(r.getString("JobID"));                       
120                         ++c;
121                 }
122                 res.add(structure);
123                 final long endTime = System.currentTimeMillis();
124                 System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
125                 return res;
126         }
127
128         
129         /*
130          * getting jobs by ip
131          */
132         public List<Pair<String, String>> ReadIpWithJobs(String ip) {
133                 final long startTime = System.currentTimeMillis();
134                 String com = "SELECT JobID, Protein, FinalStatus FROM ProteinLog WHERE ip = '" + ip + "';";
135                 System.out.println("Command: " + com);
136                 ResultSet results = session.execute(com);
137                 if (results.isExhausted())
138                         return null;
139                 final long queryTime = System.currentTimeMillis();
140                 List<Row> rows = results.all();
141                 List<Pair<String, String>> res = new ArrayList<Pair<String, String>>();
142                 System.out.println("Query time is " + (queryTime - startTime) + " msec");
143                 System.out.println(" rows analysed,  " + rows.size());
144                 int c = 0;
145                 for (Row r : rows) {
146                         if (r.getString("FinalStatus").equals("OK")) { 
147                                 Pair<String, String> pair = new Pair<String, String>(r.getString("JobID"), r.getString("Protein"));
148                                 System.out.println(pair.getElement0());
149                                 System.out.println(pair.getElement1());
150                                 res.add(pair);
151                                 ++c;
152                         }
153                 }
154                 final long endTime = System.currentTimeMillis();
155                 System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
156                 return res;
157         }
158
159         /*
160          * getting part of protein sequence from the db ProteinRow
161          */
162         public List<ProteinBean> ReadPartOfSequence(String queryProtein) {
163                 final long startTime = System.currentTimeMillis();
164                 String com = "SELECT * FROM ProteinRow;";
165                 System.out.println("Command: " + com);
166                 ResultSet results = session.execute(com);
167                 if (results.isExhausted())
168                         return null;
169                 final long queryTime = System.currentTimeMillis();
170                 List<Row> rows = results.all();
171                 System.out.println("Query time is " + (queryTime - startTime) + " msec");
172                 System.out.println(" rows analysed,  " + rows.size());
173                 List<ProteinBean> res = new ArrayList<ProteinBean>();
174                 int c = 0;
175                 for (Row r : rows) {
176                         String prot = r.getString("Protein");
177                         if (prot.matches("(.*)" + queryProtein + "(.*)")) {
178                                 ProteinBean structure = new ProteinBean(prot,  r.getMap("Predictions", String.class, String.class));
179                                 structure.setJobid(r.getString("JobID"));
180                                 res.add(structure);
181                                 ++c;
182                         }
183                 }
184                 final long endTime = System.currentTimeMillis();
185                 System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
186                 return res;
187         }
188
189         /*
190          * getting protein sequences by counter
191          */
192         public Map<String, Integer> ReadProteinSequenceByCounter() {
193                 final long startTime = System.currentTimeMillis();
194                 String com = "SELECT Protein, JobID FROM ProteinRow;";
195                 System.out.println("Command: " + com);
196                 ResultSet results = session.execute(com);
197                 if (results.isExhausted())
198                         return null;
199                 final long queryTime = System.currentTimeMillis();
200                 List<Row> rows = results.all();
201                 System.out.println("Query time is " + (queryTime - startTime) + " msec");
202                 System.out.println(" rows analysed,  " + rows.size());
203                 Map<String, Integer> res = new HashMap<String, Integer>();
204                 int c = 0;
205                 for (Row r : rows) {
206                         String protein = r.getString("Protein");
207                         String id = r.getString("JobID");
208                         if (res.containsKey(protein))
209                                 res.put(protein, res.get(protein) + 1);
210                         else
211                                 res.put(protein, 1);
212                 }
213                 final long endTime = System.currentTimeMillis();
214                 System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
215                 return res;
216         }
217
218         /*
219          * getting ip by counter
220          */
221         public Map<String, Integer> ReadIpByCounter() {
222                 final long startTime = System.currentTimeMillis();
223                 String com = "SELECT JobID, ip FROM ProteinLog;";
224                 System.out.println("Command: " + com);
225                 ResultSet results = session.execute(com);
226                 if (results.isExhausted())
227                         return null;
228                 final long queryTime = System.currentTimeMillis();
229                 List<Row> rows = results.all();
230                 System.out.println("Query time is " + (queryTime - startTime) + " msec");
231                 System.out.println(" rows analysed,  " + rows.size());
232                 Map<String, Integer> res = new HashMap<String, Integer>();
233                 int c = 0;
234                 for (Row r : rows) {
235                         String protein = r.getString("ip");
236                         String id = r.getString("JobID");
237                         if (res.containsKey(protein))
238                                 res.put(protein, res.get(protein) + 1);
239                         else
240                                 res.put(protein, 1);
241                 }
242                 final long endTime = System.currentTimeMillis();
243                 System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
244                 return res;
245         }
246
247         /*
248          * getting log info for jobid
249          */
250         public StructureJobLog ReadJobLog(String jobid) {
251                 final long startTime = System.currentTimeMillis();
252                 String com = "SELECT * FROM ProteinLog WHERE JobID = '" + jobid + "';";
253                 System.out.println("Command: " + com);
254                 ResultSet results = session.execute(com);
255                 if (results.isExhausted())
256                         return null;
257                 final long queryTime = System.currentTimeMillis();
258                 Row row = results.one();
259                 String com1 = "SELECT * FROM ProteinRow WHERE JobID = '" + jobid + "' ALLOW FILTERING;";
260                 System.out.println("Command: " + com1);
261                 ResultSet results1 = session.execute(com1);
262                 if (results1.isExhausted())
263                         return null;
264                 Row row1 = results1.one();
265                 StructureJobLog res = new StructureJobLog(row.getString("Protein"), row.getString("JobID"), row.getString("DataBegin"),
266                                 row.getString("DataEnd"), row.getString("ip"), row1.getMap("Predictions", String.class, String.class));
267                 System.out.println("Query time is " + (queryTime - startTime) + " msec");
268                 final long endTime = System.currentTimeMillis();
269                 System.out.println(" rows analysed, execution time is " + (endTime - startTime) + " msec");
270                 return res;
271         }
272 }