941ad1cd34fdb2f16641b2b3dd947326c214f45b
[proteocache.git] / datadb / compbio / cassandra / CassandraReader.java
1 package compbio.cassandra;
2
3 import java.util.Date;
4 import java.util.HashMap;
5 import java.util.List;
6 import java.util.ArrayList;
7 import java.util.Map;
8
9 import org.apache.log4j.Logger;
10
11 import com.datastax.driver.core.Row;
12 import com.datastax.driver.core.Session;
13 import com.datastax.driver.core.ResultSet;
14
15 public class CassandraReader {
16         private Session session;
17         private static Logger log = Logger.getLogger(CassandraNativeConnector.class);
18
19         public CassandraReader() {
20                 Session inis = CassandraNativeConnector.getSession();
21                 setSession (inis);
22         }
23
24         public void setSession(Session s) {
25                 assert s != null;
26                 session = s;
27         }
28
29         /*
30          * getting data from the db
31          */
32         public List<Pair<String, String>> ReadProteinDataTable() {
33                 final long startTime = System.currentTimeMillis();
34                 String com = "SELECT DataBegin,DataEnd FROM ProteinLog;";
35                 System.out.println("Command: " + com);
36                 ResultSet results = session.execute(com);
37                 final long queryTime = System.currentTimeMillis();
38                 List<Row> rows = results.all();
39                 System.out.println("Query time is " + (queryTime - startTime) + " msec");
40
41                 List<Pair<String, String>> res = new ArrayList<Pair<String, String>>();
42                 int c = 0;
43                 for (Row r : rows) {
44                         Pair<String, String> pair = new Pair<String, String>(r.getString("DataBegin"), r.getString("DataEnd"));
45                         res.add(pair);
46                         ++c;
47                 }
48                 final long endTime = System.currentTimeMillis();
49                 System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
50                 return res;
51         }
52
53         /*
54          * getting data from the db JobDateInfo
55          */
56         public long ReadDateTable(long queryDate) {
57                 ResultSet results = session.execute("SELECT Total FROM JobDateInfo WHERE jobday = " + queryDate + ";");
58                 if (results.isExhausted())
59                         return 0;
60                 Row therow = results.one();
61                 long res = therow.getLong("Total");
62                 if (!results.isExhausted()) {
63                         Date date = new Date (queryDate);
64                         log.warn("CassandraReader.ReadDateTable: date row for " + date.toString () + " ("+ queryDate + ") duplicated ");
65                 }
66                 return res;
67         }
68         /*
69          * getting whole protein sequence from the db ProteinRow
70          */
71         public List<StructureProteinPrediction> ReadWholeSequence(String queryProtein) {
72                 final long startTime = System.currentTimeMillis();
73                 String com = "SELECT JobID, Predictions FROM ProteinRow WHERE Protein = '" + queryProtein + "';";
74                 System.out.println("Command: " + com);
75                 ResultSet results = session.execute(com);
76                 if (results.isExhausted())
77                         return null;
78                 final long queryTime = System.currentTimeMillis();
79                 List<Row> rows = results.all();
80                 System.out.println("Query time is " + (queryTime - startTime) + " msec");
81                 System.out.println(" rows analysed,  " + rows.size());
82                 List<StructureProteinPrediction> res = new ArrayList<StructureProteinPrediction>();
83                 int c = 0;
84                 for (Row r : rows) {
85                         StructureProteinPrediction structure = new StructureProteinPrediction(queryProtein, r.getString("JobID"), r.getMap(
86                                         "Predictions", String.class, String.class));
87                         res.add(structure);
88                         ++c;
89                 }
90                 final long endTime = System.currentTimeMillis();
91                 System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
92                 return res;
93         }
94
95         /*
96          * getting part of protein sequence from the db ProteinRow
97          */
98         public List<StructureProteinPrediction> ReadPartOfSequence(String queryProtein) {
99                 final long startTime = System.currentTimeMillis();
100                 String com = "SELECT * FROM ProteinRow;";
101                 System.out.println("Command: " + com);
102                 ResultSet results = session.execute(com);
103                 if (results.isExhausted())
104                         return null;
105                 final long queryTime = System.currentTimeMillis();
106                 List<Row> rows = results.all();
107                 System.out.println("Query time is " + (queryTime - startTime) + " msec");
108                 System.out.println(" rows analysed,  " + rows.size());
109                 List<StructureProteinPrediction> res = new ArrayList<StructureProteinPrediction>();
110                 int c = 0;
111                 for (Row r : rows) {
112                         String prot = r.getString("Protein");
113                         if (prot.matches("(.*)" + queryProtein + "(.*)")) {
114                                 StructureProteinPrediction structure = new StructureProteinPrediction(prot, r.getString("JobID"), r.getMap("Predictions",
115                                                 String.class, String.class));
116                                 res.add(structure);
117                                 ++c;
118                         }
119                 }
120                 final long endTime = System.currentTimeMillis();
121                 System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
122                 return res;
123         }
124
125         /*
126          * getting protein sequences by counter
127          */
128         public Map<String, Integer> ReadProteinSequenceByCounter() {
129                 final long startTime = System.currentTimeMillis();
130                 String com = "SELECT Protein, JobID FROM ProteinRow;";
131                 System.out.println("Command: " + com);
132                 ResultSet results = session.execute(com);
133                 if (results.isExhausted())
134                         return null;
135                 final long queryTime = System.currentTimeMillis();
136                 List<Row> rows = results.all();
137                 System.out.println("Query time is " + (queryTime - startTime) + " msec");
138                 System.out.println(" rows analysed,  " + rows.size());
139                 Map<String, Integer> res = new HashMap<String, Integer>();
140                 int c = 0;
141                 for (Row r : rows) {
142                         String protein = r.getString("Protein");
143                         String id = r.getString("JobID");
144                         System.out.println(id + ", " + protein);
145                         if (res.containsKey(protein))
146                                 res.put(protein, res.get(protein) + 1);
147                         else
148                                 res.put(protein, 1);
149                 }
150                 final long endTime = System.currentTimeMillis();
151                 System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
152                 return res;
153         }
154
155         /*
156          * getting protein sequences by counter
157          */
158         public StructureJobLog ReadJobLog(String jobid) {
159                 final long startTime = System.currentTimeMillis();
160                 String com = "SELECT * FROM ProteinLog WHERE JobID = '" + jobid + "';";
161                 System.out.println("Command: " + com);
162                 ResultSet results = session.execute(com);
163                 if (results.isExhausted())
164                         return null;
165                 final long queryTime = System.currentTimeMillis();
166                 Row row = results.one();
167                 String com1 = "SELECT * FROM ProteinRow WHERE JobID = '" + jobid + "' ALLOW FILTERING;";
168                 System.out.println("Command: " + com1);
169                 ResultSet results1 = session.execute(com1);
170                 if (results1.isExhausted())
171                         return null;
172                 Row row1 = results1.one();
173                 StructureJobLog res = new StructureJobLog(row.getString("Protein"), row.getString("JobID"), row.getString("DataBegin"),
174                                 row.getString("DataEnd"), row.getString("ip"), row1.getMap("Predictions", String.class, String.class));
175                 System.out.println("Query time is " + (queryTime - startTime) + " msec");
176                 final long endTime = System.currentTimeMillis();
177                 System.out.println(" rows analysed, execution time is " + (endTime - startTime) + " msec");
178                 return res;
179         }
180 }