fix data report, protein report
[proteocache.git] / datadb / compbio / cassandra / CassandraNativeConnector.java
1 package compbio.cassandra;
2
3 import java.io.IOException;
4 import java.util.Calendar;
5 import java.util.HashMap;
6 import java.util.List;
7 import java.util.ArrayList;
8 import java.util.Map;
9
10 import com.datastax.driver.core.Cluster;
11 import com.datastax.driver.core.Host;
12 import com.datastax.driver.core.Metadata;
13 import com.datastax.driver.core.Row;
14 import com.datastax.driver.core.Session;
15 import com.datastax.driver.core.ResultSet;
16
17 public class CassandraNativeConnector {
18         private static Cluster cluster;
19         private static Session session;
20         /*
21          * connect to the cluster and look weather the dababase has any data inside
22          */
23         public void Connect() {
24                 // local cassandra cluster
25                 cluster = Cluster.builder().addContactPoint("localhost").build();
26                 // distributed cassandra cluster
27                 /* cluster = Cluster.builder().addContactPoint("10.0.115.190").build(); */
28                 Metadata metadata = cluster.getMetadata();
29                 System.out.printf("Connected to cluster: %s\n", metadata.getClusterName());
30                 for (Host host : metadata.getAllHosts()) {
31                         System.out.printf("Datatacenter: %s; Host: %s; Rack: %s\n", host.getDatacenter(), host.getAddress(), host.getRack());
32                 }
33
34                 session = cluster.connect();
35                 session.execute("CREATE KEYSPACE IF NOT EXISTS ProteinKeyspace WITH replication = {'class':'SimpleStrategy', 'replication_factor':3};");
36                 session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinRow (Protein ascii, JobID ascii, Predictions map<ascii,ascii>, PRIMARY KEY(JobID));");
37                 session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinLog "
38                                 + "(JobID ascii, DataBegin ascii, DataEnd ascii, ip ascii, FinalStatus ascii, ExecutionStatus ascii, Protein ascii, PRIMARY KEY(JobID));");
39                 session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinData (jobtime bigint, JobID ascii, Protein ascii, PRIMARY KEY(JobID));");
40
41                 session.execute("CREATE INDEX IF NOT EXISTS ProteinSeq ON ProteinKeyspace.ProteinRow (protein);");
42                 session.execute("CREATE INDEX IF NOT EXISTS JobDateStamp ON ProteinKeyspace.ProteinData (jobtime);");
43
44                 System.out.println("Cassandra connected");
45         }
46
47         /*
48          * parsing data source and filling the database
49          */
50         public void Parsing() throws IOException {
51                 if (true) {
52                         // if (source.equals("http")) {
53                         // get data from real Jpred production server
54                         System.out.println("Parsing web data source......");
55                         String datasrc = "http://www.compbio.dundee.ac.uk/www-jpred/results/usage-new/alljobs.dat";
56                         String prefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
57                         JpredParserHTTP parser = new JpredParserHTTP(prefix);
58                         parser.Parsing(datasrc, 4);
59                 }
60                 if (false) {
61                         // if (source.equals("file")) {
62                         // get irtifical data generated for the DB stress tests
63                         System.out.println("Parsing local file data source......");
64                         String datasrc = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/data.dat";
65                         String prefix = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/Jpreddata";
66                         JpredParserLocalFile parser = new JpredParserLocalFile(prefix);
67                         parser.Parsing(datasrc, 190);
68                 }
69         }
70
71         public void Closing() {
72                 session.shutdown();
73                 cluster.shutdown();
74                 System.out.println("Cassandra has been shut down");
75         }
76
77         /*
78          * inserting data into the db
79          */
80         public void InsertData(long jobtime, String startdate, String enddate, String ip, String jobid, String statusEx, String statusFinal,
81                         String protein, List<FastaSequence> predictions) {
82
83                 String check1 = "SELECT * FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';";
84                 ResultSet results1 = session.execute(check1);
85                 if (results1.isExhausted()) {
86                         String com1 = "INSERT INTO ProteinKeyspace.ProteinLog "
87                                         + "(JobID, IP, DataBegin, DataEnd, FinalStatus, ExecutionStatus, Protein)" + " VALUES ('" + jobid + "','" + ip + "','"
88                                         + startdate + "','" + enddate + "','" + statusFinal + "','" + statusEx + "','" + protein + "');";
89                         session.execute(com1);
90
91                         String com2 = "INSERT INTO ProteinKeyspace.ProteinData " + "(jobtime, JobID, Protein)" + " VALUES (" + jobtime + ",'" + jobid
92                                         + "','" + protein + "');";
93                         session.execute(com2);
94
95                         String allpredictions = "";
96                         for (FastaSequence pred : predictions) {
97                                 String predictionname = pred.getId();
98                                 String prediction = pred.getSequence().replaceAll("\n", "");
99                                 allpredictions += "'" + predictionname + "':'" + prediction + "',";
100                         }
101                         String final_prediction = "";
102                         if (null != allpredictions) {
103                                 final_prediction = allpredictions.substring(0, allpredictions.length() - 1);
104                         }
105
106                         String check2 = "SELECT * FROM ProteinKeyspace.ProteinRow WHERE JobID = '" + jobid + "';";
107                         ResultSet results2 = session.execute(check2);
108                         if (results2.isExhausted()) {
109                                 String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(Protein, JobID, Predictions)" + " VALUES ('" 
110                         + protein + "','" + jobid + "',{" + final_prediction + "});";
111                                 session.execute(com3);
112                         }
113                 }
114         }
115
116         /*
117          * getting data from the db
118          */
119         public List<Pair<String, String>> ReadProteinDataTable() {
120                 final long startTime = System.currentTimeMillis();
121                 String com = "SELECT DataBegin,DataEnd FROM ProteinKeyspace.ProteinLog;";
122                 System.out.println("Command: " + com);
123                 ResultSet results = session.execute(com);
124                 final long queryTime = System.currentTimeMillis();
125                 List<Row> rows = results.all();
126                 System.out.println ("Query time is " + (queryTime - startTime) + " msec");
127
128                 List<Pair<String, String>> res = new ArrayList<Pair<String, String>>();
129                 int c = 0;
130                 for (Row r : rows) {
131                         Pair<String, String> pair = new Pair<String, String>(r.getString("DataBegin"),r.getString("DataEnd"));
132                         res.add(pair);
133                         ++c;
134                 }
135                 final long endTime = System.currentTimeMillis();
136                 System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
137                 return res;
138         }
139         
140         /*
141          * getting data from the db ProteinData
142          */
143         public Integer ReadDateTable(long queryDate) {
144                 final long startTime = System.currentTimeMillis();
145                 String com = "SELECT jobtime, JobID FROM ProteinKeyspace.ProteinData WHERE jobtime = " + queryDate + ";";
146                 System.out.println("Command: " + com);
147                 ResultSet results = session.execute(com);
148                 if (results.isExhausted())
149                         return null;
150                 final long queryTime = System.currentTimeMillis();
151                 List<Row> rows = results.all();
152                 System.out.println ("Query time is " + (queryTime - startTime) + " msec");        
153                 return rows.size();
154         }
155
156         /*
157          * getting whole protein sequence from the db ProteinRow
158          */
159         public List<StructureProteinPrediction> ReadWholeSequence(String queryProtein) {
160                 final long startTime = System.currentTimeMillis();
161                 String com = "SELECT JobID, Predictions FROM ProteinKeyspace.ProteinRow WHERE Protein = '" + queryProtein + "';";
162                 System.out.println("Command: " + com);
163                 ResultSet results = session.execute(com);
164                 if (results.isExhausted())
165                         return null;
166                 final long queryTime = System.currentTimeMillis();
167                 List<Row> rows = results.all();
168                 System.out.println ("Query time is " + (queryTime - startTime) + " msec");   
169                 System.out.println (" rows analysed,  " + rows.size());
170                 List<StructureProteinPrediction> res = new ArrayList<StructureProteinPrediction>();
171                 int c = 0;
172                 for (Row r : rows) {
173                         StructureProteinPrediction structure = new StructureProteinPrediction(queryProtein, r.getString("JobID"), r.getMap("Predictions", String.class, String.class));         
174                         res.add(structure);
175                         ++c;
176                 }
177                 final long endTime = System.currentTimeMillis();
178                 System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
179                 return res;
180         }
181         
182         /*
183          * getting part of protein sequence from the db ProteinRow
184          */
185         public List<StructureProteinPrediction>  ReadPartOfSequence(String queryProtein) {
186                 final long startTime = System.currentTimeMillis();
187                 String com = "SELECT * FROM ProteinKeyspace.ProteinRow;";
188                 System.out.println("Command: " + com);
189                 ResultSet results = session.execute(com);
190                 if (results.isExhausted())
191                         return null;
192                 final long queryTime = System.currentTimeMillis();
193                 List<Row> rows = results.all();
194                 System.out.println ("Query time is " + (queryTime - startTime) + " msec");   
195                 System.out.println (" rows analysed,  " + rows.size());
196                 List<StructureProteinPrediction>  res = new ArrayList<StructureProteinPrediction>();
197                 int c = 0;
198                 for (Row r : rows) {
199                         String prot = r.getString("Protein");
200                         if (prot.matches("(.*)" + queryProtein + "(.*)")) {
201                         //      System.out.println(prot);
202                                 StructureProteinPrediction structure = new StructureProteinPrediction(prot, r.getString("JobID"), r.getMap("Predictions", String.class, String.class));         
203                                 res.add(structure);
204                                 ++c;
205                         }
206                 }
207                 final long endTime = System.currentTimeMillis();
208                 System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
209                 return res;
210         }
211         
212         /*
213          * getting protein sequences by counter
214          */
215         public List<Pair<String, Integer>>  ReadProteinDataByCounter(int counter) {
216                 final long startTime = System.currentTimeMillis();
217                 String com = "SELECT DISTINCT Protein FROM ProteinKeyspace.ProteinRow;";
218                 System.out.println("Command: " + com);
219                 ResultSet results = session.execute(com);
220                 if (results.isExhausted())
221                         return null;
222                 final long queryTime = System.currentTimeMillis();
223                 List<Row> rows = results.all();
224                 System.out.println ("Query time is " + (queryTime - startTime) + " msec");   
225                 System.out.println (" rows analysed,  " + rows.size());
226                 List<Pair<String, Integer>>  res = new ArrayList<Pair<String, Integer>>();
227                 int c = 0;
228                 for (Row r : rows) {
229                         String prot = r.getString("Protein");
230                         
231                 }
232                 final long endTime = System.currentTimeMillis();
233                 System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
234                 return res;
235         }
236         
237         /*
238          * getting earlest date of jobs from the db
239          */
240         public long getEarliestDateInDB() {
241                 final long startTime = System.currentTimeMillis();
242                 String com = "SELECT jobtime,JobID FROM ProteinKeyspace.ProteinData;";
243                 System.out.println("Command: " + com);
244                 ResultSet results = session.execute(com);
245                 final long queryTime = System.currentTimeMillis();
246                 System.out.println ("Query time is  " + (queryTime - startTime) + " msec");
247
248                 Calendar cal = Calendar.getInstance();
249                 long res = cal.getTimeInMillis();
250                 int c = 0;
251                 while (!results.isExhausted()) {
252                         Row r = results.one();
253                         long d1 = r.getLong("jobtime");
254                         if (res > d1) {
255                                 res = d1;
256                         }
257                         ++c;
258                 }
259                 final long endTime = System.currentTimeMillis();
260                 System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
261                 return res;
262         }
263         
264 }