Restore one servlet
[proteocache.git] / datadb / compbio / cassandra / CassandraNativeConnector.java
1 package compbio.cassandra;
2
3 import java.io.IOException;
4 import java.util.Calendar;
5 import java.util.HashMap;
6 import java.util.List;
7 import java.util.ArrayList;
8
9 import com.datastax.driver.core.Cluster;
10 import com.datastax.driver.core.Host;
11 import com.datastax.driver.core.Metadata;
12 import com.datastax.driver.core.Row;
13 import com.datastax.driver.core.Session;
14 import com.datastax.driver.core.ResultSet;
15
16 public class CassandraNativeConnector {
17         private static Cluster cluster;
18         private static Session session;
19         /*
20          * connect to the cluster and look weather the dababase has any data inside
21          */
22         public void Connect() {
23                 // local cassandra cluster
24                 cluster = Cluster.builder().addContactPoint("localhost").build();
25                 // distributed cassandra cluster
26                 /* cluster = Cluster.builder().addContactPoint("10.0.115.190").build(); */
27                 Metadata metadata = cluster.getMetadata();
28                 System.out.printf("Connected to cluster: %s\n", metadata.getClusterName());
29                 for (Host host : metadata.getAllHosts()) {
30                         System.out.printf("Datatacenter: %s; Host: %s; Rack: %s\n", host.getDatacenter(), host.getAddress(), host.getRack());
31                 }
32
33                 session = cluster.connect();
34                 session.execute("CREATE KEYSPACE IF NOT EXISTS ProteinKeyspace WITH replication = {'class':'SimpleStrategy', 'replication_factor':3};");
35                 session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinRow (Protein ascii, JobID ascii, Predictions map<ascii,ascii>, PRIMARY KEY(JobID));");
36                 session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinLog "
37                                 + "(JobID ascii, DataBegin ascii, DataEnd ascii, ip ascii, FinalStatus ascii, ExecutionStatus ascii, Protein ascii, PRIMARY KEY(JobID));");
38                 session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinData (jobtime bigint, JobID ascii, Protein ascii, PRIMARY KEY(JobID));");
39
40                 session.execute("CREATE INDEX IF NOT EXISTS ProteinSeq ON ProteinKeyspace.ProteinRow (protein);");
41                 session.execute("CREATE INDEX IF NOT EXISTS JobDateStamp ON ProteinKeyspace.ProteinData (jobtime);");
42
43                 System.out.println("Cassandra connected");
44         }
45
46         /*
47          * parsing data source and filling the database
48          */
49         public void Parsing() throws IOException {
50                 if (true) {
51                         // if (source.equals("http")) {
52                         // get data from real Jpred production server
53                         System.out.println("Parsing web data source......");
54                         String datasrc = "http://www.compbio.dundee.ac.uk/www-jpred/results/usage-new/alljobs.dat";
55                         String prefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
56                         JpredParserHTTP parser = new JpredParserHTTP(prefix);
57                         parser.Parsing(datasrc, 4);
58                 }
59                 if (false) {
60                         // if (source.equals("file")) {
61                         // get irtifical data generated for the DB stress tests
62                         System.out.println("Parsing local file data source......");
63                         String datasrc = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/data.dat";
64                         String prefix = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/Jpreddata";
65                         JpredParserLocalFile parser = new JpredParserLocalFile(prefix);
66                         parser.Parsing(datasrc, 190);
67                 }
68         }
69
70         public void Closing() {
71                 session.shutdown();
72                 cluster.shutdown();
73                 System.out.println("Cassandra has been shut down");
74         }
75
76         /*
77          * inserting data into the db
78          */
79         public void InsertData(long jobtime, String startdate, String enddate, String ip, String jobid, String statusEx, String statusFinal,
80                         String protein, List<FastaSequence> predictions) {
81
82                 String check1 = "SELECT * FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';";
83                 ResultSet results1 = session.execute(check1);
84                 if (results1.isExhausted()) {
85                         String com1 = "INSERT INTO ProteinKeyspace.ProteinLog "
86                                         + "(JobID, IP, DataBegin, DataEnd, FinalStatus, ExecutionStatus, Protein)" + " VALUES ('" + jobid + "','" + ip + "','"
87                                         + startdate + "','" + enddate + "','" + statusFinal + "','" + statusEx + "','" + protein + "');";
88                         session.execute(com1);
89
90                         String com2 = "INSERT INTO ProteinKeyspace.ProteinData " + "(jobtime, JobID, Protein)" + " VALUES (" + jobtime + ",'" + jobid
91                                         + "','" + protein + "');";
92                         session.execute(com2);
93
94                         String allpredictions = "";
95                         for (FastaSequence pred : predictions) {
96                                 String predictionname = pred.getId();
97                                 String prediction = pred.getSequence().replaceAll("\n", "");
98                                 allpredictions += "'" + predictionname + "':'" + prediction + "',";
99                         }
100                         String final_prediction = "";
101                         if (null != allpredictions) {
102                                 final_prediction = allpredictions.substring(0, allpredictions.length() - 1);
103                         }
104
105                         String check2 = "SELECT * FROM ProteinKeyspace.ProteinRow WHERE JobID = '" + jobid + "';";
106                         ResultSet results2 = session.execute(check2);
107                         if (results2.isExhausted()) {
108                                 String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(Protein, JobID, Predictions)" + " VALUES ('" 
109                         + protein + "','" + jobid + "',{" + final_prediction + "});";
110                                 session.execute(com3);
111                         }
112                 }
113         }
114
115         /*
116          * getting data from the db
117          */
118         public List<Pair<String, String>> ReadProteinDataTable() {
119                 final long startTime = System.currentTimeMillis();
120                 String com = "SELECT DataBegin,DataEnd FROM ProteinKeyspace.ProteinLog;";
121                 System.out.println("Command: " + com);
122                 ResultSet results = session.execute(com);
123                 final long queryTime = System.currentTimeMillis();
124                 List<Row> rows = results.all();
125                 System.out.println ("Query time is " + (queryTime - startTime) + " msec");
126
127                 List<Pair<String, String>> res = new ArrayList<Pair<String, String>>();
128                 int c = 0;
129                 for (Row r : rows) {
130                         Pair<String, String> pair = new Pair<String, String>(r.getString("DataBegin"),r.getString("DataEnd"));
131                         res.add(pair);
132                         ++c;
133                 }
134                 final long endTime = System.currentTimeMillis();
135                 System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
136                 return res;
137         }
138
139         /*
140          * getting earlest date of jobs from the db
141          */
142         public long getEarliestDateInDB() {
143                 final long startTime = System.currentTimeMillis();
144                 String com = "SELECT jobtime FROM ProteinKeyspace.ProteinData;";
145                 System.out.println("Command: " + com);
146                 ResultSet results = session.execute(com);
147                 final long queryTime = System.currentTimeMillis();
148                 System.out.println ("Query time is " + (queryTime - startTime) + " msec");
149
150                 Calendar cal = Calendar.getInstance();
151                 long res = cal.getTimeInMillis();
152                 int c = 0;
153                 while (!results.isExhausted()) {
154                         Row r = results.one();
155                         long d1 = r.getLong("jobtime");
156                         if (res > d1) {
157                                 res = d1;
158                         }
159                         ++c;
160                 }
161                 final long endTime = System.currentTimeMillis();
162                 System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
163                 return res;
164         }
165         
166 }