Fix bug with wrong source file
[proteocache.git] / datadb / compbio / cassandra / CassandraCreate.java
1 package compbio.cassandra;
2
3 import java.util.Arrays;
4 import java.util.List;
5
6 import me.prettyprint.cassandra.serializers.LongSerializer;
7 import me.prettyprint.cassandra.serializers.StringSerializer;
8 import me.prettyprint.cassandra.service.ThriftKsDef;
9 import me.prettyprint.hector.api.Cluster;
10 import me.prettyprint.hector.api.Keyspace;
11 import me.prettyprint.hector.api.beans.ColumnSlice;
12 import me.prettyprint.hector.api.ddl.ColumnFamilyDefinition;
13 import me.prettyprint.hector.api.ddl.ComparatorType;
14 import me.prettyprint.hector.api.ddl.KeyspaceDefinition;
15 import me.prettyprint.hector.api.factory.HFactory;
16 import me.prettyprint.hector.api.mutation.Mutator;
17 import me.prettyprint.hector.api.query.QueryResult;
18 import me.prettyprint.hector.api.query.SliceQuery;
19
20 public class CassandraCreate {
21         private static Keyspace ksp;
22         private static Cluster cluster;
23         private static Mutator<Long> mutatorLong;
24         private static Mutator<String> mutatorString;
25         private static Mutator<String> mutatorLog;
26         StringSerializer ss = StringSerializer.get();
27         LongSerializer ls = LongSerializer.get();
28
29         /*
30          * connect to the cluster and look weather the dababase has any data inside
31          */
32         public void Connection() {
33                 cluster = HFactory.getOrCreateCluster("Protein Cluster", "127.0.0.1:9160");
34                 KeyspaceDefinition keyspaceDef = cluster.describeKeyspace("ProteinKeyspace");
35                 /*
36                  * If keyspace does not exist, the CFs don't exist either. => create
37                  * them.
38                  */
39                 if (keyspaceDef == null) { // create column family
40                         System.out.println("ProteinKeyspace has been null");
41                         ColumnFamilyDefinition cfProtein = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinRow",
42                                         ComparatorType.ASCIITYPE);
43                         ColumnFamilyDefinition cfLog = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinLog", ComparatorType.ASCIITYPE);
44                         ColumnFamilyDefinition cfData = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinData",
45                                         ComparatorType.ASCIITYPE);
46
47                         KeyspaceDefinition newKeyspace = HFactory.createKeyspaceDefinition("ProteinKeyspace", ThriftKsDef.DEF_STRATEGY_CLASS, 1,
48                                         Arrays.asList(cfProtein, cfLog, cfData));
49                         /*
50                          * Add the schema to the cluster. "true" as the second param means
51                          * that Hector will be blocked until all nodes see the change.
52                          */
53                         cluster.addKeyspace(newKeyspace, true);
54                         cluster.addColumnFamily(cfProtein, true);
55                         cluster.addColumnFamily(cfLog, true);
56                         cluster.addColumnFamily(cfData, true);
57                 } else {
58                         System.out.println("Data loaded");
59                 }
60                 ksp = HFactory.createKeyspace("ProteinKeyspace", cluster);
61                 System.out.println("Cassandra has been connected");
62         }
63
64         /*
65          * parsing data source and filling the database
66          */
67         public void Parsing(String source) {
68                 /*
69                  * CF ProteinRow store protein and prediction
70                  */
71                 mutatorString = HFactory.createMutator(ksp, ss);
72
73                 /*
74                  * ProteinLog stores logging info: IP, job id, start date and end date
75                  */
76                 mutatorLog = HFactory.createMutator(ksp, ss);
77
78                 /*
79                  * CF ProteinData store id and protein per data
80                  */
81                 mutatorLong = HFactory.createMutator(ksp, ls);
82
83                 if (source.equals("http")) {
84                         // get data from real Jpred production server
85                         System.out.println("Parsing web data source......");
86                         String datasrc = "http://www.compbio.dundee.ac.uk/www-jpred/results/usage-new/alljobs.dat";
87                         String prefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
88                         JpredParserHTTP parser = new JpredParserHTTP(prefix);
89                         parser.Parsing(datasrc, 4);
90                         flushData();
91                 } else if (source.equals("file")) {
92                         // get irtifical data generated for the DB stress tests
93                         System.out.println("Parsing local file data source......");
94                         String datasrc = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/data.dat";
95                         String prefix = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/Jpreddata";
96                         JpredParserLocalFile parser = new JpredParserLocalFile(prefix);
97                         parser.Parsing(datasrc, 365);
98                         flushData();
99                 } else {
100                         System.out.println("Unknown data source......");
101                 }
102         }
103
104         public void flushData() {
105                 mutatorString.execute();
106                 mutatorLong.execute();
107                 mutatorLog.execute();
108                 // System.out.println("Flush new data...");
109         }
110
111         public void Closing() {
112                 cluster.getConnectionManager().shutdown();
113                 System.out.println("Cassandra has been shut down");
114         }
115
116         /*
117          * check whether the job id exists in the DB
118          */
119         public boolean CheckID(String jobid) {
120                 SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(ksp, ss, ss, ss);
121                 sliceQuery.setColumnFamily("ProteinLog").setKey(jobid).setRange("", "", false, 100);
122                 QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
123                 if (result.get().getColumns().size() > 0) {
124                         return true;
125                 }
126                 return false;
127         }
128
129         /*
130          * prepare data for insertion into the db
131          */
132         public void InsertData(long dataWork, String dataBegin, String dataEnd, String ip, String id, String statusEx, String statusFinal,
133                         String protein, List<FastaSequence> jnetpred) {
134                 mutatorLog.addInsertion(id, "ProteinLog", HFactory.createColumn("ip", ip, ss, ss))
135                                 .addInsertion(id, "ProteinLog", HFactory.createColumn("DataBegin", dataBegin, ss, ss))
136                                 .addInsertion(id, "ProteinLog", HFactory.createColumn("DataEnd", dataEnd, ss, ss))
137                                 .addInsertion(id, "ProteinLog", HFactory.createColumn("Status ex", statusEx, ss, ss))
138                                 .addInsertion(id, "ProteinLog", HFactory.createColumn("Status final", statusFinal, ss, ss))
139                                 .addInsertion(id, "ProteinLog", HFactory.createColumn("Protein", protein, ss, ss));
140                 for (int i = 0; i < jnetpred.size(); i++) {
141                         String namepred = jnetpred.get(i).getId();
142                         String pred = jnetpred.get(i).getSequence().replaceAll("\n", "");
143                         mutatorString.addInsertion(protein, "ProteinRow", HFactory.createColumn(id + ";" + namepred, pred, ss, ss));
144                 }
145                 mutatorLong.addInsertion(dataWork, "ProteinData", HFactory.createColumn(id, protein, ss, ss));
146         }
147
148         public Keyspace GetKeyspace() {
149                 return ksp;
150         }
151 }