First working code
[proteocache.git] / datadb / compbio / cassandra / CassandraCreate.java
1 package compbio.cassandra;
2
3 import java.util.Arrays;
4 import java.util.List;
5
6 import me.prettyprint.cassandra.serializers.LongSerializer;
7 import me.prettyprint.cassandra.serializers.StringSerializer;
8 import me.prettyprint.cassandra.service.ThriftKsDef;
9 import me.prettyprint.hector.api.Cluster;
10 import me.prettyprint.hector.api.Keyspace;
11 import me.prettyprint.hector.api.beans.ColumnSlice;
12 import me.prettyprint.hector.api.ddl.ColumnFamilyDefinition;
13 import me.prettyprint.hector.api.ddl.ComparatorType;
14 import me.prettyprint.hector.api.ddl.KeyspaceDefinition;
15 import me.prettyprint.hector.api.factory.HFactory;
16 import me.prettyprint.hector.api.mutation.Mutator;
17 import me.prettyprint.hector.api.query.QueryResult;
18 import me.prettyprint.hector.api.query.SliceQuery;
19
20 public class CassandraCreate {
21         private static Keyspace ksp;
22         private static Cluster cluster;
23         private static Mutator<Long> mutatorLong;
24         private static Mutator<String> mutatorString;
25         private static Mutator<String> mutatorLog;
26         StringSerializer ss = StringSerializer.get();
27         LongSerializer ls = LongSerializer.get();
28
29         // connect to the cluster
30         public void Connection() {
31                 cluster = HFactory.getOrCreateCluster("Protein Cluster", "127.0.0.1:9160");
32                 KeyspaceDefinition keyspaceDef = cluster.describeKeyspace("ProteinKeyspace");
33                 /*
34                  * If keyspace does not exist, the CFs don't exist either. => create
35                  * them.
36                  */
37                 if (keyspaceDef == null) { // create column family
38                         System.out.println("ProteinKeyspace has been null");
39                         ColumnFamilyDefinition cfProtein = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinRow",
40                                         ComparatorType.ASCIITYPE);
41                         ColumnFamilyDefinition cfLog = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinLog", ComparatorType.ASCIITYPE);
42                         ColumnFamilyDefinition cfData = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinData",
43                                         ComparatorType.ASCIITYPE);
44
45                         KeyspaceDefinition newKeyspace = HFactory.createKeyspaceDefinition("ProteinKeyspace", ThriftKsDef.DEF_STRATEGY_CLASS, 1,
46                                         Arrays.asList(cfProtein, cfLog, cfData));
47                         /*
48                          * Add the schema to the cluster. "true" as the second param means
49                          * that Hector will be blocked until all nodes see the change.
50                          */
51                         cluster.addKeyspace(newKeyspace, true);
52                         cluster.addColumnFamily(cfProtein, true);
53                         cluster.addColumnFamily(cfLog, true);
54                         cluster.addColumnFamily(cfData, true);
55                 } else {
56                         System.out.println("Data loaded");
57                 }
58                 ksp = HFactory.createKeyspace("ProteinKeyspace", cluster);
59                 System.out.println("Cassandra has been connected");
60         }
61
62         /*
63          * parsing data from
64          * http://www.compbio.dundee.ac.uk/www-jpred/results/usage-new/alljobs.dat
65          */
66         public void Parsing() {
67                 /* CF ProteinRow store protein and prediction */
68                 mutatorString = HFactory.createMutator(ksp, ss);
69
70                 /*
71                  * ProteinLog stores logging info: IP, job id, start date and end date
72                  */
73                 mutatorLog = HFactory.createMutator(ksp, ss);
74
75                 /* CF ProteinData store id and protein per data */
76                 mutatorLong = HFactory.createMutator(ksp, ls);
77
78                 System.out.println("Parsing......");
79                 String in = "http://www.compbio.dundee.ac.uk/www-jpred/results/usage-new/alljobs.dat";
80                 DataParsing datParsing = new DataParsing();
81                 datParsing.Parsing(in, 4);
82                 flushData();
83         }
84
85         public void flushData() {
86                 mutatorString.execute();
87                 mutatorLong.execute();
88                 mutatorLog.execute();
89                 //System.out.println("Flush new data...");
90         }
91
92         public void Closing() {
93                 cluster.getConnectionManager().shutdown();
94                 System.out.println("Cassandra has been shut down");
95         }
96
97         // check whether the job id exists in the DB
98         public boolean CheckID(String jobid) {
99                 SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(ksp, ss, ss, ss);
100                 sliceQuery.setColumnFamily("ProteinLog").setKey(jobid).setRange("", "", false, 100);
101                 QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
102                 if (result.get().getColumns().size() > 0) {
103                         return true;
104                 }
105                 return false;
106         }
107
108         public void InsertData(long dataWork, String dataBegin, String dataEnd, String ip, String id, String statusEx, String statusFinal,
109                         String protein, List<FastaSequence> jnetpred) {
110                 mutatorLog.addInsertion(id, "ProteinLog", HFactory.createColumn("ip", ip, ss, ss))
111                                 .addInsertion(id, "ProteinLog", HFactory.createColumn("DataBegin", dataBegin, ss, ss))
112                                 .addInsertion(id, "ProteinLog", HFactory.createColumn("DataEnd", dataEnd, ss, ss))
113                                 .addInsertion(id, "ProteinLog", HFactory.createColumn("Status ex", statusEx, ss, ss))
114                                 .addInsertion(id, "ProteinLog", HFactory.createColumn("Status final", statusFinal, ss, ss))
115                                 .addInsertion(id, "ProteinLog", HFactory.createColumn("Protein", protein, ss, ss));
116                 for (int i = 0; i < jnetpred.size(); i++) {
117                         String namepred = jnetpred.get(i).getId();
118                         String pred = jnetpred.get(i).getSequence().replaceAll("\n", "");
119                         mutatorString.addInsertion(protein, "ProteinRow", HFactory.createColumn(id + ";" + namepred, pred, ss, ss));
120                 }
121                 mutatorLong.addInsertion(dataWork, "ProteinData", HFactory.createColumn(id, protein, ss, ss));
122         }
123
124         public Keyspace GetKeyspace() {
125                 return ksp;
126         }
127 }