1 package compbio.cassandra;
3 import java.util.Arrays;
6 import me.prettyprint.cassandra.serializers.LongSerializer;
7 import me.prettyprint.cassandra.serializers.StringSerializer;
8 import me.prettyprint.cassandra.service.ThriftKsDef;
9 import me.prettyprint.hector.api.Cluster;
10 import me.prettyprint.hector.api.Keyspace;
11 import me.prettyprint.hector.api.beans.ColumnSlice;
12 import me.prettyprint.hector.api.ddl.ColumnFamilyDefinition;
13 import me.prettyprint.hector.api.ddl.ComparatorType;
14 import me.prettyprint.hector.api.ddl.KeyspaceDefinition;
15 import me.prettyprint.hector.api.factory.HFactory;
16 import me.prettyprint.hector.api.mutation.Mutator;
17 import me.prettyprint.hector.api.query.QueryResult;
18 import me.prettyprint.hector.api.query.SliceQuery;
20 public class CassandraCreate {
21 private static Keyspace ksp;
22 private static Cluster cluster;
23 private static Mutator<Long> mutatorLong;
24 private static Mutator<String> mutatorString;
25 private static Mutator<String> mutatorLog;
26 StringSerializer ss = StringSerializer.get();
27 LongSerializer ls = LongSerializer.get();
30 * connect to the cluster and look weather the dababase has any data inside
32 public void Connection() {
33 cluster = HFactory.getOrCreateCluster("Protein Cluster", "127.0.0.1:9160");
34 KeyspaceDefinition keyspaceDef = cluster.describeKeyspace("ProteinKeyspace");
36 * If keyspace does not exist, the CFs don't exist either. => create
39 if (keyspaceDef == null) { // create column family
40 System.out.println("ProteinKeyspace has been null");
41 ColumnFamilyDefinition cfProtein = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinRow",
42 ComparatorType.ASCIITYPE);
43 ColumnFamilyDefinition cfLog = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinLog", ComparatorType.ASCIITYPE);
44 ColumnFamilyDefinition cfData = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinData",
45 ComparatorType.ASCIITYPE);
47 KeyspaceDefinition newKeyspace = HFactory.createKeyspaceDefinition("ProteinKeyspace", ThriftKsDef.DEF_STRATEGY_CLASS, 1,
48 Arrays.asList(cfProtein, cfLog, cfData));
50 * Add the schema to the cluster. "true" as the second param means
51 * that Hector will be blocked until all nodes see the change.
53 cluster.addKeyspace(newKeyspace, true);
54 cluster.addColumnFamily(cfProtein, true);
55 cluster.addColumnFamily(cfLog, true);
56 cluster.addColumnFamily(cfData, true);
58 System.out.println("Data loaded");
60 ksp = HFactory.createKeyspace("ProteinKeyspace", cluster);
61 System.out.println("Cassandra has been connected");
65 * parsing data source and filling the database
67 public void Parsing(String source) {
69 * CF ProteinRow store protein and prediction
71 mutatorString = HFactory.createMutator(ksp, ss);
74 * ProteinLog stores logging info: IP, job id, start date and end date
76 mutatorLog = HFactory.createMutator(ksp, ss);
79 * CF ProteinData store id and protein per data
81 mutatorLong = HFactory.createMutator(ksp, ls);
83 if (source.equals("http")) {
84 // get data from real Jpred production server
85 System.out.println("Parsing web data source......");
86 String datasrc = "http://www.compbio.dundee.ac.uk/www-jpred/results/usage-new/alljobs.dat";
87 String prefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
88 JpredParserHTTP parser = new JpredParserHTTP(prefix);
89 parser.Parsing(datasrc, 4);
91 } else if (source.equals("file")) {
92 // get irtifical data generated for the DB stress tests
93 System.out.println("Parsing local file data source......");
94 String datasrc = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/data.dat";
95 String prefix = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/Jpreddata";
96 JpredParserLocalFile parser = new JpredParserLocalFile(prefix);
97 parser.Parsing(datasrc, 365);
100 System.out.println("Unknown data source......");
104 public void flushData() {
105 mutatorString.execute();
106 mutatorLong.execute();
107 mutatorLog.execute();
108 // System.out.println("Flush new data...");
111 public void Closing() {
112 cluster.getConnectionManager().shutdown();
113 System.out.println("Cassandra has been shut down");
117 * check whether the job id exists in the DB
119 public boolean CheckID(String jobid) {
120 SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(ksp, ss, ss, ss);
121 sliceQuery.setColumnFamily("ProteinLog").setKey(jobid).setRange("", "", false, 100);
122 QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
123 if (result.get().getColumns().size() > 0) {
130 * prepare data for insertion into the db
132 public void InsertData(long dataWork, String dataBegin, String dataEnd, String ip, String id, String statusEx, String statusFinal,
133 String protein, List<FastaSequence> jnetpred) {
134 mutatorLog.addInsertion(id, "ProteinLog", HFactory.createColumn("ip", ip, ss, ss))
135 .addInsertion(id, "ProteinLog", HFactory.createColumn("DataBegin", dataBegin, ss, ss))
136 .addInsertion(id, "ProteinLog", HFactory.createColumn("DataEnd", dataEnd, ss, ss))
137 .addInsertion(id, "ProteinLog", HFactory.createColumn("Status ex", statusEx, ss, ss))
138 .addInsertion(id, "ProteinLog", HFactory.createColumn("Status final", statusFinal, ss, ss))
139 .addInsertion(id, "ProteinLog", HFactory.createColumn("Protein", protein, ss, ss));
140 for (int i = 0; i < jnetpred.size(); i++) {
141 String namepred = jnetpred.get(i).getId();
142 String pred = jnetpred.get(i).getSequence().replaceAll("\n", "");
143 mutatorString.addInsertion(protein, "ProteinRow", HFactory.createColumn(id + ";" + namepred, pred, ss, ss));
145 mutatorLong.addInsertion(dataWork, "ProteinData", HFactory.createColumn(id, protein, ss, ss));
148 public Keyspace GetKeyspace() {