1 package compbio.cassandra;
3 import java.util.Arrays;
6 import me.prettyprint.cassandra.serializers.LongSerializer;
7 import me.prettyprint.cassandra.serializers.StringSerializer;
8 import me.prettyprint.cassandra.service.ThriftKsDef;
9 import me.prettyprint.hector.api.Cluster;
10 import me.prettyprint.hector.api.Keyspace;
11 import me.prettyprint.hector.api.beans.ColumnSlice;
12 import me.prettyprint.hector.api.ddl.ColumnFamilyDefinition;
13 import me.prettyprint.hector.api.ddl.ComparatorType;
14 import me.prettyprint.hector.api.ddl.KeyspaceDefinition;
15 import me.prettyprint.hector.api.factory.HFactory;
16 import me.prettyprint.hector.api.mutation.Mutator;
17 import me.prettyprint.hector.api.query.QueryResult;
18 import me.prettyprint.hector.api.query.SliceQuery;
20 public class CassandraCreate {
21 private static Keyspace ksp;
22 private static Cluster cluster;
23 private static Mutator<Long> mutatorLong;
24 private static Mutator<String> mutatorString;
25 private static Mutator<String> mutatorLog;
26 StringSerializer ss = StringSerializer.get();
27 LongSerializer ls = LongSerializer.get();
30 * connect to the cluster and look weather the dababase has any data inside
32 public void Connection() {
33 cluster = HFactory.getOrCreateCluster("Protein Cluster", "127.0.0.1:9160");
34 KeyspaceDefinition keyspaceDef = cluster.describeKeyspace("ProteinKeyspace");
36 * If keyspace does not exist, the CFs don't exist either. => create
39 if (keyspaceDef == null) { // create column family
40 System.out.println("ProteinKeyspace has been null");
41 ColumnFamilyDefinition cfProtein = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinRow",
42 ComparatorType.ASCIITYPE);
43 ColumnFamilyDefinition cfLog = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinLog", ComparatorType.ASCIITYPE);
44 ColumnFamilyDefinition cfData = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinData",
45 ComparatorType.ASCIITYPE);
47 KeyspaceDefinition newKeyspace = HFactory.createKeyspaceDefinition("ProteinKeyspace", ThriftKsDef.DEF_STRATEGY_CLASS, 1,
48 Arrays.asList(cfProtein, cfLog, cfData));
50 * Add the schema to the cluster. "true" as the second param means
51 * that Hector will be blocked until all nodes see the change.
53 cluster.addKeyspace(newKeyspace, true);
54 cluster.addColumnFamily(cfProtein, true);
55 cluster.addColumnFamily(cfLog, true);
56 cluster.addColumnFamily(cfData, true);
58 System.out.println("Data loaded");
60 ksp = HFactory.createKeyspace("ProteinKeyspace", cluster);
61 System.out.println("Cassandra has been connected");
65 * parsing data source and filling the database
67 public void Parsing(String source) {
69 * CF ProteinRow store protein and prediction
71 mutatorString = HFactory.createMutator(ksp, ss);
74 * ProteinLog stores logging info: IP, job id, start date and end date
76 mutatorLog = HFactory.createMutator(ksp, ss);
79 * CF ProteinData store id and protein per data
81 mutatorLong = HFactory.createMutator(ksp, ls);
83 if (source.equals("http")) {
84 // get data from real Jpred production server
85 System.out.println("Parsing web data source......");
86 String datasrc = "http://www.compbio.dundee.ac.uk/www-jpred/results/usage-new/alljobs.dat";
87 String prefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
88 JpredParserHTTP parser = new JpredParserHTTP(prefix);
90 parser.Parsing(datasrc, 4);
92 } else if (source.equals("file")) {
93 // get irtifical data generated for the DB stress tests
94 System.out.println("Parsing local file data source......");
95 String datasrc = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/data.dat";
96 String prefix = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/Jpreddata";
97 JpredParserLocalFile parser = new JpredParserLocalFile(prefix);
98 parser.Parsing(datasrc, 365);
101 System.out.println("Unknown data source......");
105 public void flushData() {
106 mutatorString.execute();
107 mutatorLong.execute();
108 mutatorLog.execute();
109 // System.out.println("Flush new data...");
112 public void Closing() {
113 cluster.getConnectionManager().shutdown();
114 System.out.println("Cassandra has been shut down");
118 * check whether the job id exists in the DB
120 public boolean CheckID(String jobid) {
121 SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(ksp, ss, ss, ss);
122 sliceQuery.setColumnFamily("ProteinLog").setKey(jobid).setRange("", "", false, 100);
123 QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
124 if (result.get().getColumns().size() > 0) {
131 * prepare data for insertion into the db
133 public void InsertData(long dataWork, String dataBegin, String dataEnd, String ip, String id, String statusEx, String statusFinal,
134 String protein, List<FastaSequence> jnetpred) {
135 mutatorLog.addInsertion(id, "ProteinLog", HFactory.createColumn("ip", ip, ss, ss))
136 .addInsertion(id, "ProteinLog", HFactory.createColumn("DataBegin", dataBegin, ss, ss))
137 .addInsertion(id, "ProteinLog", HFactory.createColumn("DataEnd", dataEnd, ss, ss))
138 .addInsertion(id, "ProteinLog", HFactory.createColumn("Status ex", statusEx, ss, ss))
139 .addInsertion(id, "ProteinLog", HFactory.createColumn("Status final", statusFinal, ss, ss))
140 .addInsertion(id, "ProteinLog", HFactory.createColumn("Protein", protein, ss, ss));
141 for (int i = 0; i < jnetpred.size(); i++) {
142 String namepred = jnetpred.get(i).getId();
143 String pred = jnetpred.get(i).getSequence().replaceAll("\n", "");
144 mutatorString.addInsertion(protein, "ProteinRow", HFactory.createColumn(id + ";" + namepred, pred, ss, ss));
146 mutatorLong.addInsertion(dataWork, "ProteinData", HFactory.createColumn(id, protein, ss, ss));
149 public Keyspace GetKeyspace() {