1 package compbio.cassandra;
3 import java.util.Arrays;
6 import me.prettyprint.cassandra.serializers.LongSerializer;
7 import me.prettyprint.cassandra.serializers.StringSerializer;
8 import me.prettyprint.cassandra.service.ThriftKsDef;
9 import me.prettyprint.hector.api.Cluster;
10 import me.prettyprint.hector.api.Keyspace;
11 import me.prettyprint.hector.api.beans.ColumnSlice;
12 import me.prettyprint.hector.api.ddl.ColumnFamilyDefinition;
13 import me.prettyprint.hector.api.ddl.ComparatorType;
14 import me.prettyprint.hector.api.ddl.KeyspaceDefinition;
15 import me.prettyprint.hector.api.factory.HFactory;
16 import me.prettyprint.hector.api.mutation.Mutator;
17 import me.prettyprint.hector.api.query.QueryResult;
18 import me.prettyprint.hector.api.query.SliceQuery;
20 public class CassandraCreate {
21 private static Keyspace ksp;
22 private static Cluster cluster;
23 private static Mutator<Long> mutatorLong;
24 private static Mutator<String> mutatorString;
25 private static Mutator<String> mutatorLog;
26 StringSerializer ss = StringSerializer.get();
27 LongSerializer ls = LongSerializer.get();
29 // connect to the cluster
30 public void Connection() {
31 cluster = HFactory.getOrCreateCluster("Protein Cluster", "127.0.0.1:9160");
32 KeyspaceDefinition keyspaceDef = cluster.describeKeyspace("ProteinKeyspace");
34 * If keyspace does not exist, the CFs don't exist either. => create
37 if (keyspaceDef == null) { // create column family
38 System.out.println("ProteinKeyspace has been null");
39 ColumnFamilyDefinition cfProtein = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinRow",
40 ComparatorType.ASCIITYPE);
41 ColumnFamilyDefinition cfLog = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinLog", ComparatorType.ASCIITYPE);
42 ColumnFamilyDefinition cfData = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinData",
43 ComparatorType.ASCIITYPE);
45 KeyspaceDefinition newKeyspace = HFactory.createKeyspaceDefinition("ProteinKeyspace", ThriftKsDef.DEF_STRATEGY_CLASS, 1,
46 Arrays.asList(cfProtein, cfLog, cfData));
48 * Add the schema to the cluster. "true" as the second param means
49 * that Hector will be blocked until all nodes see the change.
51 cluster.addKeyspace(newKeyspace, true);
52 cluster.addColumnFamily(cfProtein, true);
53 cluster.addColumnFamily(cfLog, true);
54 cluster.addColumnFamily(cfData, true);
56 System.out.println("Data loaded");
58 ksp = HFactory.createKeyspace("ProteinKeyspace", cluster);
59 System.out.println("Cassandra has been connected");
64 * http://www.compbio.dundee.ac.uk/www-jpred/results/usage-new/alljobs.dat
66 public void Parsing() {
67 /* CF ProteinRow store protein and prediction */
68 mutatorString = HFactory.createMutator(ksp, ss);
71 * ProteinLog stores logging info: IP, job id, start date and end date
73 mutatorLog = HFactory.createMutator(ksp, ss);
75 /* CF ProteinData store id and protein per data */
76 mutatorLong = HFactory.createMutator(ksp, ls);
78 System.out.println("Parsing......");
79 String in = "http://www.compbio.dundee.ac.uk/www-jpred/results/usage-new/alljobs.dat";
80 DataParsing datParsing = new DataParsing();
81 datParsing.Parsing(in, 4);
85 public void flushData() {
86 mutatorString.execute();
87 mutatorLong.execute();
89 //System.out.println("Flush new data...");
92 public void Closing() {
93 cluster.getConnectionManager().shutdown();
94 System.out.println("Cassandra has been shut down");
97 // check whether the job id exists in the DB
98 public boolean CheckID(String jobid) {
99 SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(ksp, ss, ss, ss);
100 sliceQuery.setColumnFamily("ProteinLog").setKey(jobid).setRange("", "", false, 100);
101 QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
102 if (result.get().getColumns().size() > 0) {
108 public void InsertData(long dataWork, String dataBegin, String dataEnd, String ip, String id, String statusEx, String statusFinal,
109 String protein, List<FastaSequence> jnetpred) {
110 mutatorLog.addInsertion(id, "ProteinLog", HFactory.createColumn("ip", ip, ss, ss))
111 .addInsertion(id, "ProteinLog", HFactory.createColumn("DataBegin", dataBegin, ss, ss))
112 .addInsertion(id, "ProteinLog", HFactory.createColumn("DataEnd", dataEnd, ss, ss))
113 .addInsertion(id, "ProteinLog", HFactory.createColumn("Status ex", statusEx, ss, ss))
114 .addInsertion(id, "ProteinLog", HFactory.createColumn("Status final", statusFinal, ss, ss))
115 .addInsertion(id, "ProteinLog", HFactory.createColumn("Protein", protein, ss, ss));
116 for (int i = 0; i < jnetpred.size(); i++) {
117 String namepred = jnetpred.get(i).getId();
118 String pred = jnetpred.get(i).getSequence().replaceAll("\n", "");
119 mutatorString.addInsertion(protein, "ProteinRow", HFactory.createColumn(id + ";" + namepred, pred, ss, ss));
121 mutatorLong.addInsertion(dataWork, "ProteinData", HFactory.createColumn(id, protein, ss, ss));
124 public Keyspace GetKeyspace() {