First working "writing" code for Cassandra 2.0
authorSasha Sherstnev <a.sherstnev@dundee.ac.uk>
Mon, 4 Nov 2013 21:17:39 +0000 (21:17 +0000)
committerSasha Sherstnev <a.sherstnev@dundee.ac.uk>
Mon, 4 Nov 2013 21:17:39 +0000 (21:17 +0000)
datadb/compbio/cassandra/CassandraNativeConnector.java
datadb/compbio/cassandra/JpredParserHTTP.java
datadb/compbio/cassandra/JpredParserLocalFile.java

index 49224db..1fb01fc 100644 (file)
@@ -36,10 +36,13 @@ public class CassandraNativeConnector {
 
                session = cluster.connect();
                session.execute("CREATE KEYSPACE IF NOT EXISTS ProteinKeyspace WITH replication = {'class':'SimpleStrategy', 'replication_factor':3};");
-               session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinRow (Protein ascii PRIMARY KEY, Predictions map<ascii,ascii>);");
+               session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinRow (Protein ascii, JobID ascii, Predictions map<ascii,ascii>, PRIMARY KEY(JobID));");
                session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinLog "
-                               + "(JobID ascii PRIMARY KEY, DataBegin ascii, DataEnd ascii, ip ascii, FinalStatus ascii, ExecutionStatus ascii, Protein ascii);");
-               session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinData (jobtime bigint PRIMARY KEY, JobID ascii, Protein ascii);");
+                               + "(JobID ascii, DataBegin ascii, DataEnd ascii, ip ascii, FinalStatus ascii, ExecutionStatus ascii, Protein ascii, PRIMARY KEY(JobID));");
+               session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinData (jobtime bigint, JobID ascii, Protein ascii, PRIMARY KEY(JobID));");
+
+               session.execute("CREATE INDEX ProteinSeq ON ProteinKeyspace.ProteinRow (protein);");
+               session.execute("CREATE INDEX JobDateStamp ON ProteinKeyspace.ProteinData (jobtime);");
 
                System.out.println("Cassandra connected");
        }
@@ -74,38 +77,25 @@ public class CassandraNativeConnector {
        }
 
        /*
-        * check whether the job id exists in the DB
-        */
-       public boolean CheckID(String jobid) {
-               String com = "SELECT * FROM ProteinKeyspace.ProteinData WHERE jobid = '" + jobid + "';";
-               System.out.println(com);
-               ResultSet results = session.execute(com);
-               if (null != results) {
-                       return true;
-               }
-               return false;
-       }
-
-       /*
         * prepare data for insertion into the db
         */
        public void InsertData(long jobtime, String startdate, String enddate, String ip, String jobid, String statusEx, String statusFinal,
                        String protein, List<FastaSequence> predictions) {
 
-               String check1 = "SELECT count(*) FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';";
+               String check1 = "SELECT * FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';";
                //System.out.println(check1);
                ResultSet results1 = session.execute(check1);
-               if (!results1.isExhausted()) {
+               if (results1.isExhausted()) {
                        String com1 = "INSERT INTO ProteinKeyspace.ProteinLog "
                                        + "(JobID, IP, DataBegin, DataEnd, FinalStatus, ExecutionStatus, Protein)" + " VALUES ('" + jobid + "','" + ip + "','"
                                        + startdate + "','" + enddate + "','" + statusFinal + "','" + statusEx + "','" + protein + "');";
-                       // System.out.println(com1);
+                       //System.out.println(com1);
                        session.execute(com1);
 
                        String com2 = "INSERT INTO ProteinKeyspace.ProteinData " + "(jobtime, JobID, Protein)" + " VALUES (" + jobtime + ",'" + jobid
                                        + "','" + protein + "');";
-                       // System.out.println(com2);
-                       // session.execute(com2);
+                       //System.out.println(com2);
+                       session.execute(com2);
 
                        String allpredictions = "";
                        for (FastaSequence pred : predictions) {
@@ -118,22 +108,13 @@ public class CassandraNativeConnector {
                                final_prediction = allpredictions.substring(0, allpredictions.length() - 1);
                        }
 
-                       String check2 = "SELECT count(*) FROM ProteinKeyspace.ProteinRow WHERE Protein = '" + protein + "';";
-                       //System.out.println(check1);
+                       String check2 = "SELECT * FROM ProteinKeyspace.ProteinRow WHERE JobID = '" + jobid + "';";
+                       //System.out.println(check2);
                        ResultSet results2 = session.execute(check2);
-
-                       if (results1.isExhausted()) {
-                       String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(Protein, " + jobid + ")" + " VALUES ('" + protein + "'," + "{"
-                                       + final_prediction + "}" + ");";
-                       System.out.println(com3);
-                       session.execute(com3);
-                       } else {
-                               String com4 = "ALTER TABLE ProteinKeyspace.ProteinRow ADD " + jobid + ");";
-                               System.out.println(com4);
-                               session.execute(com4);
-                               String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(" + jobid + ")" + " VALUES ({" + final_prediction + "}" + ")"
-                               + " WHERE Protein = '" + protein + "';";
-                               System.out.println(com3);
+                       if (results2.isExhausted()) {
+                               String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(Protein, JobID, Predictions)" + " VALUES ('" 
+                       + protein + "','" + jobid + "',{" + final_prediction + "});";
+                               //System.out.println(com3);
                                session.execute(com3);
                        }
                }
index d03ac79..b780ddf 100644 (file)
@@ -71,53 +71,49 @@ public class JpredParserHTTP implements JpredParser {
                                        // 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172 unknown_email jp_J9HBCBT
                                        String id = table[table.length - 1];
                                        totalcount++;
-                                       if (!cc.CheckID(id)) {
-                                               String datalink = dirprefix + "/" + id + "/" + id + ".concise.fasta";
-                                               URL urltable = new URL(datalink);
-                                               HttpURLConnection httpConnection = (HttpURLConnection) urltable.openConnection();
-                                               int responsecode = httpConnection.getResponseCode();
-                                               if (199 < responsecode && responsecode < 300) {
-                                                       try {
-                                                               final FastaReader fr = new FastaReader(urltable.openStream());
-                                                               final List<FastaSequence> seqs = new ArrayList<FastaSequence>();
-                                                               String newprotein = "";
-                                                               while (fr.hasNext()) {
-                                                                       final FastaSequence fs = fr.next();
-                                                                       if (fs.getId().equals("QUERY") || fs.getId().equals(id))
-                                                                               newprotein = fs.getSequence().replaceAll("\n", "");
-                                                                       else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) {
-                                                                               seqs.add(fs);
-                                                                       }
+                                       String datalink = dirprefix + "/" + id + "/" + id + ".concise.fasta";
+                                       URL urltable = new URL(datalink);
+                                       HttpURLConnection httpConnection = (HttpURLConnection) urltable.openConnection();
+                                       int responsecode = httpConnection.getResponseCode();
+                                       if (199 < responsecode && responsecode < 300) {
+                                               try {
+                                                       final FastaReader fr = new FastaReader(urltable.openStream());
+                                                       final List<FastaSequence> seqs = new ArrayList<FastaSequence>();
+                                                       String newprotein = "";
+                                                       while (fr.hasNext()) {
+                                                               final FastaSequence fs = fr.next();
+                                                               if (fs.getId().equals("QUERY") || fs.getId().equals(id))
+                                                                       newprotein = fs.getSequence().replaceAll("\n", "");
+                                                               else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) {
+                                                                       seqs.add(fs);
                                                                }
-                                                               if (newprotein.equals("")) {
-                                                                       countUnclearFASTAid++;
-                                                               } else {
-                                                                       SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
-                                                                       String dateInString1 = table[0].substring(0, table[0].indexOf(":"));
-                                                                       long dateWork1 = 0;
-                                                                       try {
-                                                                               Date dat1 = formatter.parse(dateInString1);
-                                                                               dateWork1 = dat1.getTime();
-                                                                       } catch (ParseException e) {
-                                                                               e.printStackTrace();
-                                                                       }
-                                                                       cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs);
-                                                                       ++countinsertions;
-                                                                       ++njobs;
-                                                                       // flush every 50 insertions
-                                                                       //if (0 == countinsertions % 50) {
-                                                                       //      cc.flushData();
-                                                                       //      njobs -= 50;
-                                                                       //}
+                                                       }
+                                                       if (newprotein.equals("")) {
+                                                               countUnclearFASTAid++;
+                                                       } else {
+                                                               SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
+                                                               String dateInString1 = table[0].substring(0, table[0].indexOf(":"));
+                                                               long dateWork1 = 0;
+                                                               try {
+                                                                       Date dat1 = formatter.parse(dateInString1);
+                                                                       dateWork1 = dat1.getTime();
+                                                               } catch (ParseException e) {
+                                                                       e.printStackTrace();
                                                                }
-                                                       } catch (IOException e) {
-                                                               e.printStackTrace();
+                                                               cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs);
+                                                               ++countinsertions;
+                                                               ++njobs;
+                                                               // flush every 50 insertions
+                                                               // if (0 == countinsertions % 50) {
+                                                               // cc.flushData();
+                                                               // njobs -= 50;
+                                                               // }
                                                        }
-                                               } else {
-                                                       countNoData++;
+                                               } catch (IOException e) {
+                                                       e.printStackTrace();
                                                }
                                        } else {
-                                               ++countinserted;
+                                               countNoData++;
                                        }
                                } else {
                                        if (line.matches(date + "(.*)Sequence0/(.*)")) {
index 27d4252..281a20b 100644 (file)
@@ -77,53 +77,43 @@ public class JpredParserLocalFile implements JpredParser {
                                String ip = table[2];
                                String id = table[table.length - 1];
                                totalcount++;
-                               //if (!cc.CheckID(id)) {
-                                       if (true) {
-                                       String confilename = dirprefix + "/" + id + "/" + id + ".concise";
-                                       File confile = new File(confilename);
-                                       if (confile.exists()) {
-                                               try {
-                                                       final FastaReader fr = new FastaReader(confilename);
-                                                       final List<FastaSequence> seqs = new ArrayList<FastaSequence>();
-                                                       String newprotein = "";
-                                                       while (fr.hasNext()) {
-                                                               final FastaSequence fs = fr.next();
-                                                               if (fs.getId().equals("QUERY") || fs.getId().equals(id))
-                                                                       newprotein = fs.getSequence().replaceAll("\n", "");
-                                                               else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) {
-                                                                       seqs.add(fs);
-                                                               }
+                               String confilename = dirprefix + "/" + id + "/" + id + ".concise";
+                               File confile = new File(confilename);
+                               if (confile.exists()) {
+                                       try {
+                                               final FastaReader fr = new FastaReader(confilename);
+                                               final List<FastaSequence> seqs = new ArrayList<FastaSequence>();
+                                               String newprotein = "";
+                                               while (fr.hasNext()) {
+                                                       final FastaSequence fs = fr.next();
+                                                       if (fs.getId().equals("QUERY") || fs.getId().equals(id))
+                                                               newprotein = fs.getSequence().replaceAll("\n", "");
+                                                       else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) {
+                                                               seqs.add(fs);
                                                        }
-                                                       if (newprotein.equals("")) {
-                                                               countUnclearFASTAid++;
-                                                       } else {
-                                                               SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
-                                                               String dateInString1 = starttime.substring(0, starttime.indexOf(":"));
-                                                               long dateWork1 = 0;
-                                                               try {
-                                                                       Date dat = formatter.parse(dateInString1);
-                                                                       dateWork1 = dat.getTime();
-                                                               } catch (ParseException e) {
-                                                                       e.printStackTrace();
-                                                               }
-                                                               cc.InsertData(dateWork1, starttime, finishtime, ip, id, "OK", "OK", newprotein, seqs);
-                                                               ++countinsertions;
-                                                               ++njobs;
-                                                               // flush every 50 insertions
-                                                               //if (0 == countinsertions % 50) {
-                                                               //      cc.flushData();
-                                                               //      njobs -= 50;
-                                                               //}
+                                               }
+                                               if (newprotein.equals("")) {
+                                                       countUnclearFASTAid++;
+                                               } else {
+                                                       SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
+                                                       String dateInString1 = starttime.substring(0, starttime.indexOf(":"));
+                                                       long insertdate = 0;
+                                                       try {
+                                                               Date dat = formatter.parse(dateInString1);
+                                                               insertdate = dat.getTime();
+                                                       } catch (ParseException e) {
+                                                               e.printStackTrace();
                                                        }
-                                                       fr.close();
-                                               } catch (IOException e) {
-                                                       e.printStackTrace();
+                                                       cc.InsertData(insertdate, starttime, finishtime, ip, id, "OK", "OK", newprotein, seqs);
+                                                       ++countinsertions;
+                                                       ++njobs;
                                                }
-                                       } else {
-                                               countNoData++;
+                                               fr.close();
+                                       } catch (IOException e) {
+                                               e.printStackTrace();
                                        }
                                } else {
-                                       ++countinserted;
+                                       countNoData++;
                                }
                        } else {
                                if (in.matches(date + "(.*)Sequence0/(.*)")) {