From: Sasha Sherstnev Date: Mon, 4 Nov 2013 21:17:39 +0000 (+0000) Subject: First working "writing" code for Cassandra 2.0 X-Git-Url: http://source.jalview.org/gitweb/?p=proteocache.git;a=commitdiff_plain;h=26b5b869bd204707a825e457a14f61e1d8ab0750 First working "writing" code for Cassandra 2.0 --- diff --git a/datadb/compbio/cassandra/CassandraNativeConnector.java b/datadb/compbio/cassandra/CassandraNativeConnector.java index 49224db..1fb01fc 100644 --- a/datadb/compbio/cassandra/CassandraNativeConnector.java +++ b/datadb/compbio/cassandra/CassandraNativeConnector.java @@ -36,10 +36,13 @@ public class CassandraNativeConnector { session = cluster.connect(); session.execute("CREATE KEYSPACE IF NOT EXISTS ProteinKeyspace WITH replication = {'class':'SimpleStrategy', 'replication_factor':3};"); - session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinRow (Protein ascii PRIMARY KEY, Predictions map);"); + session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinRow (Protein ascii, JobID ascii, Predictions map, PRIMARY KEY(JobID));"); session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinLog " - + "(JobID ascii PRIMARY KEY, DataBegin ascii, DataEnd ascii, ip ascii, FinalStatus ascii, ExecutionStatus ascii, Protein ascii);"); - session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinData (jobtime bigint PRIMARY KEY, JobID ascii, Protein ascii);"); + + "(JobID ascii, DataBegin ascii, DataEnd ascii, ip ascii, FinalStatus ascii, ExecutionStatus ascii, Protein ascii, PRIMARY KEY(JobID));"); + session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinData (jobtime bigint, JobID ascii, Protein ascii, PRIMARY KEY(JobID));"); + + session.execute("CREATE INDEX ProteinSeq ON ProteinKeyspace.ProteinRow (protein);"); + session.execute("CREATE INDEX JobDateStamp ON ProteinKeyspace.ProteinData (jobtime);"); System.out.println("Cassandra connected"); } @@ -74,38 +77,25 @@ public class CassandraNativeConnector { } /* - * check whether the job id exists in the DB - */ - public boolean CheckID(String jobid) { - String com = "SELECT * FROM ProteinKeyspace.ProteinData WHERE jobid = '" + jobid + "';"; - System.out.println(com); - ResultSet results = session.execute(com); - if (null != results) { - return true; - } - return false; - } - - /* * prepare data for insertion into the db */ public void InsertData(long jobtime, String startdate, String enddate, String ip, String jobid, String statusEx, String statusFinal, String protein, List predictions) { - String check1 = "SELECT count(*) FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';"; + String check1 = "SELECT * FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';"; //System.out.println(check1); ResultSet results1 = session.execute(check1); - if (!results1.isExhausted()) { + if (results1.isExhausted()) { String com1 = "INSERT INTO ProteinKeyspace.ProteinLog " + "(JobID, IP, DataBegin, DataEnd, FinalStatus, ExecutionStatus, Protein)" + " VALUES ('" + jobid + "','" + ip + "','" + startdate + "','" + enddate + "','" + statusFinal + "','" + statusEx + "','" + protein + "');"; - // System.out.println(com1); + //System.out.println(com1); session.execute(com1); String com2 = "INSERT INTO ProteinKeyspace.ProteinData " + "(jobtime, JobID, Protein)" + " VALUES (" + jobtime + ",'" + jobid + "','" + protein + "');"; - // System.out.println(com2); - // session.execute(com2); + //System.out.println(com2); + session.execute(com2); String allpredictions = ""; for (FastaSequence pred : predictions) { @@ -118,22 +108,13 @@ public class CassandraNativeConnector { final_prediction = allpredictions.substring(0, allpredictions.length() - 1); } - String check2 = "SELECT count(*) FROM ProteinKeyspace.ProteinRow WHERE Protein = '" + protein + "';"; - //System.out.println(check1); + String check2 = "SELECT * FROM ProteinKeyspace.ProteinRow WHERE JobID = '" + jobid + "';"; + //System.out.println(check2); ResultSet results2 = session.execute(check2); - - if (results1.isExhausted()) { - String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(Protein, " + jobid + ")" + " VALUES ('" + protein + "'," + "{" - + final_prediction + "}" + ");"; - System.out.println(com3); - session.execute(com3); - } else { - String com4 = "ALTER TABLE ProteinKeyspace.ProteinRow ADD " + jobid + ");"; - System.out.println(com4); - session.execute(com4); - String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(" + jobid + ")" + " VALUES ({" + final_prediction + "}" + ")" - + " WHERE Protein = '" + protein + "';"; - System.out.println(com3); + if (results2.isExhausted()) { + String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(Protein, JobID, Predictions)" + " VALUES ('" + + protein + "','" + jobid + "',{" + final_prediction + "});"; + //System.out.println(com3); session.execute(com3); } } diff --git a/datadb/compbio/cassandra/JpredParserHTTP.java b/datadb/compbio/cassandra/JpredParserHTTP.java index d03ac79..b780ddf 100644 --- a/datadb/compbio/cassandra/JpredParserHTTP.java +++ b/datadb/compbio/cassandra/JpredParserHTTP.java @@ -71,53 +71,49 @@ public class JpredParserHTTP implements JpredParser { // 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172 unknown_email jp_J9HBCBT String id = table[table.length - 1]; totalcount++; - if (!cc.CheckID(id)) { - String datalink = dirprefix + "/" + id + "/" + id + ".concise.fasta"; - URL urltable = new URL(datalink); - HttpURLConnection httpConnection = (HttpURLConnection) urltable.openConnection(); - int responsecode = httpConnection.getResponseCode(); - if (199 < responsecode && responsecode < 300) { - try { - final FastaReader fr = new FastaReader(urltable.openStream()); - final List seqs = new ArrayList(); - String newprotein = ""; - while (fr.hasNext()) { - final FastaSequence fs = fr.next(); - if (fs.getId().equals("QUERY") || fs.getId().equals(id)) - newprotein = fs.getSequence().replaceAll("\n", ""); - else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) { - seqs.add(fs); - } + String datalink = dirprefix + "/" + id + "/" + id + ".concise.fasta"; + URL urltable = new URL(datalink); + HttpURLConnection httpConnection = (HttpURLConnection) urltable.openConnection(); + int responsecode = httpConnection.getResponseCode(); + if (199 < responsecode && responsecode < 300) { + try { + final FastaReader fr = new FastaReader(urltable.openStream()); + final List seqs = new ArrayList(); + String newprotein = ""; + while (fr.hasNext()) { + final FastaSequence fs = fr.next(); + if (fs.getId().equals("QUERY") || fs.getId().equals(id)) + newprotein = fs.getSequence().replaceAll("\n", ""); + else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) { + seqs.add(fs); } - if (newprotein.equals("")) { - countUnclearFASTAid++; - } else { - SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); - String dateInString1 = table[0].substring(0, table[0].indexOf(":")); - long dateWork1 = 0; - try { - Date dat1 = formatter.parse(dateInString1); - dateWork1 = dat1.getTime(); - } catch (ParseException e) { - e.printStackTrace(); - } - cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs); - ++countinsertions; - ++njobs; - // flush every 50 insertions - //if (0 == countinsertions % 50) { - // cc.flushData(); - // njobs -= 50; - //} + } + if (newprotein.equals("")) { + countUnclearFASTAid++; + } else { + SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); + String dateInString1 = table[0].substring(0, table[0].indexOf(":")); + long dateWork1 = 0; + try { + Date dat1 = formatter.parse(dateInString1); + dateWork1 = dat1.getTime(); + } catch (ParseException e) { + e.printStackTrace(); } - } catch (IOException e) { - e.printStackTrace(); + cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs); + ++countinsertions; + ++njobs; + // flush every 50 insertions + // if (0 == countinsertions % 50) { + // cc.flushData(); + // njobs -= 50; + // } } - } else { - countNoData++; + } catch (IOException e) { + e.printStackTrace(); } } else { - ++countinserted; + countNoData++; } } else { if (line.matches(date + "(.*)Sequence0/(.*)")) { diff --git a/datadb/compbio/cassandra/JpredParserLocalFile.java b/datadb/compbio/cassandra/JpredParserLocalFile.java index 27d4252..281a20b 100644 --- a/datadb/compbio/cassandra/JpredParserLocalFile.java +++ b/datadb/compbio/cassandra/JpredParserLocalFile.java @@ -77,53 +77,43 @@ public class JpredParserLocalFile implements JpredParser { String ip = table[2]; String id = table[table.length - 1]; totalcount++; - //if (!cc.CheckID(id)) { - if (true) { - String confilename = dirprefix + "/" + id + "/" + id + ".concise"; - File confile = new File(confilename); - if (confile.exists()) { - try { - final FastaReader fr = new FastaReader(confilename); - final List seqs = new ArrayList(); - String newprotein = ""; - while (fr.hasNext()) { - final FastaSequence fs = fr.next(); - if (fs.getId().equals("QUERY") || fs.getId().equals(id)) - newprotein = fs.getSequence().replaceAll("\n", ""); - else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) { - seqs.add(fs); - } + String confilename = dirprefix + "/" + id + "/" + id + ".concise"; + File confile = new File(confilename); + if (confile.exists()) { + try { + final FastaReader fr = new FastaReader(confilename); + final List seqs = new ArrayList(); + String newprotein = ""; + while (fr.hasNext()) { + final FastaSequence fs = fr.next(); + if (fs.getId().equals("QUERY") || fs.getId().equals(id)) + newprotein = fs.getSequence().replaceAll("\n", ""); + else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) { + seqs.add(fs); } - if (newprotein.equals("")) { - countUnclearFASTAid++; - } else { - SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); - String dateInString1 = starttime.substring(0, starttime.indexOf(":")); - long dateWork1 = 0; - try { - Date dat = formatter.parse(dateInString1); - dateWork1 = dat.getTime(); - } catch (ParseException e) { - e.printStackTrace(); - } - cc.InsertData(dateWork1, starttime, finishtime, ip, id, "OK", "OK", newprotein, seqs); - ++countinsertions; - ++njobs; - // flush every 50 insertions - //if (0 == countinsertions % 50) { - // cc.flushData(); - // njobs -= 50; - //} + } + if (newprotein.equals("")) { + countUnclearFASTAid++; + } else { + SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); + String dateInString1 = starttime.substring(0, starttime.indexOf(":")); + long insertdate = 0; + try { + Date dat = formatter.parse(dateInString1); + insertdate = dat.getTime(); + } catch (ParseException e) { + e.printStackTrace(); } - fr.close(); - } catch (IOException e) { - e.printStackTrace(); + cc.InsertData(insertdate, starttime, finishtime, ip, id, "OK", "OK", newprotein, seqs); + ++countinsertions; + ++njobs; } - } else { - countNoData++; + fr.close(); + } catch (IOException e) { + e.printStackTrace(); } } else { - ++countinserted; + countNoData++; } } else { if (in.matches(date + "(.*)Sequence0/(.*)")) {