session = cluster.connect();
session.execute("CREATE KEYSPACE IF NOT EXISTS ProteinKeyspace WITH replication = {'class':'SimpleStrategy', 'replication_factor':3};");
- session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinRow (Protein ascii PRIMARY KEY, Predictions map<ascii,ascii>);");
+ session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinRow (Protein ascii, JobID ascii, Predictions map<ascii,ascii>, PRIMARY KEY(JobID));");
session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinLog "
- + "(JobID ascii PRIMARY KEY, DataBegin ascii, DataEnd ascii, ip ascii, FinalStatus ascii, ExecutionStatus ascii, Protein ascii);");
- session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinData (jobtime bigint PRIMARY KEY, JobID ascii, Protein ascii);");
+ + "(JobID ascii, DataBegin ascii, DataEnd ascii, ip ascii, FinalStatus ascii, ExecutionStatus ascii, Protein ascii, PRIMARY KEY(JobID));");
+ session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinData (jobtime bigint, JobID ascii, Protein ascii, PRIMARY KEY(JobID));");
+
+ session.execute("CREATE INDEX ProteinSeq ON ProteinKeyspace.ProteinRow (protein);");
+ session.execute("CREATE INDEX JobDateStamp ON ProteinKeyspace.ProteinData (jobtime);");
System.out.println("Cassandra connected");
}
}
/*
- * check whether the job id exists in the DB
- */
- public boolean CheckID(String jobid) {
- String com = "SELECT * FROM ProteinKeyspace.ProteinData WHERE jobid = '" + jobid + "';";
- System.out.println(com);
- ResultSet results = session.execute(com);
- if (null != results) {
- return true;
- }
- return false;
- }
-
- /*
* prepare data for insertion into the db
*/
public void InsertData(long jobtime, String startdate, String enddate, String ip, String jobid, String statusEx, String statusFinal,
String protein, List<FastaSequence> predictions) {
- String check1 = "SELECT count(*) FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';";
+ String check1 = "SELECT * FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';";
//System.out.println(check1);
ResultSet results1 = session.execute(check1);
- if (!results1.isExhausted()) {
+ if (results1.isExhausted()) {
String com1 = "INSERT INTO ProteinKeyspace.ProteinLog "
+ "(JobID, IP, DataBegin, DataEnd, FinalStatus, ExecutionStatus, Protein)" + " VALUES ('" + jobid + "','" + ip + "','"
+ startdate + "','" + enddate + "','" + statusFinal + "','" + statusEx + "','" + protein + "');";
- // System.out.println(com1);
+ //System.out.println(com1);
session.execute(com1);
String com2 = "INSERT INTO ProteinKeyspace.ProteinData " + "(jobtime, JobID, Protein)" + " VALUES (" + jobtime + ",'" + jobid
+ "','" + protein + "');";
- // System.out.println(com2);
- // session.execute(com2);
+ //System.out.println(com2);
+ session.execute(com2);
String allpredictions = "";
for (FastaSequence pred : predictions) {
final_prediction = allpredictions.substring(0, allpredictions.length() - 1);
}
- String check2 = "SELECT count(*) FROM ProteinKeyspace.ProteinRow WHERE Protein = '" + protein + "';";
- //System.out.println(check1);
+ String check2 = "SELECT * FROM ProteinKeyspace.ProteinRow WHERE JobID = '" + jobid + "';";
+ //System.out.println(check2);
ResultSet results2 = session.execute(check2);
-
- if (results1.isExhausted()) {
- String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(Protein, " + jobid + ")" + " VALUES ('" + protein + "'," + "{"
- + final_prediction + "}" + ");";
- System.out.println(com3);
- session.execute(com3);
- } else {
- String com4 = "ALTER TABLE ProteinKeyspace.ProteinRow ADD " + jobid + ");";
- System.out.println(com4);
- session.execute(com4);
- String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(" + jobid + ")" + " VALUES ({" + final_prediction + "}" + ")"
- + " WHERE Protein = '" + protein + "';";
- System.out.println(com3);
+ if (results2.isExhausted()) {
+ String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(Protein, JobID, Predictions)" + " VALUES ('"
+ + protein + "','" + jobid + "',{" + final_prediction + "});";
+ //System.out.println(com3);
session.execute(com3);
}
}
// 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172 unknown_email jp_J9HBCBT
String id = table[table.length - 1];
totalcount++;
- if (!cc.CheckID(id)) {
- String datalink = dirprefix + "/" + id + "/" + id + ".concise.fasta";
- URL urltable = new URL(datalink);
- HttpURLConnection httpConnection = (HttpURLConnection) urltable.openConnection();
- int responsecode = httpConnection.getResponseCode();
- if (199 < responsecode && responsecode < 300) {
- try {
- final FastaReader fr = new FastaReader(urltable.openStream());
- final List<FastaSequence> seqs = new ArrayList<FastaSequence>();
- String newprotein = "";
- while (fr.hasNext()) {
- final FastaSequence fs = fr.next();
- if (fs.getId().equals("QUERY") || fs.getId().equals(id))
- newprotein = fs.getSequence().replaceAll("\n", "");
- else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) {
- seqs.add(fs);
- }
+ String datalink = dirprefix + "/" + id + "/" + id + ".concise.fasta";
+ URL urltable = new URL(datalink);
+ HttpURLConnection httpConnection = (HttpURLConnection) urltable.openConnection();
+ int responsecode = httpConnection.getResponseCode();
+ if (199 < responsecode && responsecode < 300) {
+ try {
+ final FastaReader fr = new FastaReader(urltable.openStream());
+ final List<FastaSequence> seqs = new ArrayList<FastaSequence>();
+ String newprotein = "";
+ while (fr.hasNext()) {
+ final FastaSequence fs = fr.next();
+ if (fs.getId().equals("QUERY") || fs.getId().equals(id))
+ newprotein = fs.getSequence().replaceAll("\n", "");
+ else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) {
+ seqs.add(fs);
}
- if (newprotein.equals("")) {
- countUnclearFASTAid++;
- } else {
- SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
- String dateInString1 = table[0].substring(0, table[0].indexOf(":"));
- long dateWork1 = 0;
- try {
- Date dat1 = formatter.parse(dateInString1);
- dateWork1 = dat1.getTime();
- } catch (ParseException e) {
- e.printStackTrace();
- }
- cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs);
- ++countinsertions;
- ++njobs;
- // flush every 50 insertions
- //if (0 == countinsertions % 50) {
- // cc.flushData();
- // njobs -= 50;
- //}
+ }
+ if (newprotein.equals("")) {
+ countUnclearFASTAid++;
+ } else {
+ SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
+ String dateInString1 = table[0].substring(0, table[0].indexOf(":"));
+ long dateWork1 = 0;
+ try {
+ Date dat1 = formatter.parse(dateInString1);
+ dateWork1 = dat1.getTime();
+ } catch (ParseException e) {
+ e.printStackTrace();
}
- } catch (IOException e) {
- e.printStackTrace();
+ cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs);
+ ++countinsertions;
+ ++njobs;
+ // flush every 50 insertions
+ // if (0 == countinsertions % 50) {
+ // cc.flushData();
+ // njobs -= 50;
+ // }
}
- } else {
- countNoData++;
+ } catch (IOException e) {
+ e.printStackTrace();
}
} else {
- ++countinserted;
+ countNoData++;
}
} else {
if (line.matches(date + "(.*)Sequence0/(.*)")) {
String ip = table[2];
String id = table[table.length - 1];
totalcount++;
- //if (!cc.CheckID(id)) {
- if (true) {
- String confilename = dirprefix + "/" + id + "/" + id + ".concise";
- File confile = new File(confilename);
- if (confile.exists()) {
- try {
- final FastaReader fr = new FastaReader(confilename);
- final List<FastaSequence> seqs = new ArrayList<FastaSequence>();
- String newprotein = "";
- while (fr.hasNext()) {
- final FastaSequence fs = fr.next();
- if (fs.getId().equals("QUERY") || fs.getId().equals(id))
- newprotein = fs.getSequence().replaceAll("\n", "");
- else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) {
- seqs.add(fs);
- }
+ String confilename = dirprefix + "/" + id + "/" + id + ".concise";
+ File confile = new File(confilename);
+ if (confile.exists()) {
+ try {
+ final FastaReader fr = new FastaReader(confilename);
+ final List<FastaSequence> seqs = new ArrayList<FastaSequence>();
+ String newprotein = "";
+ while (fr.hasNext()) {
+ final FastaSequence fs = fr.next();
+ if (fs.getId().equals("QUERY") || fs.getId().equals(id))
+ newprotein = fs.getSequence().replaceAll("\n", "");
+ else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) {
+ seqs.add(fs);
}
- if (newprotein.equals("")) {
- countUnclearFASTAid++;
- } else {
- SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
- String dateInString1 = starttime.substring(0, starttime.indexOf(":"));
- long dateWork1 = 0;
- try {
- Date dat = formatter.parse(dateInString1);
- dateWork1 = dat.getTime();
- } catch (ParseException e) {
- e.printStackTrace();
- }
- cc.InsertData(dateWork1, starttime, finishtime, ip, id, "OK", "OK", newprotein, seqs);
- ++countinsertions;
- ++njobs;
- // flush every 50 insertions
- //if (0 == countinsertions % 50) {
- // cc.flushData();
- // njobs -= 50;
- //}
+ }
+ if (newprotein.equals("")) {
+ countUnclearFASTAid++;
+ } else {
+ SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
+ String dateInString1 = starttime.substring(0, starttime.indexOf(":"));
+ long insertdate = 0;
+ try {
+ Date dat = formatter.parse(dateInString1);
+ insertdate = dat.getTime();
+ } catch (ParseException e) {
+ e.printStackTrace();
}
- fr.close();
- } catch (IOException e) {
- e.printStackTrace();
+ cc.InsertData(insertdate, starttime, finishtime, ip, id, "OK", "OK", newprotein, seqs);
+ ++countinsertions;
+ ++njobs;
}
- } else {
- countNoData++;
+ fr.close();
+ } catch (IOException e) {
+ e.printStackTrace();
}
} else {
- ++countinserted;
+ countNoData++;
}
} else {
if (in.matches(date + "(.*)Sequence0/(.*)")) {