From: Sasha Sherstnev <a.sherstnev@dundee.ac.uk> Date: Sat, 2 Nov 2013 12:06:23 +0000 (+0000) Subject: Add new still not-working code for native binary protocol version X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=3887e427255a9ed5e9e43f4a52a377935331e5d3;p=proteocache.git Add new still not-working code for native binary protocol version --- diff --git a/.classpath b/.classpath index fddf6c3..f346902 100644 --- a/.classpath +++ b/.classpath @@ -3,29 +3,21 @@ <classpathentry kind="src" path="datadb"/> <classpathentry kind="src" path="server"/> <classpathentry excluding="testdata/" kind="src" path="testsrc"/> - <classpathentry kind="lib" path="WEB-INF/lib/drmaa.jar"/> - <classpathentry kind="lib" path="WEB-INF/lib/compbio-annotations-1.0.jar"/> - <classpathentry kind="lib" path="WEB-INF/lib/compbio-util-1.4.jar"/> - <classpathentry kind="lib" path="WEB-INF/lib/derby-10.8.2.2.jar"/> - <classpathentry kind="lib" path="WEB-INF/lib/compbio-ga-1.1.jar"/> <classpathentry kind="lib" path="testsrc/lib/testng-5.10-jdk15.jar"/> - <classpathentry kind="lib" path="WEB-INF/lib/jstl-1.1.2.jar"/> - <classpathentry kind="lib" path="WEB-INF/lib/standard-1.1.2.jar"/> <classpathentry kind="lib" path="WEB-INF/lib/log4j-1.2.16.jar"/> - <classpathentry kind="lib" path="WEB-INF/lib/cassandra-all-1.2.4.jar"/> - <classpathentry kind="lib" path="WEB-INF/lib/cassandra-clientutil-1.2.4.jar"/> - <classpathentry kind="lib" path="WEB-INF/lib/cassandra-thrift-1.2.4.jar"/> - <classpathentry kind="lib" path="WEB-INF/lib/hector-0.7.0-20-sources.jar"/> - <classpathentry kind="lib" path="WEB-INF/lib/hector-core-1.0-5.jar" sourcepath="/home/asherstnev/.m2/repository/me/prettyprint/hector-core/1.0-5/hector-core-1.0-5-sources.jar"/> + <classpathentry kind="lib" path="WEB-INF/lib/cassandra-driver-core-2.0.0-beta3-SNAPSHOT.jar" sourcepath="WEB-INF/lib/cassandra-driver-core-2.0.0-beta3-SNAPSHOT-sources.jar"/> + <classpathentry kind="lib" path="WEB-INF/lib/lz4-1.2.0.jar"/> + <classpathentry kind="lib" path="WEB-INF/lib/snappy-java-1.1.0.jar"/> + <classpathentry kind="lib" path="WEB-INF/lib/guava-15.0.jar" sourcepath="/home/asherstnev/.m2/repository/com/google/guava/guava/15.0/guava-15.0-sources.jar"/> + <classpathentry kind="lib" path="WEB-INF/lib/netty-3.6.6.Final.jar"/> + <classpathentry kind="lib" path="WEB-INF/lib/metrics-core-3.0.1.jar"/> + <classpathentry kind="lib" path="WEB-INF/lib/jackson-core-asl-1.9.13.jar"/> + <classpathentry kind="lib" path="WEB-INF/lib/jackson-mapper-asl-1.9.13.jar"/> <classpathentry kind="con" path="org.eclipse.jst.server.core.container/org.eclipse.jst.server.tomcat.runtimeTarget/Apache Tomcat v7.0"> <attributes> <attribute name="owner.project.facets" value="jst.web"/> </attributes> </classpathentry> - <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/jdk1.7.0_25"> - <attributes> - <attribute name="owner.project.facets" value="java"/> - </attributes> - </classpathentry> + <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/jdk1.7.0_45"/> <classpathentry kind="output" path="WEB-INF/classes"/> </classpath> diff --git a/datadb/compbio/cassandra/CassandraCreate.java b/datadb/compbio/cassandra/CassandraCreate.java deleted file mode 100644 index b147499..0000000 --- a/datadb/compbio/cassandra/CassandraCreate.java +++ /dev/null @@ -1,152 +0,0 @@ -package compbio.cassandra; - -import java.util.Arrays; -import java.util.List; - -import me.prettyprint.cassandra.serializers.LongSerializer; -import me.prettyprint.cassandra.serializers.StringSerializer; -import me.prettyprint.cassandra.service.ThriftKsDef; -import me.prettyprint.hector.api.Cluster; -import me.prettyprint.hector.api.Keyspace; -import me.prettyprint.hector.api.beans.ColumnSlice; -import me.prettyprint.hector.api.ddl.ColumnFamilyDefinition; -import me.prettyprint.hector.api.ddl.ComparatorType; -import me.prettyprint.hector.api.ddl.KeyspaceDefinition; -import me.prettyprint.hector.api.factory.HFactory; -import me.prettyprint.hector.api.mutation.Mutator; -import me.prettyprint.hector.api.query.QueryResult; -import me.prettyprint.hector.api.query.SliceQuery; - -public class CassandraCreate { - private static Keyspace ksp; - private static Cluster cluster; - private static Mutator<Long> mutatorLong; - private static Mutator<String> mutatorString; - private static Mutator<String> mutatorLog; - StringSerializer ss = StringSerializer.get(); - LongSerializer ls = LongSerializer.get(); - - /* - * connect to the cluster and look weather the dababase has any data inside - */ - public void Connection() { - cluster = HFactory.getOrCreateCluster("Protein Cluster", "127.0.0.1:9160"); - KeyspaceDefinition keyspaceDef = cluster.describeKeyspace("ProteinKeyspace"); - /* - * If keyspace does not exist, the CFs don't exist either. => create - * them. - */ - if (keyspaceDef == null) { // create column family - System.out.println("ProteinKeyspace has been null"); - ColumnFamilyDefinition cfProtein = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinRow", - ComparatorType.ASCIITYPE); - ColumnFamilyDefinition cfLog = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinLog", ComparatorType.ASCIITYPE); - ColumnFamilyDefinition cfData = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinData", - ComparatorType.ASCIITYPE); - - KeyspaceDefinition newKeyspace = HFactory.createKeyspaceDefinition("ProteinKeyspace", ThriftKsDef.DEF_STRATEGY_CLASS, 1, - Arrays.asList(cfProtein, cfLog, cfData)); - /* - * Add the schema to the cluster. "true" as the second param means - * that Hector will be blocked until all nodes see the change. - */ - cluster.addKeyspace(newKeyspace, true); - cluster.addColumnFamily(cfProtein, true); - cluster.addColumnFamily(cfLog, true); - cluster.addColumnFamily(cfData, true); - } else { - System.out.println("Data loaded"); - } - ksp = HFactory.createKeyspace("ProteinKeyspace", cluster); - System.out.println("Cassandra has been connected"); - } - - /* - * parsing data source and filling the database - */ - public void Parsing(String source) { - /* - * CF ProteinRow store protein and prediction - */ - mutatorString = HFactory.createMutator(ksp, ss); - - /* - * ProteinLog stores logging info: IP, job id, start date and end date - */ - mutatorLog = HFactory.createMutator(ksp, ss); - - /* - * CF ProteinData store id and protein per data - */ - mutatorLong = HFactory.createMutator(ksp, ls); - - if (true) { - //if (source.equals("http")) { - // get data from real Jpred production server - System.out.println("Parsing web data source......"); - String datasrc = "http://www.compbio.dundee.ac.uk/www-jpred/results/usage-new/alljobs.dat"; - String prefix = "http://www.compbio.dundee.ac.uk/www-jpred/results"; - JpredParserHTTP parser = new JpredParserHTTP(prefix); - parser.Parsing(datasrc, 4); - flushData(); - } - if (true) { - //if (source.equals("file")) { - // get irtifical data generated for the DB stress tests - System.out.println("Parsing local file data source......"); - String datasrc = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/data.dat"; - String prefix = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/Jpreddata"; - JpredParserLocalFile parser = new JpredParserLocalFile(prefix); - parser.Parsing(datasrc, 190); - flushData(); - } - } - - public void flushData() { - mutatorString.execute(); - mutatorLong.execute(); - mutatorLog.execute(); - // System.out.println("Flush new data..."); - } - - public void Closing() { - cluster.getConnectionManager().shutdown(); - System.out.println("Cassandra has been shut down"); - } - - /* - * check whether the job id exists in the DB - */ - public boolean CheckID(String jobid) { - SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(ksp, ss, ss, ss); - sliceQuery.setColumnFamily("ProteinLog").setKey(jobid).setRange("", "", false, 100); - QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute(); - if (result.get().getColumns().size() > 0) { - return true; - } - return false; - } - - /* - * prepare data for insertion into the db - */ - public void InsertData(long dataWork, String dataBegin, String dataEnd, String ip, String id, String statusEx, String statusFinal, - String protein, List<FastaSequence> jnetpred) { - mutatorLog.addInsertion(id, "ProteinLog", HFactory.createColumn("ip", ip, ss, ss)) - .addInsertion(id, "ProteinLog", HFactory.createColumn("DataBegin", dataBegin, ss, ss)) - .addInsertion(id, "ProteinLog", HFactory.createColumn("DataEnd", dataEnd, ss, ss)) - .addInsertion(id, "ProteinLog", HFactory.createColumn("Status ex", statusEx, ss, ss)) - .addInsertion(id, "ProteinLog", HFactory.createColumn("Status final", statusFinal, ss, ss)) - .addInsertion(id, "ProteinLog", HFactory.createColumn("Protein", protein, ss, ss)); - for (int i = 0; i < jnetpred.size(); i++) { - String namepred = jnetpred.get(i).getId(); - String pred = jnetpred.get(i).getSequence().replaceAll("\n", ""); - mutatorString.addInsertion(protein, "ProteinRow", HFactory.createColumn(id + ";" + namepred, pred, ss, ss)); - } - mutatorLong.addInsertion(dataWork, "ProteinData", HFactory.createColumn(id, protein, ss, ss)); - } - - public Keyspace GetKeyspace() { - return ksp; - } -} diff --git a/datadb/compbio/cassandra/CassandraNativeConnector.java b/datadb/compbio/cassandra/CassandraNativeConnector.java new file mode 100644 index 0000000..49224db --- /dev/null +++ b/datadb/compbio/cassandra/CassandraNativeConnector.java @@ -0,0 +1,142 @@ +package compbio.cassandra; + +import java.io.IOException; +import java.util.List; + +import com.datastax.driver.core.Cluster; +import com.datastax.driver.core.Host; +import com.datastax.driver.core.Metadata; +import com.datastax.driver.core.Session; +import com.datastax.driver.core.ResultSet; + +public class CassandraNativeConnector { + private static Cluster cluster; + private static Session session; + + /* + * private static Keyspace ksp; private static Mutator<Long> mutatorLong; + * private static Mutator<String> mutatorString; private static + * Mutator<String> mutatorLog; StringSerializer ss = StringSerializer.get(); + * LongSerializer ls = LongSerializer.get(); + */ + + /* + * connect to the cluster and look weather the dababase has any data inside + */ + public void Connect() { + // local cassandra cluster + cluster = Cluster.builder().addContactPoint("localhost").build(); + // distributed cassandra cluster + /* cluster = Cluster.builder().addContactPoint("10.0.115.190").build(); */ + Metadata metadata = cluster.getMetadata(); + System.out.printf("Connected to cluster: %s\n", metadata.getClusterName()); + for (Host host : metadata.getAllHosts()) { + System.out.printf("Datatacenter: %s; Host: %s; Rack: %s\n", host.getDatacenter(), host.getAddress(), host.getRack()); + } + + session = cluster.connect(); + session.execute("CREATE KEYSPACE IF NOT EXISTS ProteinKeyspace WITH replication = {'class':'SimpleStrategy', 'replication_factor':3};"); + session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinRow (Protein ascii PRIMARY KEY, Predictions map<ascii,ascii>);"); + session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinLog " + + "(JobID ascii PRIMARY KEY, DataBegin ascii, DataEnd ascii, ip ascii, FinalStatus ascii, ExecutionStatus ascii, Protein ascii);"); + session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinData (jobtime bigint PRIMARY KEY, JobID ascii, Protein ascii);"); + + System.out.println("Cassandra connected"); + } + + /* + * parsing data source and filling the database + */ + public void Parsing() throws IOException { + if (false) { + // if (source.equals("http")) { + // get data from real Jpred production server + System.out.println("Parsing web data source......"); + String datasrc = "http://www.compbio.dundee.ac.uk/www-jpred/results/usage-new/alljobs.dat"; + String prefix = "http://www.compbio.dundee.ac.uk/www-jpred/results"; + JpredParserHTTP parser = new JpredParserHTTP(prefix); + parser.Parsing(datasrc, 4); + } + if (true) { + // if (source.equals("file")) { + // get irtifical data generated for the DB stress tests + System.out.println("Parsing local file data source......"); + String datasrc = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/data.dat"; + String prefix = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/Jpreddata"; + JpredParserLocalFile parser = new JpredParserLocalFile(prefix); + parser.Parsing(datasrc, 190); + } + } + + public void Closing() { + cluster.shutdown(); + System.out.println("Cassandra has been shut down"); + } + + /* + * check whether the job id exists in the DB + */ + public boolean CheckID(String jobid) { + String com = "SELECT * FROM ProteinKeyspace.ProteinData WHERE jobid = '" + jobid + "';"; + System.out.println(com); + ResultSet results = session.execute(com); + if (null != results) { + return true; + } + return false; + } + + /* + * prepare data for insertion into the db + */ + public void InsertData(long jobtime, String startdate, String enddate, String ip, String jobid, String statusEx, String statusFinal, + String protein, List<FastaSequence> predictions) { + + String check1 = "SELECT count(*) FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';"; + //System.out.println(check1); + ResultSet results1 = session.execute(check1); + if (!results1.isExhausted()) { + String com1 = "INSERT INTO ProteinKeyspace.ProteinLog " + + "(JobID, IP, DataBegin, DataEnd, FinalStatus, ExecutionStatus, Protein)" + " VALUES ('" + jobid + "','" + ip + "','" + + startdate + "','" + enddate + "','" + statusFinal + "','" + statusEx + "','" + protein + "');"; + // System.out.println(com1); + session.execute(com1); + + String com2 = "INSERT INTO ProteinKeyspace.ProteinData " + "(jobtime, JobID, Protein)" + " VALUES (" + jobtime + ",'" + jobid + + "','" + protein + "');"; + // System.out.println(com2); + // session.execute(com2); + + String allpredictions = ""; + for (FastaSequence pred : predictions) { + String predictionname = pred.getId(); + String prediction = pred.getSequence().replaceAll("\n", ""); + allpredictions += "'" + predictionname + "':'" + prediction + "',"; + } + String final_prediction = ""; + if (null != allpredictions) { + final_prediction = allpredictions.substring(0, allpredictions.length() - 1); + } + + String check2 = "SELECT count(*) FROM ProteinKeyspace.ProteinRow WHERE Protein = '" + protein + "';"; + //System.out.println(check1); + ResultSet results2 = session.execute(check2); + + if (results1.isExhausted()) { + String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(Protein, " + jobid + ")" + " VALUES ('" + protein + "'," + "{" + + final_prediction + "}" + ");"; + System.out.println(com3); + session.execute(com3); + } else { + String com4 = "ALTER TABLE ProteinKeyspace.ProteinRow ADD " + jobid + ");"; + System.out.println(com4); + session.execute(com4); + String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(" + jobid + ")" + " VALUES ({" + final_prediction + "}" + ")" + + " WHERE Protein = '" + protein + "';"; + System.out.println(com3); + session.execute(com3); + } + } + } + +} diff --git a/datadb/compbio/cassandra/JpredParser.java b/datadb/compbio/cassandra/JpredParser.java index 56f3770..27020ec 100644 --- a/datadb/compbio/cassandra/JpredParser.java +++ b/datadb/compbio/cassandra/JpredParser.java @@ -1,5 +1,7 @@ package compbio.cassandra; +import java.io.IOException; + public interface JpredParser { /* @@ -10,5 +12,5 @@ public interface JpredParser { /* * Makes real parsing of the source file **/ - void Parsing(String source, int nDays); + void Parsing(String source, int nDays) throws IOException; } diff --git a/datadb/compbio/cassandra/JpredParserHTTP.java b/datadb/compbio/cassandra/JpredParserHTTP.java index 052ff6a..d03ac79 100644 --- a/datadb/compbio/cassandra/JpredParserHTTP.java +++ b/datadb/compbio/cassandra/JpredParserHTTP.java @@ -1,6 +1,7 @@ package compbio.cassandra; import java.io.BufferedReader; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.net.HttpURLConnection; @@ -17,7 +18,7 @@ import java.util.List; import compbio.cassandra.JpredParser; public class JpredParserHTTP implements JpredParser { - private CassandraCreate cc = new CassandraCreate(); + private CassandraNativeConnector cc = new CassandraNativeConnector(); private String dirprefix; JpredParserHTTP() { @@ -32,7 +33,7 @@ public class JpredParserHTTP implements JpredParser { dirprefix = newsourceprefix; } - public void Parsing(String source, int nDays) { + public void Parsing(String source, int nDays) throws IOException { Calendar cal = Calendar.getInstance(); cal.add(Calendar.DATE, -nDays); for (int i = 0; i < nDays; ++i) { @@ -41,9 +42,7 @@ public class JpredParserHTTP implements JpredParser { int year = cal.get(Calendar.YEAR); int day = cal.get(Calendar.DATE); String date = year + "/" + month + "/" + day; - if (0 < ParsingForDate(source, date)) { - cc.flushData(); - } + ParsingForDate(source, date); } } @@ -86,8 +85,9 @@ public class JpredParserHTTP implements JpredParser { final FastaSequence fs = fr.next(); if (fs.getId().equals("QUERY") || fs.getId().equals(id)) newprotein = fs.getSequence().replaceAll("\n", ""); - else + else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) { seqs.add(fs); + } } if (newprotein.equals("")) { countUnclearFASTAid++; @@ -105,10 +105,10 @@ public class JpredParserHTTP implements JpredParser { ++countinsertions; ++njobs; // flush every 50 insertions - if (0 == countinsertions % 50) { - cc.flushData(); - njobs -= 50; - } + //if (0 == countinsertions % 50) { + // cc.flushData(); + // njobs -= 50; + //} } } catch (IOException e) { e.printStackTrace(); diff --git a/datadb/compbio/cassandra/JpredParserLocalFile.java b/datadb/compbio/cassandra/JpredParserLocalFile.java index a3e1520..27d4252 100644 --- a/datadb/compbio/cassandra/JpredParserLocalFile.java +++ b/datadb/compbio/cassandra/JpredParserLocalFile.java @@ -2,6 +2,7 @@ package compbio.cassandra; import java.io.BufferedReader; import java.io.File; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.net.HttpURLConnection; @@ -15,37 +16,49 @@ import java.util.Date; import java.util.List; public class JpredParserLocalFile implements JpredParser { - private CassandraCreate cc = new CassandraCreate(); + private CassandraNativeConnector cc = new CassandraNativeConnector(); private String dirprefix; - public void setSource (String newsourceprefix) { + public void setSource(String newsourceprefix) { this.dirprefix = newsourceprefix; } JpredParserLocalFile() { this.dirprefix = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/data.dat"; } - + JpredParserLocalFile(String sourceurl) { this.dirprefix = sourceurl; } - public void Parsing(String source, int nDays) { + public void Parsing(String source, int nDays) throws IOException { Calendar cal = Calendar.getInstance(); cal.add(Calendar.DATE, -nDays); + List<String> alljobs = new ArrayList<String>(); + File file = new File(source); + BufferedReader alljobsfile = new BufferedReader(new InputStreamReader(new FileInputStream(file))); + String line; + + while ((line = alljobsfile.readLine()) != null) { + alljobs.add(line); + } + alljobsfile.close(); + + System.out.println("Inserting jobs for " + nDays + " days, " + alljobs.size() + " jobs in total"); + final long startTime = System.currentTimeMillis(); for (int i = 0; i < nDays; ++i) { cal.add(Calendar.DATE, 1); int month = cal.get(Calendar.MONTH) + 1; int year = cal.get(Calendar.YEAR); int day = cal.get(Calendar.DATE); String date = year + "/" + month + "/" + day; - if (0 < ParsingForDate(source, date)) { - cc.flushData(); - } + ParsingForDate(alljobs, date); } + final long execTime = System.currentTimeMillis() - startTime; + System.out.println("Execution Time = " + execTime + " ms"); } - private int ParsingForDate(String input, String date) { + private int ParsingForDate(List<String> input, String date) { int totalcount = 0; int countNoData = 0; int countUnclearFASTAid = 0; @@ -56,71 +69,71 @@ public class JpredParserLocalFile implements JpredParser { int njobs = 0; System.out.println("Inserting jobs for " + date); - try { - File file = new File(input); - BufferedReader alljobs = new BufferedReader(new InputStreamReader(new FileInputStream(file))); - String line; - - while ((line = alljobs.readLine()) != null) { - if (line.matches(date + "(.*)jp_[^\\s]+")) { - String[] table = line.split("\\s+"); - String id = table[table.length - 1]; - totalcount++; - if (!cc.CheckID(id)) { - String confilename = dirprefix + "/" + id + "/" + id + ".concise"; - File confile = new File(confilename); - if (confile.exists()) { - try { - final FastaReader fr = new FastaReader(confilename); - final List<FastaSequence> seqs = new ArrayList<FastaSequence>(); - String newprotein = ""; - while (fr.hasNext()) { - final FastaSequence fs = fr.next(); - if (fs.getId().equals("QUERY") || fs.getId().equals(id)) - newprotein = fs.getSequence().replaceAll("\n", ""); - else - seqs.add(fs); + for (String in : input) { + if (in.matches(date + "(.*)jp_[^\\s]+")) { + String[] table = in.split("\\s+"); + String starttime = table[0]; + String finishtime = table[1]; + String ip = table[2]; + String id = table[table.length - 1]; + totalcount++; + //if (!cc.CheckID(id)) { + if (true) { + String confilename = dirprefix + "/" + id + "/" + id + ".concise"; + File confile = new File(confilename); + if (confile.exists()) { + try { + final FastaReader fr = new FastaReader(confilename); + final List<FastaSequence> seqs = new ArrayList<FastaSequence>(); + String newprotein = ""; + while (fr.hasNext()) { + final FastaSequence fs = fr.next(); + if (fs.getId().equals("QUERY") || fs.getId().equals(id)) + newprotein = fs.getSequence().replaceAll("\n", ""); + else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) { + seqs.add(fs); } - if (newprotein.equals("")) { - countUnclearFASTAid++; - } else { - SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); - String dateInString1 = table[0].substring(0, table[0].indexOf(":")); - long dateWork1 = 0; - try { - Date dat1 = formatter.parse(dateInString1); - dateWork1 = dat1.getTime(); - } catch (ParseException e) { - e.printStackTrace(); - } - cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs); - ++countinsertions; - ++njobs; - // flush every 50 insertions - if (0 == countinsertions % 50) { - cc.flushData(); - njobs -= 50; - } + } + if (newprotein.equals("")) { + countUnclearFASTAid++; + } else { + SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); + String dateInString1 = starttime.substring(0, starttime.indexOf(":")); + long dateWork1 = 0; + try { + Date dat = formatter.parse(dateInString1); + dateWork1 = dat.getTime(); + } catch (ParseException e) { + e.printStackTrace(); } - fr.close(); - } catch (IOException e) { - e.printStackTrace(); + cc.InsertData(dateWork1, starttime, finishtime, ip, id, "OK", "OK", newprotein, seqs); + ++countinsertions; + ++njobs; + // flush every 50 insertions + //if (0 == countinsertions % 50) { + // cc.flushData(); + // njobs -= 50; + //} } - } else { - countNoData++; + fr.close(); + } catch (IOException e) { + e.printStackTrace(); } } else { - ++countinserted; + countNoData++; } } else { - if (line.matches(date + "(.*)Sequence0/(.*)")) { - ++counAlignments; - } else { - ++countStrange; - } + ++countinserted; + } + } else { + if (in.matches(date + "(.*)Sequence0/(.*)")) { + ++counAlignments; + } else { + ++countStrange; } } - alljobs.close(); + } + if (true) { System.out.println("Total number of jobs = " + totalcount); System.out.println(" " + countinserted + " jobs inserted already"); System.out.println(" " + counAlignments + " jalview jobs"); @@ -128,11 +141,8 @@ public class JpredParserLocalFile implements JpredParser { System.out.println(" " + countNoData + " jobs without *.concise.fasta file"); System.out.println(" " + countUnclearFASTAid + " jobs with unclear FASTA protein id in *.concise.fasta"); System.out.println(" " + countinsertions + " new job insertions\n"); - } catch (MalformedURLException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); } return njobs; } + } diff --git a/server/compbio/listeners/ContextListener.java b/server/compbio/listeners/ContextListener.java index ec72af9..44cf66b 100644 --- a/server/compbio/listeners/ContextListener.java +++ b/server/compbio/listeners/ContextListener.java @@ -1,5 +1,6 @@ package compbio.listeners; +import java.io.IOException; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; @@ -8,7 +9,7 @@ import javax.servlet.ServletContextEvent; import javax.servlet.ServletContextListener; import javax.servlet.annotation.WebListener; -import compbio.cassandra.CassandraCreate; +import compbio.cassandra.CassandraNativeConnector; /** * Application Lifecycle Listener implementation class ContextListener @@ -17,20 +18,25 @@ import compbio.cassandra.CassandraCreate; @WebListener public class ContextListener implements ServletContextListener { private ScheduledExecutorService webjob_scheduler; - CassandraCreate cc = new CassandraCreate(); + CassandraNativeConnector db = new CassandraNativeConnector(); /** * @see ServletContextListener#contextInitialized(ServletContextEvent) */ public void contextInitialized(ServletContextEvent arg0) { System.out.println("ProteoCache session start......"); - cc.Connection(); + db.Connect(); webjob_scheduler = Executors.newSingleThreadScheduledExecutor(); webjob_scheduler.scheduleAtFixedRate(new Runnable() { @Override public void run() { - cc.Parsing("test"); + try { + db.Parsing(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } } }, 0, 60, TimeUnit.SECONDS); @@ -40,7 +46,7 @@ public class ContextListener implements ServletContextListener { * @see ServletContextListener#contextDestroyed(ServletContextEvent) */ public void contextDestroyed(ServletContextEvent arg0) { - cc.Closing(); + db.Closing(); System.out.println("Shut down ProteoCache......"); webjob_scheduler.shutdownNow(); } diff --git a/server/compbio/listeners/LengthServlet.java b/server/compbio/listeners/LengthServlet.java index 8bcc886..76d0462 100644 --- a/server/compbio/listeners/LengthServlet.java +++ b/server/compbio/listeners/LengthServlet.java @@ -11,6 +11,7 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import compbio.statistic.StatisticsProt; +import compbio.statistic.CassandraRequester; /** * Servlet implementation class LengthServlet @@ -27,7 +28,7 @@ public class LengthServlet extends HttpServlet { final long startTime = System.currentTimeMillis(); String date1 = request.getParameter("data1"); String date2 = request.getParameter("data2"); - StatisticsProt sp = new StatisticsProt(); + CassandraRequester sp = new CassandraRequester(); if (null != request.getParameter("option")) { Calendar cal = Calendar.getInstance(); date1 = StatisticsProt.DateFormatYYMMDD(sp.earliestDate()); @@ -35,7 +36,7 @@ public class LengthServlet extends HttpServlet { } request.setAttribute("data1", date1); request.setAttribute("data2", date2); - request.setAttribute("result", sp.readLength(date1, date2)); + request.setAttribute("result", sp.extractExecutionTime(date1, date2)); request.setAttribute("flag", request.getParameter("option")); final long endTime = System.currentTimeMillis(); request.setAttribute("timeExecution", (endTime - startTime)); diff --git a/server/compbio/statistic/CassandraRequester.java b/server/compbio/statistic/CassandraRequester.java new file mode 100755 index 0000000..1906c97 --- /dev/null +++ b/server/compbio/statistic/CassandraRequester.java @@ -0,0 +1,229 @@ +package compbio.statistic; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Collections; +import java.util.Date; +import java.util.Iterator; +import java.util.List; + +import compbio.cassandra.CassandraNativeConnector; +import compbio.cassandra.DataBase; + +public class CassandraRequester { + private CassandraNativeConnector DBInstance = new CassandraNativeConnector(); + private ArrayList<DataBase> query; + private static long currentDate = 0; + private static long earlestDate = 0; + + + /* + * query: execution time for the period from date1 till date2 + * */ + public List<DataBase> extractExecutionTime(String date1, String date2) { + if (!isThisDateValid(date1) || !isThisDateValid(date2)) { + System.out.println("Wrong date: point 3"); + return null; + } + SetDateRange(); + int nbins = 5; + long dateStart = DateParsing(date1); + long dateEnd = DateParsing(date2); + if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate) || dateStart > dateEnd) + return null; + if (dateStart < earlestDate) + dateStart = earlestDate; + if (dateEnd > currentDate) + dateStart = currentDate; + + System.out.println("CassandraRequester.extractExecutionTime: earlestDate = " + earlestDate + ", currentDate = " + currentDate); + + Calendar start = Calendar.getInstance(); + start.setTime(new Date(dateStart)); + Calendar end = Calendar.getInstance(); + end.setTime(new Date(dateEnd)); + query = new ArrayList<DataBase>(); + List<Integer> totalTime = new ArrayList<Integer>(); + for (int i = 0; i < nbins; i++) + totalTime.add(i, 0); + /* + for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) { + List<Integer> timeResult = new ArrayList<Integer>(); + SliceQuery<Long, String, String> result = HFactory.createSliceQuery(DBInstance.GetKeyspace(), LongSerializer.get(), + StringSerializer.get(), StringSerializer.get()); + result.setColumnFamily("ProteinData"); + result.setKey(date.getTime()); + result.setRange(null, null, false, Integer.MAX_VALUE); + QueryResult<ColumnSlice<String, String>> columnSlice = result.execute(); + List<HColumn<String, String>> col = columnSlice.get().getColumns(); + if (!col.isEmpty()) { + Iterator<HColumn<String, String>> itCol = col.iterator(); + for (int i = 0; i < nbins; i++) + timeResult.add(i, 0); + // split all jobs into nbins bins + while (itCol.hasNext()) { + String id = itCol.next().getName(); + long lenResult = CountID(id); + if (lenResult <= 30) + timeResult.set(0, timeResult.get(0) + 1); + else if (lenResult > 30 && lenResult <= 60) + timeResult.set(1, timeResult.get(1) + 1); + else if (lenResult > 60 && lenResult <= 120) + timeResult.set(2, timeResult.get(2) + 1); + else if (lenResult > 120 && lenResult <= 600) + timeResult.set(3, timeResult.get(3) + 1); + else { + timeResult.set(4, timeResult.get(4) + 1); + } + } + for (int i = 0; i < nbins; i++) + totalTime.set(i, totalTime.get(i) + timeResult.get(i)); + DataBase db = new DataBase(); + db.setTimeRez(timeResult); + db.setDate(DateFormat(date.getTime())); + query.add(db); + } + } + */ + DataBase db = new DataBase(); + db.setTimeTotalExec(totalTime); + query.add(db); + System.out.println("StatisticsProt.readLength: total number of dates = " + query.size()); + return query; + } + + /* + * convert String date into long date (miliseconds since the epoch start) + */ + private static long DateParsing(String datInput) { + if (datInput == null) { + return 0; + } + long dateWorkSt = 0; + SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); + try { + dateWorkSt = formatter.parse(datInput).getTime(); + } catch (ParseException e) { + e.printStackTrace(); + } + return dateWorkSt; + } + + /* + * convert String date:time into long date:time (miliseconds since the epoch start) + */ + private static long TimeConvert(String datInput) { + long dateWorkSt = 0; + if (datInput == null) { + return dateWorkSt; + } + SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss"); + try { + dateWorkSt = formatter.parse(datInput).getTime(); + } catch (ParseException e) { + e.printStackTrace(); + } + return dateWorkSt; + } + + // convert long to date in string format + private static String DateFormat(long inDate) { + SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy"); + String dateString = datformat.format(new Date(inDate)); + return dateString; + } + + /* + * convert ??? + */ + public static String DateFormatYYMMDD(long indate) { + SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd"); + String dateString = datformat.format(new Date(indate)); + return dateString; + } + + /* + * ??? + */ + public long CountID(String id) { + /* + SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(DBInstance.GetKeyspace(), StringSerializer.get(), + StringSerializer.get(), StringSerializer.get()); + sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100); + QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute(); + String datBegin = result.get().getColumnByName("DataBegin").getValue(); + String datEnd = result.get().getColumnByName("DataEnd").getValue(); + + long datBeginLong = TimeConvert(datBegin); + long datEndLong = TimeConvert(datEnd); + return (datEndLong - datBeginLong) / 1000; + */ + return 0; + } + + /* + * set earlest date and current dates. + * earlestDate is static and should be set at the 1st call + * currentDate should be re-calculated every time + */ + private static void SetDateRange() { + if (0 == earlestDate) { + StatisticsProt sp = new StatisticsProt(); + earlestDate = sp.earliestDate(); + System.out.println("Set earlest Date = " + earlestDate); + } + Calendar cal = Calendar.getInstance(); + currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH)); + } + + public boolean isThisDateValid(String dateToValidate) { + if (dateToValidate == null || dateToValidate.equals("")) { + System.out.println("Undefined date"); + return false; + } + SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd"); + try { + // if not valid, this will throw ParseException + sdf.setLenient(false); + Date date = sdf.parse(dateToValidate); + } catch (ParseException e) { + e.printStackTrace(); + return false; + } + return true; + } + + /* + * find the earliest date in the database + */ + public long earliestDate() { + /* + ArrayList<Long> dateSort = new ArrayList<Long>(); + int row_count = 10000; + RangeSlicesQuery<Long, String, String> result = HFactory.createRangeSlicesQuery(DBInstance.GetKeyspace(), LongSerializer.get(), + StringSerializer.get(), StringSerializer.get()); + result.setColumnFamily("ProteinData"); + result.setRange(null, null, false, Integer.MAX_VALUE); + result.setRowCount(row_count); + Long last_key = null; + while (true) { + result.setKeys(last_key, null); + QueryResult<OrderedRows<Long, String, String>> columnSlice = result.execute(); + OrderedRows<Long, String, String> rows = columnSlice.get(); + Iterator<Row<Long, String, String>> rowsIterator = rows.iterator(); + while (rowsIterator.hasNext()) { + Row<Long, String, String> row = rowsIterator.next(); + last_key = row.getKey(); + dateSort.add(last_key); + } + if (rows.getCount() < row_count) + break; + } + Collections.sort(dateSort); + return dateSort.get(0); + */ + return 0; + } +} diff --git a/server/compbio/statistic/StatisticsProt.java b/server/compbio/statistic/StatisticsProt.java index c0150b7..05375c0 100755 --- a/server/compbio/statistic/StatisticsProt.java +++ b/server/compbio/statistic/StatisticsProt.java @@ -9,21 +9,11 @@ import java.util.Date; import java.util.Iterator; import java.util.List; -import me.prettyprint.cassandra.serializers.LongSerializer; -import me.prettyprint.cassandra.serializers.StringSerializer; -import me.prettyprint.hector.api.beans.ColumnSlice; -import me.prettyprint.hector.api.beans.HColumn; -import me.prettyprint.hector.api.beans.OrderedRows; -import me.prettyprint.hector.api.beans.Row; -import me.prettyprint.hector.api.factory.HFactory; -import me.prettyprint.hector.api.query.QueryResult; -import me.prettyprint.hector.api.query.RangeSlicesQuery; -import me.prettyprint.hector.api.query.SliceQuery; -import compbio.cassandra.CassandraCreate; +import compbio.cassandra.CassandraNativeConnector; import compbio.cassandra.DataBase; public class StatisticsProt { - private CassandraCreate cc = new CassandraCreate(); + private CassandraNativeConnector cc = new CassandraNativeConnector(); private ArrayList<DataBase> query; private static long currentDate = 0; private static long earlestDate = 0; @@ -55,6 +45,7 @@ public class StatisticsProt { end.setTime(new Date(dateEnd)); query = new ArrayList<DataBase>(); int day = 0; + /* for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) { SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(), StringSerializer.get(), StringSerializer.get()); @@ -72,6 +63,7 @@ public class StatisticsProt { System.out.println("no data"); } } + */ System.out.println("StatisticsProt.readLength: total number of dates = " + query.size()); return query; } @@ -105,6 +97,7 @@ public class StatisticsProt { List<Integer> totalTime = new ArrayList<Integer>(); for (int i = 0; i < nbins; i++) totalTime.add(i, 0); + /* for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) { List<Integer> timeResult = new ArrayList<Integer>(); SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(), @@ -142,6 +135,7 @@ public class StatisticsProt { query.add(db); } } + */ DataBase db = new DataBase(); db.setTimeTotalExec(totalTime); query.add(db); @@ -154,6 +148,7 @@ public class StatisticsProt { * */ public List<DataBase> readProteins(String protIn) { query = new ArrayList<DataBase>(); + /* SliceQuery<String, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(), StringSerializer.get(), StringSerializer.get()); result.setColumnFamily("ProteinRow"); @@ -172,6 +167,7 @@ public class StatisticsProt { query.add(db); } } + */ return query; } @@ -180,11 +176,12 @@ public class StatisticsProt { * */ public List<DataBase> readProtID(int counter) { query = new ArrayList<DataBase>(); - int row_count = 100000000; + int row_count = 100; + /* RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(), StringSerializer.get(), StringSerializer.get()); result.setColumnFamily("ProteinRow"); - result.setRange(null, null, false, Integer.MAX_VALUE); + result.setRange(null, null, false, 100); result.setRowCount(row_count); String last_key = null; while (true) { @@ -196,13 +193,14 @@ public class StatisticsProt { Row<String, String, String> row = rowsIterator.next(); last_key = row.getKey(); List<HColumn<String, String>> clms = row.getColumnSlice().getColumns(); - int npred = 0; - for (HColumn<String, String> cln : clms) { - String name = cln.getName(); - if (name.matches("(.*)jnetpred")) { - ++npred; - } - } + //int npred = 0; + //for (HColumn<String, String> cln : clms) { + // String name = cln.getName(); + // if (name.matches("(.*)jnetpred")) { + // ++npred; + // } + //} + int npred = clms.size(); if (npred > counter) { DataBase db = new DataBase(); db.setProt(last_key); @@ -212,7 +210,7 @@ public class StatisticsProt { } if (rows.getCount() < row_count) break; - } + }*/ return query; } @@ -222,6 +220,7 @@ public class StatisticsProt { public List<DataBase> readPart(String protIn) { int row_count = 10000; query = new ArrayList<DataBase>(); + /* RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(), StringSerializer.get(), StringSerializer.get()); result.setColumnFamily("ProteinRow"); @@ -266,6 +265,7 @@ public class StatisticsProt { if (rows.getCount() < row_count) break; } + */ return query; } @@ -323,6 +323,7 @@ public class StatisticsProt { * ??? */ public long CountID(String id) { + /* SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(), StringSerializer.get(), StringSerializer.get()); sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100); @@ -333,6 +334,8 @@ public class StatisticsProt { long datBeginLong = TimeConvert(datBegin); long datEndLong = TimeConvert(datEnd); return (datEndLong - datBeginLong) / 1000; + */ + return 0; } /* @@ -373,6 +376,7 @@ public class StatisticsProt { public long earliestDate() { ArrayList<Long> dateSort = new ArrayList<Long>(); int row_count = 10000; + /* RangeSlicesQuery<Long, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), LongSerializer.get(), StringSerializer.get(), StringSerializer.get()); result.setColumnFamily("ProteinData"); @@ -391,7 +395,7 @@ public class StatisticsProt { } if (rows.getCount() < row_count) break; - } + }*/ Collections.sort(dateSort); return dateSort.get(0); }