<classpathentry kind="src" path="datadb"/>
<classpathentry kind="src" path="server"/>
<classpathentry excluding="testdata/" kind="src" path="testsrc"/>
- <classpathentry kind="lib" path="WEB-INF/lib/drmaa.jar"/>
- <classpathentry kind="lib" path="WEB-INF/lib/compbio-annotations-1.0.jar"/>
- <classpathentry kind="lib" path="WEB-INF/lib/compbio-util-1.4.jar"/>
- <classpathentry kind="lib" path="WEB-INF/lib/derby-10.8.2.2.jar"/>
- <classpathentry kind="lib" path="WEB-INF/lib/compbio-ga-1.1.jar"/>
<classpathentry kind="lib" path="testsrc/lib/testng-5.10-jdk15.jar"/>
- <classpathentry kind="lib" path="WEB-INF/lib/jstl-1.1.2.jar"/>
- <classpathentry kind="lib" path="WEB-INF/lib/standard-1.1.2.jar"/>
<classpathentry kind="lib" path="WEB-INF/lib/log4j-1.2.16.jar"/>
- <classpathentry kind="lib" path="WEB-INF/lib/cassandra-all-1.2.4.jar"/>
- <classpathentry kind="lib" path="WEB-INF/lib/cassandra-clientutil-1.2.4.jar"/>
- <classpathentry kind="lib" path="WEB-INF/lib/cassandra-thrift-1.2.4.jar"/>
- <classpathentry kind="lib" path="WEB-INF/lib/hector-0.7.0-20-sources.jar"/>
- <classpathentry kind="lib" path="WEB-INF/lib/hector-core-1.0-5.jar" sourcepath="/home/asherstnev/.m2/repository/me/prettyprint/hector-core/1.0-5/hector-core-1.0-5-sources.jar"/>
+ <classpathentry kind="lib" path="WEB-INF/lib/cassandra-driver-core-2.0.0-beta3-SNAPSHOT.jar" sourcepath="WEB-INF/lib/cassandra-driver-core-2.0.0-beta3-SNAPSHOT-sources.jar"/>
+ <classpathentry kind="lib" path="WEB-INF/lib/lz4-1.2.0.jar"/>
+ <classpathentry kind="lib" path="WEB-INF/lib/snappy-java-1.1.0.jar"/>
+ <classpathentry kind="lib" path="WEB-INF/lib/guava-15.0.jar" sourcepath="/home/asherstnev/.m2/repository/com/google/guava/guava/15.0/guava-15.0-sources.jar"/>
+ <classpathentry kind="lib" path="WEB-INF/lib/netty-3.6.6.Final.jar"/>
+ <classpathentry kind="lib" path="WEB-INF/lib/metrics-core-3.0.1.jar"/>
+ <classpathentry kind="lib" path="WEB-INF/lib/jackson-core-asl-1.9.13.jar"/>
+ <classpathentry kind="lib" path="WEB-INF/lib/jackson-mapper-asl-1.9.13.jar"/>
<classpathentry kind="con" path="org.eclipse.jst.server.core.container/org.eclipse.jst.server.tomcat.runtimeTarget/Apache Tomcat v7.0">
<attributes>
<attribute name="owner.project.facets" value="jst.web"/>
</attributes>
</classpathentry>
- <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/jdk1.7.0_25">
- <attributes>
- <attribute name="owner.project.facets" value="java"/>
- </attributes>
- </classpathentry>
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/jdk1.7.0_45"/>
<classpathentry kind="output" path="WEB-INF/classes"/>
</classpath>
+++ /dev/null
-package compbio.cassandra;
-
-import java.util.Arrays;
-import java.util.List;
-
-import me.prettyprint.cassandra.serializers.LongSerializer;
-import me.prettyprint.cassandra.serializers.StringSerializer;
-import me.prettyprint.cassandra.service.ThriftKsDef;
-import me.prettyprint.hector.api.Cluster;
-import me.prettyprint.hector.api.Keyspace;
-import me.prettyprint.hector.api.beans.ColumnSlice;
-import me.prettyprint.hector.api.ddl.ColumnFamilyDefinition;
-import me.prettyprint.hector.api.ddl.ComparatorType;
-import me.prettyprint.hector.api.ddl.KeyspaceDefinition;
-import me.prettyprint.hector.api.factory.HFactory;
-import me.prettyprint.hector.api.mutation.Mutator;
-import me.prettyprint.hector.api.query.QueryResult;
-import me.prettyprint.hector.api.query.SliceQuery;
-
-public class CassandraCreate {
- private static Keyspace ksp;
- private static Cluster cluster;
- private static Mutator<Long> mutatorLong;
- private static Mutator<String> mutatorString;
- private static Mutator<String> mutatorLog;
- StringSerializer ss = StringSerializer.get();
- LongSerializer ls = LongSerializer.get();
-
- /*
- * connect to the cluster and look weather the dababase has any data inside
- */
- public void Connection() {
- cluster = HFactory.getOrCreateCluster("Protein Cluster", "127.0.0.1:9160");
- KeyspaceDefinition keyspaceDef = cluster.describeKeyspace("ProteinKeyspace");
- /*
- * If keyspace does not exist, the CFs don't exist either. => create
- * them.
- */
- if (keyspaceDef == null) { // create column family
- System.out.println("ProteinKeyspace has been null");
- ColumnFamilyDefinition cfProtein = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinRow",
- ComparatorType.ASCIITYPE);
- ColumnFamilyDefinition cfLog = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinLog", ComparatorType.ASCIITYPE);
- ColumnFamilyDefinition cfData = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinData",
- ComparatorType.ASCIITYPE);
-
- KeyspaceDefinition newKeyspace = HFactory.createKeyspaceDefinition("ProteinKeyspace", ThriftKsDef.DEF_STRATEGY_CLASS, 1,
- Arrays.asList(cfProtein, cfLog, cfData));
- /*
- * Add the schema to the cluster. "true" as the second param means
- * that Hector will be blocked until all nodes see the change.
- */
- cluster.addKeyspace(newKeyspace, true);
- cluster.addColumnFamily(cfProtein, true);
- cluster.addColumnFamily(cfLog, true);
- cluster.addColumnFamily(cfData, true);
- } else {
- System.out.println("Data loaded");
- }
- ksp = HFactory.createKeyspace("ProteinKeyspace", cluster);
- System.out.println("Cassandra has been connected");
- }
-
- /*
- * parsing data source and filling the database
- */
- public void Parsing(String source) {
- /*
- * CF ProteinRow store protein and prediction
- */
- mutatorString = HFactory.createMutator(ksp, ss);
-
- /*
- * ProteinLog stores logging info: IP, job id, start date and end date
- */
- mutatorLog = HFactory.createMutator(ksp, ss);
-
- /*
- * CF ProteinData store id and protein per data
- */
- mutatorLong = HFactory.createMutator(ksp, ls);
-
- if (true) {
- //if (source.equals("http")) {
- // get data from real Jpred production server
- System.out.println("Parsing web data source......");
- String datasrc = "http://www.compbio.dundee.ac.uk/www-jpred/results/usage-new/alljobs.dat";
- String prefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
- JpredParserHTTP parser = new JpredParserHTTP(prefix);
- parser.Parsing(datasrc, 4);
- flushData();
- }
- if (true) {
- //if (source.equals("file")) {
- // get irtifical data generated for the DB stress tests
- System.out.println("Parsing local file data source......");
- String datasrc = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/data.dat";
- String prefix = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/Jpreddata";
- JpredParserLocalFile parser = new JpredParserLocalFile(prefix);
- parser.Parsing(datasrc, 190);
- flushData();
- }
- }
-
- public void flushData() {
- mutatorString.execute();
- mutatorLong.execute();
- mutatorLog.execute();
- // System.out.println("Flush new data...");
- }
-
- public void Closing() {
- cluster.getConnectionManager().shutdown();
- System.out.println("Cassandra has been shut down");
- }
-
- /*
- * check whether the job id exists in the DB
- */
- public boolean CheckID(String jobid) {
- SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(ksp, ss, ss, ss);
- sliceQuery.setColumnFamily("ProteinLog").setKey(jobid).setRange("", "", false, 100);
- QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
- if (result.get().getColumns().size() > 0) {
- return true;
- }
- return false;
- }
-
- /*
- * prepare data for insertion into the db
- */
- public void InsertData(long dataWork, String dataBegin, String dataEnd, String ip, String id, String statusEx, String statusFinal,
- String protein, List<FastaSequence> jnetpred) {
- mutatorLog.addInsertion(id, "ProteinLog", HFactory.createColumn("ip", ip, ss, ss))
- .addInsertion(id, "ProteinLog", HFactory.createColumn("DataBegin", dataBegin, ss, ss))
- .addInsertion(id, "ProteinLog", HFactory.createColumn("DataEnd", dataEnd, ss, ss))
- .addInsertion(id, "ProteinLog", HFactory.createColumn("Status ex", statusEx, ss, ss))
- .addInsertion(id, "ProteinLog", HFactory.createColumn("Status final", statusFinal, ss, ss))
- .addInsertion(id, "ProteinLog", HFactory.createColumn("Protein", protein, ss, ss));
- for (int i = 0; i < jnetpred.size(); i++) {
- String namepred = jnetpred.get(i).getId();
- String pred = jnetpred.get(i).getSequence().replaceAll("\n", "");
- mutatorString.addInsertion(protein, "ProteinRow", HFactory.createColumn(id + ";" + namepred, pred, ss, ss));
- }
- mutatorLong.addInsertion(dataWork, "ProteinData", HFactory.createColumn(id, protein, ss, ss));
- }
-
- public Keyspace GetKeyspace() {
- return ksp;
- }
-}
--- /dev/null
+package compbio.cassandra;
+
+import java.io.IOException;
+import java.util.List;
+
+import com.datastax.driver.core.Cluster;
+import com.datastax.driver.core.Host;
+import com.datastax.driver.core.Metadata;
+import com.datastax.driver.core.Session;
+import com.datastax.driver.core.ResultSet;
+
+public class CassandraNativeConnector {
+ private static Cluster cluster;
+ private static Session session;
+
+ /*
+ * private static Keyspace ksp; private static Mutator<Long> mutatorLong;
+ * private static Mutator<String> mutatorString; private static
+ * Mutator<String> mutatorLog; StringSerializer ss = StringSerializer.get();
+ * LongSerializer ls = LongSerializer.get();
+ */
+
+ /*
+ * connect to the cluster and look weather the dababase has any data inside
+ */
+ public void Connect() {
+ // local cassandra cluster
+ cluster = Cluster.builder().addContactPoint("localhost").build();
+ // distributed cassandra cluster
+ /* cluster = Cluster.builder().addContactPoint("10.0.115.190").build(); */
+ Metadata metadata = cluster.getMetadata();
+ System.out.printf("Connected to cluster: %s\n", metadata.getClusterName());
+ for (Host host : metadata.getAllHosts()) {
+ System.out.printf("Datatacenter: %s; Host: %s; Rack: %s\n", host.getDatacenter(), host.getAddress(), host.getRack());
+ }
+
+ session = cluster.connect();
+ session.execute("CREATE KEYSPACE IF NOT EXISTS ProteinKeyspace WITH replication = {'class':'SimpleStrategy', 'replication_factor':3};");
+ session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinRow (Protein ascii PRIMARY KEY, Predictions map<ascii,ascii>);");
+ session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinLog "
+ + "(JobID ascii PRIMARY KEY, DataBegin ascii, DataEnd ascii, ip ascii, FinalStatus ascii, ExecutionStatus ascii, Protein ascii);");
+ session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinData (jobtime bigint PRIMARY KEY, JobID ascii, Protein ascii);");
+
+ System.out.println("Cassandra connected");
+ }
+
+ /*
+ * parsing data source and filling the database
+ */
+ public void Parsing() throws IOException {
+ if (false) {
+ // if (source.equals("http")) {
+ // get data from real Jpred production server
+ System.out.println("Parsing web data source......");
+ String datasrc = "http://www.compbio.dundee.ac.uk/www-jpred/results/usage-new/alljobs.dat";
+ String prefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
+ JpredParserHTTP parser = new JpredParserHTTP(prefix);
+ parser.Parsing(datasrc, 4);
+ }
+ if (true) {
+ // if (source.equals("file")) {
+ // get irtifical data generated for the DB stress tests
+ System.out.println("Parsing local file data source......");
+ String datasrc = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/data.dat";
+ String prefix = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/Jpreddata";
+ JpredParserLocalFile parser = new JpredParserLocalFile(prefix);
+ parser.Parsing(datasrc, 190);
+ }
+ }
+
+ public void Closing() {
+ cluster.shutdown();
+ System.out.println("Cassandra has been shut down");
+ }
+
+ /*
+ * check whether the job id exists in the DB
+ */
+ public boolean CheckID(String jobid) {
+ String com = "SELECT * FROM ProteinKeyspace.ProteinData WHERE jobid = '" + jobid + "';";
+ System.out.println(com);
+ ResultSet results = session.execute(com);
+ if (null != results) {
+ return true;
+ }
+ return false;
+ }
+
+ /*
+ * prepare data for insertion into the db
+ */
+ public void InsertData(long jobtime, String startdate, String enddate, String ip, String jobid, String statusEx, String statusFinal,
+ String protein, List<FastaSequence> predictions) {
+
+ String check1 = "SELECT count(*) FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';";
+ //System.out.println(check1);
+ ResultSet results1 = session.execute(check1);
+ if (!results1.isExhausted()) {
+ String com1 = "INSERT INTO ProteinKeyspace.ProteinLog "
+ + "(JobID, IP, DataBegin, DataEnd, FinalStatus, ExecutionStatus, Protein)" + " VALUES ('" + jobid + "','" + ip + "','"
+ + startdate + "','" + enddate + "','" + statusFinal + "','" + statusEx + "','" + protein + "');";
+ // System.out.println(com1);
+ session.execute(com1);
+
+ String com2 = "INSERT INTO ProteinKeyspace.ProteinData " + "(jobtime, JobID, Protein)" + " VALUES (" + jobtime + ",'" + jobid
+ + "','" + protein + "');";
+ // System.out.println(com2);
+ // session.execute(com2);
+
+ String allpredictions = "";
+ for (FastaSequence pred : predictions) {
+ String predictionname = pred.getId();
+ String prediction = pred.getSequence().replaceAll("\n", "");
+ allpredictions += "'" + predictionname + "':'" + prediction + "',";
+ }
+ String final_prediction = "";
+ if (null != allpredictions) {
+ final_prediction = allpredictions.substring(0, allpredictions.length() - 1);
+ }
+
+ String check2 = "SELECT count(*) FROM ProteinKeyspace.ProteinRow WHERE Protein = '" + protein + "';";
+ //System.out.println(check1);
+ ResultSet results2 = session.execute(check2);
+
+ if (results1.isExhausted()) {
+ String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(Protein, " + jobid + ")" + " VALUES ('" + protein + "'," + "{"
+ + final_prediction + "}" + ");";
+ System.out.println(com3);
+ session.execute(com3);
+ } else {
+ String com4 = "ALTER TABLE ProteinKeyspace.ProteinRow ADD " + jobid + ");";
+ System.out.println(com4);
+ session.execute(com4);
+ String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(" + jobid + ")" + " VALUES ({" + final_prediction + "}" + ")"
+ + " WHERE Protein = '" + protein + "';";
+ System.out.println(com3);
+ session.execute(com3);
+ }
+ }
+ }
+
+}
package compbio.cassandra;
+import java.io.IOException;
+
public interface JpredParser {
/*
/*
* Makes real parsing of the source file
**/
- void Parsing(String source, int nDays);
+ void Parsing(String source, int nDays) throws IOException;
}
package compbio.cassandra;
import java.io.BufferedReader;
+import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import compbio.cassandra.JpredParser;
public class JpredParserHTTP implements JpredParser {
- private CassandraCreate cc = new CassandraCreate();
+ private CassandraNativeConnector cc = new CassandraNativeConnector();
private String dirprefix;
JpredParserHTTP() {
dirprefix = newsourceprefix;
}
- public void Parsing(String source, int nDays) {
+ public void Parsing(String source, int nDays) throws IOException {
Calendar cal = Calendar.getInstance();
cal.add(Calendar.DATE, -nDays);
for (int i = 0; i < nDays; ++i) {
int year = cal.get(Calendar.YEAR);
int day = cal.get(Calendar.DATE);
String date = year + "/" + month + "/" + day;
- if (0 < ParsingForDate(source, date)) {
- cc.flushData();
- }
+ ParsingForDate(source, date);
}
}
final FastaSequence fs = fr.next();
if (fs.getId().equals("QUERY") || fs.getId().equals(id))
newprotein = fs.getSequence().replaceAll("\n", "");
- else
+ else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) {
seqs.add(fs);
+ }
}
if (newprotein.equals("")) {
countUnclearFASTAid++;
++countinsertions;
++njobs;
// flush every 50 insertions
- if (0 == countinsertions % 50) {
- cc.flushData();
- njobs -= 50;
- }
+ //if (0 == countinsertions % 50) {
+ // cc.flushData();
+ // njobs -= 50;
+ //}
}
} catch (IOException e) {
e.printStackTrace();
import java.io.BufferedReader;
import java.io.File;
+import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.util.List;
public class JpredParserLocalFile implements JpredParser {
- private CassandraCreate cc = new CassandraCreate();
+ private CassandraNativeConnector cc = new CassandraNativeConnector();
private String dirprefix;
- public void setSource (String newsourceprefix) {
+ public void setSource(String newsourceprefix) {
this.dirprefix = newsourceprefix;
}
JpredParserLocalFile() {
this.dirprefix = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/data.dat";
}
-
+
JpredParserLocalFile(String sourceurl) {
this.dirprefix = sourceurl;
}
- public void Parsing(String source, int nDays) {
+ public void Parsing(String source, int nDays) throws IOException {
Calendar cal = Calendar.getInstance();
cal.add(Calendar.DATE, -nDays);
+ List<String> alljobs = new ArrayList<String>();
+ File file = new File(source);
+ BufferedReader alljobsfile = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
+ String line;
+
+ while ((line = alljobsfile.readLine()) != null) {
+ alljobs.add(line);
+ }
+ alljobsfile.close();
+
+ System.out.println("Inserting jobs for " + nDays + " days, " + alljobs.size() + " jobs in total");
+ final long startTime = System.currentTimeMillis();
for (int i = 0; i < nDays; ++i) {
cal.add(Calendar.DATE, 1);
int month = cal.get(Calendar.MONTH) + 1;
int year = cal.get(Calendar.YEAR);
int day = cal.get(Calendar.DATE);
String date = year + "/" + month + "/" + day;
- if (0 < ParsingForDate(source, date)) {
- cc.flushData();
- }
+ ParsingForDate(alljobs, date);
}
+ final long execTime = System.currentTimeMillis() - startTime;
+ System.out.println("Execution Time = " + execTime + " ms");
}
- private int ParsingForDate(String input, String date) {
+ private int ParsingForDate(List<String> input, String date) {
int totalcount = 0;
int countNoData = 0;
int countUnclearFASTAid = 0;
int njobs = 0;
System.out.println("Inserting jobs for " + date);
- try {
- File file = new File(input);
- BufferedReader alljobs = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
- String line;
-
- while ((line = alljobs.readLine()) != null) {
- if (line.matches(date + "(.*)jp_[^\\s]+")) {
- String[] table = line.split("\\s+");
- String id = table[table.length - 1];
- totalcount++;
- if (!cc.CheckID(id)) {
- String confilename = dirprefix + "/" + id + "/" + id + ".concise";
- File confile = new File(confilename);
- if (confile.exists()) {
- try {
- final FastaReader fr = new FastaReader(confilename);
- final List<FastaSequence> seqs = new ArrayList<FastaSequence>();
- String newprotein = "";
- while (fr.hasNext()) {
- final FastaSequence fs = fr.next();
- if (fs.getId().equals("QUERY") || fs.getId().equals(id))
- newprotein = fs.getSequence().replaceAll("\n", "");
- else
- seqs.add(fs);
+ for (String in : input) {
+ if (in.matches(date + "(.*)jp_[^\\s]+")) {
+ String[] table = in.split("\\s+");
+ String starttime = table[0];
+ String finishtime = table[1];
+ String ip = table[2];
+ String id = table[table.length - 1];
+ totalcount++;
+ //if (!cc.CheckID(id)) {
+ if (true) {
+ String confilename = dirprefix + "/" + id + "/" + id + ".concise";
+ File confile = new File(confilename);
+ if (confile.exists()) {
+ try {
+ final FastaReader fr = new FastaReader(confilename);
+ final List<FastaSequence> seqs = new ArrayList<FastaSequence>();
+ String newprotein = "";
+ while (fr.hasNext()) {
+ final FastaSequence fs = fr.next();
+ if (fs.getId().equals("QUERY") || fs.getId().equals(id))
+ newprotein = fs.getSequence().replaceAll("\n", "");
+ else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) {
+ seqs.add(fs);
}
- if (newprotein.equals("")) {
- countUnclearFASTAid++;
- } else {
- SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
- String dateInString1 = table[0].substring(0, table[0].indexOf(":"));
- long dateWork1 = 0;
- try {
- Date dat1 = formatter.parse(dateInString1);
- dateWork1 = dat1.getTime();
- } catch (ParseException e) {
- e.printStackTrace();
- }
- cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs);
- ++countinsertions;
- ++njobs;
- // flush every 50 insertions
- if (0 == countinsertions % 50) {
- cc.flushData();
- njobs -= 50;
- }
+ }
+ if (newprotein.equals("")) {
+ countUnclearFASTAid++;
+ } else {
+ SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
+ String dateInString1 = starttime.substring(0, starttime.indexOf(":"));
+ long dateWork1 = 0;
+ try {
+ Date dat = formatter.parse(dateInString1);
+ dateWork1 = dat.getTime();
+ } catch (ParseException e) {
+ e.printStackTrace();
}
- fr.close();
- } catch (IOException e) {
- e.printStackTrace();
+ cc.InsertData(dateWork1, starttime, finishtime, ip, id, "OK", "OK", newprotein, seqs);
+ ++countinsertions;
+ ++njobs;
+ // flush every 50 insertions
+ //if (0 == countinsertions % 50) {
+ // cc.flushData();
+ // njobs -= 50;
+ //}
}
- } else {
- countNoData++;
+ fr.close();
+ } catch (IOException e) {
+ e.printStackTrace();
}
} else {
- ++countinserted;
+ countNoData++;
}
} else {
- if (line.matches(date + "(.*)Sequence0/(.*)")) {
- ++counAlignments;
- } else {
- ++countStrange;
- }
+ ++countinserted;
+ }
+ } else {
+ if (in.matches(date + "(.*)Sequence0/(.*)")) {
+ ++counAlignments;
+ } else {
+ ++countStrange;
}
}
- alljobs.close();
+ }
+ if (true) {
System.out.println("Total number of jobs = " + totalcount);
System.out.println(" " + countinserted + " jobs inserted already");
System.out.println(" " + counAlignments + " jalview jobs");
System.out.println(" " + countNoData + " jobs without *.concise.fasta file");
System.out.println(" " + countUnclearFASTAid + " jobs with unclear FASTA protein id in *.concise.fasta");
System.out.println(" " + countinsertions + " new job insertions\n");
- } catch (MalformedURLException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
}
return njobs;
}
+
}
package compbio.listeners;
+import java.io.IOException;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import javax.servlet.ServletContextListener;
import javax.servlet.annotation.WebListener;
-import compbio.cassandra.CassandraCreate;
+import compbio.cassandra.CassandraNativeConnector;
/**
* Application Lifecycle Listener implementation class ContextListener
@WebListener
public class ContextListener implements ServletContextListener {
private ScheduledExecutorService webjob_scheduler;
- CassandraCreate cc = new CassandraCreate();
+ CassandraNativeConnector db = new CassandraNativeConnector();
/**
* @see ServletContextListener#contextInitialized(ServletContextEvent)
*/
public void contextInitialized(ServletContextEvent arg0) {
System.out.println("ProteoCache session start......");
- cc.Connection();
+ db.Connect();
webjob_scheduler = Executors.newSingleThreadScheduledExecutor();
webjob_scheduler.scheduleAtFixedRate(new Runnable() {
@Override
public void run() {
- cc.Parsing("test");
+ try {
+ db.Parsing();
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
}
}, 0, 60, TimeUnit.SECONDS);
* @see ServletContextListener#contextDestroyed(ServletContextEvent)
*/
public void contextDestroyed(ServletContextEvent arg0) {
- cc.Closing();
+ db.Closing();
System.out.println("Shut down ProteoCache......");
webjob_scheduler.shutdownNow();
}
import javax.servlet.http.HttpServletResponse;
import compbio.statistic.StatisticsProt;
+import compbio.statistic.CassandraRequester;
/**
* Servlet implementation class LengthServlet
final long startTime = System.currentTimeMillis();
String date1 = request.getParameter("data1");
String date2 = request.getParameter("data2");
- StatisticsProt sp = new StatisticsProt();
+ CassandraRequester sp = new CassandraRequester();
if (null != request.getParameter("option")) {
Calendar cal = Calendar.getInstance();
date1 = StatisticsProt.DateFormatYYMMDD(sp.earliestDate());
}
request.setAttribute("data1", date1);
request.setAttribute("data2", date2);
- request.setAttribute("result", sp.readLength(date1, date2));
+ request.setAttribute("result", sp.extractExecutionTime(date1, date2));
request.setAttribute("flag", request.getParameter("option"));
final long endTime = System.currentTimeMillis();
request.setAttribute("timeExecution", (endTime - startTime));
--- /dev/null
+package compbio.statistic;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.Collections;
+import java.util.Date;
+import java.util.Iterator;
+import java.util.List;
+
+import compbio.cassandra.CassandraNativeConnector;
+import compbio.cassandra.DataBase;
+
+public class CassandraRequester {
+ private CassandraNativeConnector DBInstance = new CassandraNativeConnector();
+ private ArrayList<DataBase> query;
+ private static long currentDate = 0;
+ private static long earlestDate = 0;
+
+
+ /*
+ * query: execution time for the period from date1 till date2
+ * */
+ public List<DataBase> extractExecutionTime(String date1, String date2) {
+ if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
+ System.out.println("Wrong date: point 3");
+ return null;
+ }
+ SetDateRange();
+ int nbins = 5;
+ long dateStart = DateParsing(date1);
+ long dateEnd = DateParsing(date2);
+ if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate) || dateStart > dateEnd)
+ return null;
+ if (dateStart < earlestDate)
+ dateStart = earlestDate;
+ if (dateEnd > currentDate)
+ dateStart = currentDate;
+
+ System.out.println("CassandraRequester.extractExecutionTime: earlestDate = " + earlestDate + ", currentDate = " + currentDate);
+
+ Calendar start = Calendar.getInstance();
+ start.setTime(new Date(dateStart));
+ Calendar end = Calendar.getInstance();
+ end.setTime(new Date(dateEnd));
+ query = new ArrayList<DataBase>();
+ List<Integer> totalTime = new ArrayList<Integer>();
+ for (int i = 0; i < nbins; i++)
+ totalTime.add(i, 0);
+ /*
+ for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
+ List<Integer> timeResult = new ArrayList<Integer>();
+ SliceQuery<Long, String, String> result = HFactory.createSliceQuery(DBInstance.GetKeyspace(), LongSerializer.get(),
+ StringSerializer.get(), StringSerializer.get());
+ result.setColumnFamily("ProteinData");
+ result.setKey(date.getTime());
+ result.setRange(null, null, false, Integer.MAX_VALUE);
+ QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
+ List<HColumn<String, String>> col = columnSlice.get().getColumns();
+ if (!col.isEmpty()) {
+ Iterator<HColumn<String, String>> itCol = col.iterator();
+ for (int i = 0; i < nbins; i++)
+ timeResult.add(i, 0);
+ // split all jobs into nbins bins
+ while (itCol.hasNext()) {
+ String id = itCol.next().getName();
+ long lenResult = CountID(id);
+ if (lenResult <= 30)
+ timeResult.set(0, timeResult.get(0) + 1);
+ else if (lenResult > 30 && lenResult <= 60)
+ timeResult.set(1, timeResult.get(1) + 1);
+ else if (lenResult > 60 && lenResult <= 120)
+ timeResult.set(2, timeResult.get(2) + 1);
+ else if (lenResult > 120 && lenResult <= 600)
+ timeResult.set(3, timeResult.get(3) + 1);
+ else {
+ timeResult.set(4, timeResult.get(4) + 1);
+ }
+ }
+ for (int i = 0; i < nbins; i++)
+ totalTime.set(i, totalTime.get(i) + timeResult.get(i));
+ DataBase db = new DataBase();
+ db.setTimeRez(timeResult);
+ db.setDate(DateFormat(date.getTime()));
+ query.add(db);
+ }
+ }
+ */
+ DataBase db = new DataBase();
+ db.setTimeTotalExec(totalTime);
+ query.add(db);
+ System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
+ return query;
+ }
+
+ /*
+ * convert String date into long date (miliseconds since the epoch start)
+ */
+ private static long DateParsing(String datInput) {
+ if (datInput == null) {
+ return 0;
+ }
+ long dateWorkSt = 0;
+ SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
+ try {
+ dateWorkSt = formatter.parse(datInput).getTime();
+ } catch (ParseException e) {
+ e.printStackTrace();
+ }
+ return dateWorkSt;
+ }
+
+ /*
+ * convert String date:time into long date:time (miliseconds since the epoch start)
+ */
+ private static long TimeConvert(String datInput) {
+ long dateWorkSt = 0;
+ if (datInput == null) {
+ return dateWorkSt;
+ }
+ SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss");
+ try {
+ dateWorkSt = formatter.parse(datInput).getTime();
+ } catch (ParseException e) {
+ e.printStackTrace();
+ }
+ return dateWorkSt;
+ }
+
+ // convert long to date in string format
+ private static String DateFormat(long inDate) {
+ SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
+ String dateString = datformat.format(new Date(inDate));
+ return dateString;
+ }
+
+ /*
+ * convert ???
+ */
+ public static String DateFormatYYMMDD(long indate) {
+ SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd");
+ String dateString = datformat.format(new Date(indate));
+ return dateString;
+ }
+
+ /*
+ * ???
+ */
+ public long CountID(String id) {
+ /*
+ SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(DBInstance.GetKeyspace(), StringSerializer.get(),
+ StringSerializer.get(), StringSerializer.get());
+ sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100);
+ QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
+ String datBegin = result.get().getColumnByName("DataBegin").getValue();
+ String datEnd = result.get().getColumnByName("DataEnd").getValue();
+
+ long datBeginLong = TimeConvert(datBegin);
+ long datEndLong = TimeConvert(datEnd);
+ return (datEndLong - datBeginLong) / 1000;
+ */
+ return 0;
+ }
+
+ /*
+ * set earlest date and current dates.
+ * earlestDate is static and should be set at the 1st call
+ * currentDate should be re-calculated every time
+ */
+ private static void SetDateRange() {
+ if (0 == earlestDate) {
+ StatisticsProt sp = new StatisticsProt();
+ earlestDate = sp.earliestDate();
+ System.out.println("Set earlest Date = " + earlestDate);
+ }
+ Calendar cal = Calendar.getInstance();
+ currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH));
+ }
+
+ public boolean isThisDateValid(String dateToValidate) {
+ if (dateToValidate == null || dateToValidate.equals("")) {
+ System.out.println("Undefined date");
+ return false;
+ }
+ SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd");
+ try {
+ // if not valid, this will throw ParseException
+ sdf.setLenient(false);
+ Date date = sdf.parse(dateToValidate);
+ } catch (ParseException e) {
+ e.printStackTrace();
+ return false;
+ }
+ return true;
+ }
+
+ /*
+ * find the earliest date in the database
+ */
+ public long earliestDate() {
+ /*
+ ArrayList<Long> dateSort = new ArrayList<Long>();
+ int row_count = 10000;
+ RangeSlicesQuery<Long, String, String> result = HFactory.createRangeSlicesQuery(DBInstance.GetKeyspace(), LongSerializer.get(),
+ StringSerializer.get(), StringSerializer.get());
+ result.setColumnFamily("ProteinData");
+ result.setRange(null, null, false, Integer.MAX_VALUE);
+ result.setRowCount(row_count);
+ Long last_key = null;
+ while (true) {
+ result.setKeys(last_key, null);
+ QueryResult<OrderedRows<Long, String, String>> columnSlice = result.execute();
+ OrderedRows<Long, String, String> rows = columnSlice.get();
+ Iterator<Row<Long, String, String>> rowsIterator = rows.iterator();
+ while (rowsIterator.hasNext()) {
+ Row<Long, String, String> row = rowsIterator.next();
+ last_key = row.getKey();
+ dateSort.add(last_key);
+ }
+ if (rows.getCount() < row_count)
+ break;
+ }
+ Collections.sort(dateSort);
+ return dateSort.get(0);
+ */
+ return 0;
+ }
+}
import java.util.Iterator;
import java.util.List;
-import me.prettyprint.cassandra.serializers.LongSerializer;
-import me.prettyprint.cassandra.serializers.StringSerializer;
-import me.prettyprint.hector.api.beans.ColumnSlice;
-import me.prettyprint.hector.api.beans.HColumn;
-import me.prettyprint.hector.api.beans.OrderedRows;
-import me.prettyprint.hector.api.beans.Row;
-import me.prettyprint.hector.api.factory.HFactory;
-import me.prettyprint.hector.api.query.QueryResult;
-import me.prettyprint.hector.api.query.RangeSlicesQuery;
-import me.prettyprint.hector.api.query.SliceQuery;
-import compbio.cassandra.CassandraCreate;
+import compbio.cassandra.CassandraNativeConnector;
import compbio.cassandra.DataBase;
public class StatisticsProt {
-// private final static long MILLISECONDS_PER_DAY = 1000L * 60 * 60 * 24;
- private CassandraCreate cc = new CassandraCreate();
+ private CassandraNativeConnector cc = new CassandraNativeConnector();
private ArrayList<DataBase> query;
private static long currentDate = 0;
private static long earlestDate = 0;
- /* query: the period from date1 till date2 */
+ /*
+ * query: the period from date1 till date2
+ * */
public List<DataBase> readDetails(String date1, String date2) {
-
+
if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
System.out.println("Wrong date: point 1");
return null;
SetDateRange();
long dateStart = DateParsing(date1);
long dateEnd = DateParsing(date2);
- if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate)
- || dateStart > dateEnd)
+ if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate) || dateStart > dateEnd)
return null;
if (dateStart < earlestDate)
dateStart = earlestDate;
end.setTime(new Date(dateEnd));
query = new ArrayList<DataBase>();
int day = 0;
+ /*
for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
StringSerializer.get(), StringSerializer.get());
} else {
System.out.println("no data");
}
- // dateStart += MILLISECONDS_PER_DAY;
}
+ */
System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
return query;
}
/*
* query: execution time for the period from date1 till date2
- */
+ * */
public List<DataBase> readLength(String date1, String date2) {
if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
System.out.println("Wrong date: point 3");
return null;
}
SetDateRange();
+ int nbins = 5;
long dateStart = DateParsing(date1);
long dateEnd = DateParsing(date2);
- if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate)
- || dateStart > dateEnd)
+ if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate) || dateStart > dateEnd)
return null;
if (dateStart < earlestDate)
dateStart = earlestDate;
end.setTime(new Date(dateEnd));
query = new ArrayList<DataBase>();
List<Integer> totalTime = new ArrayList<Integer>();
- for (int i = 0; i < 4; i++)
+ for (int i = 0; i < nbins; i++)
totalTime.add(i, 0);
+ /*
for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
List<Integer> timeResult = new ArrayList<Integer>();
SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
List<HColumn<String, String>> col = columnSlice.get().getColumns();
if (!col.isEmpty()) {
Iterator<HColumn<String, String>> itCol = col.iterator();
- for (int i = 0; i < 4; i++)
+ for (int i = 0; i < nbins; i++)
timeResult.add(i, 0);
+ // split all jobs into nbins bins
while (itCol.hasNext()) {
String id = itCol.next().getName();
long lenResult = CountID(id);
- if (lenResult <= 30)
+ if (lenResult <= 30)
timeResult.set(0, timeResult.get(0) + 1);
else if (lenResult > 30 && lenResult <= 60)
timeResult.set(1, timeResult.get(1) + 1);
else if (lenResult > 60 && lenResult <= 120)
timeResult.set(2, timeResult.get(2) + 1);
- else {
+ else if (lenResult > 120 && lenResult <= 600)
timeResult.set(3, timeResult.get(3) + 1);
+ else {
+ timeResult.set(4, timeResult.get(4) + 1);
}
}
- for (int i = 0; i < 4; i++)
+ for (int i = 0; i < nbins; i++)
totalTime.set(i, totalTime.get(i) + timeResult.get(i));
DataBase db = new DataBase();
db.setTimeRez(timeResult);
db.setDate(DateFormat(date.getTime()));
query.add(db);
}
- // dateStart += MILLISECONDS_PER_DAY;
}
+ */
DataBase db = new DataBase();
db.setTimeTotalExec(totalTime);
query.add(db);
return query;
}
- /* query: protein sequence */
+ /*
+ * query: protein sequence
+ * */
public List<DataBase> readProteins(String protIn) {
query = new ArrayList<DataBase>();
+ /*
SliceQuery<String, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
StringSerializer.get(), StringSerializer.get());
result.setColumnFamily("ProteinRow");
query.add(db);
}
}
+ */
return query;
}
- // query by a protein sequence
+ /*
+ * query by a protein sequence
+ * */
public List<DataBase> readProtID(int counter) {
query = new ArrayList<DataBase>();
- int row_count = 100000000;
+ int row_count = 100;
+ /*
RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
StringSerializer.get(), StringSerializer.get());
result.setColumnFamily("ProteinRow");
- result.setRange(null, null, false, Integer.MAX_VALUE);
+ result.setRange(null, null, false, 100);
result.setRowCount(row_count);
String last_key = null;
while (true) {
Row<String, String, String> row = rowsIterator.next();
last_key = row.getKey();
List<HColumn<String, String>> clms = row.getColumnSlice().getColumns();
- int npred = 0;
- for (HColumn<String, String> cln : clms) {
- String name = cln.getName();
- if (name.matches("(.*)jnetpred")) {
- ++npred;
- }
- }
+ //int npred = 0;
+ //for (HColumn<String, String> cln : clms) {
+ // String name = cln.getName();
+ // if (name.matches("(.*)jnetpred")) {
+ // ++npred;
+ // }
+ //}
+ int npred = clms.size();
if (npred > counter) {
DataBase db = new DataBase();
db.setProt(last_key);
}
if (rows.getCount() < row_count)
break;
- }
+ }*/
return query;
}
- // query by a part of sequence
+ /*
+ * query by a part of sequence
+ * */
public List<DataBase> readPart(String protIn) {
int row_count = 10000;
query = new ArrayList<DataBase>();
+ /*
RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
StringSerializer.get(), StringSerializer.get());
result.setColumnFamily("ProteinRow");
if (rows.getCount() < row_count)
break;
}
+ */
return query;
}
- // convert String to Date
+ /*
+ * convert String date into long date (miliseconds since the epoch start)
+ */
private static long DateParsing(String datInput) {
if (datInput == null) {
return 0;
return dateWorkSt;
}
- // convert String to Date
+ /*
+ * convert String date:time into long date:time (miliseconds since the epoch start)
+ */
private static long TimeConvert(String datInput) {
long dateWorkSt = 0;
if (datInput == null) {
}
/*
- * private static String DateFormat1(long inDate) { SimpleDateFormat
- * datformat = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss"); String
- * dateString = datformat.format(new Date(inDate)); return dateString; }
+ * convert ???
*/
public static String DateFormatYYMMDD(long indate) {
SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd");
return dateString;
}
+ /*
+ * ???
+ */
public long CountID(String id) {
+ /*
SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
StringSerializer.get(), StringSerializer.get());
sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100);
long datBeginLong = TimeConvert(datBegin);
long datEndLong = TimeConvert(datEnd);
return (datEndLong - datBeginLong) / 1000;
+ */
+ return 0;
}
+ /*
+ * set earlest date and current dates.
+ * earlestDate is static and should be set at the 1st call
+ * currentDate should be re-calculated every time
+ */
private static void SetDateRange() {
if (0 == earlestDate) {
StatisticsProt sp = new StatisticsProt();
return true;
}
- // find the earliest date
+ /*
+ * find the earliest date in the database
+ */
public long earliestDate() {
ArrayList<Long> dateSort = new ArrayList<Long>();
int row_count = 10000;
+ /*
RangeSlicesQuery<Long, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), LongSerializer.get(),
StringSerializer.get(), StringSerializer.get());
result.setColumnFamily("ProteinData");
}
if (rows.getCount() < row_count)
break;
- }
+ }*/
Collections.sort(dateSort);
return dateSort.get(0);
}