From: Sasha Sherstnev Date: Fri, 15 Nov 2013 11:31:27 +0000 (+0000) Subject: PROT-1 Prepare project for Maven build system X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=008be2cc63154bacbe1cbf5fa03ffbe49fdc09da;p=proteocache.git PROT-1 Prepare project for Maven build system --- diff --git a/README.txt b/README.txt new file mode 100644 index 0000000..0bc6883 --- /dev/null +++ b/README.txt @@ -0,0 +1 @@ +ProteoCache is a project for caching data from Jpred diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..ae3f19b --- /dev/null +++ b/pom.xml @@ -0,0 +1,22 @@ + + 4.0.0 + compbio + proteocache + war + 0.1.0-SNAPSHOT + proteocache + http://maven.apache.org + + + + + + + junit + junit + 3.8.1 + test + + + diff --git a/src/main/java/compbio/cassandra/CassandraNativeConnector.java b/src/main/java/compbio/cassandra/CassandraNativeConnector.java new file mode 100644 index 0000000..9d214ec --- /dev/null +++ b/src/main/java/compbio/cassandra/CassandraNativeConnector.java @@ -0,0 +1,101 @@ +package compbio.cassandra; + +import java.util.Calendar; + +import org.apache.log4j.Logger; + +import com.datastax.driver.core.Cluster; +import com.datastax.driver.core.Host; +import com.datastax.driver.core.Metadata; +import com.datastax.driver.core.ResultSet; +import com.datastax.driver.core.Row; + +import com.datastax.driver.core.Session; +import compbio.engine.ProteoCachePropertyHelperManager; +import compbio.util.PropertyHelper; + +public class CassandraNativeConnector { + private static Cluster cluster; + private static Session session; + private static final PropertyHelper ph = ProteoCachePropertyHelperManager.getPropertyHelper(); + private static Logger log = Logger.getLogger(CassandraNativeConnector.class); + + public static String CASSANDRA_HOSTNAME = "localhost"; + + public static Session getSession () { + return session; + } + + /* + * connect to the cluster and look whether all tables exist + */ + public void Connect() { + + String cassandrahostname = ph.getProperty("cassandra.host"); + if (null != cassandrahostname) { + CASSANDRA_HOSTNAME = cassandrahostname; + } + + cluster = Cluster.builder().addContactPoint(CASSANDRA_HOSTNAME).build(); + + Metadata metadata = cluster.getMetadata(); + System.out.printf("Connected to cluster: %s\n", metadata.getClusterName()); + for (Host host : metadata.getAllHosts()) { + System.out.printf("Datatacenter: %s; Host: %s; Rack: %s\n", host.getDatacenter(), host.getAddress(), host.getRack()); + } + session = cluster.connect(); + CreateMainTables(); + System.out.println("Cassandra connected"); + } + + private void CreateMainTables() { + session.execute("CREATE KEYSPACE IF NOT EXISTS ProteinKeyspace WITH replication = {'class':'SimpleStrategy', 'replication_factor':3};"); + session.execute("USE ProteinKeyspace"); + + session.execute("CREATE TABLE IF NOT EXISTS MainParameters " + + "(Name ascii, Value ascii, PRIMARY KEY(Name));"); + + session.execute("CREATE TABLE IF NOT EXISTS ProteinRow " + + "(Protein ascii, JobID ascii, Predictions map, PRIMARY KEY(JobID));"); + + session.execute("CREATE TABLE IF NOT EXISTS ProteinLog " + + "(JobID ascii, DataBegin ascii, DataEnd ascii, ip ascii, FinalStatus ascii, " + + "ExecutionStatus ascii, Protein ascii, PRIMARY KEY(JobID));"); + + session.execute("CREATE TABLE IF NOT EXISTS ProteinData " + + "(jobtime bigint, JobID ascii, Protein ascii, PRIMARY KEY(JobID));"); + + session.execute("CREATE TABLE IF NOT EXISTS JpredArchive " + + "(JobID ascii, Protein varchar, IP ascii, StartTime bigint, ExecTime int, alignment map, " + + "predictions map, ArchiveLink varchar, LOG varchar, PRIMARY KEY(JobID));"); + + session.execute("CREATE TABLE IF NOT EXISTS JobDateInfo " + + "(jobday bigint, Total bigint, Program varchar, Version varchar, PRIMARY KEY(jobday));"); + + session.execute("CREATE INDEX IF NOT EXISTS ProteinSeq ON ProteinRow (protein);"); + session.execute("CREATE INDEX IF NOT EXISTS JobDateStamp ON ProteinData (jobtime);"); + } + + public void Closing() { + session.shutdown(); + cluster.shutdown(); + System.out.println("Cassandra has been shut down"); + } + + /* + * getting earlest date of jobs from the db + */ + public static long getEarliestDateInDB() { + String com = "SELECT * FROM MainParameters WHERE Name = 'EarliestJobDate';"; + System.out.println("Command: " + com); + ResultSet results = session.execute(com); + + if (!results.isExhausted()) { + Row r = results.one(); + return Long.parseLong(r.getString("Value")); + } + Calendar cal = Calendar.getInstance(); + return cal.getTimeInMillis(); + } + +} diff --git a/src/main/java/compbio/cassandra/CassandraNewTableWriter.java b/src/main/java/compbio/cassandra/CassandraNewTableWriter.java new file mode 100644 index 0000000..0d68b1a --- /dev/null +++ b/src/main/java/compbio/cassandra/CassandraNewTableWriter.java @@ -0,0 +1,95 @@ +package compbio.cassandra; + +import java.util.Calendar; +import java.util.Date; + +import org.apache.log4j.Logger; + +import com.datastax.driver.core.Row; +import com.datastax.driver.core.Session; +import com.datastax.driver.core.ResultSet; +import com.datastax.driver.core.PreparedStatement; +import com.datastax.driver.core.BoundStatement; +import com.datastax.driver.core.exceptions.QueryExecutionException; + +import compbio.engine.ProteoCachePropertyHelperManager; +import compbio.cassandra.CassandraNativeConnector; + +public class CassandraNewTableWriter { + private Session session; + private static Logger log = Logger.getLogger(CassandraNativeConnector.class); + + public CassandraNewTableWriter() { + Session inis = CassandraNativeConnector.getSession(); + setSession(inis); + } + + public void setSession(Session s) { + assert s != null; + session = s; + } + + public boolean JobisNotInsterted(String jobid) { + ResultSet results1 = session.execute("SELECT * FROM ProteinLog WHERE JobID = '" + jobid + "';"); + if (results1.isExhausted()) { + return true; + } + return false; + } + + public boolean JobisNotArchived(String jobid) { + ResultSet results1 = session.execute("SELECT * FROM JpredArchive WHERE JobID = '" + jobid + "';"); + if (results1.isExhausted()) { + return true; + } + return false; + } + + /* + * fill new table + */ + public void FillNewTable() { + final long StartTime = System.currentTimeMillis(); + long erldate = CassandraNativeConnector.getEarliestDateInDB(); + Calendar runnicCal = Calendar.getInstance(); + runnicCal.setTime(new Date(erldate)); + Calendar endcal = Calendar.getInstance(); + long endTime = endcal.getTime().getTime(); + for (Date date = runnicCal.getTime(); date.getTime() < endTime; runnicCal.add(Calendar.DATE, 1), date = runnicCal.getTime()) { + try { + ResultSet results = session.execute("SELECT * FROM ProteinData WHERE jobtime = " + date.getTime() + ";"); + session.execute("INSERT INTO JobDateInfo " + "(jobday, Total)" + " VALUES (" + date.getTime() + "," + results.all().size() + + ");"); + } catch (QueryExecutionException e) { + e.printStackTrace(); + } + } + System.out.println("Table JobDateInfo filled: total time is " + (System.currentTimeMillis() - StartTime) + " msec"); + } + + /* + * fill a table with the database global parameters + */ + public void FillMainDBParameters() { + Date bubu = new Date(CassandraNativeConnector.getEarliestDateInDB()); + System.out.println("Old EarliestJobDate is " + bubu.toString()); + + String query1 = "SELECT jobtime FROM ProteinData LIMIT 2000000;"; + System.out.println("Query db: " + query1); + ResultSet results = session.execute(query1); + Calendar endcal = Calendar.getInstance(); + long newearliestday = endcal.getTime().getTime(); + while (!results.isExhausted()) { + Row r = results.one(); + long day = r.getLong("jobtime"); + if (day < newearliestday) { + newearliestday = day; + } + } + String com = "INSERT INTO MainParameters " + "(Name, Value)" + " VALUES ('EarliestJobDate','" + String.valueOf(newearliestday) + + "');"; + session.execute(com); + Date gaga = new Date(newearliestday); + System.out.println("New EarliestJobDate is " + gaga.toString()); + } +} diff --git a/src/main/java/compbio/cassandra/CassandraReader.java b/src/main/java/compbio/cassandra/CassandraReader.java new file mode 100644 index 0000000..af697a0 --- /dev/null +++ b/src/main/java/compbio/cassandra/CassandraReader.java @@ -0,0 +1,178 @@ +package compbio.cassandra; + +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.ArrayList; +import java.util.Map; + +import org.apache.log4j.Logger; + +import com.datastax.driver.core.Row; +import com.datastax.driver.core.Session; +import com.datastax.driver.core.ResultSet; + +public class CassandraReader { + private Session session; + private static Logger log = Logger.getLogger(CassandraNativeConnector.class); + + public CassandraReader() { + Session inis = CassandraNativeConnector.getSession(); + setSession (inis); + } + + public void setSession(Session s) { + assert s != null; + session = s; + } + + /* + * getting data from the db + */ + public List> ReadProteinDataTable() { + final long startTime = System.currentTimeMillis(); + String com = "SELECT DataBegin,DataEnd FROM ProteinLog;"; + System.out.println("Command: " + com); + ResultSet results = session.execute(com); + final long queryTime = System.currentTimeMillis(); + List rows = results.all(); + System.out.println("Query time is " + (queryTime - startTime) + " msec"); + + List> res = new ArrayList>(); + int c = 0; + for (Row r : rows) { + Pair pair = new Pair(r.getString("DataBegin"), r.getString("DataEnd")); + res.add(pair); + ++c; + } + final long endTime = System.currentTimeMillis(); + System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); + return res; + } + + /* + * getting data from the db JobDateInfo + */ + public long ReadDateTable(long queryDate) { + ResultSet results = session.execute("SELECT Total FROM JobDateInfo WHERE jobday = " + queryDate + ";"); + if (results.isExhausted()) + return 0; + Row therow = results.one(); + long res = therow.getLong("Total"); + if (!results.isExhausted()) { + Date date = new Date (queryDate); + log.warn("CassandraReader.ReadDateTable: date row for " + date.toString () + " ("+ queryDate + ") duplicated "); + } + return res; + } + /* + * getting whole protein sequence from the db ProteinRow + */ + public List ReadWholeSequence(String queryProtein) { + final long startTime = System.currentTimeMillis(); + String com = "SELECT JobID, Predictions FROM ProteinRow WHERE Protein = '" + queryProtein + "';"; + System.out.println("Command: " + com); + ResultSet results = session.execute(com); + if (results.isExhausted()) + return null; + final long queryTime = System.currentTimeMillis(); + List rows = results.all(); + System.out.println("Query time is " + (queryTime - startTime) + " msec"); + System.out.println(" rows analysed, " + rows.size()); + List res = new ArrayList(); + int c = 0; + for (Row r : rows) { + StructureProteinPrediction structure = new StructureProteinPrediction(queryProtein, r.getString("JobID"), r.getMap( + "Predictions", String.class, String.class)); + res.add(structure); + ++c; + } + final long endTime = System.currentTimeMillis(); + System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); + return res; + } + + /* + * getting part of protein sequence from the db ProteinRow + */ + public List ReadPartOfSequence(String queryProtein) { + final long startTime = System.currentTimeMillis(); + String com = "SELECT * FROM ProteinRow;"; + System.out.println("Command: " + com); + ResultSet results = session.execute(com); + if (results.isExhausted()) + return null; + final long queryTime = System.currentTimeMillis(); + List rows = results.all(); + System.out.println("Query time is " + (queryTime - startTime) + " msec"); + System.out.println(" rows analysed, " + rows.size()); + List res = new ArrayList(); + int c = 0; + for (Row r : rows) { + String prot = r.getString("Protein"); + if (prot.matches("(.*)" + queryProtein + "(.*)")) { + StructureProteinPrediction structure = new StructureProteinPrediction(prot, r.getString("JobID"), r.getMap("Predictions", + String.class, String.class)); + res.add(structure); + ++c; + } + } + final long endTime = System.currentTimeMillis(); + System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); + return res; + } + + /* + * getting protein sequences by counter + */ + public Map ReadProteinSequenceByCounter() { + final long startTime = System.currentTimeMillis(); + String com = "SELECT Protein FROM ProteinRow;"; + System.out.println("Command: " + com); + ResultSet results = session.execute(com); + if (results.isExhausted()) + return null; + final long queryTime = System.currentTimeMillis(); + List rows = results.all(); + System.out.println("Query time is " + (queryTime - startTime) + " msec"); + System.out.println(" rows analysed, " + rows.size()); + Map res = new HashMap(); + int c = 0; + for (Row r : rows) { + String protein = r.getString("Protein"); + if (res.containsKey(protein)) + res.put(protein, res.get(protein) + 1); + else + res.put(protein, 1); + } + final long endTime = System.currentTimeMillis(); + System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); + return res; + } + + /* + * getting protein sequences by counter + */ + public StructureJobLog ReadJobLog(String jobid) { + final long startTime = System.currentTimeMillis(); + String com = "SELECT * FROM ProteinLog WHERE JobID = '" + jobid + "';"; + System.out.println("Command: " + com); + ResultSet results = session.execute(com); + if (results.isExhausted()) + return null; + final long queryTime = System.currentTimeMillis(); + Row row = results.one(); + String com1 = "SELECT * FROM ProteinRow WHERE JobID = '" + jobid + "' ALLOW FILTERING;"; + System.out.println("Command: " + com1); + ResultSet results1 = session.execute(com1); + if (results1.isExhausted()) + return null; + Row row1 = results1.one(); + StructureJobLog res = new StructureJobLog(row.getString("Protein"), row.getString("JobID"), row.getString("DataBegin"), + row.getString("DataEnd"), row.getString("ip"), row1.getMap("Predictions", String.class, String.class)); + System.out.println("Query time is " + (queryTime - startTime) + " msec"); + final long endTime = System.currentTimeMillis(); + System.out.println(" rows analysed, execution time is " + (endTime - startTime) + " msec"); + return res; + } +} diff --git a/src/main/java/compbio/cassandra/CassandraWriter.java b/src/main/java/compbio/cassandra/CassandraWriter.java new file mode 100644 index 0000000..48bbda7 --- /dev/null +++ b/src/main/java/compbio/cassandra/CassandraWriter.java @@ -0,0 +1,144 @@ +package compbio.cassandra; + +import java.util.List; + +import org.apache.log4j.Logger; + +import com.datastax.driver.core.Row; +import com.datastax.driver.core.Session; +import com.datastax.driver.core.ResultSet; + +import compbio.engine.JpredJob; +import compbio.engine.ProteoCachePropertyHelperManager; +import compbio.util.PropertyHelper; + +public class CassandraWriter { + private Session session; + private static final PropertyHelper ph = ProteoCachePropertyHelperManager.getPropertyHelper(); + private static Logger log = Logger.getLogger(CassandraNativeConnector.class); + + CassandraWriter() { + Session inis = CassandraNativeConnector.getSession(); + setSession(inis); + } + + public void setSession(Session s) { + assert s != null; + session = s; + } + + public boolean JobisNotInsterted(String jobid) { + ResultSet results1 = session.execute("SELECT * FROM ProteinLog WHERE JobID = '" + jobid + "';"); + if (results1.isExhausted()) { + return true; + } + return false; + } + + public boolean JobisNotArchived(String jobid) { + ResultSet results1 = session.execute("SELECT * FROM JpredArchive WHERE JobID = '" + jobid + "';"); + if (results1.isExhausted()) { + return true; + } + return false; + } + + /* + * inserting data into the tables for queries + */ + public int FormQueryTables(JpredJob job) { + if (JobisNotInsterted(job.getJobID())) { + String id = job.getJobID(); + String ip = job.getIP(); + String protein = job.getProtein(); + String finalstatus = job.getFinalStatus(); + String execstatus = job.getExecutionStatus(); + String com1 = "INSERT INTO ProteinLog " + "(JobID, IP, DataBegin, DataEnd, FinalStatus, ExecutionStatus, Protein)" + + " VALUES ('" + id + "','" + ip + "','" + job.getStartingTimeStr() + "','" + job.getEndTimeStr() + "','" + finalstatus + + "','" + execstatus + "','" + protein + "');"; + session.execute(com1); + + String com2 = "INSERT INTO ProteinData " + "(jobtime, JobID, Protein)" + " VALUES (" + job.getStartingDate() + ",'" + id + + "','" + protein + "');"; + session.execute(com2); + + String allpredictions = ""; + List pr = job.getPredictions(); + for (FastaSequence pred : pr) { + String predictionname = pred.getId(); + String prediction = pred.getSequence().replaceAll("\n", ""); + allpredictions += "'" + predictionname + "':'" + prediction + "',"; + } + String final_prediction = ""; + if (!allpredictions.equals("")) { + final_prediction = allpredictions.substring(0, allpredictions.length() - 1); + } + + String check2 = "SELECT * FROM ProteinRow WHERE JobID = '" + job.getJobID() + "';"; + ResultSet results2 = session.execute(check2); + if (results2.isExhausted()) { + String com3 = "INSERT INTO ProteinRow " + "(Protein, JobID, Predictions)" + " VALUES ('" + protein + "','" + id + "',{" + + final_prediction + "});"; + session.execute(com3); + } + + // update some internal query tables + String check3 = "SELECT * FROM MainParameters WHERE Name = 'EarliestJobDate';"; + ResultSet results3 = session.execute(check3); + boolean updateparameter = true; + if (!results3.isExhausted()) { + Row r = results3.one(); + if (job.getStartingDate() >= Long.parseLong(r.getString("Value"))) + updateparameter = false; + } + if (updateparameter) { + String com = "INSERT INTO MainParameters " + "(Name, Value)" + " VALUES ('EarliestJobDate','" + job.getStartingDateStr() + + "');"; + session.execute(com); + } + String check4 = "SELECT * FROM JobDateInfo WHERE jobday = " + job.getStartingDate() + ";"; + ResultSet results4 = session.execute(check4); + updateparameter = true; + int njobs = 1; + if (!results4.isExhausted()) { + Row r = results4.one(); + njobs += r.getLong("Total"); + } + String com = "INSERT INTO JobDateInfo " + "(jobday, Total)" + " VALUES (" + job.getStartingDate() + "," + njobs + ");"; + session.execute(com); + + return 1; + } + return 0; + } + + /* + * insert data from a real Jpred job: timing+IP, Execution Status, Final + * status, protein sequence, predictions, alignment, LOG and tar.gz files + */ + public int ArchiveData(JpredJob job, String archivepath) { + if (JobisNotArchived(job.getJobID())) { + String id = job.getJobID(); + String log = job.getLog().replaceAll("'", ""); + String com = "INSERT INTO JpredArchive (JobID, Protein, IP, StartTime, ExecTime,LOG, ArchiveLink) VALUES ('" + id + "','" + + job.getProtein() + "','" + job.getIP() + "'," + job.getStartingTime() + "," + job.getExecutionTime() + ",'" + log + + "','" + archivepath + "');"; + session.execute(com); + + List predictions = job.getPredictions(); + for (FastaSequence p : predictions) { + session.execute("UPDATE JpredArchive SET predictions = predictions + {'" + p.getId() + "':'" + + p.getSequence().replaceAll("\n", "") + "'} WHERE JobID = '" + id + "';"); + } + + List seqs = job.getAlignment(); + for (FastaSequence s : seqs) { + session.execute("UPDATE JpredArchive SET alignment = alignment + {'" + s.getId() + "':'" + + s.getSequence().replaceAll("\n", "") + "'} WHERE JobID = '" + id + "';"); + } + return 1; + } + return 0; + } + +} diff --git a/src/main/java/compbio/cassandra/DataBase.java b/src/main/java/compbio/cassandra/DataBase.java new file mode 100644 index 0000000..91eabf0 --- /dev/null +++ b/src/main/java/compbio/cassandra/DataBase.java @@ -0,0 +1,117 @@ +package compbio.cassandra; + +import java.util.List; +import java.util.ArrayList; +import java.util.Collections; + +public class DataBase { + private String date; + private int total; + private int totalJobs; + private int totalId; + private String id; + private String prot; + private String jpred; + private List subProt; + private List timeRez; + private List timeTotalExec; + private StructureJobLog logInfo; + + public DataBase() { + } + + public DataBase(String dat, int total) { + this.date = dat; + this.total = total; + } + + public void setDate(String dat) { + this.date = dat; + } + + public String getDate() { + return date; + } + + public void setTotal(int tot) { + this.total = tot; + } + + public int getTotal() { + return total; + } + + + public void setTotalJobs(int totalJobs) { + this.totalJobs = totalJobs; + } + + public int getTotalJobs() { + return totalJobs; + } + + public void setTotalId(int totId) { + this.totalId = totId; + } + + public int getTotalId() { + return totalId; + } + + public void setProt(String prot) { + this.prot = prot; + } + + public String getProt() { + return prot; + } + + public void setJpred(String jpred) { + this.jpred = jpred; + } + + public String getJpred() { + return jpred; + } + + public void setId(String id) { + this.id = id; + } + + public String getId() { + return id; + } + + public void setSubProt(List subProt) { + this.subProt = subProt; + } + + public List getSubProt() { + return subProt; + } + + public void setTimeRez(List timeRez) { + this.timeRez = timeRez; + } + + public List getTimeRez() { + return timeRez; + } + + public void setTimeTotalExec(List timeTotalExec) { + this.timeTotalExec = timeTotalExec; + } + + public List getTimeTotalExec() { + return timeTotalExec; + } + + public void setLogInfo(StructureJobLog logInfo){ + this.logInfo = logInfo; + } + + public StructureJobLog getLogInfo() { + return logInfo; + } + +} diff --git a/src/main/java/compbio/cassandra/FastaReader.java b/src/main/java/compbio/cassandra/FastaReader.java new file mode 100644 index 0000000..4783b14 --- /dev/null +++ b/src/main/java/compbio/cassandra/FastaReader.java @@ -0,0 +1,173 @@ +package compbio.cassandra; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.InputStream; +import java.util.Iterator; +import java.util.Scanner; + +//import compbio.util.Util; + +/** + * Reads files with FASTA formatted sequences. All the information in the FASTA + * header is preserved including trailing white spaces. All the white spaces are + * removed from the sequence. + * + * Examples of the correct input: + * + *
+ * 
+ * >zedpshvyzg
+ * GCQDKNNIAELNEIMGTTRSPSDWQHMKGASPRAEIGLTGKKDSWWRHCCSKEFNKTPPPIHPDMKRWGWMWNRENFEKFLIDNFLNPPCPRLMLTKGTWWRHEDLCHEIFWSTLRWLCLGNQSFSAMIWGHLCECHRMIWWESNEHMFWLKFRRALKKMNSNGPCMGPDNREWMITNRMGKEFCGPAFAGDCQSCWRKCHKTNKICFNEKKGTPTKIDHEQKDIMDILKDIDNHRNWKQCQLWLLTSKSTDQESTTMLTWSTWRDFFIIIKQPFDHKCRGALDANGDFQIAAELKWPAPMIILRQNQKTMHDKSCHHFFTNRCPLMHTTRANDKQCSWHTRKQFICQQDFTTWQHRPDTHRILPSWCMSTRRKNHIKNTPALAFSTCEMGDLPNGWAPGTIILQRQFTQAIKLPQETTGWPRCDPKFDHWNMSKWLRQLLGRDDEMIPPQCD
+ * 
+ * >xovkactesa
+ * CPLSKWWNRRAFLSHTANHWMILMTWEGPHDGESKMRIAMMKWSPCKPTMSHFRCGLDAWAEPIRQIACESTFRM
+ * FCTTPRPIHKLTEMWGHMNGWTGAFCRQLECEWMMPPRHPHPCTSTFNNNKKRLIGQIPNEGKQLFINFQKPQHG
+ * FSESDIWIWKDNPTAWHEGLTIAGIGDGQHCWNWMPMPWSGAPTSNALIEFWTWLGMIGTRCKTQGMWWDAMNHH
+ * DQFELSANAHIAAHHMEKKMILKPDDRNLGDDTWMPPGKIWMRMFAKNTNACWPEGCRDDNEEDDCGTHNLHRMC
+ * 
+ * >ntazzewyvv
+ * CGCKIF D D NMKDNNRHG TDIKKHGFMH IRHPE KRDDC FDNHCIMPKHRRWGLWD
+ * EASINM	AQQWRSLPPSRIMKLNG	HGCDCMHSHMEAD	DTKQSGIKGTFWNG	HDAQWLCRWG	
+ * EFITEA	WWGRWGAITFFHAH	ENKNEIQECSDQNLKE	SRTTCEIID   TCHLFTRHLDGW 
+ *   RCEKCQANATHMTW ACTKSCAEQW  FCAKELMMN    
+ *   W        KQMGWRCKIFRKLFRDNCWID  FELPWWPICFCCKGLSTKSHSAHDGDQCRRW    WPDCARDWLGPGIRGEF   
+ *   FCTHICQQLQRNFWCGCFRWNIEKRMFEIFDDNMAAHWKKCMHFKFLIRIHRHGPITMKMTWCRSGCCFGKTRRLPDSSFISAFLDPKHHRDGSGMMMWSSEMRSCAIPDPQQAWNQGKWIGQIKDWNICFAWPIRENQQCWATPHEMPSGFHFILEKWDALAHPHMHIRQKKCWAWAFLSLMSSTHSDMATFQWAIPGHNIWSNWDNIICGWPRI
+ * 
+ *    > 12 d t y wi 		k	jbke  	
+ *   KLSHHDCD
+ *    N
+ *     H
+ *     HSKCTEPHCGNSHQMLHRDP
+ *     CCDQCQSWEAENWCASMRKAILF
+ * 
+ * 
+ * + * @author Peter Troshin + * @version 1.0 April 2011 + * + */ +public class FastaReader implements Iterator { + + private final Scanner input; + /** + * Delimiter for the scanner + */ + private final String DELIM = ">"; + + /** + * Header data can contain non-ASCII symbols and read in UTF8 + * + * @param inputFile + * the file containing the list of FASTA formatted sequences to + * read from + * @throws FileNotFoundException + * if the input file is not found + * @throws IllegalStateException + * if the close method was called on this instance + * + */ + public FastaReader(final String inputFile) throws FileNotFoundException { + input = new Scanner(new File(inputFile), "UTF8"); + input.useDelimiter(DELIM); + Runtime.getRuntime().addShutdownHook(new Thread() { + + @Override + public void run() { + if (input != null) { + input.close(); + } + } + }); + } + + /** + * This class will not close the incoming stream! So the client should do + * so. + * + * @param inputStream + * @throws FileNotFoundException + */ + public FastaReader(final InputStream inputStream) + throws FileNotFoundException { + input = new Scanner(inputStream); + input.useDelimiter(DELIM); + } + + /** + * {@inheritDoc} + * + * @throws IllegalStateException + * if the close method was called on this instance + */ + @Override + public boolean hasNext() { + return input.hasNext(); + } + + /** + * Reads the next FastaSequence from the input + * + * @throws AssertionError + * if the header or the sequence is missing + * @throws IllegalStateException + * if the close method was called on this instance + * @throws MismatchException + * - if there were no more FastaSequence's. + */ + @Override + public FastaSequence next() { + String fastaHeader = input.next(); + while (fastaHeader.indexOf("\n") < 0 && input.hasNext()) { + fastaHeader = fastaHeader.concat(">"); + fastaHeader = fastaHeader.concat(input.next()); + } + return FastaReader.toFastaSequence(fastaHeader); + } + + /** + * Not implemented + */ + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + /** + * Call this method to close the connection to the input file if you want to + * free up the resources. The connection will be closed on the JVM shutdown + * if this method was not called explicitly. No further reading on this + * instance of the FastaReader will be possible after calling this method. + */ + public void close() { + input.close(); + } + + private static FastaSequence toFastaSequence(final String singleFastaEntry) { + + // assert !Util.isEmpty(singleFastaEntry) : + // "Empty String where FASTA sequence is expected!"; + + int nlineidx = singleFastaEntry.indexOf("\n"); + if (nlineidx < 0) { + throw new AssertionError( + "The FASTA sequence must contain the header information" + + " separated by the new line from the sequence. Given sequence does not appear to " + + "contain the header! Given data:\n " + + singleFastaEntry); + } + String header = singleFastaEntry.substring(0, nlineidx); + + // Get rid of the new line chars (should cover common cases) + header = header.replaceAll("\r", ""); + + String sequence = singleFastaEntry.substring(nlineidx); + + /* + * if (Util.isEmpty(sequence)) { throw new AssertionError( + * "Empty sequences are not allowed! Please make sure the " + + * " data is in the FASTA format! Given data:\n " + singleFastaEntry); } + */ + return new FastaSequence(header, sequence); + } +} diff --git a/src/main/java/compbio/cassandra/FastaSequence.java b/src/main/java/compbio/cassandra/FastaSequence.java new file mode 100644 index 0000000..61f49c7 --- /dev/null +++ b/src/main/java/compbio/cassandra/FastaSequence.java @@ -0,0 +1,179 @@ +package compbio.cassandra; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; + +//import compbio.util.SysPrefs; +//import compbio.util.annotation.Immutable; + +/** + * A FASTA formatted sequence. Please note that this class does not make any + * assumptions as to what sequence it stores e.g. it could be nucleotide, + * protein or even gapped alignment sequence! The only guarantee it makes is + * that the sequence does not contain white space characters e.g. spaces, new + * lines etc + * + * @author pvtroshin + * + * @version 1.0 September 2009 + */ + +@XmlAccessorType(XmlAccessType.FIELD) +//@Immutable +public class FastaSequence { + + /** + * Sequence id + */ + private String id; + + // TODO what about gapped sequence here! should be indicated + /** + * Returns the string representation of sequence + */ + private String sequence; + + FastaSequence() { + // Default constructor for JaxB + } + + /** + * Upon construction the any whitespace characters are removed from the + * sequence + * + * @param id + * @param sequence + */ + public FastaSequence(String id, String sequence) { + this.id = id; + this.sequence = sequence; + } + + /** + * Gets the value of id + * + * @return the value of id + */ + public String getId() { + return this.id; + } + + /** + * Gets the value of sequence + * + * @return the value of sequence + */ + public String getSequence() { + return this.sequence; + } + + public static int countMatchesInSequence(final String theString, + final String theRegExp) { + final Pattern p = Pattern.compile(theRegExp); + final Matcher m = p.matcher(theString); + int cnt = 0; + while (m.find()) { + cnt++; + } + return cnt; + } + + public String getFormattedFasta() { + return getFormatedSequence(80); + } + + /** + * + * @return one line name, next line sequence, no matter what the sequence + * length is + */ +/* public String getOnelineFasta() { + String fasta = ">" + getId() + SysPrefs.newlinechar; + fasta += getSequence() + SysPrefs.newlinechar; + return fasta; + } + + /** + * Format sequence per width letter in one string. Without spaces. + * + * @return multiple line formated sequence, one line width letters length + * + */ + public String getFormatedSequence(final int width) { + if (sequence == null) { + return ""; + } + + assert width >= 0 : "Wrong width parameter "; + + final StringBuilder sb = new StringBuilder(sequence); + // int tail = nrOfWindows % WIN_SIZE; + // final int turns = (nrOfWindows - tail) / WIN_SIZE; + + int tailLen = sequence.length() % width; + // add up inserted new line chars + int nchunks = (sequence.length() - tailLen) / width; + int nlineCharcounter = 0; + int insPos = 0; + for (int i = 1; i <= nchunks; i++) { + insPos = width * i + nlineCharcounter; + // to prevent inserting new line in the very end of a sequence then + // it would have failed. + if (sb.length() <= insPos) { + break; + } + sb.insert(insPos, "\n"); + nlineCharcounter++; + } + // sb.insert(insPos + tailLen, "\n"); + return sb.toString(); + } + + /** + * + * @return sequence length + */ + public int getLength() { + return this.sequence.length(); + } + + /** + * Same as oneLineFasta + */ +// @Override +// public String toString() { +// return this.getOnelineFasta(); + // } + + @Override + public int hashCode() { + final int prime = 17; + int result = 1; + result = prime * result + ((id == null) ? 0 : id.hashCode()); + result = prime * result + + ((sequence == null) ? 0 : sequence.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (!(obj instanceof FastaSequence)) { + return false; + } + FastaSequence fs = (FastaSequence) obj; + if (!fs.getId().equals(this.getId())) { + return false; + } + if (!fs.getSequence().equalsIgnoreCase(this.getSequence())) { + return false; + } + return true; + } + +} diff --git a/src/main/java/compbio/cassandra/JpredParser.java b/src/main/java/compbio/cassandra/JpredParser.java new file mode 100644 index 0000000..27020ec --- /dev/null +++ b/src/main/java/compbio/cassandra/JpredParser.java @@ -0,0 +1,16 @@ +package compbio.cassandra; + +import java.io.IOException; + +public interface JpredParser { + + /* + * Defines a source file with metainformation of Jpred Jobs + **/ + void setSource (String newsourceprefix); + + /* + * Makes real parsing of the source file + **/ + void Parsing(String source, int nDays) throws IOException; +} diff --git a/src/main/java/compbio/cassandra/JpredParserHTTP.java b/src/main/java/compbio/cassandra/JpredParserHTTP.java new file mode 100644 index 0000000..e53ddd8 --- /dev/null +++ b/src/main/java/compbio/cassandra/JpredParserHTTP.java @@ -0,0 +1,205 @@ +package compbio.cassandra; + +import java.io.BufferedReader; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLConnection; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Date; +import java.util.List; + +import compbio.cassandra.JpredParser; +import compbio.engine.JpredJob; + +public class JpredParserHTTP implements JpredParser { + private CassandraWriter cw = new CassandraWriter(); + private String dirprefix; + private List alignment; + private List predictions; + private int countNoData; + + public JpredParserHTTP() { + dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results"; + } + + public JpredParserHTTP(String sourceurl) { + dirprefix = sourceurl; + } + + public void setSource(String newsourceprefix) { + dirprefix = newsourceprefix; + } + + public void Parsing(String source, int nDays) throws IOException { + Calendar cal = Calendar.getInstance(); + cal.add(Calendar.DATE, -nDays); + for (int i = 0; i < nDays; ++i) { + cal.add(Calendar.DATE, 1); + String date = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DATE); + ParsingForDate(source, date); + } + } + + /* + * The method parses the Jpred output concise file in the FASTA format If + * there is a record with ID = QUERY or jobid, this a "one protein" job + * otherwise this is an alignment job + */ + private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException { + final FastaReader fr = new FastaReader(stream); + String protein = ""; + alignment = new ArrayList(); + predictions = new ArrayList(); + while (fr.hasNext()) { + final FastaSequence fs = fr.next(); + String seqid = fs.getId(); + String seq = fs.getSequence().replaceAll("\n", ""); + if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28") + || seqid.equals("JNETSOL25") || seqid.equals("JNETSOL5") || seqid.equals("JNETSOL0") || seqid.equals("JNETCONF") + || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM") || seqid.equals("JNETCONF")) { + predictions.add(fs); + } else { + alignment.add(fs); + if (seqid.equals("QUERY") || seqid.equals(jobid)) + protein = seq; + } + } + return protein; + } + + private String parseLogFile(final InputStream stream) throws IOException { + String out = ""; + BufferedReader buffer = new BufferedReader(new InputStreamReader(stream)); + String line; + while (null != (line = buffer.readLine())) { + out += line; + } + return out; + } + + private int analyseJob(String[] jobinfo) throws IOException { + boolean running = true; + boolean ConcisefileExists = false; + boolean LogfileExists = false; + JpredJob job = new JpredJob (jobinfo[jobinfo.length - 1], jobinfo[0], jobinfo[1]); + job.setIP(jobinfo[2]); + Date currDate = new Date(); + String maindir = dirprefix + "/" + job.getJobID() + "/"; + + //System.out.println("analyzing job " + job.getJobID()); + try { + URL dirurl = new URL(maindir); + HttpURLConnection httpConnection_dirurl = (HttpURLConnection) dirurl.openConnection(); + if (httpConnection_dirurl.getResponseCode() < 199 || 300 <= httpConnection_dirurl.getResponseCode()) { + return 0; + } + URL conciseurl = new URL(maindir + job.getJobID() + ".concise.fasta"); + URL archiveurl = new URL(maindir + job.getJobID() + ".tar.gz"); + URL logurl = new URL(maindir + "LOG"); + HttpURLConnection httpConnection_conciseurl = (HttpURLConnection) conciseurl.openConnection(); + HttpURLConnection httpConnection_logurl = (HttpURLConnection) logurl.openConnection(); + HttpURLConnection httpConnection_archiveurl = (HttpURLConnection) archiveurl.openConnection(); + if (199 < httpConnection_conciseurl.getResponseCode() && httpConnection_conciseurl.getResponseCode() < 300) { + ConcisefileExists = true; + running = false; + try { + job.setProtein(parsePredictions(conciseurl.openStream(), job.getJobID())); + } catch (IOException e) { + e.printStackTrace(); + } + } else { + // The job still can be running of failed... + ++countNoData; + } + if (199 < httpConnection_logurl.getResponseCode() && httpConnection_logurl.getResponseCode() < 300) { + LogfileExists = true; + job.setLog(parseLogFile(logurl.openStream())); + } else { + // The job has not been started at all... + job.setExecutionStatus("FAIL"); + job.setFinalStatus("STOPPED"); + running = false; + } + if (job.getLog().matches("(.*)TIMEOUT\\syour\\sjob\\stimed\\sout(.*)")) { + // blast job was too long (more than 3600 secs by default)... + job.setExecutionStatus("FAIL"); + job.setFinalStatus("TIMEDOUT"); + running = false; + } else if (job.getLog().matches("(.*)Jpred\\serror:\\sDied\\sat(.*)")) { + // an internal Jpred error... + job.setExecutionStatus("FAIL"); + job.setFinalStatus("JPREDERROR"); + running = false; + } else if ((currDate.getTime() - job.getEndTime()) / 1000 > 3601 && LogfileExists && !ConcisefileExists) { + // the job was stopped with unknown reason... + job.setExecutionStatus("FAIL"); + job.setFinalStatus("STOPPED"); + running = false; + } + + httpConnection_conciseurl.disconnect(); + httpConnection_logurl.disconnect(); + httpConnection_archiveurl.disconnect(); + } catch (MalformedURLException e) { + e.printStackTrace(); + } + + if (!running) { + job.setAlignment(alignment); + job.setPredictions(predictions); + cw.FormQueryTables(job); + cw.ArchiveData(job, "undefined"); + return 1; + } + + return 0; + } + + private void ParsingForDate(String input, String date) { + int totalcount = 0; + int countinsertions = 0; + int countinserted = 0; + int countNotanalyzed = 0; + countNoData = 0; + + System.out.println("Inserting jobs for " + date); + try { + URL url = new URL(input); + URLConnection conn = url.openConnection(); + BufferedReader alljobs = new BufferedReader(new InputStreamReader(conn.getInputStream())); + String line; + + while ((line = alljobs.readLine()) != null) { + if (line.matches(date + ":(.*)jp_[^\\s]+")) { + totalcount++; + String[] job = line.split("\\s+"); + String jobid = job[job.length - 1]; + if (cw.JobisNotInsterted(jobid)) { + countinsertions += analyseJob(job); + } else { + ++countinserted; + } + } else { + ++countNotanalyzed; + } + } + alljobs.close(); + System.out.println("Total number of jobs = " + totalcount); + System.out.println(" " + countinserted + " jobs inserted already"); + System.out.println(" " + countNotanalyzed + " not analysed jobs"); + System.out.println(" " + countNoData + " jobs without *.concise.fasta file (RUNNING or FAILED)"); + System.out.println(" " + countinsertions + " new job insertions\n"); + } catch (MalformedURLException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + ; + } +} diff --git a/src/main/java/compbio/cassandra/JpredParserLocalFile.java b/src/main/java/compbio/cassandra/JpredParserLocalFile.java new file mode 100644 index 0000000..d4a22e6 --- /dev/null +++ b/src/main/java/compbio/cassandra/JpredParserLocalFile.java @@ -0,0 +1,134 @@ +package compbio.cassandra; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.MalformedURLException; +import java.io.FileInputStream; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Date; +import java.util.List; + +public class JpredParserLocalFile implements JpredParser { + private CassandraWriter cw = new CassandraWriter(); + private String dirprefix; + + public void setSource(String newsourceprefix) { + this.dirprefix = newsourceprefix; + } + + public JpredParserLocalFile() { + this.dirprefix = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/data.dat"; + } + + public JpredParserLocalFile(String sourceurl) { + this.dirprefix = sourceurl; + } + + public void Parsing(String source, int nDays) throws IOException { + Calendar cal = Calendar.getInstance(); + cal.add(Calendar.DATE, -nDays); + List alljobs = new ArrayList(); + File file = new File(source); + BufferedReader alljobsfile = new BufferedReader(new InputStreamReader(new FileInputStream(file))); + String line; + + while ((line = alljobsfile.readLine()) != null) { + alljobs.add(line); + } + alljobsfile.close(); + + System.out.println("Inserting jobs for " + nDays + " days, " + alljobs.size() + " jobs in total"); + final long startTime = System.currentTimeMillis(); + for (int i = 0; i < nDays; ++i) { + cal.add(Calendar.DATE, 1); + int month = cal.get(Calendar.MONTH) + 1; + int year = cal.get(Calendar.YEAR); + int day = cal.get(Calendar.DATE); + String date = year + "/" + month + "/" + day; + ParsingForDate(alljobs, date); + } + final long execTime = System.currentTimeMillis() - startTime; + System.out.println("Execution Time = " + execTime + " ms"); + } + + private void ParsingForDate(List input, String date) { + int totalcount = 0; + int countNoData = 0; + int countUnclearFASTAid = 0; + int countinsertions = 0; + int countinserted = 0; + int counAlignments = 0; + int countStrange = 0; + + System.out.println("Inserting jobs for " + date); + for (String in : input) { + if (in.matches(date + ":(.*)jp_[^\\s]+")) { + String[] table = in.split("\\s+"); + String starttime = table[0]; + String finishtime = table[1]; + String ip = table[2]; + String id = table[table.length - 1]; + totalcount++; + String confilename = dirprefix + "/" + id + "/" + id + ".concise"; + File confile = new File(confilename); + if (confile.exists()) { + try { + final FastaReader fr = new FastaReader(confilename); + final List seqs = new ArrayList(); + String newprotein = ""; + while (fr.hasNext()) { + final FastaSequence fs = fr.next(); + if (fs.getId().equals("QUERY") || fs.getId().equals(id)) + newprotein = fs.getSequence().replaceAll("\n", ""); + else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) { + seqs.add(fs); + } + } + if (newprotein.equals("")) { + countUnclearFASTAid++; + } else { + SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); + String dateInString1 = starttime.substring(0, starttime.indexOf(":")); + long insertdate = 0; + try { + Date dat = formatter.parse(dateInString1); + insertdate = dat.getTime(); + } catch (ParseException e) { + e.printStackTrace(); + } + //countinsertions += cw.FormQueryTables(insertdate, starttime, finishtime, ip, id, "OK", "OK", newprotein, seqs); + } + fr.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } else { + countNoData++; + } + } else { + if (in.matches(date + "(.*)Sequence0/(.*)")) { + ++counAlignments; + } else { + ++countStrange; + } + } + } + if (true) { + System.out.println("Total number of jobs = " + totalcount); + System.out.println(" " + countinserted + " jobs inserted already"); + System.out.println(" " + counAlignments + " jalview jobs"); + System.out.println(" " + countStrange + " not analysed jobs"); + System.out.println(" " + countNoData + " jobs without *.concise.fasta file"); + System.out.println(" " + countUnclearFASTAid + " jobs with unclear FASTA protein id in *.concise.fasta"); + System.out.println(" " + countinsertions + " new job insertions\n"); + } + } + +} diff --git a/src/main/java/compbio/cassandra/Pair.java b/src/main/java/compbio/cassandra/Pair.java new file mode 100644 index 0000000..d942fea --- /dev/null +++ b/src/main/java/compbio/cassandra/Pair.java @@ -0,0 +1,24 @@ +package compbio.cassandra; + +public class Pair { + + private final K element0; + private final V element1; + + public static Pair createPair(K element0, V element1) { + return new Pair(element0, element1); + } + + public Pair(K element0, V element1) { + this.element0 = element0; + this.element1 = element1; + } + + public K getElement0() { + return element0; + } + + public V getElement1() { + return element1; + } +} \ No newline at end of file diff --git a/src/main/java/compbio/cassandra/StructureJobLog.java b/src/main/java/compbio/cassandra/StructureJobLog.java new file mode 100644 index 0000000..b2d0400 --- /dev/null +++ b/src/main/java/compbio/cassandra/StructureJobLog.java @@ -0,0 +1,45 @@ +package compbio.cassandra; + +import java.util.Map; + +public class StructureJobLog { + private String jobid; + private String dateStart; + private String dateEnd; + private String sequence; + private String ip; + private Map prediction; + + public StructureJobLog (String seq, String id, String dateStart, String dateEnd, String ip, Map pred) { + this.sequence = seq; + this.jobid = id; + this.dateStart = dateStart; + this.dateEnd = dateEnd; + this.ip = ip; + this.prediction = pred; + } + + public String getSequence () { + return sequence; + } + + public String getJobid () { + return jobid; + } + + public String getDateStart () { + return dateStart; + } + + public String getDateEnd () { + return dateEnd; + } + + public String getIp () { + return ip; + } + + public Map getPrediction () { + return prediction; + } +} diff --git a/src/main/java/compbio/cassandra/StructureProteinPrediction.java b/src/main/java/compbio/cassandra/StructureProteinPrediction.java new file mode 100644 index 0000000..936a67a --- /dev/null +++ b/src/main/java/compbio/cassandra/StructureProteinPrediction.java @@ -0,0 +1,28 @@ +package compbio.cassandra; + +import java.util.Map; + +public class StructureProteinPrediction { + private String sequence; + private String jobid; + private Map prediction; + + public StructureProteinPrediction (String seq, String id, Map pred) { + this.sequence = seq; + this.jobid = id; + this.prediction = pred; + } + + public String getSequence () { + return sequence; + } + + public String getJobid () { + return jobid; + } + + public Map getPrediction () { + return prediction; + } + +} diff --git a/src/main/java/compbio/cassandra/readers/CassandraReaderExecutionTime.java b/src/main/java/compbio/cassandra/readers/CassandraReaderExecutionTime.java new file mode 100644 index 0000000..347ebe3 --- /dev/null +++ b/src/main/java/compbio/cassandra/readers/CassandraReaderExecutionTime.java @@ -0,0 +1,104 @@ +package compbio.cassandra.readers; + + +import java.util.Calendar; +import java.util.List; +import java.util.ArrayList; + +import org.apache.log4j.Logger; + +import com.datastax.driver.core.Row; +import com.datastax.driver.core.Session; +import com.datastax.driver.core.ResultSet; +import com.datastax.driver.core.PreparedStatement; +import com.datastax.driver.core.BoundStatement; + +import compbio.cassandra.CassandraReader; +import compbio.cassandra.Pair; +import compbio.engine.ProteoCachePropertyHelperManager; +import compbio.util.PropertyHelper; + +public class CassandraReaderExecutionTime { + private Session session; + + public static String CASSANDRA_HOSTNAME = "localhost"; + public static boolean READ_WEB_JPRED = false; + public static boolean READ_LOCALFILE_JPRED = false; + + public void setSession(Session s) { + assert s != null; + session = s; + } + + private void setConditions() { + + } + + public boolean JobisNotInsterted(String jobid) { + ResultSet results1 = session.execute("SELECT * FROM ProteinLog WHERE JobID = '" + jobid + "';"); + if (results1.isExhausted()) { + return true; + } + return false; + } + + public boolean JobisNotArchived(String jobid) { + ResultSet results1 = session.execute("SELECT * FROM JpredArchive WHERE JobID = '" + jobid + "';"); + if (results1.isExhausted()) { + return true; + } + return false; + } + + /* + * getting data from the db + */ + public List> ReadProteinDataTable() { + final long startTime = System.currentTimeMillis(); + String com = "SELECT DataBegin,DataEnd FROM ProteinKeyspace.ProteinLog;"; + System.out.println("Command: " + com); + ResultSet results = session.execute(com); + final long queryTime = System.currentTimeMillis(); + List rows = results.all(); + System.out.println("Query time is " + (queryTime - startTime) + " msec"); + + List> res = new ArrayList>(); + int c = 0; + for (Row r : rows) { + Pair pair = new Pair(r.getString("DataBegin"), r.getString("DataEnd")); + res.add(pair); + ++c; + } + final long endTime = System.currentTimeMillis(); + System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); + return res; + } + + /* + * getting earlest date of jobs from the db + */ + public long getEarliestDateInDB() { + final long startTime = System.currentTimeMillis(); + String com = "SELECT jobtime FROM ProteinKeyspace.ProteinData;"; + System.out.println("Command: " + com); + ResultSet results = session.execute(com); + final long queryTime = System.currentTimeMillis(); + System.out.println("Query time is " + (queryTime - startTime) + " msec"); + + Calendar cal = Calendar.getInstance(); + long res = cal.getTimeInMillis(); + int c = 0; + while (!results.isExhausted()) { + Row r = results.one(); + long d1 = r.getLong("jobtime"); + if (res > d1) { + res = d1; + } + ++c; + } + final long endTime = System.currentTimeMillis(); + System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); + return res; + } + +} diff --git a/src/main/java/compbio/engine/Job.java b/src/main/java/compbio/engine/Job.java new file mode 100644 index 0000000..309e0e9 --- /dev/null +++ b/src/main/java/compbio/engine/Job.java @@ -0,0 +1,167 @@ +package compbio.engine; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Date; + +public class Job { + private String jobid; + private String ExecutionStatus; + private String FinalStatus; + private String IP; + private String Email; + + private String StartingDateStr; + private long StartingDate; + private String EndTimeStr; + private long EndTime; + private String StartingTimeStr; + private long StartingTime; + private long ExecutionTime; + + static SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd");; + static SimpleDateFormat timeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s"); + + public Job() { + ExecutionStatus = "OK"; + FinalStatus = "OK"; + jobid = ""; + + StartingDateStr = "1970/1/1"; + StartingTimeStr = "1970/1/1:0:0:0"; + EndTimeStr = "1970/1/1:0:0:0"; + StartingDate = 0L; + StartingTime = 0L; + EndTime = 0L; + ExecutionTime = 0L; + } + + public Job(String id, String date1, String date2) { + jobid = id; + ExecutionStatus = "OK"; + FinalStatus = "OK"; + setDates(date1, date2); + } + + protected void setDates (String d1, String d2) { + StartingTimeStr = d1; + StartingDateStr = d1.substring(0, d1.indexOf(":")); + StartingTime = convertTime(StartingTimeStr); + StartingDate = convertDate(StartingDateStr); + + EndTimeStr = d2; + EndTime = convertTime(d2); + ExecutionTime = EndTime - StartingTime; + } + + + protected long convertDate (String d) { + try { + if (null != d) { + Date startdate = dateformatter.parse(d); + return startdate.getTime(); + } + } catch (ParseException e) { + e.printStackTrace(); + } + return 0L; + } + + protected long convertTime (String d) { + try { + if (null != d) { + Date startdate = timeformatter.parse(d); + return startdate.getTime(); + } + } catch (ParseException e) { + e.printStackTrace(); + } + return 0L; + } + + + public void setStartingDate(String date) { + this.StartingDateStr = date; + StartingDate = convertDate(date); + StartingTime = convertTime(date); + if (0 < EndTime) { + ExecutionTime = EndTime - StartingTime; + } + } + + public void setEndTime(String date) { + this.EndTimeStr = date; + EndTime = convertTime(date); + if (0 < StartingTime) { + ExecutionTime = EndTime - StartingTime; + } + } + + public String getStartingDateStr() { + return StartingDateStr; + } + public String getStartingTimeStr() { + return StartingTimeStr; + } + public long getStartingDate() { + return StartingDate; + } + public long getStartingTime() { + return StartingTime; + } + + public String getEndTimeStr() { + return EndTimeStr; + } + public long getEndTime() { + return EndTime; + } + + public void setExecutionStatus(String status) { + ExecutionStatus = status; + } + + public void setFinalStatus(String status) { + FinalStatus = status; + } + + public String getExecutionStatus() { + return ExecutionStatus; + } + + public String getFinalStatus() { + return FinalStatus; + } + + public long getExecutionTime() { + return ExecutionTime; + } + + public void setIP(String IP) { + this.IP = IP; + } + + public String getIP() { + if (null != IP) + return IP; + return "127.0.0.1"; + } + + public void setEmail(String email) { + this.Email = email; + } + + public String getEmail() { + if (null != Email) + return Email; + return "user@localhost"; + } + + public void setJobId(String id) { + this.jobid = id; + } + + public String getJobID() { + return jobid; + } +} diff --git a/src/main/java/compbio/engine/JpredJob.java b/src/main/java/compbio/engine/JpredJob.java new file mode 100644 index 0000000..a606746 --- /dev/null +++ b/src/main/java/compbio/engine/JpredJob.java @@ -0,0 +1,64 @@ +package compbio.engine; + +import java.util.ArrayList; +import java.util.List; + +import compbio.cassandra.FastaSequence; + +public class JpredJob extends Job { + private String protein; + private String log; + private List predictions; + private List alignment; + + public JpredJob() { + super(); + } + + public JpredJob(String jobid, String date1, String date2) { + super(jobid, date1, date2); + this.protein = protein; + predictions = new ArrayList(); + alignment = new ArrayList(); + log = ""; + } + + public JpredJob(String jobid, String date1, String date2, List alignment, List predictions) { + super(jobid, date1, date2); + this.protein = protein; + this.alignment = alignment; + this.predictions = predictions; + } + + public void setProtein(String protein) { + this.protein = protein; + } + + public String getProtein() { + return protein; + } + + public void setLog(String log) { + this.log = log; + } + + public String getLog() { + return log; + } + + public void setAlignment(List al) { + this.alignment = al; + } + + public List getAlignment() { + return alignment; + } + + public void setPredictions(List pr) { + this.predictions = pr; + } + + public List getPredictions() { + return predictions; + } +} diff --git a/src/main/java/compbio/engine/ProteoCachePropertyHelperManager.java b/src/main/java/compbio/engine/ProteoCachePropertyHelperManager.java new file mode 100644 index 0000000..4615297 --- /dev/null +++ b/src/main/java/compbio/engine/ProteoCachePropertyHelperManager.java @@ -0,0 +1,150 @@ +/* Copyright (c) 2009 Peter Troshin + * + * JAva Bioinformatics Analysis Web Services (JABAWS) @version: 1.0 + * + * This library is free software; you can redistribute it and/or modify it under the terms of the + * Apache License version 2 as published by the Apache Software Foundation + * + * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without + * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Apache + * License for more details. + * + * A copy of the license is in apache_license.txt. It is also available here: + * @see: http://www.apache.org/licenses/LICENSE-2.0.txt + * + * Any republication or derived work distributed in source code form + * must include this copyright and license notice. + */ + +package compbio.engine; + +import java.io.File; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; + +import org.apache.log4j.Logger; + +import compbio.util.PropertyHelper; +import compbio.util.Util; + +public final class ProteoCachePropertyHelperManager { + + private static Logger log = Logger.getLogger(ProteoCachePropertyHelperManager.class); + private static PropertyHelper ph = null; + + /** + * Ways to fix path problem: + * 1) find a path to WEB-INF directory based on the path to a known class. + * Then prepend this absolute path to the rest of paths + * pros: no input from user + * cons: relocation of the source may cause problems + * + * 2) Require users to add configuration directories to the class + * path and then load entries from it. + * pros: + * cons: Many paths needs to be added. Put significant burden on the user. + * Hard to tell web appl server to add these entries to its class path. + * + * 3) Ask for project source directory explicitly in the configuration. + * pros: + * cons: similar to 1, but this initial configuration file must reside in + * well known location! Why ask users what can be found automatically? + * + * 4) Have everything in the location already in class path for tomcat. + * pros: + * cons: only classes and lib/*.jar are added, Eclipse will remove non + * classses from classes dir. + * + * Try 1 - succeed. + * + * @return an instance + */ + public static PropertyHelper getPropertyHelper() { + if (ph == null) { + try { + File MainPropertyFile = getResourceFromClasspath("conf/Proteocache.properties"); + ph = new PropertyHelper(MainPropertyFile); + } catch (IOException e) { + log.warn("Cannot read property files! Reason: " + e.getLocalizedMessage(), e.getCause()); + } + } + return ph; + } + + static File getResourceFromClasspath(String resourceName) { + assert !Util.isEmpty(resourceName); + String locPath = getLocalPath(); + File prop = new File(locPath + resourceName); + if (!prop.exists()) { + log.warn("Could not find a resource " + resourceName + " in the classpath!"); + } + return prop; + } + + /** + * Method return the absolute path to the project root directory. It assumes + * the following structure of the project: + * project-root: + * conf/settings + * binaries + * WEB-INF/classes/compbio/engine/conf/PropertyHelperManager.class + * If the structure changes it must be reflected in this method + * + * @return the local path + * @throws RuntimeException + * if cannot determine the local path + */ + public static String getLocalPath() { + String clname = ProteoCachePropertyHelperManager.class.getSimpleName(); + URL url = ProteoCachePropertyHelperManager.class.getResource(clname + ".class"); + File f = null; + try { + f = new File(url.toURI()); + // Iterate up the hierarchy to find a root project directory + for (int i = 0; i < 5; i++) { + f = f.getParentFile(); + } + } catch (URISyntaxException e) { + String mes = "Could not find resources path! Problems locating PropertyHelperManager class! "; + log.error(mes + e.getLocalizedMessage(), e.getCause()); + throw new RuntimeException(mes + e.getLocalizedMessage(), e.getCause()); + } catch (IllegalArgumentException e) { + // Classes are in the jar file, using different method to determine + // the path new File(INCORRECT URL) throws it + String mes = "It looks like classes are in the jar file. " + + "Attempting a different method to determinine the path to the resources"; + log.debug(mes + e.getLocalizedMessage(), e.getCause()); + try { + f = new File(ProteoCachePropertyHelperManager.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath()); + + // Iterate up the hierarchy to find a root project directory + // This time there is not need to walk up all class packages + // WEB_APPL_NAME\WEB-INF\lib\JAR-FILE-NAME + // jws2-1.0\WEB-INF\lib\full-jws2-1.0.jar + for (int i = 0; i < 3; i++) { + f = f.getParentFile(); + } + } catch (URISyntaxException e1) { + log.error("Could not find resources path! " + e1.getLocalizedMessage(), e1.getCause()); + throw new RuntimeException("Could not find resources path! ", e1.getCause()); + } + } + log.debug("Project directory is: " + f.getAbsolutePath()); + return f.getAbsolutePath() + File.separator; + } + + public static int getIntProperty(String propValue) { + if (!Util.isEmpty(propValue)) { + return Integer.parseInt(propValue.trim()); + } + return -1; + } + + public static boolean getBooleanProperty(String propValue) { + if (!Util.isEmpty(propValue)) { + propValue = propValue.trim(); + return Boolean.parseBoolean(propValue); + } + return false; + }} diff --git a/src/main/java/compbio/engine/archive/Archive.java b/src/main/java/compbio/engine/archive/Archive.java new file mode 100644 index 0000000..2a1f410 --- /dev/null +++ b/src/main/java/compbio/engine/archive/Archive.java @@ -0,0 +1,5 @@ +package compbio.engine.archive; + +public class Archive { + +} diff --git a/src/main/java/compbio/engine/archive/ArchiveManager.java b/src/main/java/compbio/engine/archive/ArchiveManager.java new file mode 100644 index 0000000..dabaf3a --- /dev/null +++ b/src/main/java/compbio/engine/archive/ArchiveManager.java @@ -0,0 +1,133 @@ +package compbio.engine.archive; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.InputStream; +import java.util.Iterator; +import java.util.Scanner; + +//import compbio.util.Util; + +/** + * Manage files in ProteoCache Archive + * + * @author Alexander Sherstnev + * @version 1.0 November 2013 + * + */ +public class ArchiveManager implements Iterator { + Archive archive; + //private final Scanner input; + /** + * Delimiter for the scanner + */ + //private final String DELIM = ">"; + + /** + * Header data can contain non-ASCII symbols and read in UTF8 + * + * @param mainPath + * the absolute path to the ProteoCache job archive + * @throws FileNotFoundException + * if the input file is not found + * @throws IllegalStateException + * if the close method was called on this instance + * + */ + public ArchiveManager(final String mainPath) throws FileNotFoundException { + /* + input = new Scanner(new File(mainPath), "UTF8"); + input.useDelimiter(DELIM); + Runtime.getRuntime().addShutdownHook(new Thread() { + + @Override + public void run() { + if (input != null) { + input.close(); + } + } + }); + */ + } + + public ArchiveManager(Archive ar) { + archive = ar; + } + + + /** + * {@inheritDoc} + * + * @throws IllegalStateException + * if the close method was called on this instance + */ + @Override + public boolean hasNext() { + //return input.hasNext(); + return true; + } + + /** + * Reads the next FastaSequence from the input + * + * @throws AssertionError + * if the header or the sequence is missing + * @throws IllegalStateException + * if the close method was called on this instance + * @throws MismatchException + * - if there were no more FastaSequence's. + */ + @Override + public ArchivedJob next() { + String path = "bla-bla-bla"; + /* + String path = input.next(); + while (fastaHeader.indexOf("\n") < 0 && input.hasNext()) { + path = fastaHeader.concat(">"); + path = fastaHeader.concat(input.next()); + } + */ + return new ArchivedJob(path); + } + + /** + * Not implemented + */ + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + /** + * Call this method to close the connection to the input file if you want to + * free up the resources. The connection will be closed on the JVM shutdown + * if this method was not called explicitly. No further reading on this + * instance of the FastaReader will be possible after calling this method. + */ + public void close() { + //input.close(); + } + + private static ArchivedJob toFastaSequence(final String singleFastaEntry) { + + // assert !Util.isEmpty(singleFastaEntry) : + // "Empty String where FASTA sequence is expected!"; + + int nlineidx = singleFastaEntry.indexOf("\n"); + if (nlineidx < 0) { + throw new AssertionError( + "The FASTA sequence must contain the header information" + + " separated by the new line from the sequence. Given sequence does not appear to " + + "contain the header! Given data:\n " + + singleFastaEntry); + } + String header = singleFastaEntry.substring(0, nlineidx); + + /* + * if (Util.isEmpty(sequence)) { throw new AssertionError( + * "Empty sequences are not allowed! Please make sure the " + + * " data is in the FASTA format! Given data:\n " + singleFastaEntry); } + */ + return new ArchivedJob(header); + } +} diff --git a/src/main/java/compbio/engine/archive/ArchivedJob.java b/src/main/java/compbio/engine/archive/ArchivedJob.java new file mode 100644 index 0000000..3f0f7d5 --- /dev/null +++ b/src/main/java/compbio/engine/archive/ArchivedJob.java @@ -0,0 +1,44 @@ +package compbio.engine.archive; + +import java.io.FileOutputStream; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.channels.Channels; +import java.nio.channels.ReadableByteChannel; +import java.util.List; + +public class ArchivedJob { + String path; + int filesize; + List files; + + ArchivedJob (String path) { + this.path = path; + } + + public boolean getArchiveFromWS() { + return false; + } + + + public boolean getArchiveFromWeb (String webpath) throws IOException, MalformedURLException { + URL website = new URL(webpath); + ReadableByteChannel rbc = Channels.newChannel(website.openStream()); + FileOutputStream fos = new FileOutputStream(path); + fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE); + return true; + } + + + public int getSize() { + return filesize; + } + + public List unpack() { + if (null != files) { + + } + return files; + } +} diff --git a/src/main/java/compbio/listeners/ContextListener.java b/src/main/java/compbio/listeners/ContextListener.java new file mode 100644 index 0000000..ed15609 --- /dev/null +++ b/src/main/java/compbio/listeners/ContextListener.java @@ -0,0 +1,178 @@ +package compbio.listeners; + +import java.io.IOException; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +import javax.servlet.ServletContextEvent; +import javax.servlet.ServletContextListener; +import javax.servlet.annotation.WebListener; + +import org.apache.log4j.Logger; + +import com.datastax.driver.core.Session; + +import compbio.cassandra.CassandraNativeConnector; +import compbio.cassandra.CassandraNewTableWriter; +import compbio.cassandra.JpredParserHTTP; +import compbio.cassandra.JpredParserLocalFile; +import compbio.engine.ProteoCachePropertyHelperManager; +import compbio.util.PropertyHelper; +import compbio.util.Util; + +/** + * Application Lifecycle Listener implementation class ContextListener + * + */ +@WebListener +public class ContextListener implements ServletContextListener { + private ScheduledExecutorService webjob_scheduler; + private ScheduledExecutorService localjob_scheduler; + private ScheduledExecutorService update_scheduler; + CassandraNativeConnector db = new CassandraNativeConnector(); + static PropertyHelper ph = ProteoCachePropertyHelperManager.getPropertyHelper(); + private static Logger log = Logger.getLogger(ContextListener.class); + public static boolean READ_WEB_JPRED = false; + public static boolean READ_LOCALFILE_JPRED = false; + public static boolean FILL_NEWTABLE = false; + + private static boolean initBooleanValue(String key) { + assert key != null; + String status = ph.getProperty(key); + log.debug("Loading property: " + key + " with value: " + status); + if (Util.isEmpty(status)) { + return false; + } + return new Boolean(status.trim()).booleanValue(); + } + + /** + * @see ServletContextListener#contextInitialized(ServletContextEvent) + */ + public void contextInitialized(ServletContextEvent arg0) { + System.out.println("ProteoCache session start......"); + // connect to the db and create table if needed + db.Connect(); + final CassandraNewTableWriter updater = new CassandraNewTableWriter(); + + READ_WEB_JPRED = initBooleanValue("cassandra.jpred.web.update"); + READ_LOCALFILE_JPRED = initBooleanValue("cassandra.jpred.local.update"); + FILL_NEWTABLE = initBooleanValue("cassandra.newtables.update"); + + if (FILL_NEWTABLE) { + System.out.println("Initializating new table update scheduler"); + update_scheduler = Executors.newSingleThreadScheduledExecutor(); + update_scheduler.schedule(new Runnable() { + @Override + public void run() { + updater.FillNewTable(); + } + }, 10, TimeUnit.SECONDS); + } + + if (READ_WEB_JPRED) { + // get data from real Jpred production server + final String datasrc = "http://www.compbio.dundee.ac.uk/www-jpred/results/usage-new/alljobs.dat"; + final String prefix = "http://www.compbio.dundee.ac.uk/www-jpred/results"; + final JpredParserHTTP parser = new JpredParserHTTP(prefix); + + int initialdelay = 300; + int updaterate = 600; + int newinitialdelay = ProteoCachePropertyHelperManager.getIntProperty(ph.getProperty("cassandra.jpred.web.inidelay")); + if (0 <= newinitialdelay) { + initialdelay = newinitialdelay; + } + int newupdaterate = ProteoCachePropertyHelperManager.getIntProperty(ph.getProperty("cassandra.jpred.web.updaterate")); + if (0 < newupdaterate) { + updaterate = newupdaterate; + } + final int updateperiod = ProteoCachePropertyHelperManager.getIntProperty(ph.getProperty("cassandra.jpred.web.period")); + + webjob_scheduler = Executors.newSingleThreadScheduledExecutor(); + System.out.println("Initializating web job scheduler"); + System.out.println(" initial delay = " + initialdelay + " seconds"); + System.out.println(" update rate = " + updaterate + " seconds"); + if (0 < updateperiod) + System.out.println(" update period = " + updateperiod + " days"); + else + System.out.println(" update period = 5 days"); + + webjob_scheduler.scheduleAtFixedRate(new Runnable() { + @Override + public void run() { + try { + if (0 < updateperiod) { + parser.Parsing(datasrc, updateperiod); + } else { + parser.Parsing(datasrc, 5); + } + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + }, initialdelay, updaterate, TimeUnit.SECONDS); + } + + if (READ_LOCALFILE_JPRED) { + // get irtifical data generated for the DB stress tests + final String datasrc = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/data.dat"; + final String prefix = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/Jpreddata"; + final JpredParserLocalFile parser = new JpredParserLocalFile(prefix); + + int initialdelay = 300; + int updaterate = 600; + int newinitialdelay = ProteoCachePropertyHelperManager.getIntProperty(ph.getProperty("cassandra.jpred.local.inidelay")); + if (0 <= newinitialdelay) { + initialdelay = newinitialdelay; + } + int newupdaterate = ProteoCachePropertyHelperManager.getIntProperty(ph.getProperty("cassandra.jpred.local.updaterate")); + if (0 < newupdaterate) { + updaterate = newupdaterate; + } + final int updateperiod = ProteoCachePropertyHelperManager.getIntProperty(ph.getProperty("cassandra.jpred.local.period")); + + localjob_scheduler = Executors.newSingleThreadScheduledExecutor(); + System.out.println("Initializating local job scheduler"); + System.out.println(" initial delay = " + initialdelay + " seconds"); + System.out.println(" update rate = " + updaterate + " seconds"); + if (0 < updateperiod) + System.out.println(" update period = " + updateperiod + " days"); + else + System.out.println(" update period = 5 days"); + localjob_scheduler.scheduleAtFixedRate(new Runnable() { + @Override + public void run() { + try { + if (0 < updateperiod) { + parser.Parsing(datasrc, updateperiod); + } else { + parser.Parsing(datasrc, 100); + } + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + }, initialdelay, updaterate, TimeUnit.SECONDS); + } + + } + + /** + * @see ServletContextListener#contextDestroyed(ServletContextEvent) + */ + public void contextDestroyed(ServletContextEvent arg0) { + db.Closing(); + System.out.println("Shut down ProteoCache......"); + if (READ_WEB_JPRED) { + webjob_scheduler.shutdownNow(); + } + if (READ_LOCALFILE_JPRED) { + localjob_scheduler.shutdownNow(); + } + update_scheduler.shutdownNow(); + } + +} diff --git a/src/main/java/compbio/listeners/DetailList.java b/src/main/java/compbio/listeners/DetailList.java new file mode 100644 index 0000000..385ce95 --- /dev/null +++ b/src/main/java/compbio/listeners/DetailList.java @@ -0,0 +1,46 @@ +package compbio.listeners; + +import java.io.IOException; +import java.util.List; + +import javax.servlet.ServletException; +import javax.servlet.annotation.WebServlet; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import compbio.cassandra.*; +import compbio.statistic.StatisticsProt; + +/** + * Servlet implementation class DetailList + */ +@WebServlet("/DetailList") +public class DetailList extends HttpServlet { + private static final long serialVersionUID = 1L; + + /** + * @see HttpServlet#HttpServlet() + */ + + /** + * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse + * response) + */ + protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + List result; + String date1 = request.getParameter("data1"); + String date2 = request.getParameter("data2"); + StatisticsProt sp = new StatisticsProt(); + // result = sp.readDetail(date1, date2); + } + + /** + * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse + * response) + */ + protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + // TODO Auto-generated method stub + } + +} diff --git a/src/main/java/compbio/listeners/ServletJobsByDate.java b/src/main/java/compbio/listeners/ServletJobsByDate.java new file mode 100644 index 0000000..4aee8be --- /dev/null +++ b/src/main/java/compbio/listeners/ServletJobsByDate.java @@ -0,0 +1,52 @@ +package compbio.listeners; + +import java.io.IOException; +import java.util.Calendar; +import java.util.List; + +import javax.servlet.RequestDispatcher; +import javax.servlet.ServletException; +import javax.servlet.annotation.WebServlet; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import compbio.cassandra.DataBase; +import compbio.statistic.CassandraRequester; +import compbio.statistic.StatisticsProt; + +@WebServlet("/ServletJobsByDate") +public class ServletJobsByDate extends HttpServlet { + private static final long serialVersionUID = 1L; + + /** + * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse + * response) + */ + protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + final long startTime = System.currentTimeMillis(); + String date1 = request.getParameter("data1"); + String date2 = request.getParameter("data2"); + CassandraRequester cr = new CassandraRequester(); + if (null != request.getParameter("option")) { + Calendar cal = Calendar.getInstance(); + date1 = StatisticsProt.DateFormatYYMMDD(cr.earliestDate()); + date2 = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH); + } + request.setAttribute("data1", date1); + request.setAttribute("data2", date2); + List res = cr.countJobs(date1, date2); + request.setAttribute("result", res); + final long endTime = System.currentTimeMillis(); + request.setAttribute("timeExecution", (endTime - startTime)); + System.out.println ("QueryServlet.doGet: total number of dates = " + res.size()); + request.setAttribute("flag", request.getParameter("option")); + RequestDispatcher rd = request.getRequestDispatcher("/ReportJobsByDate.jsp"); + rd.forward(request, response); + } + + protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + doGet(request, response); + } + +} diff --git a/src/main/java/compbio/listeners/ServletLogInfo.java b/src/main/java/compbio/listeners/ServletLogInfo.java new file mode 100644 index 0000000..87c8f92 --- /dev/null +++ b/src/main/java/compbio/listeners/ServletLogInfo.java @@ -0,0 +1,46 @@ +package compbio.listeners; + +import java.io.IOException; + +import javax.servlet.RequestDispatcher; +import javax.servlet.ServletException; +import javax.servlet.annotation.WebServlet; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import compbio.statistic.CassandraRequester; + + +/** + * Servlet implementation class LogServlet + */ +@WebServlet("/ServletLogInfo") +public class ServletLogInfo extends HttpServlet { + private static final long serialVersionUID = 1L; + + + + /** + * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response) + */ + protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + final long startTime = System.currentTimeMillis(); + String id = request.getParameter("IdJob"); + CassandraRequester cr = new CassandraRequester(); + request.setAttribute("result", cr.readJobLog(id)); + final long endTime = System.currentTimeMillis(); + request.setAttribute("timeExecution", (endTime - startTime)); + request.setAttribute("IdJob", id); + RequestDispatcher rd = request.getRequestDispatcher("/ReportLogInfo.jsp"); + rd.forward(request, response); + } + + /** + * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response) + */ + protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + doGet(request, response); + } + +} diff --git a/src/main/java/compbio/listeners/ServletSequenceProtein.java b/src/main/java/compbio/listeners/ServletSequenceProtein.java new file mode 100644 index 0000000..94df90c --- /dev/null +++ b/src/main/java/compbio/listeners/ServletSequenceProtein.java @@ -0,0 +1,56 @@ +package compbio.listeners; + +import java.io.IOException; + +import javax.servlet.RequestDispatcher; +import javax.servlet.ServletException; +import javax.servlet.annotation.WebServlet; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import compbio.statistic.CassandraRequester; +import compbio.statistic.StatisticsProt; + +/** + * Servlet implementation class ProtServlet + */ +@WebServlet("/ServletSequenceProtein") +public class ServletSequenceProtein extends HttpServlet { + private static final long serialVersionUID = 1L; + + /** + * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse + * response) + */ + protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + final long startTime = System.currentTimeMillis(); + String flag = request.getParameter("protein"); + String prot = request.getParameter("prot"); + String search = request.getParameter("Search"); + int counter = Integer.parseInt(request.getParameter("counterJob")); + CassandraRequester cr = new CassandraRequester(); + if (search.equals("Search counter")) { + request.setAttribute("result", cr.readProteinByCounter(counter)); + } else { + request.setAttribute("result", cr.readProteins(prot, flag)); + } + final long endTime = System.currentTimeMillis(); + request.setAttribute("timeExecution", (endTime - startTime)); + request.setAttribute("prot", prot); + request.setAttribute("flag", flag); + request.setAttribute("search", search); + request.setAttribute("counter", counter); + RequestDispatcher rd = request.getRequestDispatcher("/ReportSequenceProtein.jsp"); + rd.forward(request, response); + } + + /** + * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse + * response) + */ + protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + doGet(request, response); + } + +} diff --git a/src/main/java/compbio/listeners/ServletTimeExecution.java b/src/main/java/compbio/listeners/ServletTimeExecution.java new file mode 100644 index 0000000..c380604 --- /dev/null +++ b/src/main/java/compbio/listeners/ServletTimeExecution.java @@ -0,0 +1,54 @@ +package compbio.listeners; + +import java.io.IOException; +import java.util.Calendar; + +import javax.servlet.RequestDispatcher; +import javax.servlet.ServletException; +import javax.servlet.annotation.WebServlet; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import compbio.statistic.StatisticsProt; +import compbio.statistic.CassandraRequester; + +/** + * Servlet implementation class LengthServlet + */ +@WebServlet("/ServletTimeExecution") +public class ServletTimeExecution extends HttpServlet { + private static final long serialVersionUID = 1L; + + /** + * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse + * response) + */ + protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + final long startTime = System.currentTimeMillis(); + String date1 = request.getParameter("data1"); + String date2 = request.getParameter("data2"); + CassandraRequester sp = new CassandraRequester(); + if (null != request.getParameter("option")) { + Calendar cal = Calendar.getInstance(); + date1 = StatisticsProt.DateFormatYYMMDD(sp.earliestDate()); + date2 = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH); + } + request.setAttribute("data1", date1); + request.setAttribute("data2", date2); + request.setAttribute("result", sp.extractExecutionTime(date1, date2)); + request.setAttribute("flag", request.getParameter("option")); + final long endTime = System.currentTimeMillis(); + request.setAttribute("timeExecution", (endTime - startTime)); + RequestDispatcher rd = request.getRequestDispatcher("/ReportTimeExecution.jsp"); + rd.forward(request, response); + } + + /** + * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse + * response) + */ + protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + doGet(request, response); + } +} diff --git a/src/main/java/compbio/statistic/CassandraRequester.java b/src/main/java/compbio/statistic/CassandraRequester.java new file mode 100755 index 0000000..2da38e7 --- /dev/null +++ b/src/main/java/compbio/statistic/CassandraRequester.java @@ -0,0 +1,287 @@ +package compbio.statistic; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Date; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import compbio.cassandra.CassandraNativeConnector; +import compbio.cassandra.CassandraReader; +import compbio.cassandra.DataBase; +import compbio.cassandra.Pair; +import compbio.cassandra.StructureJobLog; +import compbio.cassandra.StructureProteinPrediction; + +public class CassandraRequester { + private CassandraReader db = new CassandraReader(); + private ArrayList query; + private static long currentDate = 0; + private static long earlestDate = 0; + + /* + * query: execution time for the period from date1 till date2 + */ + public List extractExecutionTime(String date1, String date2) { + if (null == date1) { + date1 = "1970/1/1"; + } + if (null == date2) { + date1 = "2100/1/1"; + } + if (!isThisDateValid(date1) || !isThisDateValid(date2)) { + System.out.println("Wrong date: point 3"); + return null; + } + SetDateRange(); + int nbins = 5; + long dateStart = DateParsing(date1); + long dateEnd = DateParsing(date2); + if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd) + return null; + if (dateStart < earlestDate) + dateStart = earlestDate; + if (dateEnd > currentDate) + dateStart = currentDate; + + Calendar start = Calendar.getInstance(); + start.setTime(new Date(dateStart)); + Calendar end = Calendar.getInstance(); + end.setTime(new Date(dateEnd)); + query = new ArrayList(); + List totalTime = new ArrayList(); + for (int i = 0; i < nbins; i++) + totalTime.add(i, 0); + List> res = db.ReadProteinDataTable(); + List> numres = new ArrayList>(); + + for (Pair entry : res) { + SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd"); + try { + Date jobstartdate = dateformatter.parse(entry.getElement0()); + long date = jobstartdate.getTime(); + if (dateStart <= date && date <= dateEnd) { + SimpleDateFormat datetimeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s"); + Date jobstarttime = datetimeformatter.parse(entry.getElement0()); + Date jobendtime = datetimeformatter.parse(entry.getElement1()); + long diff = (jobendtime.getTime() - jobstarttime.getTime()) / 1000; + Pair pair = new Pair(jobstartdate, Long.valueOf(diff)); + numres.add(pair); + } + } catch (ParseException e) { + e.printStackTrace(); + } + } + + for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) { + List timeResult = new ArrayList(); + for (int i = 0; i < nbins; i++) + timeResult.add(i, 0); + for (Pair p : numres) { + if (date.equals(p.getElement0())) { + long lenResult = p.getElement1().longValue(); + if (lenResult <= 30) + timeResult.set(0, timeResult.get(0) + 1); + else if (lenResult > 30 && lenResult <= 60) + timeResult.set(1, timeResult.get(1) + 1); + else if (lenResult > 60 && lenResult <= 120) + timeResult.set(2, timeResult.get(2) + 1); + else if (lenResult > 120 && lenResult <= 600) + timeResult.set(3, timeResult.get(3) + 1); + else { + timeResult.set(4, timeResult.get(4) + 1); + } + } + } + for (int i = 0; i < nbins; i++) + totalTime.set(i, totalTime.get(i) + timeResult.get(i)); + DataBase db = new DataBase(); + db.setTimeRez(timeResult); + db.setDate(DateFormat(date.getTime())); + query.add(db); + } + + DataBase db = new DataBase(); + db.setTimeTotalExec(totalTime); + query.add(db); + System.out.println("StatisticsProt.readLength: total number of dates = " + query.size()); + return query; + } + + /* + * query: total number of jobs for the period from date1 till date2 + */ + public List countJobs(String date1, String date2) { + if (null == date1) { + date1 = "1970/1/1"; + } + if (null == date2) { + date1 = "2100/1/1"; + } + if (!isThisDateValid(date1) || !isThisDateValid(date2)) { + System.out.println("Wrong date: point 3"); + return null; + } + SetDateRange(); + long dateStart = DateParsing(date1); + long dateEnd = DateParsing(date2); + if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd) + return null; + if (dateStart < earlestDate) + dateStart = earlestDate; + if (dateEnd > currentDate) + dateStart = currentDate; + + Calendar start = Calendar.getInstance(); + start.setTime(new Date(dateStart)); + Calendar end = Calendar.getInstance(); + end.setTime(new Date(dateEnd)); + query = new ArrayList(); + for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) { + long res = db.ReadDateTable(date.getTime()); + DataBase db = new DataBase(); + db.setTotal((int)res); + db.setDate(DateFormat(date.getTime())); + query.add(db); + } + System.out.println("StatisticsProt.readLength: total number of dates = " + query.size()); + return query; + } + /* + * query: protein sequence + * */ + public List readProteins(String protIn, String flag) { + query = new ArrayList(); + List res; + if (flag.equals("whole")) + res = db.ReadWholeSequence(protIn); + else + res = db.ReadPartOfSequence(protIn); + for (StructureProteinPrediction entry : res) { + Map pred = entry.getPrediction(); + Iterator it = pred.entrySet().iterator(); + while (it.hasNext()) { + DataBase db = new DataBase(); + db.setProt(entry.getSequence()); + Map.Entry pairs = (Map.Entry)it.next(); + db.setId(entry.getJobid()); + db.setJpred(pairs.getValue().toString()); + if (flag.equals("part")) + db.setSubProt(CreateSubprot (entry.getSequence(), protIn)); + query.add(db); + } + } + return query; + } + + /* + * query protein sequences with number of jobs + */ + public List readProteinByCounter(int minimalcounter) { + query = new ArrayList(); + Map map = db.ReadProteinSequenceByCounter(); + for (Map.Entry entry : map.entrySet()) { + if (entry.getValue() > minimalcounter) { + DataBase db = new DataBase(); + db.setTotalId(entry.getValue()); + db.setProt(entry.getKey()); + query.add(db); + } + } + return query; + } + + /* + * query jobs log info + */ + public DataBase readJobLog(String jobid) { + // query = new ArrayList(); + StructureJobLog res = db.ReadJobLog(jobid); + DataBase query = new DataBase(); + query.setLogInfo(res); + // query.setres); + return query; + } + /* + * create list of parts of protein sequence; + */ + private static List CreateSubprot (String protein, String subprot) { + List sub = new ArrayList(); + String subStr = protein; + while (subStr.length() > 0 && subStr.contains(subprot)) { + String first = subStr.substring(0, subStr.indexOf(subprot)); + if (first.length() > 0) + sub.add(first); + sub.add(subprot); + subStr = subStr.substring(subStr.indexOf(subprot) + subprot.length(), subStr.length()); + } + if (subStr.length() > 0) + sub.add(subStr); + return sub; + } + /* + * convert String date into long date (miliseconds since the epoch start) + */ + private static long DateParsing(String datInput) { + if (datInput == null) { + return 0; + } + long dateWorkSt = 0; + SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); + try { + dateWorkSt = formatter.parse(datInput).getTime(); + } catch (ParseException e) { + e.printStackTrace(); + } + return dateWorkSt; + } + + // convert long to date in string format + private static String DateFormat(long inDate) { + SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy"); + return datformat.format(new Date(inDate)); + } + + /* + * set earlest date and current dates. earlestDate is static and should be + * set at the 1st call currentDate should be re-calculated every time + */ + private static void SetDateRange() { + Calendar cal = Calendar.getInstance(); + currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH)); + if (0 == earlestDate) { + CassandraRequester cr = new CassandraRequester(); + earlestDate = cr.earliestDate(); + System.out.println("Set earlest Date = " + earlestDate); + } + } + + public boolean isThisDateValid(String dateToValidate) { + if (dateToValidate == null || dateToValidate.equals("")) { + System.out.println("Undefined date"); + return false; + } + SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd"); + try { + // if not valid, this will throw ParseException + sdf.setLenient(false); + Date date = sdf.parse(dateToValidate); + } catch (ParseException e) { + e.printStackTrace(); + return false; + } + return true; + } + + /* + * find the earliest date in the database + */ + public long earliestDate() { + earlestDate = CassandraNativeConnector.getEarliestDateInDB(); + return earlestDate; + } + +} diff --git a/src/main/java/compbio/statistic/StatisticsProt.java b/src/main/java/compbio/statistic/StatisticsProt.java new file mode 100755 index 0000000..05375c0 --- /dev/null +++ b/src/main/java/compbio/statistic/StatisticsProt.java @@ -0,0 +1,402 @@ +package compbio.statistic; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Collections; +import java.util.Date; +import java.util.Iterator; +import java.util.List; + +import compbio.cassandra.CassandraNativeConnector; +import compbio.cassandra.DataBase; + +public class StatisticsProt { + private CassandraNativeConnector cc = new CassandraNativeConnector(); + private ArrayList query; + private static long currentDate = 0; + private static long earlestDate = 0; + + /* + * query: the period from date1 till date2 + * */ + public List readDetails(String date1, String date2) { + + if (!isThisDateValid(date1) || !isThisDateValid(date2)) { + System.out.println("Wrong date: point 1"); + return null; + } + SetDateRange(); + long dateStart = DateParsing(date1); + long dateEnd = DateParsing(date2); + if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate) || dateStart > dateEnd) + return null; + if (dateStart < earlestDate) + dateStart = earlestDate; + if (dateEnd > currentDate) + dateStart = currentDate; + System.out.println("StatisticsProt.readDetails: earlestDate = " + earlestDate + ", currentDate = " + currentDate); + System.out.println("StatisticsProt.readDetails: Start date " + date1 + ": int representation = " + dateStart); + System.out.println("StatisticsProt.readDetails: End date " + date2 + ": int representation = " + dateEnd); + Calendar start = Calendar.getInstance(); + start.setTime(new Date(dateStart)); + Calendar end = Calendar.getInstance(); + end.setTime(new Date(dateEnd)); + query = new ArrayList(); + int day = 0; + /* + for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) { + SliceQuery result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(), + StringSerializer.get(), StringSerializer.get()); + result.setColumnFamily("ProteinData"); + result.setKey(date.getTime()); + result.setRange(null, null, false, Integer.MAX_VALUE); + QueryResult> columnSlice = result.execute(); + ++day; + System.out.print("Day " + day + ": dataStart = " + date + ": "); + if (!columnSlice.get().getColumns().isEmpty()) { + DataBase db = new DataBase(DateFormat(date.getTime()), columnSlice.get().getColumns().size()); + query.add(db); + System.out.println("data exist"); + } else { + System.out.println("no data"); + } + } + */ + System.out.println("StatisticsProt.readLength: total number of dates = " + query.size()); + return query; + } + + /* + * query: execution time for the period from date1 till date2 + * */ + public List readLength(String date1, String date2) { + if (!isThisDateValid(date1) || !isThisDateValid(date2)) { + System.out.println("Wrong date: point 3"); + return null; + } + SetDateRange(); + int nbins = 5; + long dateStart = DateParsing(date1); + long dateEnd = DateParsing(date2); + if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate) || dateStart > dateEnd) + return null; + if (dateStart < earlestDate) + dateStart = earlestDate; + if (dateEnd > currentDate) + dateStart = currentDate; + System.out.println("StatisticsProt.readLength: earlestDate = " + earlestDate + ", currentDate = " + currentDate); + System.out.println("StatisticsProt.readLength: Start date is " + date1 + ": int representation = " + dateStart); + System.out.println("StatisticsProt.readLength: End date is " + date2 + ": int representation = " + dateEnd); + Calendar start = Calendar.getInstance(); + start.setTime(new Date(dateStart)); + Calendar end = Calendar.getInstance(); + end.setTime(new Date(dateEnd)); + query = new ArrayList(); + List totalTime = new ArrayList(); + for (int i = 0; i < nbins; i++) + totalTime.add(i, 0); + /* + for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) { + List timeResult = new ArrayList(); + SliceQuery result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(), + StringSerializer.get(), StringSerializer.get()); + result.setColumnFamily("ProteinData"); + result.setKey(date.getTime()); + result.setRange(null, null, false, Integer.MAX_VALUE); + QueryResult> columnSlice = result.execute(); + List> col = columnSlice.get().getColumns(); + if (!col.isEmpty()) { + Iterator> itCol = col.iterator(); + for (int i = 0; i < nbins; i++) + timeResult.add(i, 0); + // split all jobs into nbins bins + while (itCol.hasNext()) { + String id = itCol.next().getName(); + long lenResult = CountID(id); + if (lenResult <= 30) + timeResult.set(0, timeResult.get(0) + 1); + else if (lenResult > 30 && lenResult <= 60) + timeResult.set(1, timeResult.get(1) + 1); + else if (lenResult > 60 && lenResult <= 120) + timeResult.set(2, timeResult.get(2) + 1); + else if (lenResult > 120 && lenResult <= 600) + timeResult.set(3, timeResult.get(3) + 1); + else { + timeResult.set(4, timeResult.get(4) + 1); + } + } + for (int i = 0; i < nbins; i++) + totalTime.set(i, totalTime.get(i) + timeResult.get(i)); + DataBase db = new DataBase(); + db.setTimeRez(timeResult); + db.setDate(DateFormat(date.getTime())); + query.add(db); + } + } + */ + DataBase db = new DataBase(); + db.setTimeTotalExec(totalTime); + query.add(db); + System.out.println("StatisticsProt.readLength: total number of dates = " + query.size()); + return query; + } + + /* + * query: protein sequence + * */ + public List readProteins(String protIn) { + query = new ArrayList(); + /* + SliceQuery result = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(), + StringSerializer.get(), StringSerializer.get()); + result.setColumnFamily("ProteinRow"); + result.setKey(protIn); + result.setRange(null, null, false, Integer.MAX_VALUE); + QueryResult> columnSlice = result.execute(); + Iterator> it = columnSlice.get().getColumns().iterator(); + while (it.hasNext()) { + HColumn col = it.next(); + String name = col.getName(); + if (name.matches("(.*)jnetpred")) { + DataBase db = new DataBase(); + db.setProt(protIn); + db.setId(col.getName()); + db.setJpred(col.getValue()); + query.add(db); + } + } + */ + return query; + } + + /* + * query by a protein sequence + * */ + public List readProtID(int counter) { + query = new ArrayList(); + int row_count = 100; + /* + RangeSlicesQuery result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(), + StringSerializer.get(), StringSerializer.get()); + result.setColumnFamily("ProteinRow"); + result.setRange(null, null, false, 100); + result.setRowCount(row_count); + String last_key = null; + while (true) { + result.setKeys(last_key, null); + QueryResult> columnSlice = result.execute(); + OrderedRows rows = columnSlice.get(); + Iterator> rowsIterator = rows.iterator(); + while (rowsIterator.hasNext()) { + Row row = rowsIterator.next(); + last_key = row.getKey(); + List> clms = row.getColumnSlice().getColumns(); + //int npred = 0; + //for (HColumn cln : clms) { + // String name = cln.getName(); + // if (name.matches("(.*)jnetpred")) { + // ++npred; + // } + //} + int npred = clms.size(); + if (npred > counter) { + DataBase db = new DataBase(); + db.setProt(last_key); + db.setTotalId(npred); + query.add(db); + } + } + if (rows.getCount() < row_count) + break; + }*/ + return query; + } + + /* + * query by a part of sequence + * */ + public List readPart(String protIn) { + int row_count = 10000; + query = new ArrayList(); + /* + RangeSlicesQuery result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(), + StringSerializer.get(), StringSerializer.get()); + result.setColumnFamily("ProteinRow"); + result.setRange(null, null, false, Integer.MAX_VALUE); + result.setRowCount(row_count); + String last_key = null; + while (true) { + result.setKeys(last_key, null); + QueryResult> columnSlice = result.execute(); + OrderedRows rows = columnSlice.get(); + Iterator> rowsIterator = rows.iterator(); + while (rowsIterator.hasNext()) { + Row row = rowsIterator.next(); + last_key = row.getKey(); + if (last_key.matches("(.*)" + protIn + "(.*)")) { + Iterator> it = row.getColumnSlice().getColumns().iterator(); + while (it.hasNext()) { + HColumn col = it.next(); + List subProt = new ArrayList(); + String subStr = last_key; + while (subStr.length() > 0 && subStr.contains(protIn)) { + String first = subStr.substring(0, subStr.indexOf(protIn)); + if (first.length() > 0) + subProt.add(first); + subProt.add(protIn); + subStr = subStr.substring(subStr.indexOf(protIn) + protIn.length(), subStr.length()); + } + if (subStr.length() > 0) + subProt.add(subStr); + String name = col.getName(); + if (name.matches("(.*)jnetpred")) { + DataBase db = new DataBase(); + db.setProt(last_key); + db.setId(col.getName()); + db.setJpred(col.getValue()); + db.setSubProt(subProt); + query.add(db); + } + } + } + } + if (rows.getCount() < row_count) + break; + } + */ + return query; + } + + /* + * convert String date into long date (miliseconds since the epoch start) + */ + private static long DateParsing(String datInput) { + if (datInput == null) { + return 0; + } + long dateWorkSt = 0; + SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); + try { + dateWorkSt = formatter.parse(datInput).getTime(); + } catch (ParseException e) { + e.printStackTrace(); + } + return dateWorkSt; + } + + /* + * convert String date:time into long date:time (miliseconds since the epoch start) + */ + private static long TimeConvert(String datInput) { + long dateWorkSt = 0; + if (datInput == null) { + return dateWorkSt; + } + SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss"); + try { + dateWorkSt = formatter.parse(datInput).getTime(); + } catch (ParseException e) { + e.printStackTrace(); + } + return dateWorkSt; + } + + // convert long to date in string format + private static String DateFormat(long inDate) { + SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy"); + String dateString = datformat.format(new Date(inDate)); + return dateString; + } + + /* + * convert ??? + */ + public static String DateFormatYYMMDD(long indate) { + SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd"); + String dateString = datformat.format(new Date(indate)); + return dateString; + } + + /* + * ??? + */ + public long CountID(String id) { + /* + SliceQuery sliceQuery = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(), + StringSerializer.get(), StringSerializer.get()); + sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100); + QueryResult> result = sliceQuery.execute(); + String datBegin = result.get().getColumnByName("DataBegin").getValue(); + String datEnd = result.get().getColumnByName("DataEnd").getValue(); + + long datBeginLong = TimeConvert(datBegin); + long datEndLong = TimeConvert(datEnd); + return (datEndLong - datBeginLong) / 1000; + */ + return 0; + } + + /* + * set earlest date and current dates. + * earlestDate is static and should be set at the 1st call + * currentDate should be re-calculated every time + */ + private static void SetDateRange() { + if (0 == earlestDate) { + StatisticsProt sp = new StatisticsProt(); + earlestDate = sp.earliestDate(); + System.out.println("Set earlest Date = " + earlestDate); + } + Calendar cal = Calendar.getInstance(); + currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH)); + } + + public boolean isThisDateValid(String dateToValidate) { + if (dateToValidate == null || dateToValidate.equals("")) { + System.out.println("Undefined date"); + return false; + } + SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd"); + try { + // if not valid, this will throw ParseException + sdf.setLenient(false); + Date date = sdf.parse(dateToValidate); + } catch (ParseException e) { + e.printStackTrace(); + return false; + } + return true; + } + + /* + * find the earliest date in the database + */ + public long earliestDate() { + ArrayList dateSort = new ArrayList(); + int row_count = 10000; + /* + RangeSlicesQuery result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), LongSerializer.get(), + StringSerializer.get(), StringSerializer.get()); + result.setColumnFamily("ProteinData"); + result.setRange(null, null, false, Integer.MAX_VALUE); + result.setRowCount(row_count); + Long last_key = null; + while (true) { + result.setKeys(last_key, null); + QueryResult> columnSlice = result.execute(); + OrderedRows rows = columnSlice.get(); + Iterator> rowsIterator = rows.iterator(); + while (rowsIterator.hasNext()) { + Row row = rowsIterator.next(); + last_key = row.getKey(); + dateSort.add(last_key); + } + if (rows.getCount() < row_count) + break; + }*/ + Collections.sort(dateSort); + return dateSort.get(0); + } +} diff --git a/src/main/resources/META-INF/MANIFEST.MF b/src/main/resources/META-INF/MANIFEST.MF new file mode 100644 index 0000000..5e94951 --- /dev/null +++ b/src/main/resources/META-INF/MANIFEST.MF @@ -0,0 +1,3 @@ +Manifest-Version: 1.0 +Class-Path: + diff --git a/src/main/resources/doc/packages.odt b/src/main/resources/doc/packages.odt new file mode 100644 index 0000000..246b4bf Binary files /dev/null and b/src/main/resources/doc/packages.odt differ diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties new file mode 100644 index 0000000..d186883 --- /dev/null +++ b/src/main/resources/log4j.properties @@ -0,0 +1,66 @@ +## Uncomment to enable ProteoCache activity logging +## Valid log levels are: +## TRACE - log everything from below including very detailed messages (useful for debugging only) +## DEBUG - log everything from below including some minor events (useful for debugging only) +## INFO - log everything from below including some information messages +## WARN - log error and warnings +## ERROR - log errors and fatal events only +## FATAL - log fatal events only + +# To see debug messages in TldLocationsCache, uncomment the following line: +#org.apache.jasper.compiler.TldLocationsCache.level = FINE +org.apache.catalina.core.ContainerBase.[Catalina].[localhost].level = FINE + +## Uncomment this if you would like the system to log messages into stdout +#log4j.rootLogger=TRACE, rootfile +#log4j.appender.rootfile=org.apache.log4j.RollingFileAppender +#log4j.appender.rootfile.File=JABAWSLogFilename.log +#log4j.appender.rootfile.Target=System.out +#log4j.appender.rootfile.layout=org.apache.log4j.PatternLayout +#log4j.appender.rootfile.layout.ConversionPattern=%t %d %m%n + +## Uncomment to enable JWS2 activity logging to the file +#logDir = . +#log4j.logger.compbio=TRACE, ACTIVITY +#log4j.appender.ACTIVITY=org.apache.log4j.RollingFileAppender +################log4j.appender.ACTIVITY.File=${logDir}/activity.log +#log4j.appender.ACTIVITY.File=${catalina.base}/logs/JABAWSLogFilename.log +#log4j.appender.ACTIVITY.MaxFileSize=10MB +#log4j.appender.ACTIVITY.MaxBackupIndex=10000 +#log4j.appender.ACTIVITY.layout=org.apache.log4j.PatternLayout +#log4j.appender.ACTIVITY.layout.ConversionPattern=%d{MM-dd@HH:mm:ss} %-5p %3x - %m%n + +## Uncomment for separate local engine execution log (debugging only) +#log4j.logger.compbio.engine.local.LocalExecutorService=INFO, C +#log4j.appender.C=org.apache.log4j.FileAppender +#log4j.appender.C.File=LocalExecutorService.log +#log4j.appender.C.layout=org.apache.log4j.PatternLayout +#log4j.appender.C.layout.ConversionPattern=%m%n + +################################################################################################################################## +#log4j.appender.stdout=org.apache.log4j.ConsoleAppender +#log4j.appender.stdout.Target=System.out +#log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +#log4j.appender.stdout.layout.ConversionPattern=%m%n +#log4j.rootLogger=info, stdout + +log4j.rootLogger=ERROR, R +log4j.appender.R=org.apache.log4j.FileAppender +log4j.appender.R.File=${catalina.base}/logs/errors.log +log4j.appender.R.layout=org.apache.log4j.PatternLayout +log4j.appender.R.layout.ConversionPattern=%m%n %d{MM-dd@HH:mm:ss} %-5p (%13F:%L) %3x - + +# %d{ABSOLUTE} %5p %c{1}:%L - +log4j.logger.compbio=DEBIG, B +log4j.appender.B=org.apache.log4j.FileAppender +log4j.appender.B.File=${catalina.base}/logs/debugging.log +log4j.appender.B.layout=org.apache.log4j.PatternLayout +log4j.appender.B.layout.ConversionPattern=%m%n %d{MM-dd@HH:mm:ss} %-5p (%13F:%L) %3x - + +# %d{ABSOLUTE} %5p %c{1}:%L - +#log4j.logger.compbio.engine.local.LocalExecutorService=INFO, C +#log4j.appender.C=org.apache.log4j.FileAppender +#log4j.appender.C.File=LocalExecutorService.log +#log4j.appender.C.layout=org.apache.log4j.PatternLayout +#log4j.appender.C.layout.ConversionPattern=%m%n +# %d{MM-dd@HH:mm:ss} %-5p (%13F:%L) %3x - diff --git a/src/main/website/AllReports.jsp b/src/main/website/AllReports.jsp new file mode 100644 index 0000000..ea597a9 --- /dev/null +++ b/src/main/website/AllReports.jsp @@ -0,0 +1,11 @@ +<% + String query = request.getParameter("query"); + if (query.equals("date")) + response.sendRedirect("QueryJobsByDate.jsp"); + else if (query.equals("length")) + response.sendRedirect("QueryTimeExecition.jsp"); + else if (query.equals("protein")) + response.sendRedirect("QuerySequenceProtein.jsp"); + else if (query.equals("log")) + response.sendRedirect("QueryLogInfo.jsp"); +%> \ No newline at end of file diff --git a/src/main/website/Index.jsp b/src/main/website/Index.jsp new file mode 100644 index 0000000..56f3a6b --- /dev/null +++ b/src/main/website/Index.jsp @@ -0,0 +1,19 @@ +<%@ page language="java" contentType="text/html; charset=UTF-8" + pageEncoding="UTF-8"%> + + + + +Cassandra report + + +
+

Choose query type

+ Search by protein sequence
+ Search by date
+ Log information for current job
+ Usage statistics by job time execution
+ +
+ + \ No newline at end of file diff --git a/src/main/website/PrintResult.jsp b/src/main/website/PrintResult.jsp new file mode 100644 index 0000000..c2f2e81 --- /dev/null +++ b/src/main/website/PrintResult.jsp @@ -0,0 +1,21 @@ +<%@ page language="java" contentType="text/html; charset=UTF-8" + pageEncoding="UTF-8"%> + + + + +Report + + +

Report Results

+<% String rez = request.getAttribute("result").toString(); + String[] rezLines = rez.split(";"); + for (int i = 0; i < rezLines.length; i++) { +%> +<%= rezLines[i]+"
" %> +<% +} +%> +Click here to go Back + + \ No newline at end of file diff --git a/src/main/website/QueryJobsByDate.jsp b/src/main/website/QueryJobsByDate.jsp new file mode 100644 index 0000000..ae667fd --- /dev/null +++ b/src/main/website/QueryJobsByDate.jsp @@ -0,0 +1,24 @@ +<%@ page language="java" contentType="text/html; charset=UTF-8" + pageEncoding="UTF-8"%> + <%@page import="java.util.Calendar"%> + + + + + +Date period + + +
+

Enter time period

+ <% Calendar cal = Calendar.getInstance(); + String datecalBegin = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/1"; + String datecalEnd = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH); + %> +

from style=" width : 145px;"/> + to style=" width : 145px;"/>

+ Query for all dates
+ +
+ + \ No newline at end of file diff --git a/src/main/website/QueryLogInfo.jsp b/src/main/website/QueryLogInfo.jsp new file mode 100644 index 0000000..34f0f9f --- /dev/null +++ b/src/main/website/QueryLogInfo.jsp @@ -0,0 +1,17 @@ +<%@ page language="java" contentType="text/html; charset=UTF-8" + pageEncoding="UTF-8"%> + + + + +Log query + + +
+
+

Enter job ID

+
+

+
+ + \ No newline at end of file diff --git a/src/main/website/QuerySequenceProtein.jsp b/src/main/website/QuerySequenceProtein.jsp new file mode 100644 index 0000000..3d86bd6 --- /dev/null +++ b/src/main/website/QuerySequenceProtein.jsp @@ -0,0 +1,23 @@ +<%@ page language="java" contentType="text/html; charset=UTF-8" + pageEncoding="UTF-8"%> + + + + + +Sequence of protein + + +
+
+

Enter protein sequence

+

+ search whole
+ search part
+

+

Enter minimum number of jobs per protein

+
+

+
+ + diff --git a/src/main/website/QueryTimeExecition.jsp b/src/main/website/QueryTimeExecition.jsp new file mode 100644 index 0000000..aaf925f --- /dev/null +++ b/src/main/website/QueryTimeExecition.jsp @@ -0,0 +1,28 @@ +<%@ page language="java" contentType="text/html; charset=UTF-8" + pageEncoding="UTF-8"%> +<%@page import="java.util.Calendar"%> + + + + + +Date interval + + +
+

Enter time period

+ <% + Calendar cal = Calendar.getInstance(); + String datecalBegin = cal.get(Calendar.YEAR) + "/" + cal.get(Calendar.MONTH) + "/" + cal.get(Calendar.DAY_OF_MONTH); + String datecalEnd = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH); + %> +

+ from + style="width: 145px;" /> to style="width: 145px;" /> +

+ Query for + all dates
+
+ + diff --git a/src/main/website/ReportJobsByDate.jsp b/src/main/website/ReportJobsByDate.jsp new file mode 100644 index 0000000..261a11b --- /dev/null +++ b/src/main/website/ReportJobsByDate.jsp @@ -0,0 +1,62 @@ +<%@page import="java.util.ArrayList"%> +<%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%> +<%@ taglib uri="http://java.sun.com/jsp/jstl/functions" prefix="fn"%> +<%@ taglib uri="http://java.sun.com/jsp/jstl/fmt" prefix="fmt"%> +<%@ taglib uri="http://displaytag.sf.net" prefix="dt"%> + + +

Jobs statistics for the whole period

+
+ +

+ Jobs statistics for the Period: + + to + +

+
+
+

Time execution: ${timeExecution} ms

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DateNumber of Proteins
TotalFailedCancelledAbandoned
Total:${sum}000
${res.date}000
\ No newline at end of file diff --git a/src/main/website/ReportLogInfo.jsp b/src/main/website/ReportLogInfo.jsp new file mode 100644 index 0000000..97ec1f3 --- /dev/null +++ b/src/main/website/ReportLogInfo.jsp @@ -0,0 +1,24 @@ +<%@page import="java.util.ArrayList"%> +<%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c" %> +<%@ taglib uri="http://java.sun.com/jsp/jstl/functions" prefix="fn" %> +<%@ taglib uri="http://java.sun.com/jsp/jstl/fmt" prefix="fmt" %> +<%@ taglib uri="http://displaytag.sf.net" prefix="dt" %> +

Jobs log information for: ${IdJob}

+

Time execution: ${timeExecution} ms

+

Date Start : ${result.logInfo.dateStart}
+ Date End : ${result.logInfo.dateEnd}
+ IP : ${result.logInfo.ip}

+ + + + + + + + + + + + + +
Sequence ${result.logInfo.sequence}
${pred.key} ${pred.value}
diff --git a/src/main/website/ReportSequenceProtein.jsp b/src/main/website/ReportSequenceProtein.jsp new file mode 100644 index 0000000..0459668 --- /dev/null +++ b/src/main/website/ReportSequenceProtein.jsp @@ -0,0 +1,76 @@ +<%@page import="java.util.ArrayList"%> +<%@ page trimDirectiveWhitespaces="true"%> +<%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%> +<%@ taglib uri="http://java.sun.com/jsp/jstl/functions" prefix="fn"%> +<%@ taglib uri="http://java.sun.com/jsp/jstl/fmt" prefix="fmt"%> +<%@ taglib uri="http://displaytag.sf.net" prefix="dt"%> + + +

Dundee ProteoCache query result for jobs more than ${counter}

+
+ +

Dundee ProteoCache query result

+
+
+

Time execution: ${timeExecution} ms

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Number of jobsProtein
${res.totalId}${res.prot} +
IDPrediction
${res.id} + + + + ${seq} + + ${seq} + + +
diff --git a/src/main/website/ReportTimeExecution.jsp b/src/main/website/ReportTimeExecution.jsp new file mode 100644 index 0000000..e68dc64 --- /dev/null +++ b/src/main/website/ReportTimeExecution.jsp @@ -0,0 +1,68 @@ +<%@page import="java.util.ArrayList"%> +<%@ page trimDirectiveWhitespaces="true"%> +<%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%> +<%@ taglib uri="http://java.sun.com/jsp/jstl/functions" prefix="fn"%> +<%@ taglib uri="http://java.sun.com/jsp/jstl/fmt" prefix="fmt"%> +<%@ taglib uri="http://displaytag.sf.net" prefix="dt"%> + + +

Time execution for the whole period

+
+ +

Time execution for the interval: ${data1} - ${data2}

+
+
+

Time execution: ${timeExecution} ms

+ + + + + + + + + + + + +

Total number of jobs: ${sum}

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Dateless then 30 s30 s - 60 s1 min - 2 min2 min - 10 minmore then 10 min
Total:${total}
${res.date}${time}
diff --git a/src/main/website/StatisticsProtein.jsp b/src/main/website/StatisticsProtein.jsp new file mode 100644 index 0000000..06c7d72 --- /dev/null +++ b/src/main/website/StatisticsProtein.jsp @@ -0,0 +1,41 @@ +<%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c" %> +<%@ taglib uri="http://java.sun.com/jsp/jstl/functions" prefix="fn" %> +<%@ taglib uri="http://java.sun.com/jsp/jstl/fmt" prefix="fmt" %> +<%@ taglib uri="http://displaytag.sf.net" prefix="dt" %> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DateNumber of Proteins
TotalFailedCancelledAbandoned
${res.date}000
Total:${sum}000
diff --git a/src/main/website/WEB-INF/classes/compbio/cassandra/CassandraNativeConnector.class b/src/main/website/WEB-INF/classes/compbio/cassandra/CassandraNativeConnector.class new file mode 100644 index 0000000..1d98043 Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/cassandra/CassandraNativeConnector.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/cassandra/CassandraNewTableWriter.class b/src/main/website/WEB-INF/classes/compbio/cassandra/CassandraNewTableWriter.class new file mode 100644 index 0000000..184a350 Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/cassandra/CassandraNewTableWriter.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/cassandra/CassandraReader.class b/src/main/website/WEB-INF/classes/compbio/cassandra/CassandraReader.class new file mode 100644 index 0000000..f4fde82 Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/cassandra/CassandraReader.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/cassandra/CassandraWriter.class b/src/main/website/WEB-INF/classes/compbio/cassandra/CassandraWriter.class new file mode 100644 index 0000000..47dc936 Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/cassandra/CassandraWriter.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/cassandra/DataBase.class b/src/main/website/WEB-INF/classes/compbio/cassandra/DataBase.class new file mode 100644 index 0000000..5d7478c Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/cassandra/DataBase.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/cassandra/FastaReader$1.class b/src/main/website/WEB-INF/classes/compbio/cassandra/FastaReader$1.class new file mode 100644 index 0000000..35bb7a5 Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/cassandra/FastaReader$1.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/cassandra/FastaReader.class b/src/main/website/WEB-INF/classes/compbio/cassandra/FastaReader.class new file mode 100644 index 0000000..fc9995b Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/cassandra/FastaReader.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/cassandra/FastaSequence.class b/src/main/website/WEB-INF/classes/compbio/cassandra/FastaSequence.class new file mode 100644 index 0000000..52ccb44 Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/cassandra/FastaSequence.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/cassandra/JpredParser.class b/src/main/website/WEB-INF/classes/compbio/cassandra/JpredParser.class new file mode 100644 index 0000000..54fd220 Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/cassandra/JpredParser.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/cassandra/JpredParserHTTP.class b/src/main/website/WEB-INF/classes/compbio/cassandra/JpredParserHTTP.class new file mode 100644 index 0000000..0237ce4 Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/cassandra/JpredParserHTTP.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/cassandra/JpredParserLocalFile.class b/src/main/website/WEB-INF/classes/compbio/cassandra/JpredParserLocalFile.class new file mode 100644 index 0000000..a50d47a Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/cassandra/JpredParserLocalFile.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/cassandra/Pair.class b/src/main/website/WEB-INF/classes/compbio/cassandra/Pair.class new file mode 100644 index 0000000..a3db949 Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/cassandra/Pair.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/cassandra/StructureJobLog.class b/src/main/website/WEB-INF/classes/compbio/cassandra/StructureJobLog.class new file mode 100644 index 0000000..5382acd Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/cassandra/StructureJobLog.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/cassandra/StructureProteinPrediction.class b/src/main/website/WEB-INF/classes/compbio/cassandra/StructureProteinPrediction.class new file mode 100644 index 0000000..a12691f Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/cassandra/StructureProteinPrediction.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/cassandra/readers/CassandraReaderExecutionTime.class b/src/main/website/WEB-INF/classes/compbio/cassandra/readers/CassandraReaderExecutionTime.class new file mode 100644 index 0000000..caedb90 Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/cassandra/readers/CassandraReaderExecutionTime.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/engine/Job.class b/src/main/website/WEB-INF/classes/compbio/engine/Job.class new file mode 100644 index 0000000..db9bf65 Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/engine/Job.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/engine/JpredJob.class b/src/main/website/WEB-INF/classes/compbio/engine/JpredJob.class new file mode 100644 index 0000000..c139ac5 Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/engine/JpredJob.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/engine/ProteoCachePropertyHelperManager.class b/src/main/website/WEB-INF/classes/compbio/engine/ProteoCachePropertyHelperManager.class new file mode 100644 index 0000000..d51dd42 Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/engine/ProteoCachePropertyHelperManager.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/engine/archive/Archive.class b/src/main/website/WEB-INF/classes/compbio/engine/archive/Archive.class new file mode 100644 index 0000000..450c569 Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/engine/archive/Archive.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/engine/archive/ArchiveManager.class b/src/main/website/WEB-INF/classes/compbio/engine/archive/ArchiveManager.class new file mode 100644 index 0000000..6b71816 Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/engine/archive/ArchiveManager.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/engine/archive/ArchivedJob.class b/src/main/website/WEB-INF/classes/compbio/engine/archive/ArchivedJob.class new file mode 100644 index 0000000..40fb97e Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/engine/archive/ArchivedJob.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/listeners/ContextListener$1.class b/src/main/website/WEB-INF/classes/compbio/listeners/ContextListener$1.class new file mode 100644 index 0000000..4df8088 Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/listeners/ContextListener$1.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/listeners/ContextListener$2.class b/src/main/website/WEB-INF/classes/compbio/listeners/ContextListener$2.class new file mode 100644 index 0000000..4b2dd66 Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/listeners/ContextListener$2.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/listeners/ContextListener$3.class b/src/main/website/WEB-INF/classes/compbio/listeners/ContextListener$3.class new file mode 100644 index 0000000..47d0065 Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/listeners/ContextListener$3.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/listeners/ContextListener.class b/src/main/website/WEB-INF/classes/compbio/listeners/ContextListener.class new file mode 100644 index 0000000..2aa7afb Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/listeners/ContextListener.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/listeners/DetailList.class b/src/main/website/WEB-INF/classes/compbio/listeners/DetailList.class new file mode 100644 index 0000000..f09bc6d Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/listeners/DetailList.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/listeners/ServletJobsByDate.class b/src/main/website/WEB-INF/classes/compbio/listeners/ServletJobsByDate.class new file mode 100644 index 0000000..553aec6 Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/listeners/ServletJobsByDate.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/listeners/ServletLogInfo.class b/src/main/website/WEB-INF/classes/compbio/listeners/ServletLogInfo.class new file mode 100644 index 0000000..9101cf4 Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/listeners/ServletLogInfo.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/listeners/ServletSequenceProtein.class b/src/main/website/WEB-INF/classes/compbio/listeners/ServletSequenceProtein.class new file mode 100644 index 0000000..28d5a4a Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/listeners/ServletSequenceProtein.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/listeners/ServletTimeExecution.class b/src/main/website/WEB-INF/classes/compbio/listeners/ServletTimeExecution.class new file mode 100644 index 0000000..eaafb8b Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/listeners/ServletTimeExecution.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/statistic/CassandraRequester.class b/src/main/website/WEB-INF/classes/compbio/statistic/CassandraRequester.class new file mode 100644 index 0000000..9bba9ac Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/statistic/CassandraRequester.class differ diff --git a/src/main/website/WEB-INF/classes/compbio/statistic/StatisticsProt.class b/src/main/website/WEB-INF/classes/compbio/statistic/StatisticsProt.class new file mode 100644 index 0000000..d8a9117 Binary files /dev/null and b/src/main/website/WEB-INF/classes/compbio/statistic/StatisticsProt.class differ diff --git a/src/main/website/WEB-INF/classes/lib/testng-5.10-jdk15.jar b/src/main/website/WEB-INF/classes/lib/testng-5.10-jdk15.jar new file mode 100644 index 0000000..67463a7 Binary files /dev/null and b/src/main/website/WEB-INF/classes/lib/testng-5.10-jdk15.jar differ diff --git a/src/main/website/WEB-INF/lib/cassandra-driver-core-2.0.0-beta3-SNAPSHOT-javadoc.jar b/src/main/website/WEB-INF/lib/cassandra-driver-core-2.0.0-beta3-SNAPSHOT-javadoc.jar new file mode 100644 index 0000000..15e8d71 Binary files /dev/null and b/src/main/website/WEB-INF/lib/cassandra-driver-core-2.0.0-beta3-SNAPSHOT-javadoc.jar differ diff --git a/src/main/website/WEB-INF/lib/cassandra-driver-core-2.0.0-beta3-SNAPSHOT-sources.jar b/src/main/website/WEB-INF/lib/cassandra-driver-core-2.0.0-beta3-SNAPSHOT-sources.jar new file mode 100644 index 0000000..d6ed818 Binary files /dev/null and b/src/main/website/WEB-INF/lib/cassandra-driver-core-2.0.0-beta3-SNAPSHOT-sources.jar differ diff --git a/src/main/website/WEB-INF/lib/cassandra-driver-core-2.0.0-beta3-SNAPSHOT.jar b/src/main/website/WEB-INF/lib/cassandra-driver-core-2.0.0-beta3-SNAPSHOT.jar new file mode 100644 index 0000000..fa7cac0 Binary files /dev/null and b/src/main/website/WEB-INF/lib/cassandra-driver-core-2.0.0-beta3-SNAPSHOT.jar differ diff --git a/src/main/website/WEB-INF/lib/displaytag-1.2.jar b/src/main/website/WEB-INF/lib/displaytag-1.2.jar new file mode 100644 index 0000000..4f379bd Binary files /dev/null and b/src/main/website/WEB-INF/lib/displaytag-1.2.jar differ diff --git a/src/main/website/WEB-INF/lib/guava-15.0.jar b/src/main/website/WEB-INF/lib/guava-15.0.jar new file mode 100644 index 0000000..eb9ef8a Binary files /dev/null and b/src/main/website/WEB-INF/lib/guava-15.0.jar differ diff --git a/src/main/website/WEB-INF/lib/jabaws-core-2.1.0.jar b/src/main/website/WEB-INF/lib/jabaws-core-2.1.0.jar new file mode 100644 index 0000000..ad5d733 Binary files /dev/null and b/src/main/website/WEB-INF/lib/jabaws-core-2.1.0.jar differ diff --git a/src/main/website/WEB-INF/lib/jabaws-core-src-2.1.0.jar b/src/main/website/WEB-INF/lib/jabaws-core-src-2.1.0.jar new file mode 100644 index 0000000..7c6cdb2 Binary files /dev/null and b/src/main/website/WEB-INF/lib/jabaws-core-src-2.1.0.jar differ diff --git a/src/main/website/WEB-INF/lib/jackson-core-asl-1.9.13.jar b/src/main/website/WEB-INF/lib/jackson-core-asl-1.9.13.jar new file mode 100644 index 0000000..bb4fe1d Binary files /dev/null and b/src/main/website/WEB-INF/lib/jackson-core-asl-1.9.13.jar differ diff --git a/src/main/website/WEB-INF/lib/jackson-mapper-asl-1.9.13.jar b/src/main/website/WEB-INF/lib/jackson-mapper-asl-1.9.13.jar new file mode 100644 index 0000000..0f2073f Binary files /dev/null and b/src/main/website/WEB-INF/lib/jackson-mapper-asl-1.9.13.jar differ diff --git a/src/main/website/WEB-INF/lib/jstl-1.2.jar b/src/main/website/WEB-INF/lib/jstl-1.2.jar new file mode 100644 index 0000000..0fd275e Binary files /dev/null and b/src/main/website/WEB-INF/lib/jstl-1.2.jar differ diff --git a/src/main/website/WEB-INF/lib/log4j-1.2.16.jar b/src/main/website/WEB-INF/lib/log4j-1.2.16.jar new file mode 100644 index 0000000..5429a90 Binary files /dev/null and b/src/main/website/WEB-INF/lib/log4j-1.2.16.jar differ diff --git a/src/main/website/WEB-INF/lib/lz4-1.2.0.jar b/src/main/website/WEB-INF/lib/lz4-1.2.0.jar new file mode 100644 index 0000000..9cdebd2 Binary files /dev/null and b/src/main/website/WEB-INF/lib/lz4-1.2.0.jar differ diff --git a/src/main/website/WEB-INF/lib/metrics-core-3.0.1.jar b/src/main/website/WEB-INF/lib/metrics-core-3.0.1.jar new file mode 100644 index 0000000..9b50dad Binary files /dev/null and b/src/main/website/WEB-INF/lib/metrics-core-3.0.1.jar differ diff --git a/src/main/website/WEB-INF/lib/netty-3.6.6.Final.jar b/src/main/website/WEB-INF/lib/netty-3.6.6.Final.jar new file mode 100644 index 0000000..35cb073 Binary files /dev/null and b/src/main/website/WEB-INF/lib/netty-3.6.6.Final.jar differ diff --git a/src/main/website/WEB-INF/lib/slf4j-api-1.7.5.jar b/src/main/website/WEB-INF/lib/slf4j-api-1.7.5.jar new file mode 100644 index 0000000..8766455 Binary files /dev/null and b/src/main/website/WEB-INF/lib/slf4j-api-1.7.5.jar differ diff --git a/src/main/website/WEB-INF/lib/slf4j-simple-1.7.5.jar b/src/main/website/WEB-INF/lib/slf4j-simple-1.7.5.jar new file mode 100644 index 0000000..9dece31 Binary files /dev/null and b/src/main/website/WEB-INF/lib/slf4j-simple-1.7.5.jar differ diff --git a/src/main/website/WEB-INF/lib/snappy-java-1.1.0.jar b/src/main/website/WEB-INF/lib/snappy-java-1.1.0.jar new file mode 100644 index 0000000..f7660c4 Binary files /dev/null and b/src/main/website/WEB-INF/lib/snappy-java-1.1.0.jar differ diff --git a/src/main/website/WEB-INF/web.xml b/src/main/website/WEB-INF/web.xml new file mode 100644 index 0000000..242a900 --- /dev/null +++ b/src/main/website/WEB-INF/web.xml @@ -0,0 +1,360 @@ + + +ProteoCache +ProteoCache - Results repository for the Dundee Resource + + + Index.jsp + + + + + compbio.listeners.ContextListener + + + + + + 20 + + + + + This is a standard tomcat 'default' servlet for making listings + listings + org.apache.catalina.servlets.DefaultServlet + + debug + 0 + + + readonly + true + + + listings + true + + 1 + + + + + + + + + + + + + + + + Administrator pages + + /DisplayStat + /Joblist + /AnnualStat + /conf/* + /binaries/* + /testsrc/* + /lib/* + + + + + admin + + + + + + + + unsupported HTTP methods + /* + PUT + DELETE + TRACE + OPTIONS + + + + + + + BASIC + ProteCache administrators + + + + + + The role that is required to log in and view ProteCache internals + admin + + + diff --git a/src/main/website/conf/Engine.properties b/src/main/website/conf/Engine.properties new file mode 100644 index 0000000..dd2a0db --- /dev/null +++ b/src/main/website/conf/Engine.properties @@ -0,0 +1,38 @@ +################################################################################# +# enable or disable engine true | false +# OPTIONAL defaults to true +engine.local.enable=true + +################################################################################# +# Directory to use for temporary files storage +# OPTIONAL defaults to java temporary directory +# Relative path within the project will be converted in absolute at runtime +#local.tmp.directory=/cluster/gjb_lab/fc/www-jws2/jaba/local_jobsout +local.tmp.directory=jobsout + +################################################################################# +# Number of threads for tasks execution (valid values between 1 and 2x cpu. +# Where x is a number of cores available in the system) +# OPTIONAL defaults to the number of cores for core number <=4 and +# number of cores-1 for greater core numbers +engine.local.thread.number=2 + +################################################################################# +# Enable/disable cluster statistics collector +local.stat.collector.enable=true +# Frequency of statistics collecting (in minutes) +# normal rate: once a date = 24 * 60 = 1440 +local.stat.collector.update.frequency=1 + +# Maximum amount of time the job is considered running in hours +# Optional defaults to 24 hours +local.stat.maxruntime=6 + +################################################################################# +# Maximum amount of time the job directory is living (in hours), +# -1 means the directories are never deleted +# Defaults is one week, i.e. 168 hours +local.jobdir.maxlifespan=168 +# Frequency of cleaning job directory (in minutes) +# normal rate: once a date = 24 * 60 = 1440 +local.jobdir.cleaning.frequency=1 diff --git a/src/main/website/conf/GA.properties b/src/main/website/conf/GA.properties new file mode 100644 index 0000000..904765c --- /dev/null +++ b/src/main/website/conf/GA.properties @@ -0,0 +1,19 @@ +# This file contains settings for Google Analytics (GA) Statistics Engine. +# Feel free to remove/delete this file if you do not want us to see that you +# use ProteoCache. +# However, we would appreciate it greatly if you could leave it on. ProteoCache +# usage statistics are collected for funding and reporting purposes, and no +# private information is collected. The data sent by ProteoCache is as follows: +# 1) The IP address of the ProteoCache server machine. +# 2) The name of the web service that was called. +# 3) A few details of the system such as java version, user language, +# color depth, screen, resolution and character encoding. + +# All calls to GA are completed asynchronously and create very little +# overhead. Thus they do not influence the server response time or performance. + +# Values yes or true = enable GA, any other value will disable it +enable.ga=no +# If set to yes replaces your server name/IP address with 127.0.0.1 +anonymize.ip=no + diff --git a/src/main/website/conf/Proteocache.properties b/src/main/website/conf/Proteocache.properties new file mode 100644 index 0000000..ae10be4 --- /dev/null +++ b/src/main/website/conf/Proteocache.properties @@ -0,0 +1,28 @@ +################################################################################# +# Cassandra host or IP +# test server is 10.0.115.190 +cassandra.host=localhost + +cassandra.newtables.update=false + +################################################################################# +# Jpred sources +# real Jpred web-server +cassandra.jpred.web.update=true +cassandra.jpred.web.inidelay=0 +cassandra.jpred.web.updaterate=30 + +# update time period (in days) +# by defauls for 100 past days +cassandra.jpred.web.period=5 + + +################################################################################# +# local test job source +cassandra.jpred.local.update=false +cassandra.jpred.local.inidelay=10 +cassandra.jpred.local.updaterate=200 + +# update time period (in days) +# by defauls for 100 past days +cassandra.jpred.local.period=300