From: Sasha Sherstnev Date: Fri, 8 Nov 2013 13:59:53 +0000 (+0000) Subject: Merge branch 'master' into servlets X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=27060ede81676b3e84f13e2ea2f25836d8c3f6fa;p=proteocache.git Merge branch 'master' into servlets Conflicts: .classpath datadb/compbio/cassandra/CassandraNativeConnector.java datadb/compbio/cassandra/JpredParserHTTP.java datadb/compbio/cassandra/JpredParserLocalFile.java server/compbio/listeners/ContextListener.java server/compbio/statistic/CassandraRequester.java website/QuerySequenceProtein.jsp website/ReportLength.jsp website/ReportNew.jsp --- 27060ede81676b3e84f13e2ea2f25836d8c3f6fa diff --cc .classpath index 46c02bc,394889c..d69819a --- a/.classpath +++ b/.classpath @@@ -15,7 -15,7 +15,8 @@@ + + diff --cc datadb/compbio/cassandra/CassandraNativeConnector.java index 5593594,7109c78..6e6992d --- a/datadb/compbio/cassandra/CassandraNativeConnector.java +++ b/datadb/compbio/cassandra/CassandraNativeConnector.java @@@ -2,62 -2,63 +2,103 @@@ package compbio.cassandra import java.io.IOException; import java.util.Calendar; +import java.util.HashMap; import java.util.List; import java.util.ArrayList; +import java.util.Map; + ++import org.apache.log4j.Logger; + import com.datastax.driver.core.Cluster; import com.datastax.driver.core.Host; import com.datastax.driver.core.Metadata; import com.datastax.driver.core.Row; import com.datastax.driver.core.Session; import com.datastax.driver.core.ResultSet; ++import com.datastax.driver.core.PreparedStatement; ++import com.datastax.driver.core.BoundStatement; ++ ++import compbio.engine.ProteoCachePropertyHelperManager; ++import compbio.util.PropertyHelper; ++import compbio.util.Util; public class CassandraNativeConnector { private static Cluster cluster; private static Session session; ++ private static final PropertyHelper ph = ProteoCachePropertyHelperManager.getPropertyHelper(); ++ private static Logger log = Logger.getLogger(CassandraNativeConnector.class); ++ ++ public static String CASSANDRA_HOSTNAME = "localhost"; ++ public static boolean READ_WEB_JPRED = false; ++ public static boolean READ_LOCALFILE_JPRED = false; ++ ++ private static boolean initBooleanValue(String key) { ++ assert key != null; ++ String status = ph.getProperty(key); ++ log.debug("Loading property: " + key + " with value: " + status); ++ if (Util.isEmpty(status)) { ++ return false; ++ } ++ return new Boolean(status.trim()).booleanValue(); ++ } + /* -- * connect to the cluster and look weather the dababase has any data inside ++ * connect to the cluster and look whether all tables exist */ public void Connect() { -- // local cassandra cluster -- cluster = Cluster.builder().addContactPoint("localhost").build(); -- // distributed cassandra cluster -- /* cluster = Cluster.builder().addContactPoint("10.0.115.190").build(); */ ++ ++ String cassandrahostname = ph.getProperty("cassandra.host"); ++ if (null != cassandrahostname) { ++ CASSANDRA_HOSTNAME = cassandrahostname; ++ } ++ READ_WEB_JPRED = initBooleanValue("cassandra.jpred.web"); ++ READ_LOCALFILE_JPRED = initBooleanValue("cassandra.jpred.local"); ++ ++ cluster = Cluster.builder().addContactPoint(CASSANDRA_HOSTNAME).build(); ++ Metadata metadata = cluster.getMetadata(); System.out.printf("Connected to cluster: %s\n", metadata.getClusterName()); for (Host host : metadata.getAllHosts()) { System.out.printf("Datatacenter: %s; Host: %s; Rack: %s\n", host.getDatacenter(), host.getAddress(), host.getRack()); } -- session = cluster.connect(); ++ CreateTables(); ++ System.out.println("Cassandra connected"); ++ } ++ ++ private void CreateTables() { session.execute("CREATE KEYSPACE IF NOT EXISTS ProteinKeyspace WITH replication = {'class':'SimpleStrategy', 'replication_factor':3};"); -- session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinRow (Protein ascii, JobID ascii, Predictions map, PRIMARY KEY(JobID));"); -- session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinLog " -- + "(JobID ascii, DataBegin ascii, DataEnd ascii, ip ascii, FinalStatus ascii, ExecutionStatus ascii, Protein ascii, PRIMARY KEY(JobID));"); -- session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinData (jobtime bigint, JobID ascii, Protein ascii, PRIMARY KEY(JobID));"); - session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.JpredArchive " + - "(JobID ascii, Protein varchar, IP ascii, StartTime bigint, ExecTime int, alignment map, predictions map, archive blob, LOG varchar, PRIMARY KEY(JobID));"); ++ session.execute("USE ProteinKeyspace"); -- session.execute("CREATE INDEX IF NOT EXISTS ProteinSeq ON ProteinKeyspace.ProteinRow (protein);"); -- session.execute("CREATE INDEX IF NOT EXISTS JobDateStamp ON ProteinKeyspace.ProteinData (jobtime);"); ++ session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinRow " ++ + "(Protein ascii, JobID ascii, Predictions map, PRIMARY KEY(JobID));"); ++ session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinLog " ++ + "(JobID ascii, DataBegin ascii, DataEnd ascii, ip ascii, FinalStatus ascii, " ++ + "ExecutionStatus ascii, Protein ascii, PRIMARY KEY(JobID));"); ++ session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinData " ++ + "(jobtime bigint, JobID ascii, Protein ascii, PRIMARY KEY(JobID));"); ++ session.execute("CREATE COLUMNFAMILY IF NOT EXISTS JpredArchive " ++ + "(JobID ascii, Protein varchar, IP ascii, StartTime bigint, ExecTime int, alignment map, " ++ + "predictions map, archive blob, LOG varchar, PRIMARY KEY(JobID));"); -- System.out.println("Cassandra connected"); ++ session.execute("CREATE INDEX IF NOT EXISTS ProteinSeq ON ProteinRow (protein);"); ++ session.execute("CREATE INDEX IF NOT EXISTS JobDateStamp ON ProteinData (jobtime);"); } /* * parsing data source and filling the database */ public void Parsing() throws IOException { -- if (true) { ++ if (READ_WEB_JPRED) { // if (source.equals("http")) { // get data from real Jpred production server System.out.println("Parsing web data source......"); String datasrc = "http://www.compbio.dundee.ac.uk/www-jpred/results/usage-new/alljobs.dat"; String prefix = "http://www.compbio.dundee.ac.uk/www-jpred/results"; JpredParserHTTP parser = new JpredParserHTTP(prefix); -- parser.Parsing(datasrc, 4); ++ parser.Parsing(datasrc, 5); } -- if (false) { ++ if (READ_LOCALFILE_JPRED) { // if (source.equals("file")) { // get irtifical data generated for the DB stress tests System.out.println("Parsing local file data source......"); @@@ -74,21 -75,24 +115,37 @@@ System.out.println("Cassandra has been shut down"); } - /* - * inserting data into the db - */ - public void FormQueryTables(long jobtime, String startdate, String enddate, String ip, String jobid, String statusEx, String statusFinal, - String protein, List predictions) { ++ public boolean JobisNotInsterted(String jobid) { ++ ResultSet results1 = session.execute("SELECT * FROM ProteinLog WHERE JobID = '" + jobid + "';"); ++ if (results1.isExhausted()) { ++ return true; ++ } ++ return false; ++ } + - String check1 = "SELECT * FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';"; - ResultSet results1 = session.execute(check1); ++ public boolean JobisNotArchived(String jobid) { ++ ResultSet results1 = session.execute("SELECT * FROM JpredArchive WHERE JobID = '" + jobid + "';"); + if (results1.isExhausted()) { - String com1 = "INSERT INTO ProteinKeyspace.ProteinLog " - + "(JobID, IP, DataBegin, DataEnd, FinalStatus, ExecutionStatus, Protein)" + " VALUES ('" + jobid + "','" + ip + "','" - + startdate + "','" + enddate + "','" + statusFinal + "','" + statusEx + "','" + protein + "');"; - session.execute(com1); ++ return true; ++ } ++ return false; ++ } + - String com2 = "INSERT INTO ProteinKeyspace.ProteinData " + "(jobtime, JobID, Protein)" + " VALUES (" + jobtime + ",'" + jobid + /* - * inserting data into the db ++ * inserting data into the tables for queries + */ - public void InsertData(long jobtime, String startdate, String enddate, String ip, String jobid, String statusEx, String statusFinal, - String protein, List predictions) { - String check1 = "SELECT * FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';"; - ResultSet results1 = session.execute(check1); - if (results1.isExhausted()) { - String com1 = "INSERT INTO ProteinKeyspace.ProteinLog " - + "(JobID, IP, DataBegin, DataEnd, FinalStatus, ExecutionStatus, Protein)" + " VALUES ('" + jobid + "','" + ip + "','" - + startdate + "','" + enddate + "','" + statusFinal + "','" + statusEx + "','" + protein + "');"; - session.execute(com1); - String com2 = "INSERT INTO ProteinKeyspace.ProteinData " + "(jobtime, JobID, Protein)" + " VALUES (" + jobtime + ",'" + jobid ++ public int FormQueryTables(long jobtime, String startdate, String enddate, String ip, String jobid, String statusEx, ++ String statusFinal, String protein, List predictions) { ++ if (JobisNotInsterted(jobid)) { ++ String com1 = "INSERT INTO ProteinLog " + "(JobID, IP, DataBegin, DataEnd, FinalStatus, ExecutionStatus, Protein)" ++ + " VALUES ('" + jobid + "','" + ip + "','" + startdate + "','" + enddate + "','" + statusFinal + "','" + statusEx + "','" + protein + "');"; ++ session.execute(com1); ++ ++ String com2 = "INSERT INTO ProteinData " + "(jobtime, JobID, Protein)" + " VALUES (" + jobtime + ",'" + jobid + "','" + protein ++ + "');"; session.execute(com2); + String allpredictions = ""; for (FastaSequence pred : predictions) { String predictionname = pred.getId(); @@@ -99,23 -103,61 +156,55 @@@ if (null != allpredictions) { final_prediction = allpredictions.substring(0, allpredictions.length() - 1); } - String check2 = "SELECT * FROM ProteinKeyspace.ProteinRow WHERE JobID = '" + jobid + "' ALLOW FILTERING;"; + - String check2 = "SELECT * FROM ProteinKeyspace.ProteinRow WHERE JobID = '" + jobid + "';"; ++ String check2 = "SELECT * FROM ProteinRow WHERE JobID = '" + jobid + "';"; ResultSet results2 = session.execute(check2); if (results2.isExhausted()) { - String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(Protein, JobID, Predictions)" + " VALUES ('" - + protein + "','" + jobid + "',{" + final_prediction + "});"; - String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(Protein, JobID, Predictions)" + " VALUES ('" + protein + "','" - + jobid + "',{" + final_prediction + "});"; ++ String com3 = "INSERT INTO ProteinRow " + "(Protein, JobID, Predictions)" + " VALUES ('" + protein + "','" + jobid + "',{" ++ + final_prediction + "});"; session.execute(com3); } - String check3 = "SELECT * FROM ProteinKeyspace.ProteinRow WHERE JobID = '" + jobid + "';"; ++ return 1; } ++ return 0; + } + - public void ArchiveData(long starttime, int exectime, String ip, String jobid, String statusEx, String statusFinal, - String protein, List predictions, List seqs, String LogFile) { - - String check1 = "SELECT * FROM ProteinKeyspace.JpredArchive WHERE JobID = '" + jobid + "';"; - ResultSet results1 = session.execute(check1); - if (results1.isExhausted()) { - String allpredictions = ""; - for (FastaSequence pred : predictions) { - String predictionname = pred.getId(); - String prediction = pred.getSequence().replaceAll("\n", ""); - allpredictions += "'" + predictionname + "':'" + prediction + "',"; - } - String final_allpredictions = ""; - if (null != allpredictions) { - final_allpredictions = allpredictions.substring(0, allpredictions.length() - 1); ++ /* ++ * insert data from a real Jpred job: timing+IP, Execution Status, Final ++ * status, protein sequence, predictions, alignment, LOG and tar.gz files ++ */ ++ public int ArchiveData(long starttime, long exectime, String ip, String jobid, String statusEx, String statusFinal, String protein, ++ List predictions, List seqs, String LogFile, String archivepath) { ++ if (JobisNotArchived(jobid)) { ++ String log = LogFile.replaceAll("'", ""); ++ session.execute("INSERT INTO JpredArchive (JobID, Protein, IP, StartTime, ExecTime,LOG) VALUES ('" + jobid + "','" + protein ++ + "','" + ip + "'," + starttime + "," + exectime + ",'" + log + "');"); ++ if (false) { ++ PreparedStatement statement = session.prepare("INSERT INTO JpredArchive (JobID, archive) VALUES (?,?);"); ++ BoundStatement boundStatement = new BoundStatement(statement); ++ session.execute(boundStatement.bind(jobid, archivepath)); + } - String alignment = ""; - for (FastaSequence seq : seqs) { - String predictionname = seq.getId(); - String prediction = seq.getSequence().replaceAll("\n", ""); - alignment += "'" + predictionname + "':'" + prediction + "',"; ++ ++ for (FastaSequence p : predictions) { ++ session.execute("UPDATE JpredArchive SET predictions = predictions + {'" + p.getId() + "':'" ++ + p.getSequence().replaceAll("\n", "") + "'} WHERE JobID = '" + jobid + "';"); + } - String final_alignment = ""; - if (null != allpredictions) { - final_alignment = alignment.substring(0, allpredictions.length() - 1); ++ ++ for (FastaSequence s : seqs) { ++ session.execute("UPDATE JpredArchive SET alignment = alignment + {'" + s.getId() + "':'" ++ + s.getSequence().replaceAll("\n", "") + "'} WHERE JobID = '" + jobid + "';"); + } - - String com1 = "INSERT INTO ProteinKeyspace.JpredArchive " - + "(JobID, Protein, IP, StartTime, ExecTime, alignment, predictions, LOG))" - + " VALUES ('" - + jobid + "','" + protein + "','" + ip + "'," + starttime + "," + exectime - + "',[" + final_allpredictions + "],[" + final_alignment + "],'" + LogFile + "]);"; - session.execute(com1); ++ return 1; + } ++ return 0; } - - /* * getting data from the db */ public List> ReadProteinDataTable() { final long startTime = System.currentTimeMillis(); -- String com = "SELECT DataBegin,DataEnd FROM ProteinKeyspace.ProteinLog;"; ++ String com = "SELECT DataBegin,DataEnd FROM ProteinLog;"; System.out.println("Command: " + com); ResultSet results = session.execute(com); final long queryTime = System.currentTimeMillis(); @@@ -130,146 -172,20 +219,149 @@@ ++c; } final long endTime = System.currentTimeMillis(); - System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); + System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); return res; } - + /* + * getting data from the db ProteinData + */ + public Integer ReadDateTable(long queryDate) { + final long startTime = System.currentTimeMillis(); - String com = "SELECT jobtime, JobID FROM ProteinKeyspace.ProteinData WHERE jobtime = " + queryDate + ";"; ++ String com = "SELECT jobtime, JobID FROM ProteinData WHERE jobtime = " + queryDate + ";"; + System.out.println("Command: " + com); + ResultSet results = session.execute(com); - if (results.isExhausted()) - return null; + final long queryTime = System.currentTimeMillis(); ++ System.out.println("Query time is " + (queryTime - startTime) + " msec"); ++ if (results.isExhausted()) ++ return 0; + List rows = results.all(); - System.out.println ("Query time is " + (queryTime - startTime) + " msec"); ++ final long endTime = System.currentTimeMillis(); ++ System.out.println("Processing time is " + (endTime - queryTime) + " msec"); + return rows.size(); + } + + /* + * getting whole protein sequence from the db ProteinRow + */ + public List ReadWholeSequence(String queryProtein) { + final long startTime = System.currentTimeMillis(); - String com = "SELECT JobID, Predictions FROM ProteinKeyspace.ProteinRow WHERE Protein = '" + queryProtein + "';"; ++ String com = "SELECT JobID, Predictions FROM ProteinRow WHERE Protein = '" + queryProtein + "';"; + System.out.println("Command: " + com); + ResultSet results = session.execute(com); + if (results.isExhausted()) + return null; + final long queryTime = System.currentTimeMillis(); + List rows = results.all(); - System.out.println ("Query time is " + (queryTime - startTime) + " msec"); - System.out.println (" rows analysed, " + rows.size()); ++ System.out.println("Query time is " + (queryTime - startTime) + " msec"); ++ System.out.println(" rows analysed, " + rows.size()); + List res = new ArrayList(); + int c = 0; + for (Row r : rows) { - StructureProteinPrediction structure = new StructureProteinPrediction(queryProtein, r.getString("JobID"), r.getMap("Predictions", String.class, String.class)); ++ StructureProteinPrediction structure = new StructureProteinPrediction(queryProtein, r.getString("JobID"), r.getMap( ++ "Predictions", String.class, String.class)); + res.add(structure); + ++c; + } + final long endTime = System.currentTimeMillis(); - System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); ++ System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); + return res; + } - ++ + /* + * getting part of protein sequence from the db ProteinRow + */ - public List ReadPartOfSequence(String queryProtein) { ++ public List ReadPartOfSequence(String queryProtein) { + final long startTime = System.currentTimeMillis(); - String com = "SELECT * FROM ProteinKeyspace.ProteinRow;"; ++ String com = "SELECT * FROM ProteinRow;"; + System.out.println("Command: " + com); + ResultSet results = session.execute(com); + if (results.isExhausted()) + return null; + final long queryTime = System.currentTimeMillis(); + List rows = results.all(); - System.out.println ("Query time is " + (queryTime - startTime) + " msec"); - System.out.println (" rows analysed, " + rows.size()); - List res = new ArrayList(); ++ System.out.println("Query time is " + (queryTime - startTime) + " msec"); ++ System.out.println(" rows analysed, " + rows.size()); ++ List res = new ArrayList(); + int c = 0; + for (Row r : rows) { + String prot = r.getString("Protein"); + if (prot.matches("(.*)" + queryProtein + "(.*)")) { - // System.out.println(prot); - StructureProteinPrediction structure = new StructureProteinPrediction(prot, r.getString("JobID"), r.getMap("Predictions", String.class, String.class)); ++ StructureProteinPrediction structure = new StructureProteinPrediction(prot, r.getString("JobID"), r.getMap("Predictions", ++ String.class, String.class)); + res.add(structure); + ++c; + } + } + final long endTime = System.currentTimeMillis(); - System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); ++ System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); + return res; + } - ++ + /* + * getting protein sequences by counter + */ - public Map ReadProteinDataByCounter() { ++ public Map ReadProteinDataByCounter() { + final long startTime = System.currentTimeMillis(); - String com = "SELECT Protein FROM ProteinKeyspace.ProteinRow;"; ++ String com = "SELECT Protein FROM ProteinRow;"; + System.out.println("Command: " + com); + ResultSet results = session.execute(com); + if (results.isExhausted()) + return null; + final long queryTime = System.currentTimeMillis(); + List rows = results.all(); - System.out.println ("Query time is " + (queryTime - startTime) + " msec"); - System.out.println (" rows analysed, " + rows.size()); ++ System.out.println("Query time is " + (queryTime - startTime) + " msec"); ++ System.out.println(" rows analysed, " + rows.size()); + Map res = new HashMap(); + int c = 0; + for (Row r : rows) { + String protein = r.getString("Protein"); - if (res.containsKey(protein)) ++ if (res.containsKey(protein)) + res.put(protein, res.get(protein) + 1); + else + res.put(protein, 1); + } + final long endTime = System.currentTimeMillis(); - System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); ++ System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); + return res; + } - - ++ + /* + * getting protein sequences by counter + */ + public StructureJobLog ReadJobLog(String jobid) { + final long startTime = System.currentTimeMillis(); + String com = "SELECT * FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';"; + System.out.println("Command: " + com); + ResultSet results = session.execute(com); + if (results.isExhausted()) + return null; + final long queryTime = System.currentTimeMillis(); + Row row = results.one(); + String com1 = "SELECT * FROM ProteinKeyspace.ProteinRow WHERE JobID = '" + jobid + "' ALLOW FILTERING;"; + System.out.println("Command: " + com1); + ResultSet results1 = session.execute(com1); + if (results1.isExhausted()) + return null; + Row row1 = results1.one(); - StructureJobLog res = new StructureJobLog(row.getString("Protein"), row.getString("JobID"), row.getString("DataBegin"), row.getString("DataEnd"), row.getString("ip"), row1.getMap("Predictions", String.class, String.class)); - System.out.println ("Query time is " + (queryTime - startTime) + " msec"); ++ StructureJobLog res = new StructureJobLog(row.getString("Protein"), row.getString("JobID"), row.getString("DataBegin"), ++ row.getString("DataEnd"), row.getString("ip"), row1.getMap("Predictions", String.class, String.class)); ++ System.out.println("Query time is " + (queryTime - startTime) + " msec"); + final long endTime = System.currentTimeMillis(); - System.out.println (" rows analysed, execution time is " + (endTime - startTime) + " msec"); ++ System.out.println(" rows analysed, execution time is " + (endTime - startTime) + " msec"); + return res; + } - ++ + /* * getting earlest date of jobs from the db */ public long getEarliestDateInDB() { final long startTime = System.currentTimeMillis(); - String com = "SELECT jobtime,JobID FROM ProteinKeyspace.ProteinData;"; - String com = "SELECT jobtime FROM ProteinKeyspace.ProteinData;"; ++ String com = "SELECT jobtime,JobID FROM ProteinData;"; System.out.println("Command: " + com); ResultSet results = session.execute(com); final long queryTime = System.currentTimeMillis(); - System.out.println ("Query time is " + (queryTime - startTime) + " msec"); - System.out.println("Query time is " + (queryTime - startTime) + " msec"); ++ System.out.println("Query time is " + (queryTime - startTime) + " msec"); Calendar cal = Calendar.getInstance(); long res = cal.getTimeInMillis(); diff --cc datadb/compbio/cassandra/JpredParserHTTP.java index b780ddf,51fc8e3..27f66cc --- a/datadb/compbio/cassandra/JpredParserHTTP.java +++ b/datadb/compbio/cassandra/JpredParserHTTP.java @@@ -1,8 -1,8 +1,11 @@@ package compbio.cassandra; import java.io.BufferedReader; ++import java.io.DataInputStream; ++import java.io.EOFException; import java.io.FileNotFoundException; import java.io.IOException; ++import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.MalformedURLException; @@@ -20,6 -20,6 +23,9 @@@ import compbio.cassandra.JpredParser public class JpredParserHTTP implements JpredParser { private CassandraNativeConnector cc = new CassandraNativeConnector(); private String dirprefix; ++ private List alignment; ++ private List predictions; ++ private String jnetpred; JpredParserHTTP() { dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results"; @@@ -46,7 -46,7 +52,55 @@@ } } -- private int ParsingForDate(String input, String date) { ++ private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException { ++ final FastaReader fr = new FastaReader(stream); ++ String query = ""; ++ alignment = new ArrayList(); ++ predictions = new ArrayList(); ++ while (fr.hasNext()) { ++ final FastaSequence fs = fr.next(); ++ String seqid = fs.getId(); ++ String seq = fs.getSequence().replaceAll("\n", ""); ++ if (seqid.equals("QUERY") || seqid.equals(jobid)) { ++ query = seq; ++ alignment.add(fs); ++ } else if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28") ++ || seqid.equals("JNETSOL25") || seqid.equals("JNETSOL5") || seqid.equals("JNETSOL0") || seqid.equals("JNETCONF") ++ || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM")) { ++ predictions.add(fs); ++ if (seqid.equals("jnetpred")) ++ jnetpred = seq; ++ } else { ++ alignment.add(fs); ++ } ++ } ++ return query; ++ } ++ ++ private String parseLogFile(final InputStream stream) throws IOException { ++ String out = ""; ++ BufferedReader buffer = new BufferedReader(new InputStreamReader(stream)); ++ String line; ++ while (null != (line = buffer.readLine())) { ++ out += line; ++ } ++ return out; ++ } ++ ++ private List parseArchiveFile(final InputStream stream) throws IOException { ++ DataInputStream data_in = new DataInputStream(stream); ++ List out = new ArrayList(); ++ while (true) { ++ try { ++ out.add(data_in.readByte()); ++ } catch (EOFException eof) { ++ break; ++ } ++ } ++ return out; ++ } ++ ++ private void ParsingForDate(String input, String date) { int totalcount = 0; int countNoData = 0; int countUnclearFASTAid = 0; @@@ -54,7 -54,7 +108,6 @@@ int countinserted = 0; int counAlignments = 0; int countStrange = 0; -- int njobs = 0; System.out.println("Inserting jobs for " + date); try { @@@ -64,56 -64,51 +117,64 @@@ String line; while ((line = alljobs.readLine()) != null) { -- if (line.matches(date + "(.*)jp_[^\\s]+")) { ++ if (line.matches(date + ":(.*)jp_[^\\s]+")) { String[] table = line.split("\\s+"); // Format of a record: // starttime endtime ip email jobid (directory) -- // 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172 unknown_email jp_J9HBCBT ++ // 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172 ++ // unknown_email jp_J9HBCBT String id = table[table.length - 1]; totalcount++; -- String datalink = dirprefix + "/" + id + "/" + id + ".concise.fasta"; -- URL urltable = new URL(datalink); -- HttpURLConnection httpConnection = (HttpURLConnection) urltable.openConnection(); -- int responsecode = httpConnection.getResponseCode(); -- if (199 < responsecode && responsecode < 300) { -- try { -- final FastaReader fr = new FastaReader(urltable.openStream()); -- final List seqs = new ArrayList(); -- String newprotein = ""; -- while (fr.hasNext()) { -- final FastaSequence fs = fr.next(); -- if (fs.getId().equals("QUERY") || fs.getId().equals(id)) -- newprotein = fs.getSequence().replaceAll("\n", ""); -- else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) { -- seqs.add(fs); -- } -- } -- if (newprotein.equals("")) { -- countUnclearFASTAid++; -- } else { -- SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); -- String dateInString1 = table[0].substring(0, table[0].indexOf(":")); -- long dateWork1 = 0; -- try { -- Date dat1 = formatter.parse(dateInString1); -- dateWork1 = dat1.getTime(); -- } catch (ParseException e) { -- e.printStackTrace(); ++ if (cc.JobisNotInsterted(id)) { ++ URL dataurl = new URL(dirprefix + "/" + id + "/" + id + ".concise.fasta"); ++ URL archiveurl = new URL(dirprefix + "/" + id + "/" + id + ".tar.gz"); ++ URL logurl = new URL(dirprefix + "/" + id + "/LOG"); ++ HttpURLConnection httpConnection1 = (HttpURLConnection) dataurl.openConnection(); ++ HttpURLConnection httpConnection2 = (HttpURLConnection) logurl.openConnection(); ++ HttpURLConnection httpConnection3 = (HttpURLConnection) archiveurl.openConnection(); ++ int response1 = httpConnection1.getResponseCode(); ++ int response2 = httpConnection2.getResponseCode(); ++ if (199 < response1 && response1 < 300) { ++ try { ++ String protein = parsePredictions(dataurl.openStream(), id); ++ if (protein.equals("")) { ++ countUnclearFASTAid++; ++ } else { ++ SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd"); ++ SimpleDateFormat timeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s"); ++ String startdatestring = table[0].substring(0, table[0].indexOf(":")); ++ try { ++ Date startdate = dateformatter.parse(startdatestring); ++ Date starttime = timeformatter.parse(table[0]); ++ Date endtime = timeformatter.parse(table[1]); ++ String ip = table[2]; ++ String execstatus = "OK"; ++ String finalstatus = "OK"; ++ countinsertions += cc.FormQueryTables(startdate.getTime(), table[0], table[1], ip, id, execstatus, ++ finalstatus, protein, predictions); ++ ++ long exectime = (endtime.getTime() - starttime.getTime()) / 1000; ++ String log = ""; ++ if (199 < response2 && response2 < 300) { ++ log = parseLogFile(logurl.openStream()); ++ } ++ cc.ArchiveData(startdate.getTime(), exectime, ip, id, execstatus, finalstatus, protein, ++ predictions, alignment, log, archiveurl.toString()); ++ } catch (ParseException e) { ++ e.printStackTrace(); ++ } } - cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs); - ++countinsertions; - ++njobs; - // flush every 50 insertions - // if (0 == countinsertions % 50) { - // cc.flushData(); - // njobs -= 50; - // } - cc.FormQueryTables(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs); - ++countinsertions; - ++njobs; ++ } catch (IOException e) { ++ e.printStackTrace(); } -- } catch (IOException e) { -- e.printStackTrace(); ++ } else { ++ countNoData++; } ++ httpConnection1.disconnect(); ++ httpConnection2.disconnect(); ++ httpConnection3.disconnect(); } else { -- countNoData++; ++ ++countinserted; } } else { if (line.matches(date + "(.*)Sequence0/(.*)")) { @@@ -136,6 -131,6 +197,6 @@@ } catch (IOException e) { e.printStackTrace(); } -- return njobs; ++ ; } } diff --cc datadb/compbio/cassandra/JpredParserLocalFile.java index 281a20b,54e50ab..4b254ae --- a/datadb/compbio/cassandra/JpredParserLocalFile.java +++ b/datadb/compbio/cassandra/JpredParserLocalFile.java @@@ -58,7 -58,7 +58,7 @@@ public class JpredParserLocalFile imple System.out.println("Execution Time = " + execTime + " ms"); } -- private int ParsingForDate(List input, String date) { ++ private void ParsingForDate(List input, String date) { int totalcount = 0; int countNoData = 0; int countUnclearFASTAid = 0; @@@ -66,11 -66,11 +66,10 @@@ int countinserted = 0; int counAlignments = 0; int countStrange = 0; System.out.println("Inserting jobs for " + date); for (String in : input) { -- if (in.matches(date + "(.*)jp_[^\\s]+")) { ++ if (in.matches(date + ":(.*)jp_[^\\s]+")) { String[] table = in.split("\\s+"); String starttime = table[0]; String finishtime = table[1]; @@@ -104,9 -104,9 +103,7 @@@ } catch (ParseException e) { e.printStackTrace(); } - cc.InsertData(insertdate, starttime, finishtime, ip, id, "OK", "OK", newprotein, seqs); - ++countinsertions; - ++njobs; - cc.FormQueryTables(insertdate, starttime, finishtime, ip, id, "OK", "OK", newprotein, seqs); - ++countinsertions; - ++njobs; ++ countinsertions += cc.FormQueryTables(insertdate, starttime, finishtime, ip, id, "OK", "OK", newprotein, seqs); } fr.close(); } catch (IOException e) { @@@ -132,7 -132,7 +129,6 @@@ System.out.println(" " + countUnclearFASTAid + " jobs with unclear FASTA protein id in *.concise.fasta"); System.out.println(" " + countinsertions + " new job insertions\n"); } -- return njobs; } }