import java.io.IOException;
import java.util.Calendar;
+import java.util.HashMap;
import java.util.List;
import java.util.ArrayList;
+import java.util.Map;
+
++import org.apache.log4j.Logger;
+
import com.datastax.driver.core.Cluster;
import com.datastax.driver.core.Host;
import com.datastax.driver.core.Metadata;
import com.datastax.driver.core.Row;
import com.datastax.driver.core.Session;
import com.datastax.driver.core.ResultSet;
++import com.datastax.driver.core.PreparedStatement;
++import com.datastax.driver.core.BoundStatement;
++
++import compbio.engine.ProteoCachePropertyHelperManager;
++import compbio.util.PropertyHelper;
++import compbio.util.Util;
public class CassandraNativeConnector {
private static Cluster cluster;
private static Session session;
++ private static final PropertyHelper ph = ProteoCachePropertyHelperManager.getPropertyHelper();
++ private static Logger log = Logger.getLogger(CassandraNativeConnector.class);
++
++ public static String CASSANDRA_HOSTNAME = "localhost";
++ public static boolean READ_WEB_JPRED = false;
++ public static boolean READ_LOCALFILE_JPRED = false;
++
++ private static boolean initBooleanValue(String key) {
++ assert key != null;
++ String status = ph.getProperty(key);
++ log.debug("Loading property: " + key + " with value: " + status);
++ if (Util.isEmpty(status)) {
++ return false;
++ }
++ return new Boolean(status.trim()).booleanValue();
++ }
+
/*
-- * connect to the cluster and look weather the dababase has any data inside
++ * connect to the cluster and look whether all tables exist
*/
public void Connect() {
-- // local cassandra cluster
-- cluster = Cluster.builder().addContactPoint("localhost").build();
-- // distributed cassandra cluster
-- /* cluster = Cluster.builder().addContactPoint("10.0.115.190").build(); */
++
++ String cassandrahostname = ph.getProperty("cassandra.host");
++ if (null != cassandrahostname) {
++ CASSANDRA_HOSTNAME = cassandrahostname;
++ }
++ READ_WEB_JPRED = initBooleanValue("cassandra.jpred.web");
++ READ_LOCALFILE_JPRED = initBooleanValue("cassandra.jpred.local");
++
++ cluster = Cluster.builder().addContactPoint(CASSANDRA_HOSTNAME).build();
++
Metadata metadata = cluster.getMetadata();
System.out.printf("Connected to cluster: %s\n", metadata.getClusterName());
for (Host host : metadata.getAllHosts()) {
System.out.printf("Datatacenter: %s; Host: %s; Rack: %s\n", host.getDatacenter(), host.getAddress(), host.getRack());
}
--
session = cluster.connect();
++ CreateTables();
++ System.out.println("Cassandra connected");
++ }
++
++ private void CreateTables() {
session.execute("CREATE KEYSPACE IF NOT EXISTS ProteinKeyspace WITH replication = {'class':'SimpleStrategy', 'replication_factor':3};");
-- session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinRow (Protein ascii, JobID ascii, Predictions map<ascii,ascii>, PRIMARY KEY(JobID));");
-- session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinLog "
-- + "(JobID ascii, DataBegin ascii, DataEnd ascii, ip ascii, FinalStatus ascii, ExecutionStatus ascii, Protein ascii, PRIMARY KEY(JobID));");
-- session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinData (jobtime bigint, JobID ascii, Protein ascii, PRIMARY KEY(JobID));");
- session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.JpredArchive " +
- "(JobID ascii, Protein varchar, IP ascii, StartTime bigint, ExecTime int, alignment map<ascii,ascii>, predictions map<ascii,ascii>, archive blob, LOG varchar, PRIMARY KEY(JobID));");
++ session.execute("USE ProteinKeyspace");
-- session.execute("CREATE INDEX IF NOT EXISTS ProteinSeq ON ProteinKeyspace.ProteinRow (protein);");
-- session.execute("CREATE INDEX IF NOT EXISTS JobDateStamp ON ProteinKeyspace.ProteinData (jobtime);");
++ session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinRow "
++ + "(Protein ascii, JobID ascii, Predictions map<ascii,ascii>, PRIMARY KEY(JobID));");
++ session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinLog "
++ + "(JobID ascii, DataBegin ascii, DataEnd ascii, ip ascii, FinalStatus ascii, "
++ + "ExecutionStatus ascii, Protein ascii, PRIMARY KEY(JobID));");
++ session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinData "
++ + "(jobtime bigint, JobID ascii, Protein ascii, PRIMARY KEY(JobID));");
++ session.execute("CREATE COLUMNFAMILY IF NOT EXISTS JpredArchive "
++ + "(JobID ascii, Protein varchar, IP ascii, StartTime bigint, ExecTime int, alignment map<ascii,ascii>, "
++ + "predictions map<ascii,ascii>, archive blob, LOG varchar, PRIMARY KEY(JobID));");
-- System.out.println("Cassandra connected");
++ session.execute("CREATE INDEX IF NOT EXISTS ProteinSeq ON ProteinRow (protein);");
++ session.execute("CREATE INDEX IF NOT EXISTS JobDateStamp ON ProteinData (jobtime);");
}
/*
* parsing data source and filling the database
*/
public void Parsing() throws IOException {
-- if (true) {
++ if (READ_WEB_JPRED) {
// if (source.equals("http")) {
// get data from real Jpred production server
System.out.println("Parsing web data source......");
String datasrc = "http://www.compbio.dundee.ac.uk/www-jpred/results/usage-new/alljobs.dat";
String prefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
JpredParserHTTP parser = new JpredParserHTTP(prefix);
-- parser.Parsing(datasrc, 4);
++ parser.Parsing(datasrc, 5);
}
-- if (false) {
++ if (READ_LOCALFILE_JPRED) {
// if (source.equals("file")) {
// get irtifical data generated for the DB stress tests
System.out.println("Parsing local file data source......");
System.out.println("Cassandra has been shut down");
}
- /*
- * inserting data into the db
- */
- public void FormQueryTables(long jobtime, String startdate, String enddate, String ip, String jobid, String statusEx, String statusFinal,
- String protein, List<FastaSequence> predictions) {
++ public boolean JobisNotInsterted(String jobid) {
++ ResultSet results1 = session.execute("SELECT * FROM ProteinLog WHERE JobID = '" + jobid + "';");
++ if (results1.isExhausted()) {
++ return true;
++ }
++ return false;
++ }
+
- String check1 = "SELECT * FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';";
- ResultSet results1 = session.execute(check1);
++ public boolean JobisNotArchived(String jobid) {
++ ResultSet results1 = session.execute("SELECT * FROM JpredArchive WHERE JobID = '" + jobid + "';");
+ if (results1.isExhausted()) {
- String com1 = "INSERT INTO ProteinKeyspace.ProteinLog "
- + "(JobID, IP, DataBegin, DataEnd, FinalStatus, ExecutionStatus, Protein)" + " VALUES ('" + jobid + "','" + ip + "','"
- + startdate + "','" + enddate + "','" + statusFinal + "','" + statusEx + "','" + protein + "');";
- session.execute(com1);
++ return true;
++ }
++ return false;
++ }
+
- String com2 = "INSERT INTO ProteinKeyspace.ProteinData " + "(jobtime, JobID, Protein)" + " VALUES (" + jobtime + ",'" + jobid
+ /*
- * inserting data into the db
++ * inserting data into the tables for queries
+ */
- public void InsertData(long jobtime, String startdate, String enddate, String ip, String jobid, String statusEx, String statusFinal,
- String protein, List<FastaSequence> predictions) {
- String check1 = "SELECT * FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';";
- ResultSet results1 = session.execute(check1);
- if (results1.isExhausted()) {
- String com1 = "INSERT INTO ProteinKeyspace.ProteinLog "
- + "(JobID, IP, DataBegin, DataEnd, FinalStatus, ExecutionStatus, Protein)" + " VALUES ('" + jobid + "','" + ip + "','"
- + startdate + "','" + enddate + "','" + statusFinal + "','" + statusEx + "','" + protein + "');";
- session.execute(com1);
- String com2 = "INSERT INTO ProteinKeyspace.ProteinData " + "(jobtime, JobID, Protein)" + " VALUES (" + jobtime + ",'" + jobid
++ public int FormQueryTables(long jobtime, String startdate, String enddate, String ip, String jobid, String statusEx,
++ String statusFinal, String protein, List<FastaSequence> predictions) {
++ if (JobisNotInsterted(jobid)) {
++ String com1 = "INSERT INTO ProteinLog " + "(JobID, IP, DataBegin, DataEnd, FinalStatus, ExecutionStatus, Protein)"
++ + " VALUES ('" + jobid + "','" + ip + "','" + startdate + "','" + enddate + "','" + statusFinal + "','" + statusEx
+ "','" + protein + "');";
++ session.execute(com1);
++
++ String com2 = "INSERT INTO ProteinData " + "(jobtime, JobID, Protein)" + " VALUES (" + jobtime + ",'" + jobid + "','" + protein
++ + "');";
session.execute(com2);
+
String allpredictions = "";
for (FastaSequence pred : predictions) {
String predictionname = pred.getId();
if (null != allpredictions) {
final_prediction = allpredictions.substring(0, allpredictions.length() - 1);
}
- String check2 = "SELECT * FROM ProteinKeyspace.ProteinRow WHERE JobID = '" + jobid + "' ALLOW FILTERING;";
+
- String check2 = "SELECT * FROM ProteinKeyspace.ProteinRow WHERE JobID = '" + jobid + "';";
++ String check2 = "SELECT * FROM ProteinRow WHERE JobID = '" + jobid + "';";
ResultSet results2 = session.execute(check2);
if (results2.isExhausted()) {
- String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(Protein, JobID, Predictions)" + " VALUES ('"
- + protein + "','" + jobid + "',{" + final_prediction + "});";
- String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(Protein, JobID, Predictions)" + " VALUES ('" + protein + "','"
- + jobid + "',{" + final_prediction + "});";
++ String com3 = "INSERT INTO ProteinRow " + "(Protein, JobID, Predictions)" + " VALUES ('" + protein + "','" + jobid + "',{"
++ + final_prediction + "});";
session.execute(com3);
}
- String check3 = "SELECT * FROM ProteinKeyspace.ProteinRow WHERE JobID = '" + jobid + "';";
++ return 1;
}
++ return 0;
+ }
+
- public void ArchiveData(long starttime, int exectime, String ip, String jobid, String statusEx, String statusFinal,
- String protein, List<FastaSequence> predictions, List<FastaSequence> seqs, String LogFile) {
-
- String check1 = "SELECT * FROM ProteinKeyspace.JpredArchive WHERE JobID = '" + jobid + "';";
- ResultSet results1 = session.execute(check1);
- if (results1.isExhausted()) {
- String allpredictions = "";
- for (FastaSequence pred : predictions) {
- String predictionname = pred.getId();
- String prediction = pred.getSequence().replaceAll("\n", "");
- allpredictions += "'" + predictionname + "':'" + prediction + "',";
- }
- String final_allpredictions = "";
- if (null != allpredictions) {
- final_allpredictions = allpredictions.substring(0, allpredictions.length() - 1);
++ /*
++ * insert data from a real Jpred job: timing+IP, Execution Status, Final
++ * status, protein sequence, predictions, alignment, LOG and tar.gz files
++ */
++ public int ArchiveData(long starttime, long exectime, String ip, String jobid, String statusEx, String statusFinal, String protein,
++ List<FastaSequence> predictions, List<FastaSequence> seqs, String LogFile, String archivepath) {
++ if (JobisNotArchived(jobid)) {
++ String log = LogFile.replaceAll("'", "");
++ session.execute("INSERT INTO JpredArchive (JobID, Protein, IP, StartTime, ExecTime,LOG) VALUES ('" + jobid + "','" + protein
++ + "','" + ip + "'," + starttime + "," + exectime + ",'" + log + "');");
++ if (false) {
++ PreparedStatement statement = session.prepare("INSERT INTO JpredArchive (JobID, archive) VALUES (?,?);");
++ BoundStatement boundStatement = new BoundStatement(statement);
++ session.execute(boundStatement.bind(jobid, archivepath));
+ }
- String alignment = "";
- for (FastaSequence seq : seqs) {
- String predictionname = seq.getId();
- String prediction = seq.getSequence().replaceAll("\n", "");
- alignment += "'" + predictionname + "':'" + prediction + "',";
++
++ for (FastaSequence p : predictions) {
++ session.execute("UPDATE JpredArchive SET predictions = predictions + {'" + p.getId() + "':'"
++ + p.getSequence().replaceAll("\n", "") + "'} WHERE JobID = '" + jobid + "';");
+ }
- String final_alignment = "";
- if (null != allpredictions) {
- final_alignment = alignment.substring(0, allpredictions.length() - 1);
++
++ for (FastaSequence s : seqs) {
++ session.execute("UPDATE JpredArchive SET alignment = alignment + {'" + s.getId() + "':'"
++ + s.getSequence().replaceAll("\n", "") + "'} WHERE JobID = '" + jobid + "';");
+ }
-
- String com1 = "INSERT INTO ProteinKeyspace.JpredArchive "
- + "(JobID, Protein, IP, StartTime, ExecTime, alignment, predictions, LOG))"
- + " VALUES ('"
- + jobid + "','" + protein + "','" + ip + "'," + starttime + "," + exectime
- + "',[" + final_allpredictions + "],[" + final_alignment + "],'" + LogFile + "]);";
- session.execute(com1);
++ return 1;
+ }
++ return 0;
}
-
-
/*
* getting data from the db
*/
public List<Pair<String, String>> ReadProteinDataTable() {
final long startTime = System.currentTimeMillis();
-- String com = "SELECT DataBegin,DataEnd FROM ProteinKeyspace.ProteinLog;";
++ String com = "SELECT DataBegin,DataEnd FROM ProteinLog;";
System.out.println("Command: " + com);
ResultSet results = session.execute(com);
final long queryTime = System.currentTimeMillis();
++c;
}
final long endTime = System.currentTimeMillis();
- System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
+ System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
return res;
}
-
+
/*
+ * getting data from the db ProteinData
+ */
+ public Integer ReadDateTable(long queryDate) {
+ final long startTime = System.currentTimeMillis();
- String com = "SELECT jobtime, JobID FROM ProteinKeyspace.ProteinData WHERE jobtime = " + queryDate + ";";
++ String com = "SELECT jobtime, JobID FROM ProteinData WHERE jobtime = " + queryDate + ";";
+ System.out.println("Command: " + com);
+ ResultSet results = session.execute(com);
- if (results.isExhausted())
- return null;
+ final long queryTime = System.currentTimeMillis();
++ System.out.println("Query time is " + (queryTime - startTime) + " msec");
++ if (results.isExhausted())
++ return 0;
+ List<Row> rows = results.all();
- System.out.println ("Query time is " + (queryTime - startTime) + " msec");
++ final long endTime = System.currentTimeMillis();
++ System.out.println("Processing time is " + (endTime - queryTime) + " msec");
+ return rows.size();
+ }
+
+ /*
+ * getting whole protein sequence from the db ProteinRow
+ */
+ public List<StructureProteinPrediction> ReadWholeSequence(String queryProtein) {
+ final long startTime = System.currentTimeMillis();
- String com = "SELECT JobID, Predictions FROM ProteinKeyspace.ProteinRow WHERE Protein = '" + queryProtein + "';";
++ String com = "SELECT JobID, Predictions FROM ProteinRow WHERE Protein = '" + queryProtein + "';";
+ System.out.println("Command: " + com);
+ ResultSet results = session.execute(com);
+ if (results.isExhausted())
+ return null;
+ final long queryTime = System.currentTimeMillis();
+ List<Row> rows = results.all();
- System.out.println ("Query time is " + (queryTime - startTime) + " msec");
- System.out.println (" rows analysed, " + rows.size());
++ System.out.println("Query time is " + (queryTime - startTime) + " msec");
++ System.out.println(" rows analysed, " + rows.size());
+ List<StructureProteinPrediction> res = new ArrayList<StructureProteinPrediction>();
+ int c = 0;
+ for (Row r : rows) {
- StructureProteinPrediction structure = new StructureProteinPrediction(queryProtein, r.getString("JobID"), r.getMap("Predictions", String.class, String.class));
++ StructureProteinPrediction structure = new StructureProteinPrediction(queryProtein, r.getString("JobID"), r.getMap(
++ "Predictions", String.class, String.class));
+ res.add(structure);
+ ++c;
+ }
+ final long endTime = System.currentTimeMillis();
- System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
++ System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
+ return res;
+ }
-
++
+ /*
+ * getting part of protein sequence from the db ProteinRow
+ */
- public List<StructureProteinPrediction> ReadPartOfSequence(String queryProtein) {
++ public List<StructureProteinPrediction> ReadPartOfSequence(String queryProtein) {
+ final long startTime = System.currentTimeMillis();
- String com = "SELECT * FROM ProteinKeyspace.ProteinRow;";
++ String com = "SELECT * FROM ProteinRow;";
+ System.out.println("Command: " + com);
+ ResultSet results = session.execute(com);
+ if (results.isExhausted())
+ return null;
+ final long queryTime = System.currentTimeMillis();
+ List<Row> rows = results.all();
- System.out.println ("Query time is " + (queryTime - startTime) + " msec");
- System.out.println (" rows analysed, " + rows.size());
- List<StructureProteinPrediction> res = new ArrayList<StructureProteinPrediction>();
++ System.out.println("Query time is " + (queryTime - startTime) + " msec");
++ System.out.println(" rows analysed, " + rows.size());
++ List<StructureProteinPrediction> res = new ArrayList<StructureProteinPrediction>();
+ int c = 0;
+ for (Row r : rows) {
+ String prot = r.getString("Protein");
+ if (prot.matches("(.*)" + queryProtein + "(.*)")) {
- // System.out.println(prot);
- StructureProteinPrediction structure = new StructureProteinPrediction(prot, r.getString("JobID"), r.getMap("Predictions", String.class, String.class));
++ StructureProteinPrediction structure = new StructureProteinPrediction(prot, r.getString("JobID"), r.getMap("Predictions",
++ String.class, String.class));
+ res.add(structure);
+ ++c;
+ }
+ }
+ final long endTime = System.currentTimeMillis();
- System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
++ System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
+ return res;
+ }
-
++
+ /*
+ * getting protein sequences by counter
+ */
- public Map<String, Integer> ReadProteinDataByCounter() {
++ public Map<String, Integer> ReadProteinDataByCounter() {
+ final long startTime = System.currentTimeMillis();
- String com = "SELECT Protein FROM ProteinKeyspace.ProteinRow;";
++ String com = "SELECT Protein FROM ProteinRow;";
+ System.out.println("Command: " + com);
+ ResultSet results = session.execute(com);
+ if (results.isExhausted())
+ return null;
+ final long queryTime = System.currentTimeMillis();
+ List<Row> rows = results.all();
- System.out.println ("Query time is " + (queryTime - startTime) + " msec");
- System.out.println (" rows analysed, " + rows.size());
++ System.out.println("Query time is " + (queryTime - startTime) + " msec");
++ System.out.println(" rows analysed, " + rows.size());
+ Map<String, Integer> res = new HashMap<String, Integer>();
+ int c = 0;
+ for (Row r : rows) {
+ String protein = r.getString("Protein");
- if (res.containsKey(protein))
++ if (res.containsKey(protein))
+ res.put(protein, res.get(protein) + 1);
+ else
+ res.put(protein, 1);
+ }
+ final long endTime = System.currentTimeMillis();
- System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
++ System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
+ return res;
+ }
-
-
++
+ /*
+ * getting protein sequences by counter
+ */
+ public StructureJobLog ReadJobLog(String jobid) {
+ final long startTime = System.currentTimeMillis();
+ String com = "SELECT * FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';";
+ System.out.println("Command: " + com);
+ ResultSet results = session.execute(com);
+ if (results.isExhausted())
+ return null;
+ final long queryTime = System.currentTimeMillis();
+ Row row = results.one();
+ String com1 = "SELECT * FROM ProteinKeyspace.ProteinRow WHERE JobID = '" + jobid + "' ALLOW FILTERING;";
+ System.out.println("Command: " + com1);
+ ResultSet results1 = session.execute(com1);
+ if (results1.isExhausted())
+ return null;
+ Row row1 = results1.one();
- StructureJobLog res = new StructureJobLog(row.getString("Protein"), row.getString("JobID"), row.getString("DataBegin"), row.getString("DataEnd"), row.getString("ip"), row1.getMap("Predictions", String.class, String.class));
- System.out.println ("Query time is " + (queryTime - startTime) + " msec");
++ StructureJobLog res = new StructureJobLog(row.getString("Protein"), row.getString("JobID"), row.getString("DataBegin"),
++ row.getString("DataEnd"), row.getString("ip"), row1.getMap("Predictions", String.class, String.class));
++ System.out.println("Query time is " + (queryTime - startTime) + " msec");
+ final long endTime = System.currentTimeMillis();
- System.out.println (" rows analysed, execution time is " + (endTime - startTime) + " msec");
++ System.out.println(" rows analysed, execution time is " + (endTime - startTime) + " msec");
+ return res;
+ }
-
++
+ /*
* getting earlest date of jobs from the db
*/
public long getEarliestDateInDB() {
final long startTime = System.currentTimeMillis();
- String com = "SELECT jobtime,JobID FROM ProteinKeyspace.ProteinData;";
- String com = "SELECT jobtime FROM ProteinKeyspace.ProteinData;";
++ String com = "SELECT jobtime,JobID FROM ProteinData;";
System.out.println("Command: " + com);
ResultSet results = session.execute(com);
final long queryTime = System.currentTimeMillis();
- System.out.println ("Query time is " + (queryTime - startTime) + " msec");
- System.out.println("Query time is " + (queryTime - startTime) + " msec");
++ System.out.println("Query time is " + (queryTime - startTime) + " msec");
Calendar cal = Calendar.getInstance();
long res = cal.getTimeInMillis();
package compbio.cassandra;
import java.io.BufferedReader;
++import java.io.DataInputStream;
++import java.io.EOFException;
import java.io.FileNotFoundException;
import java.io.IOException;
++import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
public class JpredParserHTTP implements JpredParser {
private CassandraNativeConnector cc = new CassandraNativeConnector();
private String dirprefix;
++ private List<FastaSequence> alignment;
++ private List<FastaSequence> predictions;
++ private String jnetpred;
JpredParserHTTP() {
dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
}
}
-- private int ParsingForDate(String input, String date) {
++ private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException {
++ final FastaReader fr = new FastaReader(stream);
++ String query = "";
++ alignment = new ArrayList<FastaSequence>();
++ predictions = new ArrayList<FastaSequence>();
++ while (fr.hasNext()) {
++ final FastaSequence fs = fr.next();
++ String seqid = fs.getId();
++ String seq = fs.getSequence().replaceAll("\n", "");
++ if (seqid.equals("QUERY") || seqid.equals(jobid)) {
++ query = seq;
++ alignment.add(fs);
++ } else if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28")
++ || seqid.equals("JNETSOL25") || seqid.equals("JNETSOL5") || seqid.equals("JNETSOL0") || seqid.equals("JNETCONF")
++ || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM")) {
++ predictions.add(fs);
++ if (seqid.equals("jnetpred"))
++ jnetpred = seq;
++ } else {
++ alignment.add(fs);
++ }
++ }
++ return query;
++ }
++
++ private String parseLogFile(final InputStream stream) throws IOException {
++ String out = "";
++ BufferedReader buffer = new BufferedReader(new InputStreamReader(stream));
++ String line;
++ while (null != (line = buffer.readLine())) {
++ out += line;
++ }
++ return out;
++ }
++
++ private List<Byte> parseArchiveFile(final InputStream stream) throws IOException {
++ DataInputStream data_in = new DataInputStream(stream);
++ List<Byte> out = new ArrayList<Byte>();
++ while (true) {
++ try {
++ out.add(data_in.readByte());
++ } catch (EOFException eof) {
++ break;
++ }
++ }
++ return out;
++ }
++
++ private void ParsingForDate(String input, String date) {
int totalcount = 0;
int countNoData = 0;
int countUnclearFASTAid = 0;
int countinserted = 0;
int counAlignments = 0;
int countStrange = 0;
-- int njobs = 0;
System.out.println("Inserting jobs for " + date);
try {
String line;
while ((line = alljobs.readLine()) != null) {
-- if (line.matches(date + "(.*)jp_[^\\s]+")) {
++ if (line.matches(date + ":(.*)jp_[^\\s]+")) {
String[] table = line.split("\\s+");
// Format of a record:
// starttime endtime ip email jobid (directory)
-- // 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172 unknown_email jp_J9HBCBT
++ // 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172
++ // unknown_email jp_J9HBCBT
String id = table[table.length - 1];
totalcount++;
-- String datalink = dirprefix + "/" + id + "/" + id + ".concise.fasta";
-- URL urltable = new URL(datalink);
-- HttpURLConnection httpConnection = (HttpURLConnection) urltable.openConnection();
-- int responsecode = httpConnection.getResponseCode();
-- if (199 < responsecode && responsecode < 300) {
-- try {
-- final FastaReader fr = new FastaReader(urltable.openStream());
-- final List<FastaSequence> seqs = new ArrayList<FastaSequence>();
-- String newprotein = "";
-- while (fr.hasNext()) {
-- final FastaSequence fs = fr.next();
-- if (fs.getId().equals("QUERY") || fs.getId().equals(id))
-- newprotein = fs.getSequence().replaceAll("\n", "");
-- else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) {
-- seqs.add(fs);
-- }
-- }
-- if (newprotein.equals("")) {
-- countUnclearFASTAid++;
-- } else {
-- SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
-- String dateInString1 = table[0].substring(0, table[0].indexOf(":"));
-- long dateWork1 = 0;
-- try {
-- Date dat1 = formatter.parse(dateInString1);
-- dateWork1 = dat1.getTime();
-- } catch (ParseException e) {
-- e.printStackTrace();
++ if (cc.JobisNotInsterted(id)) {
++ URL dataurl = new URL(dirprefix + "/" + id + "/" + id + ".concise.fasta");
++ URL archiveurl = new URL(dirprefix + "/" + id + "/" + id + ".tar.gz");
++ URL logurl = new URL(dirprefix + "/" + id + "/LOG");
++ HttpURLConnection httpConnection1 = (HttpURLConnection) dataurl.openConnection();
++ HttpURLConnection httpConnection2 = (HttpURLConnection) logurl.openConnection();
++ HttpURLConnection httpConnection3 = (HttpURLConnection) archiveurl.openConnection();
++ int response1 = httpConnection1.getResponseCode();
++ int response2 = httpConnection2.getResponseCode();
++ if (199 < response1 && response1 < 300) {
++ try {
++ String protein = parsePredictions(dataurl.openStream(), id);
++ if (protein.equals("")) {
++ countUnclearFASTAid++;
++ } else {
++ SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd");
++ SimpleDateFormat timeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
++ String startdatestring = table[0].substring(0, table[0].indexOf(":"));
++ try {
++ Date startdate = dateformatter.parse(startdatestring);
++ Date starttime = timeformatter.parse(table[0]);
++ Date endtime = timeformatter.parse(table[1]);
++ String ip = table[2];
++ String execstatus = "OK";
++ String finalstatus = "OK";
++ countinsertions += cc.FormQueryTables(startdate.getTime(), table[0], table[1], ip, id, execstatus,
++ finalstatus, protein, predictions);
++
++ long exectime = (endtime.getTime() - starttime.getTime()) / 1000;
++ String log = "";
++ if (199 < response2 && response2 < 300) {
++ log = parseLogFile(logurl.openStream());
++ }
++ cc.ArchiveData(startdate.getTime(), exectime, ip, id, execstatus, finalstatus, protein,
++ predictions, alignment, log, archiveurl.toString());
++ } catch (ParseException e) {
++ e.printStackTrace();
++ }
}
- cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs);
- ++countinsertions;
- ++njobs;
- // flush every 50 insertions
- // if (0 == countinsertions % 50) {
- // cc.flushData();
- // njobs -= 50;
- // }
- cc.FormQueryTables(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs);
- ++countinsertions;
- ++njobs;
++ } catch (IOException e) {
++ e.printStackTrace();
}
-- } catch (IOException e) {
-- e.printStackTrace();
++ } else {
++ countNoData++;
}
++ httpConnection1.disconnect();
++ httpConnection2.disconnect();
++ httpConnection3.disconnect();
} else {
-- countNoData++;
++ ++countinserted;
}
} else {
if (line.matches(date + "(.*)Sequence0/(.*)")) {
} catch (IOException e) {
e.printStackTrace();
}
-- return njobs;
++ ;
}
}