From: Sasha Sherstnev Date: Fri, 8 Nov 2013 12:11:18 +0000 (+0000) Subject: Improved cassandra support X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=91de317f0e7f8697c3b2d036c7f22a5d6df6cabc;p=proteocache.git Improved cassandra support --- diff --git a/datadb/compbio/cassandra/CassandraNativeConnector.java b/datadb/compbio/cassandra/CassandraNativeConnector.java index 7109c78..d87f89e 100644 --- a/datadb/compbio/cassandra/CassandraNativeConnector.java +++ b/datadb/compbio/cassandra/CassandraNativeConnector.java @@ -2,8 +2,12 @@ package compbio.cassandra; import java.io.IOException; import java.util.Calendar; +import java.util.HashMap; import java.util.List; import java.util.ArrayList; +import java.util.Map; + +import org.apache.log4j.Logger; import com.datastax.driver.core.Cluster; import com.datastax.driver.core.Host; @@ -11,54 +15,90 @@ import com.datastax.driver.core.Metadata; import com.datastax.driver.core.Row; import com.datastax.driver.core.Session; import com.datastax.driver.core.ResultSet; +import com.datastax.driver.core.PreparedStatement; +import com.datastax.driver.core.BoundStatement; + +import compbio.engine.ProteoCachePropertyHelperManager; +import compbio.util.PropertyHelper; +import compbio.util.Util; public class CassandraNativeConnector { private static Cluster cluster; private static Session session; + private static final PropertyHelper ph = ProteoCachePropertyHelperManager.getPropertyHelper(); + private static Logger log = Logger.getLogger(CassandraNativeConnector.class); + + public static String CASSANDRA_HOSTNAME = "localhost"; + public static boolean READ_WEB_JPRED = false; + public static boolean READ_LOCALFILE_JPRED = false; + + private static boolean initBooleanValue(String key) { + assert key != null; + String status = ph.getProperty(key); + log.debug("Loading property: " + key + " with value: " + status); + if (Util.isEmpty(status)) { + return false; + } + return new Boolean(status.trim()).booleanValue(); + } /* - * connect to the cluster and look weather the dababase has any data inside + * connect to the cluster and look whether all tables exist */ public void Connect() { - // local cassandra cluster - cluster = Cluster.builder().addContactPoint("localhost").build(); - // distributed cassandra cluster - /* cluster = Cluster.builder().addContactPoint("10.0.115.190").build(); */ + + String cassandrahostname = ph.getProperty("cassandra.host"); + if (null != cassandrahostname) { + CASSANDRA_HOSTNAME = cassandrahostname; + } + READ_WEB_JPRED = initBooleanValue("cassandra.jpred.web"); + READ_LOCALFILE_JPRED = initBooleanValue("cassandra.jpred.local"); + + cluster = Cluster.builder().addContactPoint(CASSANDRA_HOSTNAME).build(); + Metadata metadata = cluster.getMetadata(); System.out.printf("Connected to cluster: %s\n", metadata.getClusterName()); for (Host host : metadata.getAllHosts()) { System.out.printf("Datatacenter: %s; Host: %s; Rack: %s\n", host.getDatacenter(), host.getAddress(), host.getRack()); } - session = cluster.connect(); + CreateTables(); + System.out.println("Cassandra connected"); + } + + private void CreateTables() { session.execute("CREATE KEYSPACE IF NOT EXISTS ProteinKeyspace WITH replication = {'class':'SimpleStrategy', 'replication_factor':3};"); - session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinRow (Protein ascii, JobID ascii, Predictions map, PRIMARY KEY(JobID));"); - session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinLog " - + "(JobID ascii, DataBegin ascii, DataEnd ascii, ip ascii, FinalStatus ascii, ExecutionStatus ascii, Protein ascii, PRIMARY KEY(JobID));"); - session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinData (jobtime bigint, JobID ascii, Protein ascii, PRIMARY KEY(JobID));"); - session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.JpredArchive " + - "(JobID ascii, Protein varchar, IP ascii, StartTime bigint, ExecTime int, alignment map, predictions map, archive blob, LOG varchar, PRIMARY KEY(JobID));"); + session.execute("USE ProteinKeyspace"); - session.execute("CREATE INDEX IF NOT EXISTS ProteinSeq ON ProteinKeyspace.ProteinRow (protein);"); - session.execute("CREATE INDEX IF NOT EXISTS JobDateStamp ON ProteinKeyspace.ProteinData (jobtime);"); + session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinRow " + + "(Protein ascii, JobID ascii, Predictions map, PRIMARY KEY(JobID));"); + session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinLog " + + "(JobID ascii, DataBegin ascii, DataEnd ascii, ip ascii, FinalStatus ascii, " + + "ExecutionStatus ascii, Protein ascii, PRIMARY KEY(JobID));"); + session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinData " + + "(jobtime bigint, JobID ascii, Protein ascii, PRIMARY KEY(JobID));"); + session.execute("CREATE COLUMNFAMILY IF NOT EXISTS JpredArchive " + + "(JobID ascii, Protein varchar, IP ascii, StartTime bigint, ExecTime int, alignment map, " + + "predictions map, archive blob, LOG varchar, PRIMARY KEY(JobID));"); - System.out.println("Cassandra connected"); + session.execute("CREATE INDEX IF NOT EXISTS ProteinSeq ON ProteinRow (protein);"); + session.execute("CREATE INDEX IF NOT EXISTS JobDateStamp ON ProteinData (jobtime);"); } /* * parsing data source and filling the database */ public void Parsing() throws IOException { - if (true) { + if (READ_WEB_JPRED) { // if (source.equals("http")) { // get data from real Jpred production server System.out.println("Parsing web data source......"); String datasrc = "http://www.compbio.dundee.ac.uk/www-jpred/results/usage-new/alljobs.dat"; String prefix = "http://www.compbio.dundee.ac.uk/www-jpred/results"; JpredParserHTTP parser = new JpredParserHTTP(prefix); - parser.Parsing(datasrc, 4); + parser.Parsing(datasrc, 5); } - if (false) { + if (READ_LOCALFILE_JPRED) { // if (source.equals("file")) { // get irtifical data generated for the DB stress tests System.out.println("Parsing local file data source......"); @@ -75,22 +115,35 @@ public class CassandraNativeConnector { System.out.println("Cassandra has been shut down"); } - /* - * inserting data into the db - */ - public void FormQueryTables(long jobtime, String startdate, String enddate, String ip, String jobid, String statusEx, String statusFinal, - String protein, List predictions) { + public boolean JobisNotInsterted(String jobid) { + ResultSet results1 = session.execute("SELECT * FROM ProteinLog WHERE JobID = '" + jobid + "';"); + if (results1.isExhausted()) { + return true; + } + return false; + } - String check1 = "SELECT * FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';"; - ResultSet results1 = session.execute(check1); + public boolean JobisNotArchived(String jobid) { + ResultSet results1 = session.execute("SELECT * FROM JpredArchive WHERE JobID = '" + jobid + "';"); if (results1.isExhausted()) { - String com1 = "INSERT INTO ProteinKeyspace.ProteinLog " - + "(JobID, IP, DataBegin, DataEnd, FinalStatus, ExecutionStatus, Protein)" + " VALUES ('" + jobid + "','" + ip + "','" - + startdate + "','" + enddate + "','" + statusFinal + "','" + statusEx + "','" + protein + "');"; - session.execute(com1); + return true; + } + return false; + } - String com2 = "INSERT INTO ProteinKeyspace.ProteinData " + "(jobtime, JobID, Protein)" + " VALUES (" + jobtime + ",'" + jobid + /* + * inserting data into the tables for queries + */ + public int FormQueryTables(long jobtime, String startdate, String enddate, String ip, String jobid, String statusEx, + String statusFinal, String protein, List predictions) { + if (JobisNotInsterted(jobid)) { + String com1 = "INSERT INTO ProteinLog " + "(JobID, IP, DataBegin, DataEnd, FinalStatus, ExecutionStatus, Protein)" + + " VALUES ('" + jobid + "','" + ip + "','" + startdate + "','" + enddate + "','" + statusFinal + "','" + statusEx + "','" + protein + "');"; + session.execute(com1); + + String com2 = "INSERT INTO ProteinData " + "(jobtime, JobID, Protein)" + " VALUES (" + jobtime + ",'" + jobid + "','" + protein + + "');"; session.execute(com2); String allpredictions = ""; @@ -104,54 +157,48 @@ public class CassandraNativeConnector { final_prediction = allpredictions.substring(0, allpredictions.length() - 1); } - String check2 = "SELECT * FROM ProteinKeyspace.ProteinRow WHERE JobID = '" + jobid + "';"; + String check2 = "SELECT * FROM ProteinRow WHERE JobID = '" + jobid + "';"; ResultSet results2 = session.execute(check2); if (results2.isExhausted()) { - String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(Protein, JobID, Predictions)" + " VALUES ('" + protein + "','" - + jobid + "',{" + final_prediction + "});"; + String com3 = "INSERT INTO ProteinRow " + "(Protein, JobID, Predictions)" + " VALUES ('" + protein + "','" + jobid + "',{" + + final_prediction + "});"; session.execute(com3); } + return 1; } + return 0; } - public void ArchiveData(long starttime, int exectime, String ip, String jobid, String statusEx, String statusFinal, - String protein, List predictions, List seqs, String LogFile) { - - String check1 = "SELECT * FROM ProteinKeyspace.JpredArchive WHERE JobID = '" + jobid + "';"; - ResultSet results1 = session.execute(check1); - if (results1.isExhausted()) { - String allpredictions = ""; - for (FastaSequence pred : predictions) { - String predictionname = pred.getId(); - String prediction = pred.getSequence().replaceAll("\n", ""); - allpredictions += "'" + predictionname + "':'" + prediction + "',"; - } - String final_allpredictions = ""; - if (null != allpredictions) { - final_allpredictions = allpredictions.substring(0, allpredictions.length() - 1); + /* + * insert data from a real Jpred job: timing+IP, Execution Status, Final + * status, protein sequence, predictions, alignment, LOG and tar.gz files + */ + public int ArchiveData(long starttime, long exectime, String ip, String jobid, String statusEx, String statusFinal, String protein, + List predictions, List seqs, String LogFile, String archivepath) { + if (JobisNotArchived(jobid)) { + String log = LogFile.replaceAll("'", ""); + session.execute("INSERT INTO JpredArchive (JobID, Protein, IP, StartTime, ExecTime,LOG) VALUES ('" + jobid + "','" + protein + + "','" + ip + "'," + starttime + "," + exectime + ",'" + log + "');"); + if (false) { + PreparedStatement statement = session.prepare("INSERT INTO JpredArchive (JobID, archive) VALUES (?,?);"); + BoundStatement boundStatement = new BoundStatement(statement); + session.execute(boundStatement.bind(jobid, archivepath)); } - String alignment = ""; - for (FastaSequence seq : seqs) { - String predictionname = seq.getId(); - String prediction = seq.getSequence().replaceAll("\n", ""); - alignment += "'" + predictionname + "':'" + prediction + "',"; + + for (FastaSequence p : predictions) { + session.execute("UPDATE JpredArchive SET predictions = predictions + {'" + p.getId() + "':'" + + p.getSequence().replaceAll("\n", "") + "'} WHERE JobID = '" + jobid + "';"); } - String final_alignment = ""; - if (null != allpredictions) { - final_alignment = alignment.substring(0, allpredictions.length() - 1); + + for (FastaSequence s : seqs) { + session.execute("UPDATE JpredArchive SET alignment = alignment + {'" + s.getId() + "':'" + + s.getSequence().replaceAll("\n", "") + "'} WHERE JobID = '" + jobid + "';"); } - - String com1 = "INSERT INTO ProteinKeyspace.JpredArchive " - + "(JobID, Protein, IP, StartTime, ExecTime, alignment, predictions, LOG))" - + " VALUES ('" - + jobid + "','" + protein + "','" + ip + "'," + starttime + "," + exectime - + "',[" + final_allpredictions + "],[" + final_alignment + "],'" + LogFile + "]);"; - session.execute(com1); + return 1; } + return 0; } - - /* * getting data from the db */ @@ -177,6 +224,132 @@ public class CassandraNativeConnector { } /* + * getting data from the db ProteinData + */ + public Integer ReadDateTable(long queryDate) { + final long startTime = System.currentTimeMillis(); + String com = "SELECT jobtime, JobID FROM ProteinKeyspace.ProteinData WHERE jobtime = " + queryDate + ";"; + System.out.println("Command: " + com); + ResultSet results = session.execute(com); + final long queryTime = System.currentTimeMillis(); + System.out.println("Query time is " + (queryTime - startTime) + " msec"); + if (results.isExhausted()) + return 0; + List rows = results.all(); + final long endTime = System.currentTimeMillis(); + System.out.println ("Processing time is " + (endTime - queryTime) + " msec"); + return rows.size(); + } + + /* + * getting whole protein sequence from the db ProteinRow + */ + public List ReadWholeSequence(String queryProtein) { + final long startTime = System.currentTimeMillis(); + String com = "SELECT JobID, Predictions FROM ProteinKeyspace.ProteinRow WHERE Protein = '" + queryProtein + "';"; + System.out.println("Command: " + com); + ResultSet results = session.execute(com); + if (results.isExhausted()) + return null; + final long queryTime = System.currentTimeMillis(); + List rows = results.all(); + System.out.println ("Query time is " + (queryTime - startTime) + " msec"); + System.out.println (" rows analysed, " + rows.size()); + List res = new ArrayList(); + int c = 0; + for (Row r : rows) { + StructureProteinPrediction structure = new StructureProteinPrediction(queryProtein, r.getString("JobID"), r.getMap("Predictions", String.class, String.class)); + res.add(structure); + ++c; + } + final long endTime = System.currentTimeMillis(); + System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); + return res; + } + + /* + * getting part of protein sequence from the db ProteinRow + */ + public List ReadPartOfSequence(String queryProtein) { + final long startTime = System.currentTimeMillis(); + String com = "SELECT * FROM ProteinKeyspace.ProteinRow;"; + System.out.println("Command: " + com); + ResultSet results = session.execute(com); + if (results.isExhausted()) + return null; + final long queryTime = System.currentTimeMillis(); + List rows = results.all(); + System.out.println ("Query time is " + (queryTime - startTime) + " msec"); + System.out.println (" rows analysed, " + rows.size()); + List res = new ArrayList(); + int c = 0; + for (Row r : rows) { + String prot = r.getString("Protein"); + if (prot.matches("(.*)" + queryProtein + "(.*)")) { + StructureProteinPrediction structure = new StructureProteinPrediction(prot, r.getString("JobID"), r.getMap("Predictions", String.class, String.class)); + res.add(structure); + ++c; + } + } + final long endTime = System.currentTimeMillis(); + System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); + return res; + } + + /* + * getting protein sequences by counter + */ + public Map ReadProteinDataByCounter() { + final long startTime = System.currentTimeMillis(); + String com = "SELECT Protein FROM ProteinKeyspace.ProteinRow;"; + System.out.println("Command: " + com); + ResultSet results = session.execute(com); + if (results.isExhausted()) + return null; + final long queryTime = System.currentTimeMillis(); + List rows = results.all(); + System.out.println ("Query time is " + (queryTime - startTime) + " msec"); + System.out.println (" rows analysed, " + rows.size()); + Map res = new HashMap(); + int c = 0; + for (Row r : rows) { + String protein = r.getString("Protein"); + if (res.containsKey(protein)) + res.put(protein, res.get(protein) + 1); + else + res.put(protein, 1); + } + final long endTime = System.currentTimeMillis(); + System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec"); + return res; + } + + /* + * getting protein sequences by counter + */ + public StructureJobLog ReadJobLog(String jobid) { + final long startTime = System.currentTimeMillis(); + String com = "SELECT * FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';"; + System.out.println("Command: " + com); + ResultSet results = session.execute(com); + if (results.isExhausted()) + return null; + final long queryTime = System.currentTimeMillis(); + Row row = results.one(); + String com1 = "SELECT * FROM ProteinKeyspace.ProteinRow WHERE JobID = '" + jobid + "' ALLOW FILTERING;"; + System.out.println("Command: " + com1); + ResultSet results1 = session.execute(com1); + if (results1.isExhausted()) + return null; + Row row1 = results1.one(); + StructureJobLog res = new StructureJobLog(row.getString("Protein"), row.getString("JobID"), row.getString("DataBegin"), row.getString("DataEnd"), row.getString("ip"), row1.getMap("Predictions", String.class, String.class)); + System.out.println ("Query time is " + (queryTime - startTime) + " msec"); + final long endTime = System.currentTimeMillis(); + System.out.println (" rows analysed, execution time is " + (endTime - startTime) + " msec"); + return res; + } + + /* * getting earlest date of jobs from the db */ public long getEarliestDateInDB() { diff --git a/datadb/compbio/cassandra/JpredParserHTTP.java b/datadb/compbio/cassandra/JpredParserHTTP.java index 51fc8e3..27f66cc 100644 --- a/datadb/compbio/cassandra/JpredParserHTTP.java +++ b/datadb/compbio/cassandra/JpredParserHTTP.java @@ -1,8 +1,11 @@ package compbio.cassandra; import java.io.BufferedReader; +import java.io.DataInputStream; +import java.io.EOFException; import java.io.FileNotFoundException; import java.io.IOException; +import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.MalformedURLException; @@ -20,6 +23,9 @@ import compbio.cassandra.JpredParser; public class JpredParserHTTP implements JpredParser { private CassandraNativeConnector cc = new CassandraNativeConnector(); private String dirprefix; + private List alignment; + private List predictions; + private String jnetpred; JpredParserHTTP() { dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results"; @@ -46,7 +52,55 @@ public class JpredParserHTTP implements JpredParser { } } - private int ParsingForDate(String input, String date) { + private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException { + final FastaReader fr = new FastaReader(stream); + String query = ""; + alignment = new ArrayList(); + predictions = new ArrayList(); + while (fr.hasNext()) { + final FastaSequence fs = fr.next(); + String seqid = fs.getId(); + String seq = fs.getSequence().replaceAll("\n", ""); + if (seqid.equals("QUERY") || seqid.equals(jobid)) { + query = seq; + alignment.add(fs); + } else if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28") + || seqid.equals("JNETSOL25") || seqid.equals("JNETSOL5") || seqid.equals("JNETSOL0") || seqid.equals("JNETCONF") + || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM")) { + predictions.add(fs); + if (seqid.equals("jnetpred")) + jnetpred = seq; + } else { + alignment.add(fs); + } + } + return query; + } + + private String parseLogFile(final InputStream stream) throws IOException { + String out = ""; + BufferedReader buffer = new BufferedReader(new InputStreamReader(stream)); + String line; + while (null != (line = buffer.readLine())) { + out += line; + } + return out; + } + + private List parseArchiveFile(final InputStream stream) throws IOException { + DataInputStream data_in = new DataInputStream(stream); + List out = new ArrayList(); + while (true) { + try { + out.add(data_in.readByte()); + } catch (EOFException eof) { + break; + } + } + return out; + } + + private void ParsingForDate(String input, String date) { int totalcount = 0; int countNoData = 0; int countUnclearFASTAid = 0; @@ -54,7 +108,6 @@ public class JpredParserHTTP implements JpredParser { int countinserted = 0; int counAlignments = 0; int countStrange = 0; - int njobs = 0; System.out.println("Inserting jobs for " + date); try { @@ -64,51 +117,64 @@ public class JpredParserHTTP implements JpredParser { String line; while ((line = alljobs.readLine()) != null) { - if (line.matches(date + "(.*)jp_[^\\s]+")) { + if (line.matches(date + ":(.*)jp_[^\\s]+")) { String[] table = line.split("\\s+"); // Format of a record: // starttime endtime ip email jobid (directory) - // 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172 unknown_email jp_J9HBCBT + // 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172 + // unknown_email jp_J9HBCBT String id = table[table.length - 1]; totalcount++; - String datalink = dirprefix + "/" + id + "/" + id + ".concise.fasta"; - URL urltable = new URL(datalink); - HttpURLConnection httpConnection = (HttpURLConnection) urltable.openConnection(); - int responsecode = httpConnection.getResponseCode(); - if (199 < responsecode && responsecode < 300) { - try { - final FastaReader fr = new FastaReader(urltable.openStream()); - final List seqs = new ArrayList(); - String newprotein = ""; - while (fr.hasNext()) { - final FastaSequence fs = fr.next(); - if (fs.getId().equals("QUERY") || fs.getId().equals(id)) - newprotein = fs.getSequence().replaceAll("\n", ""); - else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) { - seqs.add(fs); - } - } - if (newprotein.equals("")) { - countUnclearFASTAid++; - } else { - SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); - String dateInString1 = table[0].substring(0, table[0].indexOf(":")); - long dateWork1 = 0; - try { - Date dat1 = formatter.parse(dateInString1); - dateWork1 = dat1.getTime(); - } catch (ParseException e) { - e.printStackTrace(); + if (cc.JobisNotInsterted(id)) { + URL dataurl = new URL(dirprefix + "/" + id + "/" + id + ".concise.fasta"); + URL archiveurl = new URL(dirprefix + "/" + id + "/" + id + ".tar.gz"); + URL logurl = new URL(dirprefix + "/" + id + "/LOG"); + HttpURLConnection httpConnection1 = (HttpURLConnection) dataurl.openConnection(); + HttpURLConnection httpConnection2 = (HttpURLConnection) logurl.openConnection(); + HttpURLConnection httpConnection3 = (HttpURLConnection) archiveurl.openConnection(); + int response1 = httpConnection1.getResponseCode(); + int response2 = httpConnection2.getResponseCode(); + if (199 < response1 && response1 < 300) { + try { + String protein = parsePredictions(dataurl.openStream(), id); + if (protein.equals("")) { + countUnclearFASTAid++; + } else { + SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd"); + SimpleDateFormat timeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s"); + String startdatestring = table[0].substring(0, table[0].indexOf(":")); + try { + Date startdate = dateformatter.parse(startdatestring); + Date starttime = timeformatter.parse(table[0]); + Date endtime = timeformatter.parse(table[1]); + String ip = table[2]; + String execstatus = "OK"; + String finalstatus = "OK"; + countinsertions += cc.FormQueryTables(startdate.getTime(), table[0], table[1], ip, id, execstatus, + finalstatus, protein, predictions); + + long exectime = (endtime.getTime() - starttime.getTime()) / 1000; + String log = ""; + if (199 < response2 && response2 < 300) { + log = parseLogFile(logurl.openStream()); + } + cc.ArchiveData(startdate.getTime(), exectime, ip, id, execstatus, finalstatus, protein, + predictions, alignment, log, archiveurl.toString()); + } catch (ParseException e) { + e.printStackTrace(); + } } - cc.FormQueryTables(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs); - ++countinsertions; - ++njobs; + } catch (IOException e) { + e.printStackTrace(); } - } catch (IOException e) { - e.printStackTrace(); + } else { + countNoData++; } + httpConnection1.disconnect(); + httpConnection2.disconnect(); + httpConnection3.disconnect(); } else { - countNoData++; + ++countinserted; } } else { if (line.matches(date + "(.*)Sequence0/(.*)")) { @@ -131,6 +197,6 @@ public class JpredParserHTTP implements JpredParser { } catch (IOException e) { e.printStackTrace(); } - return njobs; + ; } } diff --git a/datadb/compbio/cassandra/JpredParserLocalFile.java b/datadb/compbio/cassandra/JpredParserLocalFile.java index 54e50ab..4b254ae 100644 --- a/datadb/compbio/cassandra/JpredParserLocalFile.java +++ b/datadb/compbio/cassandra/JpredParserLocalFile.java @@ -58,7 +58,7 @@ public class JpredParserLocalFile implements JpredParser { System.out.println("Execution Time = " + execTime + " ms"); } - private int ParsingForDate(List input, String date) { + private void ParsingForDate(List input, String date) { int totalcount = 0; int countNoData = 0; int countUnclearFASTAid = 0; @@ -66,11 +66,10 @@ public class JpredParserLocalFile implements JpredParser { int countinserted = 0; int counAlignments = 0; int countStrange = 0; - int njobs = 0; System.out.println("Inserting jobs for " + date); for (String in : input) { - if (in.matches(date + "(.*)jp_[^\\s]+")) { + if (in.matches(date + ":(.*)jp_[^\\s]+")) { String[] table = in.split("\\s+"); String starttime = table[0]; String finishtime = table[1]; @@ -104,9 +103,7 @@ public class JpredParserLocalFile implements JpredParser { } catch (ParseException e) { e.printStackTrace(); } - cc.FormQueryTables(insertdate, starttime, finishtime, ip, id, "OK", "OK", newprotein, seqs); - ++countinsertions; - ++njobs; + countinsertions += cc.FormQueryTables(insertdate, starttime, finishtime, ip, id, "OK", "OK", newprotein, seqs); } fr.close(); } catch (IOException e) { @@ -132,7 +129,6 @@ public class JpredParserLocalFile implements JpredParser { System.out.println(" " + countUnclearFASTAid + " jobs with unclear FASTA protein id in *.concise.fasta"); System.out.println(" " + countinsertions + " new job insertions\n"); } - return njobs; } }