Improved cassandra support
authorSasha Sherstnev <a.sherstnev@dundee.ac.uk>
Fri, 8 Nov 2013 12:11:18 +0000 (12:11 +0000)
committerSasha Sherstnev <a.sherstnev@dundee.ac.uk>
Fri, 8 Nov 2013 12:11:18 +0000 (12:11 +0000)
datadb/compbio/cassandra/CassandraNativeConnector.java
datadb/compbio/cassandra/JpredParserHTTP.java
datadb/compbio/cassandra/JpredParserLocalFile.java

index 7109c78..d87f89e 100644 (file)
@@ -2,8 +2,12 @@ package compbio.cassandra;
 
 import java.io.IOException;
 import java.util.Calendar;
+import java.util.HashMap;
 import java.util.List;
 import java.util.ArrayList;
+import java.util.Map;
+
+import org.apache.log4j.Logger;
 
 import com.datastax.driver.core.Cluster;
 import com.datastax.driver.core.Host;
@@ -11,54 +15,90 @@ import com.datastax.driver.core.Metadata;
 import com.datastax.driver.core.Row;
 import com.datastax.driver.core.Session;
 import com.datastax.driver.core.ResultSet;
+import com.datastax.driver.core.PreparedStatement;
+import com.datastax.driver.core.BoundStatement;
+
+import compbio.engine.ProteoCachePropertyHelperManager;
+import compbio.util.PropertyHelper;
+import compbio.util.Util;
 
 public class CassandraNativeConnector {
        private static Cluster cluster;
        private static Session session;
+       private static final PropertyHelper ph = ProteoCachePropertyHelperManager.getPropertyHelper();
+       private static Logger log = Logger.getLogger(CassandraNativeConnector.class);
+
+       public static String CASSANDRA_HOSTNAME = "localhost";
+       public static boolean READ_WEB_JPRED = false;
+       public static boolean READ_LOCALFILE_JPRED = false;
+
+       private static boolean initBooleanValue(String key) {
+               assert key != null;
+               String status = ph.getProperty(key);
+               log.debug("Loading property: " + key + " with value: " + status);
+               if (Util.isEmpty(status)) {
+                       return false;
+               }
+               return new Boolean(status.trim()).booleanValue();
+       }
 
        /*
-        * connect to the cluster and look weather the dababase has any data inside
+        * connect to the cluster and look whether all tables exist
         */
        public void Connect() {
-               // local cassandra cluster
-               cluster = Cluster.builder().addContactPoint("localhost").build();
-               // distributed cassandra cluster
-               /* cluster = Cluster.builder().addContactPoint("10.0.115.190").build(); */
+
+               String cassandrahostname = ph.getProperty("cassandra.host");
+               if (null != cassandrahostname) {
+                       CASSANDRA_HOSTNAME = cassandrahostname;
+               }
+               READ_WEB_JPRED = initBooleanValue("cassandra.jpred.web");
+               READ_LOCALFILE_JPRED = initBooleanValue("cassandra.jpred.local");
+
+               cluster = Cluster.builder().addContactPoint(CASSANDRA_HOSTNAME).build();
+
                Metadata metadata = cluster.getMetadata();
                System.out.printf("Connected to cluster: %s\n", metadata.getClusterName());
                for (Host host : metadata.getAllHosts()) {
                        System.out.printf("Datatacenter: %s; Host: %s; Rack: %s\n", host.getDatacenter(), host.getAddress(), host.getRack());
                }
-
                session = cluster.connect();
+               CreateTables();
+               System.out.println("Cassandra connected");
+       }
+
+       private void CreateTables() {
                session.execute("CREATE KEYSPACE IF NOT EXISTS ProteinKeyspace WITH replication = {'class':'SimpleStrategy', 'replication_factor':3};");
-               session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinRow (Protein ascii, JobID ascii, Predictions map<ascii,ascii>, PRIMARY KEY(JobID));");
-               session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinLog "
-                               + "(JobID ascii, DataBegin ascii, DataEnd ascii, ip ascii, FinalStatus ascii, ExecutionStatus ascii, Protein ascii, PRIMARY KEY(JobID));");
-               session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinData (jobtime bigint, JobID ascii, Protein ascii, PRIMARY KEY(JobID));");
-               session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.JpredArchive " + 
-               "(JobID ascii, Protein varchar, IP ascii, StartTime bigint, ExecTime int, alignment map<ascii,ascii>, predictions map<ascii,ascii>, archive blob, LOG varchar, PRIMARY KEY(JobID));");
+               session.execute("USE ProteinKeyspace");
 
-               session.execute("CREATE INDEX IF NOT EXISTS ProteinSeq ON ProteinKeyspace.ProteinRow (protein);");
-               session.execute("CREATE INDEX IF NOT EXISTS JobDateStamp ON ProteinKeyspace.ProteinData (jobtime);");
+               session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinRow "
+                               + "(Protein ascii, JobID ascii, Predictions map<ascii,ascii>, PRIMARY KEY(JobID));");
+               session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinLog "
+                               + "(JobID ascii, DataBegin ascii, DataEnd ascii, ip ascii, FinalStatus ascii, "
+                               + "ExecutionStatus ascii, Protein ascii, PRIMARY KEY(JobID));");
+               session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinData "
+                               + "(jobtime bigint, JobID ascii, Protein ascii, PRIMARY KEY(JobID));");
+               session.execute("CREATE COLUMNFAMILY IF NOT EXISTS JpredArchive "
+                               + "(JobID ascii, Protein varchar, IP ascii, StartTime bigint, ExecTime int, alignment map<ascii,ascii>, "
+                               + "predictions map<ascii,ascii>, archive blob, LOG varchar, PRIMARY KEY(JobID));");
 
-               System.out.println("Cassandra connected");
+               session.execute("CREATE INDEX IF NOT EXISTS ProteinSeq ON ProteinRow (protein);");
+               session.execute("CREATE INDEX IF NOT EXISTS JobDateStamp ON ProteinData (jobtime);");
        }
 
        /*
         * parsing data source and filling the database
         */
        public void Parsing() throws IOException {
-               if (true) {
+               if (READ_WEB_JPRED) {
                        // if (source.equals("http")) {
                        // get data from real Jpred production server
                        System.out.println("Parsing web data source......");
                        String datasrc = "http://www.compbio.dundee.ac.uk/www-jpred/results/usage-new/alljobs.dat";
                        String prefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
                        JpredParserHTTP parser = new JpredParserHTTP(prefix);
-                       parser.Parsing(datasrc, 4);
+                       parser.Parsing(datasrc, 5);
                }
-               if (false) {
+               if (READ_LOCALFILE_JPRED) {
                        // if (source.equals("file")) {
                        // get irtifical data generated for the DB stress tests
                        System.out.println("Parsing local file data source......");
@@ -75,22 +115,35 @@ public class CassandraNativeConnector {
                System.out.println("Cassandra has been shut down");
        }
 
-       /*
-        * inserting data into the db
-        */
-       public void FormQueryTables(long jobtime, String startdate, String enddate, String ip, String jobid, String statusEx, String statusFinal,
-                       String protein, List<FastaSequence> predictions) {
+       public boolean JobisNotInsterted(String jobid) {
+               ResultSet results1 = session.execute("SELECT * FROM ProteinLog WHERE JobID = '" + jobid + "';");
+               if (results1.isExhausted()) {
+                       return true;
+               }
+               return false;
+       }
 
-               String check1 = "SELECT * FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';";
-               ResultSet results1 = session.execute(check1);
+       public boolean JobisNotArchived(String jobid) {
+               ResultSet results1 = session.execute("SELECT * FROM JpredArchive WHERE JobID = '" + jobid + "';");
                if (results1.isExhausted()) {
-                       String com1 = "INSERT INTO ProteinKeyspace.ProteinLog "
-                                       + "(JobID, IP, DataBegin, DataEnd, FinalStatus, ExecutionStatus, Protein)" + " VALUES ('" + jobid + "','" + ip + "','"
-                                       + startdate + "','" + enddate + "','" + statusFinal + "','" + statusEx + "','" + protein + "');";
-                       session.execute(com1);
+                       return true;
+               }
+               return false;
+       }
 
-                       String com2 = "INSERT INTO ProteinKeyspace.ProteinData " + "(jobtime, JobID, Protein)" + " VALUES (" + jobtime + ",'" + jobid
+       /*
+        * inserting data into the tables for queries
+        */
+       public int FormQueryTables(long jobtime, String startdate, String enddate, String ip, String jobid, String statusEx,
+                       String statusFinal, String protein, List<FastaSequence> predictions) {
+               if (JobisNotInsterted(jobid)) {
+                       String com1 = "INSERT INTO ProteinLog " + "(JobID, IP, DataBegin, DataEnd, FinalStatus, ExecutionStatus, Protein)"
+                                       + " VALUES ('" + jobid + "','" + ip + "','" + startdate + "','" + enddate + "','" + statusFinal + "','" + statusEx
                                        + "','" + protein + "');";
+                       session.execute(com1);
+
+                       String com2 = "INSERT INTO ProteinData " + "(jobtime, JobID, Protein)" + " VALUES (" + jobtime + ",'" + jobid + "','" + protein
+                                       + "');";
                        session.execute(com2);
 
                        String allpredictions = "";
@@ -104,54 +157,48 @@ public class CassandraNativeConnector {
                                final_prediction = allpredictions.substring(0, allpredictions.length() - 1);
                        }
 
-                       String check2 = "SELECT * FROM ProteinKeyspace.ProteinRow WHERE JobID = '" + jobid + "';";
+                       String check2 = "SELECT * FROM ProteinRow WHERE JobID = '" + jobid + "';";
                        ResultSet results2 = session.execute(check2);
                        if (results2.isExhausted()) {
-                               String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(Protein, JobID, Predictions)" + " VALUES ('" + protein + "','"
-                                               + jobid + "',{" + final_prediction + "});";
+                               String com3 = "INSERT INTO ProteinRow " + "(Protein, JobID, Predictions)" + " VALUES ('" + protein + "','" + jobid + "',{"
+                                               + final_prediction + "});";
                                session.execute(com3);
                        }
+                       return 1;
                }
+               return 0;
        }
 
-       public void ArchiveData(long starttime, int exectime, String ip, String jobid, String statusEx, String statusFinal,
-                       String protein, List<FastaSequence> predictions, List<FastaSequence> seqs, String LogFile) {
-
-               String check1 = "SELECT * FROM ProteinKeyspace.JpredArchive WHERE JobID = '" + jobid + "';";
-               ResultSet results1 = session.execute(check1);
-               if (results1.isExhausted()) {
-                       String allpredictions = "";
-                       for (FastaSequence pred : predictions) {
-                               String predictionname = pred.getId();
-                               String prediction = pred.getSequence().replaceAll("\n", "");
-                               allpredictions += "'" + predictionname + "':'" + prediction + "',";
-                       }
-                       String final_allpredictions = "";
-                       if (null != allpredictions) {
-                               final_allpredictions = allpredictions.substring(0, allpredictions.length() - 1);
+       /*
+        * insert data from a real Jpred job: timing+IP, Execution Status, Final
+        * status, protein sequence, predictions, alignment, LOG and tar.gz files
+        */
+       public int ArchiveData(long starttime, long exectime, String ip, String jobid, String statusEx, String statusFinal, String protein,
+                       List<FastaSequence> predictions, List<FastaSequence> seqs, String LogFile, String archivepath) {
+               if (JobisNotArchived(jobid)) {
+                       String log = LogFile.replaceAll("'", "");
+                       session.execute("INSERT INTO JpredArchive (JobID, Protein, IP, StartTime, ExecTime,LOG) VALUES ('" + jobid + "','" + protein
+                                       + "','" + ip + "'," + starttime + "," + exectime + ",'" + log + "');");
+                       if (false) {
+                               PreparedStatement statement = session.prepare("INSERT INTO JpredArchive (JobID, archive) VALUES (?,?);");
+                               BoundStatement boundStatement = new BoundStatement(statement);
+                               session.execute(boundStatement.bind(jobid, archivepath));
                        }
-                       String alignment = "";
-                       for (FastaSequence seq : seqs) {
-                               String predictionname = seq.getId();
-                               String prediction = seq.getSequence().replaceAll("\n", "");
-                               alignment += "'" + predictionname + "':'" + prediction + "',";
+
+                       for (FastaSequence p : predictions) {
+                               session.execute("UPDATE JpredArchive SET predictions = predictions + {'" + p.getId() + "':'"
+                                               + p.getSequence().replaceAll("\n", "") + "'} WHERE JobID = '" + jobid + "';");
                        }
-                       String final_alignment = "";
-                       if (null != allpredictions) {
-                               final_alignment = alignment.substring(0, allpredictions.length() - 1);
+
+                       for (FastaSequence s : seqs) {
+                               session.execute("UPDATE JpredArchive SET alignment = alignment + {'" + s.getId() + "':'"
+                                               + s.getSequence().replaceAll("\n", "") + "'} WHERE JobID = '" + jobid + "';");
                        }
-                       
-                       String com1 = "INSERT INTO ProteinKeyspace.JpredArchive "
-                                       + "(JobID, Protein, IP, StartTime, ExecTime, alignment, predictions, LOG))"
-                                       + " VALUES ('" 
-                                       + jobid + "','" + protein + "','" + ip + "'," + starttime + "," + exectime
-                                       + "',[" + final_allpredictions + "],[" + final_alignment + "],'" + LogFile + "]);";
-                       session.execute(com1);
+                       return 1;
                }
+               return 0;
        }
 
-       
-       
        /*
         * getting data from the db
         */
@@ -177,6 +224,132 @@ public class CassandraNativeConnector {
        }
 
        /*
+        * getting data from the db ProteinData
+        */
+       public Integer ReadDateTable(long queryDate) {
+               final long startTime = System.currentTimeMillis();
+               String com = "SELECT jobtime, JobID FROM ProteinKeyspace.ProteinData WHERE jobtime = " + queryDate + ";";
+               System.out.println("Command: " + com);
+               ResultSet results = session.execute(com);
+               final long queryTime = System.currentTimeMillis();
+               System.out.println("Query time is " + (queryTime - startTime) + " msec");
+               if (results.isExhausted())
+                       return 0;
+               List<Row> rows = results.all();
+               final long endTime = System.currentTimeMillis();
+               System.out.println ("Processing time is " + (endTime - queryTime) + " msec");        
+               return rows.size();
+       }
+
+       /*
+        * getting whole protein sequence from the db ProteinRow
+        */
+       public List<StructureProteinPrediction> ReadWholeSequence(String queryProtein) {
+               final long startTime = System.currentTimeMillis();
+               String com = "SELECT JobID, Predictions FROM ProteinKeyspace.ProteinRow WHERE Protein = '" + queryProtein + "';";
+               System.out.println("Command: " + com);
+               ResultSet results = session.execute(com);
+               if (results.isExhausted())
+                       return null;
+               final long queryTime = System.currentTimeMillis();
+               List<Row> rows = results.all();
+               System.out.println ("Query time is " + (queryTime - startTime) + " msec");   
+               System.out.println (" rows analysed,  " + rows.size());
+               List<StructureProteinPrediction> res = new ArrayList<StructureProteinPrediction>();
+               int c = 0;
+               for (Row r : rows) {
+                       StructureProteinPrediction structure = new StructureProteinPrediction(queryProtein, r.getString("JobID"), r.getMap("Predictions", String.class, String.class));         
+                       res.add(structure);
+                       ++c;
+               }
+               final long endTime = System.currentTimeMillis();
+               System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
+               return res;
+       }
+
+       /*
+        * getting part of protein sequence from the db ProteinRow
+        */
+       public List<StructureProteinPrediction>  ReadPartOfSequence(String queryProtein) {
+               final long startTime = System.currentTimeMillis();
+               String com = "SELECT * FROM ProteinKeyspace.ProteinRow;";
+               System.out.println("Command: " + com);
+               ResultSet results = session.execute(com);
+               if (results.isExhausted())
+                       return null;
+               final long queryTime = System.currentTimeMillis();
+               List<Row> rows = results.all();
+               System.out.println ("Query time is " + (queryTime - startTime) + " msec");   
+               System.out.println (" rows analysed,  " + rows.size());
+               List<StructureProteinPrediction>  res = new ArrayList<StructureProteinPrediction>();
+               int c = 0;
+               for (Row r : rows) {
+                       String prot = r.getString("Protein");
+                       if (prot.matches("(.*)" + queryProtein + "(.*)")) {
+                               StructureProteinPrediction structure = new StructureProteinPrediction(prot, r.getString("JobID"), r.getMap("Predictions", String.class, String.class));         
+                               res.add(structure);
+                               ++c;
+                       }
+               }
+               final long endTime = System.currentTimeMillis();
+               System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
+               return res;
+       }
+
+       /*
+        * getting protein sequences by counter
+        */
+       public Map<String, Integer>  ReadProteinDataByCounter() {
+               final long startTime = System.currentTimeMillis();
+               String com = "SELECT Protein FROM ProteinKeyspace.ProteinRow;";
+               System.out.println("Command: " + com);
+               ResultSet results = session.execute(com);
+               if (results.isExhausted())
+                       return null;
+               final long queryTime = System.currentTimeMillis();
+               List<Row> rows = results.all();
+               System.out.println ("Query time is " + (queryTime - startTime) + " msec");   
+               System.out.println (" rows analysed,  " + rows.size());
+               Map<String, Integer> res = new HashMap<String, Integer>();
+               int c = 0;
+               for (Row r : rows) {
+                       String protein = r.getString("Protein");
+                       if (res.containsKey(protein)) 
+                               res.put(protein, res.get(protein) + 1);
+                       else
+                               res.put(protein, 1);
+               }
+               final long endTime = System.currentTimeMillis();
+               System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
+               return res;
+       }
+
+       /*
+        * getting protein sequences by counter
+        */
+       public StructureJobLog ReadJobLog(String jobid) {
+               final long startTime = System.currentTimeMillis();
+               String com = "SELECT * FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';";
+               System.out.println("Command: " + com);
+               ResultSet results = session.execute(com);
+               if (results.isExhausted())
+                       return null;
+               final long queryTime = System.currentTimeMillis();
+               Row row = results.one();
+               String com1 = "SELECT * FROM ProteinKeyspace.ProteinRow WHERE JobID = '" + jobid + "' ALLOW FILTERING;";
+               System.out.println("Command: " + com1);
+               ResultSet results1 = session.execute(com1);
+               if (results1.isExhausted())
+                       return null;
+               Row row1 = results1.one();
+               StructureJobLog res = new StructureJobLog(row.getString("Protein"), row.getString("JobID"), row.getString("DataBegin"), row.getString("DataEnd"), row.getString("ip"), row1.getMap("Predictions", String.class, String.class));
+               System.out.println ("Query time is " + (queryTime - startTime) + " msec");   
+               final long endTime = System.currentTimeMillis();
+               System.out.println (" rows analysed, execution time is " + (endTime - startTime) + " msec");
+               return res;
+       }
+
+       /*
         * getting earlest date of jobs from the db
         */
        public long getEarliestDateInDB() {
index 51fc8e3..27f66cc 100644 (file)
@@ -1,8 +1,11 @@
 package compbio.cassandra;
 
 import java.io.BufferedReader;
+import java.io.DataInputStream;
+import java.io.EOFException;
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.net.HttpURLConnection;
 import java.net.MalformedURLException;
@@ -20,6 +23,9 @@ import compbio.cassandra.JpredParser;
 public class JpredParserHTTP implements JpredParser {
        private CassandraNativeConnector cc = new CassandraNativeConnector();
        private String dirprefix;
+       private List<FastaSequence> alignment;
+       private List<FastaSequence> predictions;
+       private String jnetpred;
 
        JpredParserHTTP() {
                dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
@@ -46,7 +52,55 @@ public class JpredParserHTTP implements JpredParser {
                }
        }
 
-       private int ParsingForDate(String input, String date) {
+       private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException {
+               final FastaReader fr = new FastaReader(stream);
+               String query = "";
+               alignment = new ArrayList<FastaSequence>();
+               predictions = new ArrayList<FastaSequence>();
+               while (fr.hasNext()) {
+                       final FastaSequence fs = fr.next();
+                       String seqid = fs.getId();
+                       String seq = fs.getSequence().replaceAll("\n", "");
+                       if (seqid.equals("QUERY") || seqid.equals(jobid)) {
+                               query = seq;
+                               alignment.add(fs);
+                       } else if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28")
+                                       || seqid.equals("JNETSOL25") || seqid.equals("JNETSOL5") || seqid.equals("JNETSOL0") || seqid.equals("JNETCONF")
+                                       || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM")) {
+                               predictions.add(fs);
+                               if (seqid.equals("jnetpred"))
+                                       jnetpred = seq;
+                       } else {
+                               alignment.add(fs);
+                       }
+               }
+               return query;
+       }
+
+       private String parseLogFile(final InputStream stream) throws IOException {
+               String out = "";
+               BufferedReader buffer = new BufferedReader(new InputStreamReader(stream));
+               String line;
+               while (null != (line = buffer.readLine())) {
+                       out += line;
+               }
+               return out;
+       }
+
+       private List<Byte> parseArchiveFile(final InputStream stream) throws IOException {
+               DataInputStream data_in = new DataInputStream(stream);
+               List<Byte> out = new ArrayList<Byte>();
+               while (true) {
+                       try {
+                               out.add(data_in.readByte());
+                       } catch (EOFException eof) {
+                               break;
+                       }
+               }
+               return out;
+       }
+
+       private void ParsingForDate(String input, String date) {
                int totalcount = 0;
                int countNoData = 0;
                int countUnclearFASTAid = 0;
@@ -54,7 +108,6 @@ public class JpredParserHTTP implements JpredParser {
                int countinserted = 0;
                int counAlignments = 0;
                int countStrange = 0;
-               int njobs = 0;
 
                System.out.println("Inserting jobs for " + date);
                try {
@@ -64,51 +117,64 @@ public class JpredParserHTTP implements JpredParser {
                        String line;
 
                        while ((line = alljobs.readLine()) != null) {
-                               if (line.matches(date + "(.*)jp_[^\\s]+")) {
+                               if (line.matches(date + ":(.*)jp_[^\\s]+")) {
                                        String[] table = line.split("\\s+");
                                        // Format of a record:
                                        // starttime endtime ip email jobid (directory)
-                                       // 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172 unknown_email jp_J9HBCBT
+                                       // 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172
+                                       // unknown_email jp_J9HBCBT
                                        String id = table[table.length - 1];
                                        totalcount++;
-                                       String datalink = dirprefix + "/" + id + "/" + id + ".concise.fasta";
-                                       URL urltable = new URL(datalink);
-                                       HttpURLConnection httpConnection = (HttpURLConnection) urltable.openConnection();
-                                       int responsecode = httpConnection.getResponseCode();
-                                       if (199 < responsecode && responsecode < 300) {
-                                               try {
-                                                       final FastaReader fr = new FastaReader(urltable.openStream());
-                                                       final List<FastaSequence> seqs = new ArrayList<FastaSequence>();
-                                                       String newprotein = "";
-                                                       while (fr.hasNext()) {
-                                                               final FastaSequence fs = fr.next();
-                                                               if (fs.getId().equals("QUERY") || fs.getId().equals(id))
-                                                                       newprotein = fs.getSequence().replaceAll("\n", "");
-                                                               else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) {
-                                                                       seqs.add(fs);
-                                                               }
-                                                       }
-                                                       if (newprotein.equals("")) {
-                                                               countUnclearFASTAid++;
-                                                       } else {
-                                                               SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
-                                                               String dateInString1 = table[0].substring(0, table[0].indexOf(":"));
-                                                               long dateWork1 = 0;
-                                                               try {
-                                                                       Date dat1 = formatter.parse(dateInString1);
-                                                                       dateWork1 = dat1.getTime();
-                                                               } catch (ParseException e) {
-                                                                       e.printStackTrace();
+                                       if (cc.JobisNotInsterted(id)) {
+                                               URL dataurl = new URL(dirprefix + "/" + id + "/" + id + ".concise.fasta");
+                                               URL archiveurl = new URL(dirprefix + "/" + id + "/" + id + ".tar.gz");
+                                               URL logurl = new URL(dirprefix + "/" + id + "/LOG");
+                                               HttpURLConnection httpConnection1 = (HttpURLConnection) dataurl.openConnection();
+                                               HttpURLConnection httpConnection2 = (HttpURLConnection) logurl.openConnection();
+                                               HttpURLConnection httpConnection3 = (HttpURLConnection) archiveurl.openConnection();
+                                               int response1 = httpConnection1.getResponseCode();
+                                               int response2 = httpConnection2.getResponseCode();
+                                               if (199 < response1 && response1 < 300) {
+                                                       try {
+                                                               String protein = parsePredictions(dataurl.openStream(), id);
+                                                               if (protein.equals("")) {
+                                                                       countUnclearFASTAid++;
+                                                               } else {
+                                                                       SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd");
+                                                                       SimpleDateFormat timeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
+                                                                       String startdatestring = table[0].substring(0, table[0].indexOf(":"));
+                                                                       try {
+                                                                               Date startdate = dateformatter.parse(startdatestring);
+                                                                               Date starttime = timeformatter.parse(table[0]);
+                                                                               Date endtime = timeformatter.parse(table[1]);
+                                                                               String ip = table[2];
+                                                                               String execstatus = "OK";
+                                                                               String finalstatus = "OK";
+                                                                               countinsertions += cc.FormQueryTables(startdate.getTime(), table[0], table[1], ip, id, execstatus,
+                                                                                               finalstatus, protein, predictions);
+
+                                                                               long exectime = (endtime.getTime() - starttime.getTime()) / 1000;
+                                                                               String log = "";
+                                                                               if (199 < response2 && response2 < 300) {
+                                                                                       log = parseLogFile(logurl.openStream());
+                                                                               }
+                                                                               cc.ArchiveData(startdate.getTime(), exectime, ip, id, execstatus, finalstatus, protein,
+                                                                                               predictions, alignment, log, archiveurl.toString());
+                                                                       } catch (ParseException e) {
+                                                                               e.printStackTrace();
+                                                                       }
                                                                }
-                                                               cc.FormQueryTables(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs);
-                                                               ++countinsertions;
-                                                               ++njobs;
+                                                       } catch (IOException e) {
+                                                               e.printStackTrace();
                                                        }
-                                               } catch (IOException e) {
-                                                       e.printStackTrace();
+                                               } else {
+                                                       countNoData++;
                                                }
+                                               httpConnection1.disconnect();
+                                               httpConnection2.disconnect();
+                                               httpConnection3.disconnect();
                                        } else {
-                                               countNoData++;
+                                               ++countinserted;
                                        }
                                } else {
                                        if (line.matches(date + "(.*)Sequence0/(.*)")) {
@@ -131,6 +197,6 @@ public class JpredParserHTTP implements JpredParser {
                } catch (IOException e) {
                        e.printStackTrace();
                }
-               return njobs;
+               ;
        }
 }
index 54e50ab..4b254ae 100644 (file)
@@ -58,7 +58,7 @@ public class JpredParserLocalFile implements JpredParser {
                System.out.println("Execution Time = " + execTime + " ms");
        }
 
-       private int ParsingForDate(List<String> input, String date) {
+       private void ParsingForDate(List<String> input, String date) {
                int totalcount = 0;
                int countNoData = 0;
                int countUnclearFASTAid = 0;
@@ -66,11 +66,10 @@ public class JpredParserLocalFile implements JpredParser {
                int countinserted = 0;
                int counAlignments = 0;
                int countStrange = 0;
-               int njobs = 0;
 
                System.out.println("Inserting jobs for " + date);
                for (String in : input) {
-                       if (in.matches(date + "(.*)jp_[^\\s]+")) {
+                       if (in.matches(date + ":(.*)jp_[^\\s]+")) {
                                String[] table = in.split("\\s+");
                                String starttime = table[0];
                                String finishtime = table[1];
@@ -104,9 +103,7 @@ public class JpredParserLocalFile implements JpredParser {
                                                        } catch (ParseException e) {
                                                                e.printStackTrace();
                                                        }
-                                                       cc.FormQueryTables(insertdate, starttime, finishtime, ip, id, "OK", "OK", newprotein, seqs);
-                                                       ++countinsertions;
-                                                       ++njobs;
+                                                       countinsertions += cc.FormQueryTables(insertdate, starttime, finishtime, ip, id, "OK", "OK", newprotein, seqs);
                                                }
                                                fr.close();
                                        } catch (IOException e) {
@@ -132,7 +129,6 @@ public class JpredParserLocalFile implements JpredParser {
                        System.out.println("   " + countUnclearFASTAid + " jobs with unclear FASTA protein id in *.concise.fasta");
                        System.out.println("   " + countinsertions + " new job insertions\n");
                }
-               return njobs;
        }
 
 }