Merge branch 'master' into servlets
authorSasha Sherstnev <a.sherstnev@dundee.ac.uk>
Fri, 8 Nov 2013 13:59:53 +0000 (13:59 +0000)
committerSasha Sherstnev <a.sherstnev@dundee.ac.uk>
Fri, 8 Nov 2013 13:59:53 +0000 (13:59 +0000)
Conflicts:
.classpath
datadb/compbio/cassandra/CassandraNativeConnector.java
datadb/compbio/cassandra/JpredParserHTTP.java
datadb/compbio/cassandra/JpredParserLocalFile.java
server/compbio/listeners/ContextListener.java
server/compbio/statistic/CassandraRequester.java
website/QuerySequenceProtein.jsp
website/ReportLength.jsp
website/ReportNew.jsp

1  2 
.classpath
datadb/compbio/cassandra/CassandraNativeConnector.java
datadb/compbio/cassandra/JpredParserHTTP.java
datadb/compbio/cassandra/JpredParserLocalFile.java

diff --cc .classpath
@@@ -15,7 -15,7 +15,8 @@@
        <classpathentry kind="lib" path="WEB-INF/lib/jackson-core-asl-1.9.13.jar"/>
        <classpathentry kind="lib" path="WEB-INF/lib/jackson-mapper-asl-1.9.13.jar"/>
        <classpathentry kind="lib" path="WEB-INF/lib/displaytag-1.2.jar"/>
+       <classpathentry kind="lib" path="WEB-INF/lib/jabaws-core-2.1.0.jar" sourcepath="WEB-INF/lib/jabaws-core-src-2.1.0.jar"/>
 +      <classpathentry kind="lib" path="WEB-INF/lib/jstl-1.2.jar"/>
        <classpathentry kind="con" path="org.eclipse.jst.server.core.container/org.eclipse.jst.server.tomcat.runtimeTarget/Apache Tomcat v7.0">
                <attributes>
                        <attribute name="owner.project.facets" value="jst.web"/>
@@@ -2,62 -2,63 +2,103 @@@ package compbio.cassandra
  
  import java.io.IOException;
  import java.util.Calendar;
 +import java.util.HashMap;
  import java.util.List;
  import java.util.ArrayList;
 +import java.util.Map;
 +
++import org.apache.log4j.Logger;
  import com.datastax.driver.core.Cluster;
  import com.datastax.driver.core.Host;
  import com.datastax.driver.core.Metadata;
  import com.datastax.driver.core.Row;
  import com.datastax.driver.core.Session;
  import com.datastax.driver.core.ResultSet;
++import com.datastax.driver.core.PreparedStatement;
++import com.datastax.driver.core.BoundStatement;
++
++import compbio.engine.ProteoCachePropertyHelperManager;
++import compbio.util.PropertyHelper;
++import compbio.util.Util;
  
  public class CassandraNativeConnector {
        private static Cluster cluster;
        private static Session session;
++      private static final PropertyHelper ph = ProteoCachePropertyHelperManager.getPropertyHelper();
++      private static Logger log = Logger.getLogger(CassandraNativeConnector.class);
++
++      public static String CASSANDRA_HOSTNAME = "localhost";
++      public static boolean READ_WEB_JPRED = false;
++      public static boolean READ_LOCALFILE_JPRED = false;
++
++      private static boolean initBooleanValue(String key) {
++              assert key != null;
++              String status = ph.getProperty(key);
++              log.debug("Loading property: " + key + " with value: " + status);
++              if (Util.isEmpty(status)) {
++                      return false;
++              }
++              return new Boolean(status.trim()).booleanValue();
++      }
        /*
--       * connect to the cluster and look weather the dababase has any data inside
++       * connect to the cluster and look whether all tables exist
         */
        public void Connect() {
--              // local cassandra cluster
--              cluster = Cluster.builder().addContactPoint("localhost").build();
--              // distributed cassandra cluster
--              /* cluster = Cluster.builder().addContactPoint("10.0.115.190").build(); */
++
++              String cassandrahostname = ph.getProperty("cassandra.host");
++              if (null != cassandrahostname) {
++                      CASSANDRA_HOSTNAME = cassandrahostname;
++              }
++              READ_WEB_JPRED = initBooleanValue("cassandra.jpred.web");
++              READ_LOCALFILE_JPRED = initBooleanValue("cassandra.jpred.local");
++
++              cluster = Cluster.builder().addContactPoint(CASSANDRA_HOSTNAME).build();
++
                Metadata metadata = cluster.getMetadata();
                System.out.printf("Connected to cluster: %s\n", metadata.getClusterName());
                for (Host host : metadata.getAllHosts()) {
                        System.out.printf("Datatacenter: %s; Host: %s; Rack: %s\n", host.getDatacenter(), host.getAddress(), host.getRack());
                }
--
                session = cluster.connect();
++              CreateTables();
++              System.out.println("Cassandra connected");
++      }
++
++      private void CreateTables() {
                session.execute("CREATE KEYSPACE IF NOT EXISTS ProteinKeyspace WITH replication = {'class':'SimpleStrategy', 'replication_factor':3};");
--              session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinRow (Protein ascii, JobID ascii, Predictions map<ascii,ascii>, PRIMARY KEY(JobID));");
--              session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinLog "
--                              + "(JobID ascii, DataBegin ascii, DataEnd ascii, ip ascii, FinalStatus ascii, ExecutionStatus ascii, Protein ascii, PRIMARY KEY(JobID));");
--              session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.ProteinData (jobtime bigint, JobID ascii, Protein ascii, PRIMARY KEY(JobID));");
 -              session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinKeyspace.JpredArchive " + 
 -              "(JobID ascii, Protein varchar, IP ascii, StartTime bigint, ExecTime int, alignment map<ascii,ascii>, predictions map<ascii,ascii>, archive blob, LOG varchar, PRIMARY KEY(JobID));");
++              session.execute("USE ProteinKeyspace");
  
--              session.execute("CREATE INDEX IF NOT EXISTS ProteinSeq ON ProteinKeyspace.ProteinRow (protein);");
--              session.execute("CREATE INDEX IF NOT EXISTS JobDateStamp ON ProteinKeyspace.ProteinData (jobtime);");
++              session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinRow "
++                              + "(Protein ascii, JobID ascii, Predictions map<ascii,ascii>, PRIMARY KEY(JobID));");
++              session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinLog "
++                              + "(JobID ascii, DataBegin ascii, DataEnd ascii, ip ascii, FinalStatus ascii, "
++                              + "ExecutionStatus ascii, Protein ascii, PRIMARY KEY(JobID));");
++              session.execute("CREATE COLUMNFAMILY IF NOT EXISTS ProteinData "
++                              + "(jobtime bigint, JobID ascii, Protein ascii, PRIMARY KEY(JobID));");
++              session.execute("CREATE COLUMNFAMILY IF NOT EXISTS JpredArchive "
++                              + "(JobID ascii, Protein varchar, IP ascii, StartTime bigint, ExecTime int, alignment map<ascii,ascii>, "
++                              + "predictions map<ascii,ascii>, archive blob, LOG varchar, PRIMARY KEY(JobID));");
  
--              System.out.println("Cassandra connected");
++              session.execute("CREATE INDEX IF NOT EXISTS ProteinSeq ON ProteinRow (protein);");
++              session.execute("CREATE INDEX IF NOT EXISTS JobDateStamp ON ProteinData (jobtime);");
        }
  
        /*
         * parsing data source and filling the database
         */
        public void Parsing() throws IOException {
--              if (true) {
++              if (READ_WEB_JPRED) {
                        // if (source.equals("http")) {
                        // get data from real Jpred production server
                        System.out.println("Parsing web data source......");
                        String datasrc = "http://www.compbio.dundee.ac.uk/www-jpred/results/usage-new/alljobs.dat";
                        String prefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
                        JpredParserHTTP parser = new JpredParserHTTP(prefix);
--                      parser.Parsing(datasrc, 4);
++                      parser.Parsing(datasrc, 5);
                }
--              if (false) {
++              if (READ_LOCALFILE_JPRED) {
                        // if (source.equals("file")) {
                        // get irtifical data generated for the DB stress tests
                        System.out.println("Parsing local file data source......");
                System.out.println("Cassandra has been shut down");
        }
  
 -      /*
 -       * inserting data into the db
 -       */
 -      public void FormQueryTables(long jobtime, String startdate, String enddate, String ip, String jobid, String statusEx, String statusFinal,
 -                      String protein, List<FastaSequence> predictions) {
++      public boolean JobisNotInsterted(String jobid) {
++              ResultSet results1 = session.execute("SELECT * FROM ProteinLog WHERE JobID = '" + jobid + "';");
++              if (results1.isExhausted()) {
++                      return true;
++              }
++              return false;
++      }
 -              String check1 = "SELECT * FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';";
 -              ResultSet results1 = session.execute(check1);
++      public boolean JobisNotArchived(String jobid) {
++              ResultSet results1 = session.execute("SELECT * FROM JpredArchive WHERE JobID = '" + jobid + "';");
+               if (results1.isExhausted()) {
 -                      String com1 = "INSERT INTO ProteinKeyspace.ProteinLog "
 -                                      + "(JobID, IP, DataBegin, DataEnd, FinalStatus, ExecutionStatus, Protein)" + " VALUES ('" + jobid + "','" + ip + "','"
 -                                      + startdate + "','" + enddate + "','" + statusFinal + "','" + statusEx + "','" + protein + "');";
 -                      session.execute(com1);
++                      return true;
++              }
++              return false;
++      }
 -                      String com2 = "INSERT INTO ProteinKeyspace.ProteinData " + "(jobtime, JobID, Protein)" + " VALUES (" + jobtime + ",'" + jobid
 +      /*
-        * inserting data into the db
++       * inserting data into the tables for queries
 +       */
-       public void InsertData(long jobtime, String startdate, String enddate, String ip, String jobid, String statusEx, String statusFinal,
-                       String protein, List<FastaSequence> predictions) {
-               String check1 = "SELECT * FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';";
-               ResultSet results1 = session.execute(check1);
-               if (results1.isExhausted()) {
-                       String com1 = "INSERT INTO ProteinKeyspace.ProteinLog "
-                                       + "(JobID, IP, DataBegin, DataEnd, FinalStatus, ExecutionStatus, Protein)" + " VALUES ('" + jobid + "','" + ip + "','"
-                                       + startdate + "','" + enddate + "','" + statusFinal + "','" + statusEx + "','" + protein + "');";
-                       session.execute(com1);
-                       String com2 = "INSERT INTO ProteinKeyspace.ProteinData " + "(jobtime, JobID, Protein)" + " VALUES (" + jobtime + ",'" + jobid
++      public int FormQueryTables(long jobtime, String startdate, String enddate, String ip, String jobid, String statusEx,
++                      String statusFinal, String protein, List<FastaSequence> predictions) {
++              if (JobisNotInsterted(jobid)) {
++                      String com1 = "INSERT INTO ProteinLog " + "(JobID, IP, DataBegin, DataEnd, FinalStatus, ExecutionStatus, Protein)"
++                                      + " VALUES ('" + jobid + "','" + ip + "','" + startdate + "','" + enddate + "','" + statusFinal + "','" + statusEx
                                        + "','" + protein + "');";
++                      session.execute(com1);
++
++                      String com2 = "INSERT INTO ProteinData " + "(jobtime, JobID, Protein)" + " VALUES (" + jobtime + ",'" + jobid + "','" + protein
++                                      + "');";
                        session.execute(com2);
                        String allpredictions = "";
                        for (FastaSequence pred : predictions) {
                                String predictionname = pred.getId();
                        if (null != allpredictions) {
                                final_prediction = allpredictions.substring(0, allpredictions.length() - 1);
                        }
-                       String check2 = "SELECT * FROM ProteinKeyspace.ProteinRow WHERE JobID = '" + jobid + "' ALLOW FILTERING;";
 -                      String check2 = "SELECT * FROM ProteinKeyspace.ProteinRow WHERE JobID = '" + jobid + "';";
++                      String check2 = "SELECT * FROM ProteinRow WHERE JobID = '" + jobid + "';";
                        ResultSet results2 = session.execute(check2);
                        if (results2.isExhausted()) {
-                               String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(Protein, JobID, Predictions)" + " VALUES ('" 
-                       + protein + "','" + jobid + "',{" + final_prediction + "});";
 -                              String com3 = "INSERT INTO ProteinKeyspace.ProteinRow " + "(Protein, JobID, Predictions)" + " VALUES ('" + protein + "','"
 -                                              + jobid + "',{" + final_prediction + "});";
++                              String com3 = "INSERT INTO ProteinRow " + "(Protein, JobID, Predictions)" + " VALUES ('" + protein + "','" + jobid + "',{"
++                                              + final_prediction + "});";
                                session.execute(com3);
                        }
-                       String check3 = "SELECT * FROM ProteinKeyspace.ProteinRow WHERE JobID = '" + jobid + "';";
++                      return 1;
                }
++              return 0;
+       }
 -      public void ArchiveData(long starttime, int exectime, String ip, String jobid, String statusEx, String statusFinal,
 -                      String protein, List<FastaSequence> predictions, List<FastaSequence> seqs, String LogFile) {
 -
 -              String check1 = "SELECT * FROM ProteinKeyspace.JpredArchive WHERE JobID = '" + jobid + "';";
 -              ResultSet results1 = session.execute(check1);
 -              if (results1.isExhausted()) {
 -                      String allpredictions = "";
 -                      for (FastaSequence pred : predictions) {
 -                              String predictionname = pred.getId();
 -                              String prediction = pred.getSequence().replaceAll("\n", "");
 -                              allpredictions += "'" + predictionname + "':'" + prediction + "',";
 -                      }
 -                      String final_allpredictions = "";
 -                      if (null != allpredictions) {
 -                              final_allpredictions = allpredictions.substring(0, allpredictions.length() - 1);
++      /*
++       * insert data from a real Jpred job: timing+IP, Execution Status, Final
++       * status, protein sequence, predictions, alignment, LOG and tar.gz files
++       */
++      public int ArchiveData(long starttime, long exectime, String ip, String jobid, String statusEx, String statusFinal, String protein,
++                      List<FastaSequence> predictions, List<FastaSequence> seqs, String LogFile, String archivepath) {
++              if (JobisNotArchived(jobid)) {
++                      String log = LogFile.replaceAll("'", "");
++                      session.execute("INSERT INTO JpredArchive (JobID, Protein, IP, StartTime, ExecTime,LOG) VALUES ('" + jobid + "','" + protein
++                                      + "','" + ip + "'," + starttime + "," + exectime + ",'" + log + "');");
++                      if (false) {
++                              PreparedStatement statement = session.prepare("INSERT INTO JpredArchive (JobID, archive) VALUES (?,?);");
++                              BoundStatement boundStatement = new BoundStatement(statement);
++                              session.execute(boundStatement.bind(jobid, archivepath));
+                       }
 -                      String alignment = "";
 -                      for (FastaSequence seq : seqs) {
 -                              String predictionname = seq.getId();
 -                              String prediction = seq.getSequence().replaceAll("\n", "");
 -                              alignment += "'" + predictionname + "':'" + prediction + "',";
++
++                      for (FastaSequence p : predictions) {
++                              session.execute("UPDATE JpredArchive SET predictions = predictions + {'" + p.getId() + "':'"
++                                              + p.getSequence().replaceAll("\n", "") + "'} WHERE JobID = '" + jobid + "';");
+                       }
 -                      String final_alignment = "";
 -                      if (null != allpredictions) {
 -                              final_alignment = alignment.substring(0, allpredictions.length() - 1);
++
++                      for (FastaSequence s : seqs) {
++                              session.execute("UPDATE JpredArchive SET alignment = alignment + {'" + s.getId() + "':'"
++                                              + s.getSequence().replaceAll("\n", "") + "'} WHERE JobID = '" + jobid + "';");
+                       }
 -                      
 -                      String com1 = "INSERT INTO ProteinKeyspace.JpredArchive "
 -                                      + "(JobID, Protein, IP, StartTime, ExecTime, alignment, predictions, LOG))"
 -                                      + " VALUES ('" 
 -                                      + jobid + "','" + protein + "','" + ip + "'," + starttime + "," + exectime
 -                                      + "',[" + final_allpredictions + "],[" + final_alignment + "],'" + LogFile + "]);";
 -                      session.execute(com1);
++                      return 1;
+               }
++              return 0;
        }
  
 -      
 -      
        /*
         * getting data from the db
         */
        public List<Pair<String, String>> ReadProteinDataTable() {
                final long startTime = System.currentTimeMillis();
--              String com = "SELECT DataBegin,DataEnd FROM ProteinKeyspace.ProteinLog;";
++              String com = "SELECT DataBegin,DataEnd FROM ProteinLog;";
                System.out.println("Command: " + com);
                ResultSet results = session.execute(com);
                final long queryTime = System.currentTimeMillis();
                        ++c;
                }
                final long endTime = System.currentTimeMillis();
-               System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
+               System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
                return res;
        }
-       
        /*
 +       * getting data from the db ProteinData
 +       */
 +      public Integer ReadDateTable(long queryDate) {
 +              final long startTime = System.currentTimeMillis();
-               String com = "SELECT jobtime, JobID FROM ProteinKeyspace.ProteinData WHERE jobtime = " + queryDate + ";";
++              String com = "SELECT jobtime, JobID FROM ProteinData WHERE jobtime = " + queryDate + ";";
 +              System.out.println("Command: " + com);
 +              ResultSet results = session.execute(com);
-               if (results.isExhausted())
-                       return null;
 +              final long queryTime = System.currentTimeMillis();
++              System.out.println("Query time is " + (queryTime - startTime) + " msec");
++              if (results.isExhausted())
++                      return 0;
 +              List<Row> rows = results.all();
-               System.out.println ("Query time is " + (queryTime - startTime) + " msec");        
++              final long endTime = System.currentTimeMillis();
++              System.out.println("Processing time is " + (endTime - queryTime) + " msec");
 +              return rows.size();
 +      }
 +
 +      /*
 +       * getting whole protein sequence from the db ProteinRow
 +       */
 +      public List<StructureProteinPrediction> ReadWholeSequence(String queryProtein) {
 +              final long startTime = System.currentTimeMillis();
-               String com = "SELECT JobID, Predictions FROM ProteinKeyspace.ProteinRow WHERE Protein = '" + queryProtein + "';";
++              String com = "SELECT JobID, Predictions FROM ProteinRow WHERE Protein = '" + queryProtein + "';";
 +              System.out.println("Command: " + com);
 +              ResultSet results = session.execute(com);
 +              if (results.isExhausted())
 +                      return null;
 +              final long queryTime = System.currentTimeMillis();
 +              List<Row> rows = results.all();
-               System.out.println ("Query time is " + (queryTime - startTime) + " msec");   
-               System.out.println (" rows analysed,  " + rows.size());
++              System.out.println("Query time is " + (queryTime - startTime) + " msec");
++              System.out.println(" rows analysed,  " + rows.size());
 +              List<StructureProteinPrediction> res = new ArrayList<StructureProteinPrediction>();
 +              int c = 0;
 +              for (Row r : rows) {
-                       StructureProteinPrediction structure = new StructureProteinPrediction(queryProtein, r.getString("JobID"), r.getMap("Predictions", String.class, String.class));         
++                      StructureProteinPrediction structure = new StructureProteinPrediction(queryProtein, r.getString("JobID"), r.getMap(
++                                      "Predictions", String.class, String.class));
 +                      res.add(structure);
 +                      ++c;
 +              }
 +              final long endTime = System.currentTimeMillis();
-               System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
++              System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
 +              return res;
 +      }
-       
++
 +      /*
 +       * getting part of protein sequence from the db ProteinRow
 +       */
-       public List<StructureProteinPrediction>  ReadPartOfSequence(String queryProtein) {
++      public List<StructureProteinPrediction> ReadPartOfSequence(String queryProtein) {
 +              final long startTime = System.currentTimeMillis();
-               String com = "SELECT * FROM ProteinKeyspace.ProteinRow;";
++              String com = "SELECT * FROM ProteinRow;";
 +              System.out.println("Command: " + com);
 +              ResultSet results = session.execute(com);
 +              if (results.isExhausted())
 +                      return null;
 +              final long queryTime = System.currentTimeMillis();
 +              List<Row> rows = results.all();
-               System.out.println ("Query time is " + (queryTime - startTime) + " msec");   
-               System.out.println (" rows analysed,  " + rows.size());
-               List<StructureProteinPrediction>  res = new ArrayList<StructureProteinPrediction>();
++              System.out.println("Query time is " + (queryTime - startTime) + " msec");
++              System.out.println(" rows analysed,  " + rows.size());
++              List<StructureProteinPrediction> res = new ArrayList<StructureProteinPrediction>();
 +              int c = 0;
 +              for (Row r : rows) {
 +                      String prot = r.getString("Protein");
 +                      if (prot.matches("(.*)" + queryProtein + "(.*)")) {
-                       //      System.out.println(prot);
-                               StructureProteinPrediction structure = new StructureProteinPrediction(prot, r.getString("JobID"), r.getMap("Predictions", String.class, String.class));         
++                              StructureProteinPrediction structure = new StructureProteinPrediction(prot, r.getString("JobID"), r.getMap("Predictions",
++                                              String.class, String.class));
 +                              res.add(structure);
 +                              ++c;
 +                      }
 +              }
 +              final long endTime = System.currentTimeMillis();
-               System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
++              System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
 +              return res;
 +      }
-       
++
 +      /*
 +       * getting protein sequences by counter
 +       */
-       public Map<String, Integer>  ReadProteinDataByCounter() {
++      public Map<String, Integer> ReadProteinDataByCounter() {
 +              final long startTime = System.currentTimeMillis();
-               String com = "SELECT Protein FROM ProteinKeyspace.ProteinRow;";
++              String com = "SELECT Protein FROM ProteinRow;";
 +              System.out.println("Command: " + com);
 +              ResultSet results = session.execute(com);
 +              if (results.isExhausted())
 +                      return null;
 +              final long queryTime = System.currentTimeMillis();
 +              List<Row> rows = results.all();
-               System.out.println ("Query time is " + (queryTime - startTime) + " msec");   
-               System.out.println (" rows analysed,  " + rows.size());
++              System.out.println("Query time is " + (queryTime - startTime) + " msec");
++              System.out.println(" rows analysed,  " + rows.size());
 +              Map<String, Integer> res = new HashMap<String, Integer>();
 +              int c = 0;
 +              for (Row r : rows) {
 +                      String protein = r.getString("Protein");
-                       if (res.containsKey(protein)) 
++                      if (res.containsKey(protein))
 +                              res.put(protein, res.get(protein) + 1);
 +                      else
 +                              res.put(protein, 1);
 +              }
 +              final long endTime = System.currentTimeMillis();
-               System.out.println (c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
++              System.out.println(c + " rows analysed, execution time is " + (endTime - startTime) + " msec");
 +              return res;
 +      }
-       
-       
++
 +      /*
 +       * getting protein sequences by counter
 +       */
 +      public StructureJobLog ReadJobLog(String jobid) {
 +              final long startTime = System.currentTimeMillis();
 +              String com = "SELECT * FROM ProteinKeyspace.ProteinLog WHERE JobID = '" + jobid + "';";
 +              System.out.println("Command: " + com);
 +              ResultSet results = session.execute(com);
 +              if (results.isExhausted())
 +                      return null;
 +              final long queryTime = System.currentTimeMillis();
 +              Row row = results.one();
 +              String com1 = "SELECT * FROM ProteinKeyspace.ProteinRow WHERE JobID = '" + jobid + "' ALLOW FILTERING;";
 +              System.out.println("Command: " + com1);
 +              ResultSet results1 = session.execute(com1);
 +              if (results1.isExhausted())
 +                      return null;
 +              Row row1 = results1.one();
-               StructureJobLog res = new StructureJobLog(row.getString("Protein"), row.getString("JobID"), row.getString("DataBegin"), row.getString("DataEnd"), row.getString("ip"), row1.getMap("Predictions", String.class, String.class));
-               System.out.println ("Query time is " + (queryTime - startTime) + " msec");   
++              StructureJobLog res = new StructureJobLog(row.getString("Protein"), row.getString("JobID"), row.getString("DataBegin"),
++                              row.getString("DataEnd"), row.getString("ip"), row1.getMap("Predictions", String.class, String.class));
++              System.out.println("Query time is " + (queryTime - startTime) + " msec");
 +              final long endTime = System.currentTimeMillis();
-               System.out.println (" rows analysed, execution time is " + (endTime - startTime) + " msec");
++              System.out.println(" rows analysed, execution time is " + (endTime - startTime) + " msec");
 +              return res;
 +      }
-       
++
 +      /*
         * getting earlest date of jobs from the db
         */
        public long getEarliestDateInDB() {
                final long startTime = System.currentTimeMillis();
-               String com = "SELECT jobtime,JobID FROM ProteinKeyspace.ProteinData;";
 -              String com = "SELECT jobtime FROM ProteinKeyspace.ProteinData;";
++              String com = "SELECT jobtime,JobID FROM ProteinData;";
                System.out.println("Command: " + com);
                ResultSet results = session.execute(com);
                final long queryTime = System.currentTimeMillis();
-               System.out.println ("Query time is  " + (queryTime - startTime) + " msec");
 -              System.out.println("Query time is " + (queryTime - startTime) + " msec");
++              System.out.println("Query time is  " + (queryTime - startTime) + " msec");
  
                Calendar cal = Calendar.getInstance();
                long res = cal.getTimeInMillis();
@@@ -1,8 -1,8 +1,11 @@@
  package compbio.cassandra;
  
  import java.io.BufferedReader;
++import java.io.DataInputStream;
++import java.io.EOFException;
  import java.io.FileNotFoundException;
  import java.io.IOException;
++import java.io.InputStream;
  import java.io.InputStreamReader;
  import java.net.HttpURLConnection;
  import java.net.MalformedURLException;
@@@ -20,6 -20,6 +23,9 @@@ import compbio.cassandra.JpredParser
  public class JpredParserHTTP implements JpredParser {
        private CassandraNativeConnector cc = new CassandraNativeConnector();
        private String dirprefix;
++      private List<FastaSequence> alignment;
++      private List<FastaSequence> predictions;
++      private String jnetpred;
  
        JpredParserHTTP() {
                dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
                }
        }
  
--      private int ParsingForDate(String input, String date) {
++      private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException {
++              final FastaReader fr = new FastaReader(stream);
++              String query = "";
++              alignment = new ArrayList<FastaSequence>();
++              predictions = new ArrayList<FastaSequence>();
++              while (fr.hasNext()) {
++                      final FastaSequence fs = fr.next();
++                      String seqid = fs.getId();
++                      String seq = fs.getSequence().replaceAll("\n", "");
++                      if (seqid.equals("QUERY") || seqid.equals(jobid)) {
++                              query = seq;
++                              alignment.add(fs);
++                      } else if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28")
++                                      || seqid.equals("JNETSOL25") || seqid.equals("JNETSOL5") || seqid.equals("JNETSOL0") || seqid.equals("JNETCONF")
++                                      || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM")) {
++                              predictions.add(fs);
++                              if (seqid.equals("jnetpred"))
++                                      jnetpred = seq;
++                      } else {
++                              alignment.add(fs);
++                      }
++              }
++              return query;
++      }
++
++      private String parseLogFile(final InputStream stream) throws IOException {
++              String out = "";
++              BufferedReader buffer = new BufferedReader(new InputStreamReader(stream));
++              String line;
++              while (null != (line = buffer.readLine())) {
++                      out += line;
++              }
++              return out;
++      }
++
++      private List<Byte> parseArchiveFile(final InputStream stream) throws IOException {
++              DataInputStream data_in = new DataInputStream(stream);
++              List<Byte> out = new ArrayList<Byte>();
++              while (true) {
++                      try {
++                              out.add(data_in.readByte());
++                      } catch (EOFException eof) {
++                              break;
++                      }
++              }
++              return out;
++      }
++
++      private void ParsingForDate(String input, String date) {
                int totalcount = 0;
                int countNoData = 0;
                int countUnclearFASTAid = 0;
                int countinserted = 0;
                int counAlignments = 0;
                int countStrange = 0;
--              int njobs = 0;
  
                System.out.println("Inserting jobs for " + date);
                try {
                        String line;
  
                        while ((line = alljobs.readLine()) != null) {
--                              if (line.matches(date + "(.*)jp_[^\\s]+")) {
++                              if (line.matches(date + ":(.*)jp_[^\\s]+")) {
                                        String[] table = line.split("\\s+");
                                        // Format of a record:
                                        // starttime endtime ip email jobid (directory)
--                                      // 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172 unknown_email jp_J9HBCBT
++                                      // 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172
++                                      // unknown_email jp_J9HBCBT
                                        String id = table[table.length - 1];
                                        totalcount++;
--                                      String datalink = dirprefix + "/" + id + "/" + id + ".concise.fasta";
--                                      URL urltable = new URL(datalink);
--                                      HttpURLConnection httpConnection = (HttpURLConnection) urltable.openConnection();
--                                      int responsecode = httpConnection.getResponseCode();
--                                      if (199 < responsecode && responsecode < 300) {
--                                              try {
--                                                      final FastaReader fr = new FastaReader(urltable.openStream());
--                                                      final List<FastaSequence> seqs = new ArrayList<FastaSequence>();
--                                                      String newprotein = "";
--                                                      while (fr.hasNext()) {
--                                                              final FastaSequence fs = fr.next();
--                                                              if (fs.getId().equals("QUERY") || fs.getId().equals(id))
--                                                                      newprotein = fs.getSequence().replaceAll("\n", "");
--                                                              else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) {
--                                                                      seqs.add(fs);
--                                                              }
--                                                      }
--                                                      if (newprotein.equals("")) {
--                                                              countUnclearFASTAid++;
--                                                      } else {
--                                                              SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
--                                                              String dateInString1 = table[0].substring(0, table[0].indexOf(":"));
--                                                              long dateWork1 = 0;
--                                                              try {
--                                                                      Date dat1 = formatter.parse(dateInString1);
--                                                                      dateWork1 = dat1.getTime();
--                                                              } catch (ParseException e) {
--                                                                      e.printStackTrace();
++                                      if (cc.JobisNotInsterted(id)) {
++                                              URL dataurl = new URL(dirprefix + "/" + id + "/" + id + ".concise.fasta");
++                                              URL archiveurl = new URL(dirprefix + "/" + id + "/" + id + ".tar.gz");
++                                              URL logurl = new URL(dirprefix + "/" + id + "/LOG");
++                                              HttpURLConnection httpConnection1 = (HttpURLConnection) dataurl.openConnection();
++                                              HttpURLConnection httpConnection2 = (HttpURLConnection) logurl.openConnection();
++                                              HttpURLConnection httpConnection3 = (HttpURLConnection) archiveurl.openConnection();
++                                              int response1 = httpConnection1.getResponseCode();
++                                              int response2 = httpConnection2.getResponseCode();
++                                              if (199 < response1 && response1 < 300) {
++                                                      try {
++                                                              String protein = parsePredictions(dataurl.openStream(), id);
++                                                              if (protein.equals("")) {
++                                                                      countUnclearFASTAid++;
++                                                              } else {
++                                                                      SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd");
++                                                                      SimpleDateFormat timeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
++                                                                      String startdatestring = table[0].substring(0, table[0].indexOf(":"));
++                                                                      try {
++                                                                              Date startdate = dateformatter.parse(startdatestring);
++                                                                              Date starttime = timeformatter.parse(table[0]);
++                                                                              Date endtime = timeformatter.parse(table[1]);
++                                                                              String ip = table[2];
++                                                                              String execstatus = "OK";
++                                                                              String finalstatus = "OK";
++                                                                              countinsertions += cc.FormQueryTables(startdate.getTime(), table[0], table[1], ip, id, execstatus,
++                                                                                              finalstatus, protein, predictions);
++
++                                                                              long exectime = (endtime.getTime() - starttime.getTime()) / 1000;
++                                                                              String log = "";
++                                                                              if (199 < response2 && response2 < 300) {
++                                                                                      log = parseLogFile(logurl.openStream());
++                                                                              }
++                                                                              cc.ArchiveData(startdate.getTime(), exectime, ip, id, execstatus, finalstatus, protein,
++                                                                                              predictions, alignment, log, archiveurl.toString());
++                                                                      } catch (ParseException e) {
++                                                                              e.printStackTrace();
++                                                                      }
                                                                }
-                                                               cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs);
-                                                               ++countinsertions;
-                                                               ++njobs;
-                                                               // flush every 50 insertions
-                                                               // if (0 == countinsertions % 50) {
-                                                               // cc.flushData();
-                                                               // njobs -= 50;
-                                                               // }
 -                                                              cc.FormQueryTables(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs);
 -                                                              ++countinsertions;
 -                                                              ++njobs;
++                                                      } catch (IOException e) {
++                                                              e.printStackTrace();
                                                        }
--                                              } catch (IOException e) {
--                                                      e.printStackTrace();
++                                              } else {
++                                                      countNoData++;
                                                }
++                                              httpConnection1.disconnect();
++                                              httpConnection2.disconnect();
++                                              httpConnection3.disconnect();
                                        } else {
--                                              countNoData++;
++                                              ++countinserted;
                                        }
                                } else {
                                        if (line.matches(date + "(.*)Sequence0/(.*)")) {
                } catch (IOException e) {
                        e.printStackTrace();
                }
--              return njobs;
++              ;
        }
  }
@@@ -58,7 -58,7 +58,7 @@@ public class JpredParserLocalFile imple
                System.out.println("Execution Time = " + execTime + " ms");
        }
  
--      private int ParsingForDate(List<String> input, String date) {
++      private void ParsingForDate(List<String> input, String date) {
                int totalcount = 0;
                int countNoData = 0;
                int countUnclearFASTAid = 0;
                int countinserted = 0;
                int counAlignments = 0;
                int countStrange = 0;
  
                System.out.println("Inserting jobs for " + date);
                for (String in : input) {
--                      if (in.matches(date + "(.*)jp_[^\\s]+")) {
++                      if (in.matches(date + ":(.*)jp_[^\\s]+")) {
                                String[] table = in.split("\\s+");
                                String starttime = table[0];
                                String finishtime = table[1];
                                                        } catch (ParseException e) {
                                                                e.printStackTrace();
                                                        }
-                                                       cc.InsertData(insertdate, starttime, finishtime, ip, id, "OK", "OK", newprotein, seqs);
-                                                       ++countinsertions;
-                                                       ++njobs;
 -                                                      cc.FormQueryTables(insertdate, starttime, finishtime, ip, id, "OK", "OK", newprotein, seqs);
 -                                                      ++countinsertions;
 -                                                      ++njobs;
++                                                      countinsertions += cc.FormQueryTables(insertdate, starttime, finishtime, ip, id, "OK", "OK", newprotein, seqs);
                                                }
                                                fr.close();
                                        } catch (IOException e) {
                        System.out.println("   " + countUnclearFASTAid + " jobs with unclear FASTA protein id in *.concise.fasta");
                        System.out.println("   " + countinsertions + " new job insertions\n");
                }
--              return njobs;
        }
  
  }