First working code for archinving Jpred jobs
[proteocache.git] / datadb / compbio / cassandra / JpredParserHTTP.java
index bf4c460..ac9ed4f 100644 (file)
@@ -1,8 +1,6 @@
 package compbio.cassandra;
 
 import java.io.BufferedReader;
-import java.io.DataInputStream;
-import java.io.EOFException;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
@@ -11,21 +9,30 @@ import java.net.HttpURLConnection;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.net.URLConnection;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Calendar;
 import java.util.Date;
 import java.util.List;
 
 import compbio.cassandra.JpredParser;
+import compbio.data.sequence.FastaReader;
+import compbio.data.sequence.FastaSequence;
+import compbio.engine.JpredJob;
+import compbio.engine.ProteoCachePropertyHelperManager;
+import compbio.engine.archive.Archive;
+import compbio.engine.archive.ArchivedJob;
+import compbio.util.PropertyHelper;
+import compbio.util.Util;
 
 public class JpredParserHTTP implements JpredParser {
        private CassandraWriter cw = new CassandraWriter();
+       private static Archive archive;
        private String dirprefix;
        private List<FastaSequence> alignment;
        private List<FastaSequence> predictions;
        private int countNoData;
+       private static boolean archiving = false;
+       private static final PropertyHelper ph = ProteoCachePropertyHelperManager.getPropertyHelper();
 
        public JpredParserHTTP() {
                dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
@@ -39,16 +46,26 @@ public class JpredParserHTTP implements JpredParser {
                dirprefix = newsourceprefix;
        }
 
+       private boolean initBooleanValue(String key) {
+               assert key != null;
+               String status = ph.getProperty(key);
+               if (Util.isEmpty(status)) {
+                       return false;
+               }
+               return new Boolean(status.trim()).booleanValue();
+       }
+
        public void Parsing(String source, int nDays) throws IOException {
                Calendar cal = Calendar.getInstance();
                cal.add(Calendar.DATE, -nDays);
+               archiving = initBooleanValue("archive.enable");
+               if (archiving) {
+                       archive = new Archive();
+               }
                for (int i = 0; i < nDays; ++i) {
                        cal.add(Calendar.DATE, 1);
-                       int month = cal.get(Calendar.MONTH) + 1;
-                       int year = cal.get(Calendar.YEAR);
-                       int day = cal.get(Calendar.DATE);
-                       String date = year + "/" + month + "/" + day;
-                       ParsingForDate(source, date);
+                       String date = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DATE);
+                       ParsingOneDay(source, date);
                }
        }
 
@@ -60,8 +77,6 @@ public class JpredParserHTTP implements JpredParser {
        private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException {
                final FastaReader fr = new FastaReader(stream);
                String protein = "";
-               alignment = new ArrayList<FastaSequence>();
-               predictions = new ArrayList<FastaSequence>();
                while (fr.hasNext()) {
                        final FastaSequence fs = fr.next();
                        String seqid = fs.getId();
@@ -89,49 +104,16 @@ public class JpredParserHTTP implements JpredParser {
                return out;
        }
 
-       private List<Byte> parseArchiveFile(final InputStream stream) throws IOException {
-               DataInputStream data_in = new DataInputStream(stream);
-               List<Byte> out = new ArrayList<Byte>();
-               while (true) {
-                       try {
-                               out.add(data_in.readByte());
-                       } catch (EOFException eof) {
-                               break;
-                       }
-               }
-               return out;
-       }
-
-       private int analyseJob(String[] job) throws IOException {
+       private int analyseJob(String[] jobinfo) throws IOException {
+               alignment = new ArrayList<FastaSequence>();
+               predictions = new ArrayList<FastaSequence>();
                boolean running = true;
                boolean ConcisefileExists = false;
                boolean LogfileExists = false;
-               String id = job[job.length - 1];
-               String startdatestring = job[0].substring(0, job[0].indexOf(":"));
-               Date startdate = new Date(0);
-               Date starttime = new Date(0);
-               Date endtime = new Date(0);
+               JpredJob job = new JpredJob(jobinfo[jobinfo.length - 1], jobinfo[0], jobinfo[1]);
+               job.setIP(jobinfo[2]);
                Date currDate = new Date();
-               String ip = job[2];
-               String execstatus = "OK";
-               String finalstatus = "OK";
-               String protein = "";
-               long exectime = 0;
-               String log = "";
-               String maindir = dirprefix + "/" + id + "/";
-               String concisefile = dirprefix + "/" + id + "/" + id + ".concise.fasta";
-               String archivefile = dirprefix + "/" + id + "/" + id + ".tar.gz";
-               String logfile = dirprefix + "/" + id + "/LOG";
-               SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd");
-               SimpleDateFormat timeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
-               try {
-                       startdate = dateformatter.parse(startdatestring);
-                       starttime = timeformatter.parse(job[0]);
-                       endtime = timeformatter.parse(job[1]);
-                       exectime = (endtime.getTime() - starttime.getTime()) / 1000;
-               } catch (ParseException e) {
-                       e.printStackTrace();
-               }
+               String maindir = dirprefix + "/" + job.getJobID() + "/";
 
                try {
                        URL dirurl = new URL(maindir);
@@ -139,71 +121,77 @@ public class JpredParserHTTP implements JpredParser {
                        if (httpConnection_dirurl.getResponseCode() < 199 || 300 <= httpConnection_dirurl.getResponseCode()) {
                                return 0;
                        }
-                       URL conciseurl = new URL(concisefile);
-                       URL archiveurl = new URL(archivefile);
-                       URL logurl = new URL(logfile);
+                       URL conciseurl = new URL(maindir + job.getJobID() + ".concise.fasta");
+                       URL logurl = new URL(maindir + "LOG");
                        HttpURLConnection httpConnection_conciseurl = (HttpURLConnection) conciseurl.openConnection();
                        HttpURLConnection httpConnection_logurl = (HttpURLConnection) logurl.openConnection();
-                       HttpURLConnection httpConnection_archiveurl = (HttpURLConnection) archiveurl.openConnection();
                        if (199 < httpConnection_conciseurl.getResponseCode() && httpConnection_conciseurl.getResponseCode() < 300) {
                                ConcisefileExists = true;
                                running = false;
                                try {
-                                       protein = parsePredictions(conciseurl.openStream(), id);
+                                       job.setProtein(parsePredictions(conciseurl.openStream(), job.getJobID()));
                                } catch (IOException e) {
                                        e.printStackTrace();
                                }
                        } else {
                                // The job still can be running of failed...
                                ++countNoData;
-                               alignment = new ArrayList<FastaSequence>();
-                               predictions = new ArrayList<FastaSequence>();
                        }
                        if (199 < httpConnection_logurl.getResponseCode() && httpConnection_logurl.getResponseCode() < 300) {
                                LogfileExists = true;
-                               log = parseLogFile(logurl.openStream());
+                               job.setLog(parseLogFile(logurl.openStream()));
                        } else {
                                // The job has not been started at all...
-                               execstatus = "FAIL";
-                               finalstatus = "STOPPED";
+                               job.setExecutionStatus("FAIL");
+                               job.setFinalStatus("STOPPED");
                                running = false;
                        }
-                       if (log.matches("(.*)TIMEOUT\\syour\\sjob\\stimed\\sout(.*)")) {
+                       if (job.getLog().matches("(.*)TIMEOUT\\syour\\sjob\\stimed\\sout(.*)")) {
                                // blast job was too long (more than 3600 secs by default)...
-                               execstatus = "FAIL";
-                               finalstatus = "TIMEDOUT";
+                               job.setExecutionStatus("FAIL");
+                               job.setFinalStatus("TIMEDOUT");
                                running = false;
-                       } else if (log.matches("(.*)Jpred\\serror:\\sDied\\sat(.*)")) {
+                       } else if (job.getLog().matches("(.*)Jpred\\serror:\\sDied\\sat(.*)")) {
                                // an internal Jpred error...
-                               execstatus = "FAIL";
-                               finalstatus = "JPREDERROR";
+                               job.setExecutionStatus("FAIL");
+                               job.setFinalStatus("JPREDERROR");
                                running = false;
-                       } else if ((currDate.getTime() - endtime.getTime()) / 1000 > 3601 && LogfileExists && !ConcisefileExists) {
+                       } else if ((currDate.getTime() - job.getEndTime()) / 1000 > 3601 && LogfileExists && !ConcisefileExists) {
                                // the job was stopped with unknown reason...
-                               execstatus = "FAIL";
-                               finalstatus = "STOPPED";
+                               job.setExecutionStatus("FAIL");
+                               job.setFinalStatus("STOPPED");
                                running = false;
                        }
 
                        httpConnection_conciseurl.disconnect();
                        httpConnection_logurl.disconnect();
-                       httpConnection_archiveurl.disconnect();
                } catch (MalformedURLException e) {
                        e.printStackTrace();
                }
 
                if (!running) {
-                       long t = startdate.getTime();
-                       cw.FormQueryTables(t, job[0], job[1], ip, id, execstatus, finalstatus, protein, predictions);
-                       cw.ArchiveData(t, exectime, ip, id, execstatus, finalstatus, protein, predictions, alignment, log, archivefile);
+                       job.setAlignment(alignment);
+                       job.setPredictions(predictions);
+                       cw.FormQueryTables(job);
+                       // archiving the job
+                       if (archiving) {
+                               ArchivedJob ajob = new ArchivedJob(job.getJobID());
+                               String arlink = archive.createJob(job.getJobID());
+                               if (job.getFinalStatus().equals("OK")) {
+                                       ajob.setArchivePath(arlink);
+                                       ajob.copyArchiveFromWeb(maindir + job.getJobID() + ".tar.gz");
+                                       cw.ArchiveData(job, arlink);
+                               } else {
+                                       cw.ArchiveData(job, "undefined");
+                               }
+                       }
                        return 1;
-               } else
-                       System.out.println("job " + id + " is running");
+               }
 
                return 0;
        }
 
-       private void ParsingForDate(String input, String date) {
+       private void ParsingOneDay(String input, String date) {
                int totalcount = 0;
                int countinsertions = 0;
                int countinserted = 0;
@@ -244,4 +232,4 @@ public class JpredParserHTTP implements JpredParser {
                }
                ;
        }
-}
+};