Allow less full jobs (with failed status) to be reported
[proteocache.git] / datadb / compbio / cassandra / JpredParserHTTP.java
index bf4c460..b84ddfd 100644 (file)
@@ -1,8 +1,6 @@
 package compbio.cassandra;
 
 import java.io.BufferedReader;
-import java.io.DataInputStream;
-import java.io.EOFException;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
@@ -17,15 +15,29 @@ import java.util.ArrayList;
 import java.util.Calendar;
 import java.util.Date;
 import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import compbio.cassandra.JpredParser;
+import compbio.data.sequence.FastaReader;
+import compbio.data.sequence.FastaSequence;
+import compbio.engine.JpredJob;
+import compbio.engine.ProteoCachePropertyHelperManager;
+import compbio.engine.archive.Archive;
+import compbio.engine.archive.ArchivedJob;
+import compbio.util.PropertyHelper;
+import compbio.util.Util;
 
 public class JpredParserHTTP implements JpredParser {
        private CassandraWriter cw = new CassandraWriter();
+       private static Archive archive;
        private String dirprefix;
        private List<FastaSequence> alignment;
        private List<FastaSequence> predictions;
        private int countNoData;
+       private static boolean archiving = false;
+       private static final PropertyHelper ph = ProteoCachePropertyHelperManager.getPropertyHelper();
+       static SimpleDateFormat timeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
 
        public JpredParserHTTP() {
                dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
@@ -39,16 +51,26 @@ public class JpredParserHTTP implements JpredParser {
                dirprefix = newsourceprefix;
        }
 
+       private boolean initBooleanValue(String key) {
+               assert key != null;
+               String status = ph.getProperty(key);
+               if (Util.isEmpty(status)) {
+                       return false;
+               }
+               return new Boolean(status.trim()).booleanValue();
+       }
+
        public void Parsing(String source, int nDays) throws IOException {
                Calendar cal = Calendar.getInstance();
                cal.add(Calendar.DATE, -nDays);
+               archiving = initBooleanValue("archive.enable");
+               if (archiving) {
+                       archive = new Archive();
+               }
                for (int i = 0; i < nDays; ++i) {
                        cal.add(Calendar.DATE, 1);
-                       int month = cal.get(Calendar.MONTH) + 1;
-                       int year = cal.get(Calendar.YEAR);
-                       int day = cal.get(Calendar.DATE);
-                       String date = year + "/" + month + "/" + day;
-                       ParsingForDate(source, date);
+                       String date = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DATE);
+                       ParsingOneDay(source, date);
                }
        }
 
@@ -60,8 +82,6 @@ public class JpredParserHTTP implements JpredParser {
        private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException {
                final FastaReader fr = new FastaReader(stream);
                String protein = "";
-               alignment = new ArrayList<FastaSequence>();
-               predictions = new ArrayList<FastaSequence>();
                while (fr.hasNext()) {
                        final FastaSequence fs = fr.next();
                        String seqid = fs.getId();
@@ -79,56 +99,49 @@ public class JpredParserHTTP implements JpredParser {
                return protein;
        }
 
-       private String parseLogFile(final InputStream stream) throws IOException {
+       private String parseSeqFile(final InputStream stream, String jobid) throws FileNotFoundException {
+               final FastaReader fr = new FastaReader(stream);
+               String protein = "";
+               final FastaSequence fs = fr.next();
+               protein = fs.getSequence().replaceAll("\n", "");
+               if (fr.hasNext()) {
+                       // this is an aligment job...
+                       return "alignment";
+               }
+               return protein;
+       }
+
+       private String parseLogFile(final InputStream stream, JpredJob job) throws IOException {
                String out = "";
                BufferedReader buffer = new BufferedReader(new InputStreamReader(stream));
                String line;
-               while (null != (line = buffer.readLine())) {
-                       out += line;
+               if (null != (out = buffer.readLine()) && (out.contains("version"))) {
+                       Matcher matcher = Pattern.compile("((\\d|\\.)+)").matcher(out);
+                       if (matcher.find())
+                               job.setProgramVersion(matcher.group(0));
                }
-               return out;
-       }
-
-       private List<Byte> parseArchiveFile(final InputStream stream) throws IOException {
-               DataInputStream data_in = new DataInputStream(stream);
-               List<Byte> out = new ArrayList<Byte>();
-               while (true) {
-                       try {
-                               out.add(data_in.readByte());
-                       } catch (EOFException eof) {
-                               break;
-                       }
+               while (null != (line = buffer.readLine())) {
+                       out += line;            
                }
                return out;
        }
 
-       private int analyseJob(String[] job) throws IOException {
+       private int analyseJob(String[] jobinfo) throws IOException {
+               alignment = new ArrayList<FastaSequence>();
+               predictions = new ArrayList<FastaSequence>();
                boolean running = true;
                boolean ConcisefileExists = false;
                boolean LogfileExists = false;
-               String id = job[job.length - 1];
-               String startdatestring = job[0].substring(0, job[0].indexOf(":"));
-               Date startdate = new Date(0);
-               Date starttime = new Date(0);
-               Date endtime = new Date(0);
+               JpredJob job = new JpredJob(jobinfo[jobinfo.length - 1], jobinfo[0], jobinfo[1]);
+               job.setIP(jobinfo[2]);
+               job.setProgramName("Jpred");
                Date currDate = new Date();
-               String ip = job[2];
-               String execstatus = "OK";
-               String finalstatus = "OK";
-               String protein = "";
-               long exectime = 0;
-               String log = "";
-               String maindir = dirprefix + "/" + id + "/";
-               String concisefile = dirprefix + "/" + id + "/" + id + ".concise.fasta";
-               String archivefile = dirprefix + "/" + id + "/" + id + ".tar.gz";
-               String logfile = dirprefix + "/" + id + "/LOG";
-               SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd");
-               SimpleDateFormat timeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
+               String maindir = dirprefix + "/" + job.getJobID() + "/";
+
                try {
-                       startdate = dateformatter.parse(startdatestring);
-                       starttime = timeformatter.parse(job[0]);
-                       endtime = timeformatter.parse(job[1]);
-                       exectime = (endtime.getTime() - starttime.getTime()) / 1000;
+                       Date finishTime = timeformatter.parse(jobinfo[1]);
+                       long delay = currDate.getTime() / 1000 - finishTime.getTime() / 1000;
+                       if (delay < 120) return 0;
                } catch (ParseException e) {
                        e.printStackTrace();
                }
@@ -139,71 +152,91 @@ public class JpredParserHTTP implements JpredParser {
                        if (httpConnection_dirurl.getResponseCode() < 199 || 300 <= httpConnection_dirurl.getResponseCode()) {
                                return 0;
                        }
-                       URL conciseurl = new URL(concisefile);
-                       URL archiveurl = new URL(archivefile);
-                       URL logurl = new URL(logfile);
+                       URL conciseurl = new URL(maindir + job.getJobID() + ".concise.fasta");
+                       URL logurl = new URL(maindir + "LOG");
                        HttpURLConnection httpConnection_conciseurl = (HttpURLConnection) conciseurl.openConnection();
                        HttpURLConnection httpConnection_logurl = (HttpURLConnection) logurl.openConnection();
-                       HttpURLConnection httpConnection_archiveurl = (HttpURLConnection) archiveurl.openConnection();
                        if (199 < httpConnection_conciseurl.getResponseCode() && httpConnection_conciseurl.getResponseCode() < 300) {
                                ConcisefileExists = true;
                                running = false;
-                               try {
-                                       protein = parsePredictions(conciseurl.openStream(), id);
+                               try {                           
+                                       job.setProtein(parsePredictions(conciseurl.openStream(), job.getJobID()));
                                } catch (IOException e) {
                                        e.printStackTrace();
                                }
                        } else {
                                // The job still can be running of failed...
                                ++countNoData;
-                               alignment = new ArrayList<FastaSequence>();
-                               predictions = new ArrayList<FastaSequence>();
                        }
                        if (199 < httpConnection_logurl.getResponseCode() && httpConnection_logurl.getResponseCode() < 300) {
                                LogfileExists = true;
-                               log = parseLogFile(logurl.openStream());
+                               job.setLog(parseLogFile(logurl.openStream(), job));
                        } else {
                                // The job has not been started at all...
-                               execstatus = "FAIL";
-                               finalstatus = "STOPPED";
+                               System.out.println ("WARNING! Job " + job.getJobID() + " has status FAIL/STOPPED");
+                               job.setExecutionStatus("FAIL");
+                               job.setFinalStatus("STOPPED");
                                running = false;
                        }
-                       if (log.matches("(.*)TIMEOUT\\syour\\sjob\\stimed\\sout(.*)")) {
+                       if (job.getLog().matches("(.*)TIMEOUT\\syour\\sjob\\stimed\\sout(.*)")) {
                                // blast job was too long (more than 3600 secs by default)...
-                               execstatus = "FAIL";
-                               finalstatus = "TIMEDOUT";
+                               job.setExecutionStatus("FAIL");
+                               job.setFinalStatus("TIMEDOUT");
                                running = false;
-                       } else if (log.matches("(.*)Jpred\\serror:\\sDied\\sat(.*)")) {
+                       } else if (job.getLog().matches("(.*)Jpred\\serror:\\sDied\\sat(.*)")) {
                                // an internal Jpred error...
-                               execstatus = "FAIL";
-                               finalstatus = "JPREDERROR";
+                               job.setExecutionStatus("FAIL");
+                               job.setFinalStatus("JPREDERROR");
                                running = false;
-                       } else if ((currDate.getTime() - endtime.getTime()) / 1000 > 3601 && LogfileExists && !ConcisefileExists) {
+                       } else if ((currDate.getTime() - job.getEndTime()) / 1000 > 3601 && LogfileExists && !ConcisefileExists) {
                                // the job was stopped with unknown reason...
-                               execstatus = "FAIL";
-                               finalstatus = "STOPPED";
+                               job.setExecutionStatus("FAIL");
+                               job.setFinalStatus("STOPPED");
                                running = false;
                        }
 
                        httpConnection_conciseurl.disconnect();
                        httpConnection_logurl.disconnect();
-                       httpConnection_archiveurl.disconnect();
                } catch (MalformedURLException e) {
                        e.printStackTrace();
                }
 
                if (!running) {
-                       long t = startdate.getTime();
-                       cw.FormQueryTables(t, job[0], job[1], ip, id, execstatus, finalstatus, protein, predictions);
-                       cw.ArchiveData(t, exectime, ip, id, execstatus, finalstatus, protein, predictions, alignment, log, archivefile);
+                       // logging the job
+                       job.setAlignment(alignment);
+                       job.setPredictions(predictions);
+                       if (job.getExecutionStatus().equals("FAIL")) {
+                               URL sequrl = new URL(maindir + job.getJobID() + ".seq");
+                               HttpURLConnection httpConnection_sequrl = (HttpURLConnection) sequrl.openConnection();
+                               if (199 < httpConnection_sequrl.getResponseCode() && httpConnection_sequrl.getResponseCode() < 300) {
+                                       try {
+                                               job.setProtein(parseSeqFile(sequrl.openStream(), job.getJobID()));
+                                       } catch (IOException e) {
+                                               e.printStackTrace();
+                                       }
+                               }
+                       }
+                       cw.FormQueryTables(job);
+
+                       // archiving the job
+                       if (archiving) {
+                               ArchivedJob ajob = new ArchivedJob(job.getJobID());
+                               String arlink = archive.createJob(job.getJobID());
+                               if (job.getFinalStatus().equals("OK")) {
+                                       ajob.setArchivePath(arlink);
+                                       ajob.copyArchiveFromWeb(maindir + job.getJobID() + ".tar.gz");
+                                       cw.ArchiveData(job, arlink);
+                               } else {
+                                       cw.ArchiveData(job, "undefined");
+                               }
+                       }
                        return 1;
-               } else
-                       System.out.println("job " + id + " is running");
+               }
 
                return 0;
        }
 
-       private void ParsingForDate(String input, String date) {
+       private void ParsingOneDay(String input, String date) {
                int totalcount = 0;
                int countinsertions = 0;
                int countinserted = 0;
@@ -244,4 +277,4 @@ public class JpredParserHTTP implements JpredParser {
                }
                ;
        }
-}
+};