Add delay for reading new jobs
[proteocache.git] / datadb / compbio / cassandra / JpredParserHTTP.java
index 2167a2d..b84ddfd 100644 (file)
@@ -9,22 +9,35 @@ import java.net.HttpURLConnection;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.net.URLConnection;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Calendar;
 import java.util.Date;
 import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import compbio.cassandra.JpredParser;
 import compbio.data.sequence.FastaReader;
 import compbio.data.sequence.FastaSequence;
 import compbio.engine.JpredJob;
+import compbio.engine.ProteoCachePropertyHelperManager;
+import compbio.engine.archive.Archive;
+import compbio.engine.archive.ArchivedJob;
+import compbio.util.PropertyHelper;
+import compbio.util.Util;
 
 public class JpredParserHTTP implements JpredParser {
        private CassandraWriter cw = new CassandraWriter();
+       private static Archive archive;
        private String dirprefix;
        private List<FastaSequence> alignment;
        private List<FastaSequence> predictions;
        private int countNoData;
+       private static boolean archiving = false;
+       private static final PropertyHelper ph = ProteoCachePropertyHelperManager.getPropertyHelper();
+       static SimpleDateFormat timeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
 
        public JpredParserHTTP() {
                dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
@@ -38,13 +51,26 @@ public class JpredParserHTTP implements JpredParser {
                dirprefix = newsourceprefix;
        }
 
+       private boolean initBooleanValue(String key) {
+               assert key != null;
+               String status = ph.getProperty(key);
+               if (Util.isEmpty(status)) {
+                       return false;
+               }
+               return new Boolean(status.trim()).booleanValue();
+       }
+
        public void Parsing(String source, int nDays) throws IOException {
                Calendar cal = Calendar.getInstance();
                cal.add(Calendar.DATE, -nDays);
+               archiving = initBooleanValue("archive.enable");
+               if (archiving) {
+                       archive = new Archive();
+               }
                for (int i = 0; i < nDays; ++i) {
                        cal.add(Calendar.DATE, 1);
                        String date = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DATE);
-                       ParsingForDate(source, date);
+                       ParsingOneDay(source, date);
                }
        }
 
@@ -56,8 +82,6 @@ public class JpredParserHTTP implements JpredParser {
        private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException {
                final FastaReader fr = new FastaReader(stream);
                String protein = "";
-               alignment = new ArrayList<FastaSequence>();
-               predictions = new ArrayList<FastaSequence>();
                while (fr.hasNext()) {
                        final FastaSequence fs = fr.next();
                        String seqid = fs.getId();
@@ -75,26 +99,53 @@ public class JpredParserHTTP implements JpredParser {
                return protein;
        }
 
-       private String parseLogFile(final InputStream stream) throws IOException {
+       private String parseSeqFile(final InputStream stream, String jobid) throws FileNotFoundException {
+               final FastaReader fr = new FastaReader(stream);
+               String protein = "";
+               final FastaSequence fs = fr.next();
+               protein = fs.getSequence().replaceAll("\n", "");
+               if (fr.hasNext()) {
+                       // this is an aligment job...
+                       return "alignment";
+               }
+               return protein;
+       }
+
+       private String parseLogFile(final InputStream stream, JpredJob job) throws IOException {
                String out = "";
                BufferedReader buffer = new BufferedReader(new InputStreamReader(stream));
                String line;
+               if (null != (out = buffer.readLine()) && (out.contains("version"))) {
+                       Matcher matcher = Pattern.compile("((\\d|\\.)+)").matcher(out);
+                       if (matcher.find())
+                               job.setProgramVersion(matcher.group(0));
+               }
                while (null != (line = buffer.readLine())) {
-                       out += line;
+                       out += line;            
                }
                return out;
        }
 
        private int analyseJob(String[] jobinfo) throws IOException {
+               alignment = new ArrayList<FastaSequence>();
+               predictions = new ArrayList<FastaSequence>();
                boolean running = true;
                boolean ConcisefileExists = false;
                boolean LogfileExists = false;
-               JpredJob job = new JpredJob (jobinfo[jobinfo.length - 1], jobinfo[0], jobinfo[1]);
+               JpredJob job = new JpredJob(jobinfo[jobinfo.length - 1], jobinfo[0], jobinfo[1]);
                job.setIP(jobinfo[2]);
+               job.setProgramName("Jpred");
                Date currDate = new Date();
                String maindir = dirprefix + "/" + job.getJobID() + "/";
 
-               //System.out.println("analyzing job " + job.getJobID());
+               try {
+                       Date finishTime = timeformatter.parse(jobinfo[1]);
+                       long delay = currDate.getTime() / 1000 - finishTime.getTime() / 1000;
+                       if (delay < 120) return 0;
+               } catch (ParseException e) {
+                       e.printStackTrace();
+               }
+
                try {
                        URL dirurl = new URL(maindir);
                        HttpURLConnection httpConnection_dirurl = (HttpURLConnection) dirurl.openConnection();
@@ -102,15 +153,13 @@ public class JpredParserHTTP implements JpredParser {
                                return 0;
                        }
                        URL conciseurl = new URL(maindir + job.getJobID() + ".concise.fasta");
-                       URL archiveurl = new URL(maindir + job.getJobID() + ".tar.gz");
                        URL logurl = new URL(maindir + "LOG");
                        HttpURLConnection httpConnection_conciseurl = (HttpURLConnection) conciseurl.openConnection();
                        HttpURLConnection httpConnection_logurl = (HttpURLConnection) logurl.openConnection();
-                       HttpURLConnection httpConnection_archiveurl = (HttpURLConnection) archiveurl.openConnection();
                        if (199 < httpConnection_conciseurl.getResponseCode() && httpConnection_conciseurl.getResponseCode() < 300) {
                                ConcisefileExists = true;
                                running = false;
-                               try {
+                               try {                           
                                        job.setProtein(parsePredictions(conciseurl.openStream(), job.getJobID()));
                                } catch (IOException e) {
                                        e.printStackTrace();
@@ -121,9 +170,10 @@ public class JpredParserHTTP implements JpredParser {
                        }
                        if (199 < httpConnection_logurl.getResponseCode() && httpConnection_logurl.getResponseCode() < 300) {
                                LogfileExists = true;
-                               job.setLog(parseLogFile(logurl.openStream()));
+                               job.setLog(parseLogFile(logurl.openStream(), job));
                        } else {
                                // The job has not been started at all...
+                               System.out.println ("WARNING! Job " + job.getJobID() + " has status FAIL/STOPPED");
                                job.setExecutionStatus("FAIL");
                                job.setFinalStatus("STOPPED");
                                running = false;
@@ -147,23 +197,46 @@ public class JpredParserHTTP implements JpredParser {
 
                        httpConnection_conciseurl.disconnect();
                        httpConnection_logurl.disconnect();
-                       httpConnection_archiveurl.disconnect();
                } catch (MalformedURLException e) {
                        e.printStackTrace();
                }
 
                if (!running) {
+                       // logging the job
                        job.setAlignment(alignment);
                        job.setPredictions(predictions);
+                       if (job.getExecutionStatus().equals("FAIL")) {
+                               URL sequrl = new URL(maindir + job.getJobID() + ".seq");
+                               HttpURLConnection httpConnection_sequrl = (HttpURLConnection) sequrl.openConnection();
+                               if (199 < httpConnection_sequrl.getResponseCode() && httpConnection_sequrl.getResponseCode() < 300) {
+                                       try {
+                                               job.setProtein(parseSeqFile(sequrl.openStream(), job.getJobID()));
+                                       } catch (IOException e) {
+                                               e.printStackTrace();
+                                       }
+                               }
+                       }
                        cw.FormQueryTables(job);
-                       cw.ArchiveData(job, "undefined");
+
+                       // archiving the job
+                       if (archiving) {
+                               ArchivedJob ajob = new ArchivedJob(job.getJobID());
+                               String arlink = archive.createJob(job.getJobID());
+                               if (job.getFinalStatus().equals("OK")) {
+                                       ajob.setArchivePath(arlink);
+                                       ajob.copyArchiveFromWeb(maindir + job.getJobID() + ".tar.gz");
+                                       cw.ArchiveData(job, arlink);
+                               } else {
+                                       cw.ArchiveData(job, "undefined");
+                               }
+                       }
                        return 1;
                }
 
                return 0;
        }
 
-       private void ParsingForDate(String input, String date) {
+       private void ParsingOneDay(String input, String date) {
                int totalcount = 0;
                int countinsertions = 0;
                int countinserted = 0;
@@ -204,4 +277,4 @@ public class JpredParserHTTP implements JpredParser {
                }
                ;
        }
-}
+};