package compbio.cassandra; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import compbio.cassandra.JpredParser; import compbio.data.sequence.FastaReader; import compbio.data.sequence.FastaSequence; import compbio.engine.JpredJob; import compbio.engine.ProteoCachePropertyHelperManager; import compbio.engine.archive.Archive; import compbio.engine.archive.ArchivedJob; import compbio.util.PropertyHelper; import compbio.util.Util; public class JpredParserHTTP implements JpredParser { private CassandraWriter cw = new CassandraWriter(); private static Archive archive; private String dirprefix; private List alignment; private List predictions; private int countNoData; private static boolean archiving = false; private static final PropertyHelper ph = ProteoCachePropertyHelperManager.getPropertyHelper(); static SimpleDateFormat timeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s"); public JpredParserHTTP() { dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results"; } public JpredParserHTTP(String sourceurl) { dirprefix = sourceurl; } public void setSource(String newsourceprefix) { dirprefix = newsourceprefix; } private boolean initBooleanValue(String key) { assert key != null; String status = ph.getProperty(key); if (Util.isEmpty(status)) { return false; } return new Boolean(status.trim()).booleanValue(); } public void Parsing(String source, int nDays) throws IOException { Calendar cal = Calendar.getInstance(); cal.add(Calendar.DATE, -nDays); archiving = initBooleanValue("archive.enable"); if (archiving) { archive = new Archive(); } for (int i = 0; i < nDays; ++i) { cal.add(Calendar.DATE, 1); String date = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DATE); ParsingOneDay(source, date); } } /* * The method parses the Jpred output concise file in the FASTA format If * there is a record with ID = QUERY or jobid, this a "one protein" job * otherwise this is an alignment job */ private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException { final FastaReader fr = new FastaReader(stream); String protein = ""; while (fr.hasNext()) { final FastaSequence fs = fr.next(); String seqid = fs.getId(); String seq = fs.getSequence().replaceAll("\n", ""); if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28") || seqid.equals("JNETSOL25") || seqid.equals("JNETSOL5") || seqid.equals("JNETSOL0") || seqid.equals("JNETCONF") || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM") || seqid.equals("JNETCONF")) { predictions.add(fs); } else { alignment.add(fs); if (seqid.equals("QUERY") || seqid.equals(jobid)) protein = seq; } } return protein; } private String parseSeqFile(final InputStream stream, String jobid) throws FileNotFoundException { final FastaReader fr = new FastaReader(stream); String protein = ""; final FastaSequence fs = fr.next(); protein = fs.getSequence().replaceAll("\n", ""); if (fr.hasNext()) { // this is an aligment job... return "alignment"; } return protein; } private String parseLogFile(final InputStream stream, JpredJob job) throws IOException { String out = ""; BufferedReader buffer = new BufferedReader(new InputStreamReader(stream)); String line; if (null != (out = buffer.readLine()) && (out.contains("version"))) { Matcher matcher = Pattern.compile("((\\d|\\.)+)").matcher(out); if (matcher.find()) job.setProgramVersion(matcher.group(0)); } while (null != (line = buffer.readLine())) { out += line; } return out; } private int analyseJob(String[] jobinfo) throws IOException { alignment = new ArrayList(); predictions = new ArrayList(); boolean running = true; boolean ConcisefileExists = false; boolean LogfileExists = false; JpredJob job = new JpredJob(jobinfo[jobinfo.length - 1], jobinfo[0], jobinfo[1]); job.setIP(jobinfo[2]); job.setProgramName("Jpred"); Date currDate = new Date(); String maindir = dirprefix + "/" + job.getJobID() + "/"; try { Date finishTime = timeformatter.parse(jobinfo[1]); long delay = currDate.getTime() / 1000 - finishTime.getTime() / 1000; if (delay < 120) return 0; } catch (ParseException e) { e.printStackTrace(); } try { URL dirurl = new URL(maindir); HttpURLConnection httpConnection_dirurl = (HttpURLConnection) dirurl.openConnection(); if (httpConnection_dirurl.getResponseCode() < 199 || 300 <= httpConnection_dirurl.getResponseCode()) { return 0; } URL conciseurl = new URL(maindir + job.getJobID() + ".concise.fasta"); URL logurl = new URL(maindir + "LOG"); HttpURLConnection httpConnection_conciseurl = (HttpURLConnection) conciseurl.openConnection(); HttpURLConnection httpConnection_logurl = (HttpURLConnection) logurl.openConnection(); if (199 < httpConnection_conciseurl.getResponseCode() && httpConnection_conciseurl.getResponseCode() < 300) { ConcisefileExists = true; running = false; try { job.setProtein(parsePredictions(conciseurl.openStream(), job.getJobID())); } catch (IOException e) { e.printStackTrace(); } } else { // The job still can be running of failed... ++countNoData; } if (199 < httpConnection_logurl.getResponseCode() && httpConnection_logurl.getResponseCode() < 300) { LogfileExists = true; job.setLog(parseLogFile(logurl.openStream(), job)); } else { // The job has not been started at all... System.out.println ("WARNING! Job " + job.getJobID() + " has status FAIL/STOPPED"); job.setExecutionStatus("FAIL"); job.setFinalStatus("STOPPED"); running = false; } if (job.getLog().matches("(.*)TIMEOUT\\syour\\sjob\\stimed\\sout(.*)")) { // blast job was too long (more than 3600 secs by default)... job.setExecutionStatus("FAIL"); job.setFinalStatus("TIMEDOUT"); running = false; } else if (job.getLog().matches("(.*)Jpred\\serror:\\sDied\\sat(.*)")) { // an internal Jpred error... job.setExecutionStatus("FAIL"); job.setFinalStatus("JPREDERROR"); running = false; } else if ((currDate.getTime() - job.getEndTime()) / 1000 > 3601 && LogfileExists && !ConcisefileExists) { // the job was stopped with unknown reason... job.setExecutionStatus("FAIL"); job.setFinalStatus("STOPPED"); running = false; } httpConnection_conciseurl.disconnect(); httpConnection_logurl.disconnect(); } catch (MalformedURLException e) { e.printStackTrace(); } if (!running) { // logging the job job.setAlignment(alignment); job.setPredictions(predictions); if (job.getExecutionStatus().equals("FAIL")) { URL sequrl = new URL(maindir + job.getJobID() + ".seq"); HttpURLConnection httpConnection_sequrl = (HttpURLConnection) sequrl.openConnection(); if (199 < httpConnection_sequrl.getResponseCode() && httpConnection_sequrl.getResponseCode() < 300) { try { job.setProtein(parseSeqFile(sequrl.openStream(), job.getJobID())); } catch (IOException e) { e.printStackTrace(); } } } cw.FormQueryTables(job); // archiving the job if (archiving) { ArchivedJob ajob = new ArchivedJob(job.getJobID()); String arlink = archive.createJob(job.getJobID()); if (job.getFinalStatus().equals("OK")) { ajob.setArchivePath(arlink); ajob.copyArchiveFromWeb(maindir + job.getJobID() + ".tar.gz"); cw.ArchiveData(job, arlink); } else { cw.ArchiveData(job, "undefined"); } } return 1; } return 0; } private void ParsingOneDay(String input, String date) { int totalcount = 0; int countinsertions = 0; int countinserted = 0; int countNotanalyzed = 0; countNoData = 0; System.out.println("Inserting jobs for " + date); try { URL url = new URL(input); URLConnection conn = url.openConnection(); BufferedReader alljobs = new BufferedReader(new InputStreamReader(conn.getInputStream())); String line; while ((line = alljobs.readLine()) != null) { if (line.matches(date + ":(.*)jp_[^\\s]+")) { totalcount++; String[] job = line.split("\\s+"); String jobid = job[job.length - 1]; if (cw.JobisNotInsterted(jobid)) { countinsertions += analyseJob(job); } else { ++countinserted; } } else { ++countNotanalyzed; } } alljobs.close(); System.out.println("Total number of jobs = " + totalcount); System.out.println(" " + countinserted + " jobs inserted already"); System.out.println(" " + countNotanalyzed + " not analysed jobs"); System.out.println(" " + countNoData + " jobs without *.concise.fasta file (RUNNING or FAILED)"); System.out.println(" " + countinsertions + " new job insertions\n"); } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } ; } };