package compbio.cassandra; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.List; import compbio.cassandra.JpredParser; import compbio.engine.JpredJob; public class JpredParserHTTP implements JpredParser { private CassandraWriter cw = new CassandraWriter(); private String dirprefix; private List alignment; private List predictions; private int countNoData; public JpredParserHTTP() { dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results"; } public JpredParserHTTP(String sourceurl) { dirprefix = sourceurl; } public void setSource(String newsourceprefix) { dirprefix = newsourceprefix; } public void Parsing(String source, int nDays) throws IOException { Calendar cal = Calendar.getInstance(); cal.add(Calendar.DATE, -nDays); for (int i = 0; i < nDays; ++i) { cal.add(Calendar.DATE, 1); String date = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DATE); ParsingForDate(source, date); } } /* * The method parses the Jpred output concise file in the FASTA format If * there is a record with ID = QUERY or jobid, this a "one protein" job * otherwise this is an alignment job */ private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException { final FastaReader fr = new FastaReader(stream); String protein = ""; alignment = new ArrayList(); predictions = new ArrayList(); while (fr.hasNext()) { final FastaSequence fs = fr.next(); String seqid = fs.getId(); String seq = fs.getSequence().replaceAll("\n", ""); if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28") || seqid.equals("JNETSOL25") || seqid.equals("JNETSOL5") || seqid.equals("JNETSOL0") || seqid.equals("JNETCONF") || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM") || seqid.equals("JNETCONF")) { predictions.add(fs); } else { alignment.add(fs); if (seqid.equals("QUERY") || seqid.equals(jobid)) protein = seq; } } return protein; } private String parseLogFile(final InputStream stream) throws IOException { String out = ""; BufferedReader buffer = new BufferedReader(new InputStreamReader(stream)); String line; while (null != (line = buffer.readLine())) { out += line; } return out; } private int analyseJob(String[] jobinfo) throws IOException { boolean running = true; boolean ConcisefileExists = false; boolean LogfileExists = false; JpredJob job = new JpredJob (jobinfo[jobinfo.length - 1], jobinfo[0], jobinfo[1]); job.setIP(jobinfo[2]); Date currDate = new Date(); String maindir = dirprefix + "/" + job.getJobID() + "/"; //System.out.println("analyzing job " + job.getJobID()); try { URL dirurl = new URL(maindir); HttpURLConnection httpConnection_dirurl = (HttpURLConnection) dirurl.openConnection(); if (httpConnection_dirurl.getResponseCode() < 199 || 300 <= httpConnection_dirurl.getResponseCode()) { return 0; } URL conciseurl = new URL(maindir + job.getJobID() + ".concise.fasta"); URL archiveurl = new URL(maindir + job.getJobID() + ".tar.gz"); URL logurl = new URL(maindir + "LOG"); HttpURLConnection httpConnection_conciseurl = (HttpURLConnection) conciseurl.openConnection(); HttpURLConnection httpConnection_logurl = (HttpURLConnection) logurl.openConnection(); HttpURLConnection httpConnection_archiveurl = (HttpURLConnection) archiveurl.openConnection(); if (199 < httpConnection_conciseurl.getResponseCode() && httpConnection_conciseurl.getResponseCode() < 300) { ConcisefileExists = true; running = false; try { job.setProtein(parsePredictions(conciseurl.openStream(), job.getJobID())); } catch (IOException e) { e.printStackTrace(); } } else { // The job still can be running of failed... ++countNoData; } if (199 < httpConnection_logurl.getResponseCode() && httpConnection_logurl.getResponseCode() < 300) { LogfileExists = true; job.setLog(parseLogFile(logurl.openStream())); } else { // The job has not been started at all... job.setExecutionStatus("FAIL"); job.setFinalStatus("STOPPED"); running = false; } if (job.getLog().matches("(.*)TIMEOUT\\syour\\sjob\\stimed\\sout(.*)")) { // blast job was too long (more than 3600 secs by default)... job.setExecutionStatus("FAIL"); job.setFinalStatus("TIMEDOUT"); running = false; } else if (job.getLog().matches("(.*)Jpred\\serror:\\sDied\\sat(.*)")) { // an internal Jpred error... job.setExecutionStatus("FAIL"); job.setFinalStatus("JPREDERROR"); running = false; } else if ((currDate.getTime() - job.getEndTime()) / 1000 > 3601 && LogfileExists && !ConcisefileExists) { // the job was stopped with unknown reason... job.setExecutionStatus("FAIL"); job.setFinalStatus("STOPPED"); running = false; } httpConnection_conciseurl.disconnect(); httpConnection_logurl.disconnect(); httpConnection_archiveurl.disconnect(); } catch (MalformedURLException e) { e.printStackTrace(); } if (!running) { job.setAlignment(alignment); job.setPredictions(predictions); cw.FormQueryTables(job); cw.ArchiveData(job, "undefined"); return 1; } return 0; } private void ParsingForDate(String input, String date) { int totalcount = 0; int countinsertions = 0; int countinserted = 0; int countNotanalyzed = 0; countNoData = 0; System.out.println("Inserting jobs for " + date); try { URL url = new URL(input); URLConnection conn = url.openConnection(); BufferedReader alljobs = new BufferedReader(new InputStreamReader(conn.getInputStream())); String line; while ((line = alljobs.readLine()) != null) { if (line.matches(date + ":(.*)jp_[^\\s]+")) { totalcount++; String[] job = line.split("\\s+"); String jobid = job[job.length - 1]; if (cw.JobisNotInsterted(jobid)) { countinsertions += analyseJob(job); } else { ++countinserted; } } else { ++countNotanalyzed; } } alljobs.close(); System.out.println("Total number of jobs = " + totalcount); System.out.println(" " + countinserted + " jobs inserted already"); System.out.println(" " + countNotanalyzed + " not analysed jobs"); System.out.println(" " + countNoData + " jobs without *.concise.fasta file (RUNNING or FAILED)"); System.out.println(" " + countinsertions + " new job insertions\n"); } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } ; } }