package compbio.cassandra;
import java.io.BufferedReader;
-import java.io.DataInputStream;
-import java.io.EOFException;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import compbio.cassandra.JpredParser;
+import compbio.engine.JpredJob;
public class JpredParserHTTP implements JpredParser {
private CassandraWriter cw = new CassandraWriter();
private String dirprefix;
private List<FastaSequence> alignment;
private List<FastaSequence> predictions;
- private String jnetpred;
+ private int countNoData;
public JpredParserHTTP() {
dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
cal.add(Calendar.DATE, -nDays);
for (int i = 0; i < nDays; ++i) {
cal.add(Calendar.DATE, 1);
- int month = cal.get(Calendar.MONTH) + 1;
- int year = cal.get(Calendar.YEAR);
- int day = cal.get(Calendar.DATE);
- String date = year + "/" + month + "/" + day;
+ String date = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DATE);
ParsingForDate(source, date);
}
}
+ /*
+ * The method parses the Jpred output concise file in the FASTA format If
+ * there is a record with ID = QUERY or jobid, this a "one protein" job
+ * otherwise this is an alignment job
+ */
private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException {
final FastaReader fr = new FastaReader(stream);
- String query = "";
+ String protein = "";
alignment = new ArrayList<FastaSequence>();
predictions = new ArrayList<FastaSequence>();
while (fr.hasNext()) {
final FastaSequence fs = fr.next();
String seqid = fs.getId();
String seq = fs.getSequence().replaceAll("\n", "");
- if (seqid.equals("QUERY") || seqid.equals(jobid)) {
- query = seq;
- alignment.add(fs);
- } else if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28")
+ if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28")
|| seqid.equals("JNETSOL25") || seqid.equals("JNETSOL5") || seqid.equals("JNETSOL0") || seqid.equals("JNETCONF")
- || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM")) {
+ || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM") || seqid.equals("JNETCONF")) {
predictions.add(fs);
- if (seqid.equals("jnetpred"))
- jnetpred = seq;
} else {
alignment.add(fs);
+ if (seqid.equals("QUERY") || seqid.equals(jobid))
+ protein = seq;
}
}
- return query;
+ return protein;
}
private String parseLogFile(final InputStream stream) throws IOException {
return out;
}
- private List<Byte> parseArchiveFile(final InputStream stream) throws IOException {
- DataInputStream data_in = new DataInputStream(stream);
- List<Byte> out = new ArrayList<Byte>();
- while (true) {
- try {
- out.add(data_in.readByte());
- } catch (EOFException eof) {
- break;
+ private int analyseJob(String[] jobinfo) throws IOException {
+ boolean running = true;
+ boolean ConcisefileExists = false;
+ boolean LogfileExists = false;
+ JpredJob job = new JpredJob (jobinfo[jobinfo.length - 1], jobinfo[0], jobinfo[1]);
+ job.setIP(jobinfo[2]);
+ Date currDate = new Date();
+ String maindir = dirprefix + "/" + job.getJobID() + "/";
+
+ //System.out.println("analyzing job " + job.getJobID());
+ try {
+ URL dirurl = new URL(maindir);
+ HttpURLConnection httpConnection_dirurl = (HttpURLConnection) dirurl.openConnection();
+ if (httpConnection_dirurl.getResponseCode() < 199 || 300 <= httpConnection_dirurl.getResponseCode()) {
+ return 0;
+ }
+ URL conciseurl = new URL(maindir + job.getJobID() + ".concise.fasta");
+ URL archiveurl = new URL(maindir + job.getJobID() + ".tar.gz");
+ URL logurl = new URL(maindir + "LOG");
+ HttpURLConnection httpConnection_conciseurl = (HttpURLConnection) conciseurl.openConnection();
+ HttpURLConnection httpConnection_logurl = (HttpURLConnection) logurl.openConnection();
+ HttpURLConnection httpConnection_archiveurl = (HttpURLConnection) archiveurl.openConnection();
+ if (199 < httpConnection_conciseurl.getResponseCode() && httpConnection_conciseurl.getResponseCode() < 300) {
+ ConcisefileExists = true;
+ running = false;
+ try {
+ job.setProtein(parsePredictions(conciseurl.openStream(), job.getJobID()));
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ } else {
+ // The job still can be running of failed...
+ ++countNoData;
+ }
+ if (199 < httpConnection_logurl.getResponseCode() && httpConnection_logurl.getResponseCode() < 300) {
+ LogfileExists = true;
+ job.setLog(parseLogFile(logurl.openStream()));
+ } else {
+ // The job has not been started at all...
+ job.setExecutionStatus("FAIL");
+ job.setFinalStatus("STOPPED");
+ running = false;
+ }
+ if (job.getLog().matches("(.*)TIMEOUT\\syour\\sjob\\stimed\\sout(.*)")) {
+ // blast job was too long (more than 3600 secs by default)...
+ job.setExecutionStatus("FAIL");
+ job.setFinalStatus("TIMEDOUT");
+ running = false;
+ } else if (job.getLog().matches("(.*)Jpred\\serror:\\sDied\\sat(.*)")) {
+ // an internal Jpred error...
+ job.setExecutionStatus("FAIL");
+ job.setFinalStatus("JPREDERROR");
+ running = false;
+ } else if ((currDate.getTime() - job.getEndTime()) / 1000 > 3601 && LogfileExists && !ConcisefileExists) {
+ // the job was stopped with unknown reason...
+ job.setExecutionStatus("FAIL");
+ job.setFinalStatus("STOPPED");
+ running = false;
}
+
+ httpConnection_conciseurl.disconnect();
+ httpConnection_logurl.disconnect();
+ httpConnection_archiveurl.disconnect();
+ } catch (MalformedURLException e) {
+ e.printStackTrace();
}
- return out;
+
+ if (!running) {
+ job.setAlignment(alignment);
+ job.setPredictions(predictions);
+ cw.FormQueryTables(job);
+ cw.ArchiveData(job, "undefined");
+ return 1;
+ }
+
+ return 0;
}
private void ParsingForDate(String input, String date) {
int totalcount = 0;
- int countNoData = 0;
- int countUnclearFASTAid = 0;
int countinsertions = 0;
int countinserted = 0;
- int counAlignments = 0;
- int countStrange = 0;
+ int countNotanalyzed = 0;
+ countNoData = 0;
System.out.println("Inserting jobs for " + date);
try {
while ((line = alljobs.readLine()) != null) {
if (line.matches(date + ":(.*)jp_[^\\s]+")) {
- String[] table = line.split("\\s+");
- // Format of a record:
- // starttime endtime ip email jobid (directory)
- // 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172
- // unknown_email jp_J9HBCBT
- String id = table[table.length - 1];
totalcount++;
- if (cw.JobisNotInsterted(id)) {
- URL dataurl = new URL(dirprefix + "/" + id + "/" + id + ".concise.fasta");
- URL archiveurl = new URL(dirprefix + "/" + id + "/" + id + ".tar.gz");
- URL logurl = new URL(dirprefix + "/" + id + "/LOG");
- HttpURLConnection httpConnection1 = (HttpURLConnection) dataurl.openConnection();
- HttpURLConnection httpConnection2 = (HttpURLConnection) logurl.openConnection();
- HttpURLConnection httpConnection3 = (HttpURLConnection) archiveurl.openConnection();
- int response1 = httpConnection1.getResponseCode();
- int response2 = httpConnection2.getResponseCode();
- if (199 < response1 && response1 < 300) {
- try {
- String protein = parsePredictions(dataurl.openStream(), id);
- if (protein.equals("")) {
- countUnclearFASTAid++;
- } else {
- SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd");
- SimpleDateFormat timeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
- String startdatestring = table[0].substring(0, table[0].indexOf(":"));
- try {
- Date startdate = dateformatter.parse(startdatestring);
- Date starttime = timeformatter.parse(table[0]);
- Date endtime = timeformatter.parse(table[1]);
- String ip = table[2];
- String execstatus = "OK";
- String finalstatus = "OK";
- countinsertions += cw.FormQueryTables(startdate.getTime(), table[0], table[1], ip, id, execstatus,
- finalstatus, protein, predictions);
-
- long exectime = (endtime.getTime() - starttime.getTime()) / 1000;
- String log = "";
- if (199 < response2 && response2 < 300) {
- log = parseLogFile(logurl.openStream());
- }
- cw.ArchiveData(startdate.getTime(), exectime, ip, id, execstatus, finalstatus, protein,
- predictions, alignment, log, archiveurl.toString());
- } catch (ParseException e) {
- e.printStackTrace();
- }
- }
- } catch (IOException e) {
- e.printStackTrace();
- }
- } else {
- countNoData++;
- }
- httpConnection1.disconnect();
- httpConnection2.disconnect();
- httpConnection3.disconnect();
+ String[] job = line.split("\\s+");
+ String jobid = job[job.length - 1];
+ if (cw.JobisNotInsterted(jobid)) {
+ countinsertions += analyseJob(job);
} else {
++countinserted;
}
} else {
- if (line.matches(date + "(.*)Sequence0/(.*)")) {
- ++counAlignments;
- } else {
- ++countStrange;
- }
+ ++countNotanalyzed;
}
}
alljobs.close();
System.out.println("Total number of jobs = " + totalcount);
System.out.println(" " + countinserted + " jobs inserted already");
- System.out.println(" " + counAlignments + " jalview jobs");
- System.out.println(" " + countStrange + " not analysed jobs");
- System.out.println(" " + countNoData + " jobs without *.concise.fasta file");
- System.out.println(" " + countUnclearFASTAid + " jobs with unclear FASTA protein id in *.concise.fasta");
+ System.out.println(" " + countNotanalyzed + " not analysed jobs");
+ System.out.println(" " + countNoData + " jobs without *.concise.fasta file (RUNNING or FAILED)");
System.out.println(" " + countinsertions + " new job insertions\n");
} catch (MalformedURLException e) {
e.printStackTrace();