private String dirprefix;
private List<FastaSequence> alignment;
private List<FastaSequence> predictions;
- private String jnetpred;
+ private int countNoData;
public JpredParserHTTP() {
dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
}
}
+ /*
+ * The method parses the Jpred output concise file in the FASTA format If
+ * there is a record with ID = QUERY or jobid, this a "one protein" job
+ * otherwise this is an alignment job
+ */
private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException {
final FastaReader fr = new FastaReader(stream);
- String query = "";
+ String protein = "";
alignment = new ArrayList<FastaSequence>();
predictions = new ArrayList<FastaSequence>();
while (fr.hasNext()) {
final FastaSequence fs = fr.next();
String seqid = fs.getId();
String seq = fs.getSequence().replaceAll("\n", "");
- if (seqid.equals("QUERY") || seqid.equals(jobid)) {
- query = seq;
- alignment.add(fs);
- } else if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28")
+ if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28")
|| seqid.equals("JNETSOL25") || seqid.equals("JNETSOL5") || seqid.equals("JNETSOL0") || seqid.equals("JNETCONF")
- || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM")) {
+ || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM") || seqid.equals("JNETCONF")) {
predictions.add(fs);
- if (seqid.equals("jnetpred"))
- jnetpred = seq;
} else {
alignment.add(fs);
+ if (seqid.equals("QUERY") || seqid.equals(jobid))
+ protein = seq;
}
}
- return query;
+ return protein;
}
private String parseLogFile(final InputStream stream) throws IOException {
return out;
}
+ private int analyseJob(String[] job) throws IOException {
+ boolean running = true;
+ boolean ConcisefileExists = false;
+ boolean LogfileExists = false;
+ String id = job[job.length - 1];
+ String startdatestring = job[0].substring(0, job[0].indexOf(":"));
+ Date startdate = new Date(0);
+ Date starttime = new Date(0);
+ Date endtime = new Date(0);
+ Date currDate = new Date();
+ String ip = job[2];
+ String execstatus = "OK";
+ String finalstatus = "OK";
+ String protein = "";
+ long exectime = 0;
+ String log = "";
+ String maindir = dirprefix + "/" + id + "/";
+ String concisefile = dirprefix + "/" + id + "/" + id + ".concise.fasta";
+ String archivefile = dirprefix + "/" + id + "/" + id + ".tar.gz";
+ String logfile = dirprefix + "/" + id + "/LOG";
+ SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd");
+ SimpleDateFormat timeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
+ try {
+ startdate = dateformatter.parse(startdatestring);
+ starttime = timeformatter.parse(job[0]);
+ endtime = timeformatter.parse(job[1]);
+ exectime = (endtime.getTime() - starttime.getTime()) / 1000;
+ } catch (ParseException e) {
+ e.printStackTrace();
+ }
+
+ try {
+ URL dirurl = new URL(maindir);
+ HttpURLConnection httpConnection_dirurl = (HttpURLConnection) dirurl.openConnection();
+ if (httpConnection_dirurl.getResponseCode() < 199 || 300 <= httpConnection_dirurl.getResponseCode()) {
+ return 0;
+ }
+ URL conciseurl = new URL(concisefile);
+ URL archiveurl = new URL(archivefile);
+ URL logurl = new URL(logfile);
+ HttpURLConnection httpConnection_conciseurl = (HttpURLConnection) conciseurl.openConnection();
+ HttpURLConnection httpConnection_logurl = (HttpURLConnection) logurl.openConnection();
+ HttpURLConnection httpConnection_archiveurl = (HttpURLConnection) archiveurl.openConnection();
+ if (199 < httpConnection_conciseurl.getResponseCode() && httpConnection_conciseurl.getResponseCode() < 300) {
+ ConcisefileExists = true;
+ running = false;
+ try {
+ protein = parsePredictions(conciseurl.openStream(), id);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ } else {
+ // The job still can be running of failed...
+ ++countNoData;
+ alignment = new ArrayList<FastaSequence>();
+ predictions = new ArrayList<FastaSequence>();
+ }
+ if (199 < httpConnection_logurl.getResponseCode() && httpConnection_logurl.getResponseCode() < 300) {
+ LogfileExists = true;
+ log = parseLogFile(logurl.openStream());
+ } else {
+ // The job has not been started at all...
+ execstatus = "FAIL";
+ finalstatus = "STOPPED";
+ running = false;
+ }
+ if (log.matches("(.*)TIMEOUT\\syour\\sjob\\stimed\\sout(.*)")) {
+ // blast job was too long (more than 3600 secs by default)...
+ execstatus = "FAIL";
+ finalstatus = "TIMEDOUT";
+ running = false;
+ } else if (log.matches("(.*)Jpred\\serror:\\sDied\\sat(.*)")) {
+ // an internal Jpred error...
+ execstatus = "FAIL";
+ finalstatus = "JPREDERROR";
+ running = false;
+ } else if ((currDate.getTime() - endtime.getTime()) / 1000 > 3601 && LogfileExists && !ConcisefileExists) {
+ // the job was stopped with unknown reason...
+ execstatus = "FAIL";
+ finalstatus = "STOPPED";
+ running = false;
+ }
+
+ httpConnection_conciseurl.disconnect();
+ httpConnection_logurl.disconnect();
+ httpConnection_archiveurl.disconnect();
+ } catch (MalformedURLException e) {
+ e.printStackTrace();
+ }
+
+ if (!running) {
+ long t = startdate.getTime();
+ cw.FormQueryTables(t, job[0], job[1], ip, id, execstatus, finalstatus, protein, predictions);
+ cw.ArchiveData(t, exectime, ip, id, execstatus, finalstatus, protein, predictions, alignment, log, archivefile);
+ return 1;
+ } else
+ System.out.println("job " + id + " is running");
+
+ return 0;
+ }
+
private void ParsingForDate(String input, String date) {
int totalcount = 0;
- int countNoData = 0;
- int countUnclearFASTAid = 0;
int countinsertions = 0;
int countinserted = 0;
- int counAlignments = 0;
- int countStrange = 0;
+ int countNotanalyzed = 0;
+ countNoData = 0;
System.out.println("Inserting jobs for " + date);
try {
while ((line = alljobs.readLine()) != null) {
if (line.matches(date + ":(.*)jp_[^\\s]+")) {
- String[] table = line.split("\\s+");
- // Format of a record:
- // starttime endtime ip email jobid (directory)
- // 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172
- // unknown_email jp_J9HBCBT
- String id = table[table.length - 1];
totalcount++;
- if (cw.JobisNotInsterted(id)) {
- URL dataurl = new URL(dirprefix + "/" + id + "/" + id + ".concise.fasta");
- URL archiveurl = new URL(dirprefix + "/" + id + "/" + id + ".tar.gz");
- URL logurl = new URL(dirprefix + "/" + id + "/LOG");
- HttpURLConnection httpConnection1 = (HttpURLConnection) dataurl.openConnection();
- HttpURLConnection httpConnection2 = (HttpURLConnection) logurl.openConnection();
- HttpURLConnection httpConnection3 = (HttpURLConnection) archiveurl.openConnection();
- int response1 = httpConnection1.getResponseCode();
- int response2 = httpConnection2.getResponseCode();
- if (199 < response1 && response1 < 300) {
- try {
- String protein = parsePredictions(dataurl.openStream(), id);
- if (protein.equals("")) {
- countUnclearFASTAid++;
- } else {
- SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd");
- SimpleDateFormat timeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
- String startdatestring = table[0].substring(0, table[0].indexOf(":"));
- try {
- Date startdate = dateformatter.parse(startdatestring);
- Date starttime = timeformatter.parse(table[0]);
- Date endtime = timeformatter.parse(table[1]);
- String ip = table[2];
- String execstatus = "OK";
- String finalstatus = "OK";
- countinsertions += cw.FormQueryTables(startdate.getTime(), table[0], table[1], ip, id, execstatus,
- finalstatus, protein, predictions);
-
- long exectime = (endtime.getTime() - starttime.getTime()) / 1000;
- String log = "";
- if (199 < response2 && response2 < 300) {
- log = parseLogFile(logurl.openStream());
- }
- cw.ArchiveData(startdate.getTime(), exectime, ip, id, execstatus, finalstatus, protein,
- predictions, alignment, log, archiveurl.toString());
- } catch (ParseException e) {
- e.printStackTrace();
- }
- }
- } catch (IOException e) {
- e.printStackTrace();
- }
- } else {
- countNoData++;
- }
- httpConnection1.disconnect();
- httpConnection2.disconnect();
- httpConnection3.disconnect();
+ String[] job = line.split("\\s+");
+ String jobid = job[job.length - 1];
+ if (cw.JobisNotInsterted(jobid)) {
+ countinsertions += analyseJob(job);
} else {
++countinserted;
}
} else {
- if (line.matches(date + "(.*)Sequence0/(.*)")) {
- ++counAlignments;
- } else {
- ++countStrange;
- }
+ ++countNotanalyzed;
}
}
alljobs.close();
System.out.println("Total number of jobs = " + totalcount);
System.out.println(" " + countinserted + " jobs inserted already");
- System.out.println(" " + counAlignments + " jalview jobs");
- System.out.println(" " + countStrange + " not analysed jobs");
- System.out.println(" " + countNoData + " jobs without *.concise.fasta file");
- System.out.println(" " + countUnclearFASTAid + " jobs with unclear FASTA protein id in *.concise.fasta");
+ System.out.println(" " + countNotanalyzed + " not analysed jobs");
+ System.out.println(" " + countNoData + " jobs without *.concise.fasta file (RUNNING or FAILED)");
System.out.println(" " + countinsertions + " new job insertions\n");
} catch (MalformedURLException e) {
e.printStackTrace();