package compbio.cassandra;
import java.io.BufferedReader;
-import java.io.DataInputStream;
-import java.io.EOFException;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import compbio.cassandra.JpredParser;
+import compbio.data.sequence.FastaReader;
+import compbio.data.sequence.FastaSequence;
+import compbio.engine.JpredJob;
+import compbio.engine.ProteoCachePropertyHelperManager;
+import compbio.engine.archive.Archive;
+import compbio.engine.archive.ArchivedJob;
+import compbio.util.PropertyHelper;
+import compbio.util.Util;
public class JpredParserHTTP implements JpredParser {
private CassandraWriter cw = new CassandraWriter();
+ private static Archive archive;
private String dirprefix;
private List<FastaSequence> alignment;
private List<FastaSequence> predictions;
private int countNoData;
+ private static boolean archiving = false;
+ private static final PropertyHelper ph = ProteoCachePropertyHelperManager.getPropertyHelper();
public JpredParserHTTP() {
dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
dirprefix = newsourceprefix;
}
+ private boolean initBooleanValue(String key) {
+ assert key != null;
+ String status = ph.getProperty(key);
+ if (Util.isEmpty(status)) {
+ return false;
+ }
+ return new Boolean(status.trim()).booleanValue();
+ }
+
public void Parsing(String source, int nDays) throws IOException {
Calendar cal = Calendar.getInstance();
cal.add(Calendar.DATE, -nDays);
+ archiving = initBooleanValue("archive.enable");
+ if (archiving) {
+ archive = new Archive();
+ }
for (int i = 0; i < nDays; ++i) {
cal.add(Calendar.DATE, 1);
- int month = cal.get(Calendar.MONTH) + 1;
- int year = cal.get(Calendar.YEAR);
- int day = cal.get(Calendar.DATE);
- String date = year + "/" + month + "/" + day;
- ParsingForDate(source, date);
+ String date = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DATE);
+ ParsingOneDay(source, date);
}
}
private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException {
final FastaReader fr = new FastaReader(stream);
String protein = "";
- alignment = new ArrayList<FastaSequence>();
- predictions = new ArrayList<FastaSequence>();
while (fr.hasNext()) {
final FastaSequence fs = fr.next();
String seqid = fs.getId();
return out;
}
- private List<Byte> parseArchiveFile(final InputStream stream) throws IOException {
- DataInputStream data_in = new DataInputStream(stream);
- List<Byte> out = new ArrayList<Byte>();
- while (true) {
- try {
- out.add(data_in.readByte());
- } catch (EOFException eof) {
- break;
- }
- }
- return out;
- }
-
- private int analyseJob(String[] job) throws IOException {
+ private int analyseJob(String[] jobinfo) throws IOException {
+ alignment = new ArrayList<FastaSequence>();
+ predictions = new ArrayList<FastaSequence>();
boolean running = true;
boolean ConcisefileExists = false;
boolean LogfileExists = false;
- String id = job[job.length - 1];
- String startdatestring = job[0].substring(0, job[0].indexOf(":"));
- Date startdate = new Date(0);
- Date starttime = new Date(0);
- Date endtime = new Date(0);
+ JpredJob job = new JpredJob(jobinfo[jobinfo.length - 1], jobinfo[0], jobinfo[1]);
+ job.setIP(jobinfo[2]);
Date currDate = new Date();
- String ip = job[2];
- String execstatus = "OK";
- String finalstatus = "OK";
- String protein = "";
- long exectime = 0;
- String log = "";
- String maindir = dirprefix + "/" + id + "/";
- String concisefile = dirprefix + "/" + id + "/" + id + ".concise.fasta";
- String archivefile = dirprefix + "/" + id + "/" + id + ".tar.gz";
- String logfile = dirprefix + "/" + id + "/LOG";
- SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd");
- SimpleDateFormat timeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
- try {
- startdate = dateformatter.parse(startdatestring);
- starttime = timeformatter.parse(job[0]);
- endtime = timeformatter.parse(job[1]);
- exectime = (endtime.getTime() - starttime.getTime()) / 1000;
- } catch (ParseException e) {
- e.printStackTrace();
- }
+ String maindir = dirprefix + "/" + job.getJobID() + "/";
try {
URL dirurl = new URL(maindir);
if (httpConnection_dirurl.getResponseCode() < 199 || 300 <= httpConnection_dirurl.getResponseCode()) {
return 0;
}
- URL conciseurl = new URL(concisefile);
- URL archiveurl = new URL(archivefile);
- URL logurl = new URL(logfile);
+ URL conciseurl = new URL(maindir + job.getJobID() + ".concise.fasta");
+ URL logurl = new URL(maindir + "LOG");
HttpURLConnection httpConnection_conciseurl = (HttpURLConnection) conciseurl.openConnection();
HttpURLConnection httpConnection_logurl = (HttpURLConnection) logurl.openConnection();
- HttpURLConnection httpConnection_archiveurl = (HttpURLConnection) archiveurl.openConnection();
if (199 < httpConnection_conciseurl.getResponseCode() && httpConnection_conciseurl.getResponseCode() < 300) {
ConcisefileExists = true;
running = false;
try {
- protein = parsePredictions(conciseurl.openStream(), id);
+ job.setProtein(parsePredictions(conciseurl.openStream(), job.getJobID()));
} catch (IOException e) {
e.printStackTrace();
}
} else {
// The job still can be running of failed...
++countNoData;
- alignment = new ArrayList<FastaSequence>();
- predictions = new ArrayList<FastaSequence>();
}
if (199 < httpConnection_logurl.getResponseCode() && httpConnection_logurl.getResponseCode() < 300) {
LogfileExists = true;
- log = parseLogFile(logurl.openStream());
+ job.setLog(parseLogFile(logurl.openStream()));
} else {
// The job has not been started at all...
- execstatus = "FAIL";
- finalstatus = "STOPPED";
+ job.setExecutionStatus("FAIL");
+ job.setFinalStatus("STOPPED");
running = false;
}
- if (log.matches("(.*)TIMEOUT\\syour\\sjob\\stimed\\sout(.*)")) {
+ if (job.getLog().matches("(.*)TIMEOUT\\syour\\sjob\\stimed\\sout(.*)")) {
// blast job was too long (more than 3600 secs by default)...
- execstatus = "FAIL";
- finalstatus = "TIMEDOUT";
+ job.setExecutionStatus("FAIL");
+ job.setFinalStatus("TIMEDOUT");
running = false;
- } else if (log.matches("(.*)Jpred\\serror:\\sDied\\sat(.*)")) {
+ } else if (job.getLog().matches("(.*)Jpred\\serror:\\sDied\\sat(.*)")) {
// an internal Jpred error...
- execstatus = "FAIL";
- finalstatus = "JPREDERROR";
+ job.setExecutionStatus("FAIL");
+ job.setFinalStatus("JPREDERROR");
running = false;
- } else if ((currDate.getTime() - endtime.getTime()) / 1000 > 3601 && LogfileExists && !ConcisefileExists) {
+ } else if ((currDate.getTime() - job.getEndTime()) / 1000 > 3601 && LogfileExists && !ConcisefileExists) {
// the job was stopped with unknown reason...
- execstatus = "FAIL";
- finalstatus = "STOPPED";
+ job.setExecutionStatus("FAIL");
+ job.setFinalStatus("STOPPED");
running = false;
}
httpConnection_conciseurl.disconnect();
httpConnection_logurl.disconnect();
- httpConnection_archiveurl.disconnect();
} catch (MalformedURLException e) {
e.printStackTrace();
}
if (!running) {
- long t = startdate.getTime();
- cw.FormQueryTables(t, job[0], job[1], ip, id, execstatus, finalstatus, protein, predictions);
- cw.ArchiveData(t, exectime, ip, id, execstatus, finalstatus, protein, predictions, alignment, log, archivefile);
+ job.setAlignment(alignment);
+ job.setPredictions(predictions);
+ cw.FormQueryTables(job);
+ // archiving the job
+ if (archiving) {
+ ArchivedJob ajob = new ArchivedJob(job.getJobID());
+ String arlink = archive.createJob(job.getJobID());
+ if (job.getFinalStatus().equals("OK")) {
+ ajob.setArchivePath(arlink);
+ ajob.copyArchiveFromWeb(maindir + job.getJobID() + ".tar.gz");
+ cw.ArchiveData(job, arlink);
+ } else {
+ cw.ArchiveData(job, "undefined");
+ }
+ }
return 1;
- } else
- System.out.println("job " + id + " is running");
+ }
return 0;
}
- private void ParsingForDate(String input, String date) {
+ private void ParsingOneDay(String input, String date) {
int totalcount = 0;
int countinsertions = 0;
int countinserted = 0;
}
;
}
-}
+};