1 package compbio.cassandra;
3 import java.io.BufferedReader;
4 import java.io.DataInputStream;
5 import java.io.EOFException;
6 import java.io.FileNotFoundException;
7 import java.io.IOException;
8 import java.io.InputStream;
9 import java.io.InputStreamReader;
10 import java.net.HttpURLConnection;
11 import java.net.MalformedURLException;
13 import java.net.URLConnection;
14 import java.text.ParseException;
15 import java.text.SimpleDateFormat;
16 import java.util.ArrayList;
17 import java.util.Calendar;
18 import java.util.Date;
19 import java.util.List;
21 import compbio.cassandra.JpredParser;
23 public class JpredParserHTTP implements JpredParser {
24 private CassandraWriter cw = new CassandraWriter();
25 private String dirprefix;
26 private List<FastaSequence> alignment;
27 private List<FastaSequence> predictions;
28 private int countNoData;
30 public JpredParserHTTP() {
31 dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
34 public JpredParserHTTP(String sourceurl) {
35 dirprefix = sourceurl;
38 public void setSource(String newsourceprefix) {
39 dirprefix = newsourceprefix;
42 public void Parsing(String source, int nDays) throws IOException {
43 Calendar cal = Calendar.getInstance();
44 cal.add(Calendar.DATE, -nDays);
45 for (int i = 0; i < nDays; ++i) {
46 cal.add(Calendar.DATE, 1);
47 int month = cal.get(Calendar.MONTH) + 1;
48 int year = cal.get(Calendar.YEAR);
49 int day = cal.get(Calendar.DATE);
50 String date = year + "/" + month + "/" + day;
51 ParsingForDate(source, date);
56 * The method parses the Jpred output concise file in the FASTA format If
57 * there is a record with ID = QUERY or jobid, this a "one protein" job
58 * otherwise this is an alignment job
60 private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException {
61 final FastaReader fr = new FastaReader(stream);
63 alignment = new ArrayList<FastaSequence>();
64 predictions = new ArrayList<FastaSequence>();
65 while (fr.hasNext()) {
66 final FastaSequence fs = fr.next();
67 String seqid = fs.getId();
68 String seq = fs.getSequence().replaceAll("\n", "");
69 if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28")
70 || seqid.equals("JNETSOL25") || seqid.equals("JNETSOL5") || seqid.equals("JNETSOL0") || seqid.equals("JNETCONF")
71 || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM") || seqid.equals("JNETCONF")) {
75 if (seqid.equals("QUERY") || seqid.equals(jobid))
82 private String parseLogFile(final InputStream stream) throws IOException {
84 BufferedReader buffer = new BufferedReader(new InputStreamReader(stream));
86 while (null != (line = buffer.readLine())) {
92 private List<Byte> parseArchiveFile(final InputStream stream) throws IOException {
93 DataInputStream data_in = new DataInputStream(stream);
94 List<Byte> out = new ArrayList<Byte>();
97 out.add(data_in.readByte());
98 } catch (EOFException eof) {
105 private int analyseJob(String[] job) throws IOException {
106 boolean running = true;
107 boolean ConcisefileExists = false;
108 boolean LogfileExists = false;
109 String id = job[job.length - 1];
110 String startdatestring = job[0].substring(0, job[0].indexOf(":"));
111 Date startdate = new Date(0);
112 Date starttime = new Date(0);
113 Date endtime = new Date(0);
114 Date currDate = new Date();
116 String execstatus = "OK";
117 String finalstatus = "OK";
121 String maindir = dirprefix + "/" + id + "/";
122 String concisefile = dirprefix + "/" + id + "/" + id + ".concise.fasta";
123 String archivefile = dirprefix + "/" + id + "/" + id + ".tar.gz";
124 String logfile = dirprefix + "/" + id + "/LOG";
125 SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd");
126 SimpleDateFormat timeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
128 startdate = dateformatter.parse(startdatestring);
129 starttime = timeformatter.parse(job[0]);
130 endtime = timeformatter.parse(job[1]);
131 exectime = (endtime.getTime() - starttime.getTime()) / 1000;
132 } catch (ParseException e) {
137 URL dirurl = new URL(maindir);
138 HttpURLConnection httpConnection_dirurl = (HttpURLConnection) dirurl.openConnection();
139 if (httpConnection_dirurl.getResponseCode() < 199 || 300 <= httpConnection_dirurl.getResponseCode()) {
142 URL conciseurl = new URL(concisefile);
143 URL archiveurl = new URL(archivefile);
144 URL logurl = new URL(logfile);
145 HttpURLConnection httpConnection_conciseurl = (HttpURLConnection) conciseurl.openConnection();
146 HttpURLConnection httpConnection_logurl = (HttpURLConnection) logurl.openConnection();
147 HttpURLConnection httpConnection_archiveurl = (HttpURLConnection) archiveurl.openConnection();
148 if (199 < httpConnection_conciseurl.getResponseCode() && httpConnection_conciseurl.getResponseCode() < 300) {
149 ConcisefileExists = true;
152 protein = parsePredictions(conciseurl.openStream(), id);
153 } catch (IOException e) {
157 // The job still can be running of failed...
159 alignment = new ArrayList<FastaSequence>();
160 predictions = new ArrayList<FastaSequence>();
162 if (199 < httpConnection_logurl.getResponseCode() && httpConnection_logurl.getResponseCode() < 300) {
163 LogfileExists = true;
164 log = parseLogFile(logurl.openStream());
166 // The job has not been started at all...
168 finalstatus = "STOPPED";
171 if (log.matches("(.*)TIMEOUT\\syour\\sjob\\stimed\\sout(.*)")) {
172 // blast job was too long (more than 3600 secs by default)...
174 finalstatus = "TIMEDOUT";
176 } else if (log.matches("(.*)Jpred\\serror:\\sDied\\sat(.*)")) {
177 // an internal Jpred error...
179 finalstatus = "JPREDERROR";
181 } else if ((currDate.getTime() - endtime.getTime()) / 1000 > 3601 && LogfileExists && !ConcisefileExists) {
182 // the job was stopped with unknown reason...
184 finalstatus = "STOPPED";
188 httpConnection_conciseurl.disconnect();
189 httpConnection_logurl.disconnect();
190 httpConnection_archiveurl.disconnect();
191 } catch (MalformedURLException e) {
196 long t = startdate.getTime();
197 cw.FormQueryTables(t, job[0], job[1], ip, id, execstatus, finalstatus, protein, predictions);
198 cw.ArchiveData(t, exectime, ip, id, execstatus, finalstatus, protein, predictions, alignment, log, archivefile);
201 System.out.println("job " + id + " is running");
206 private void ParsingForDate(String input, String date) {
208 int countinsertions = 0;
209 int countinserted = 0;
210 int countNotanalyzed = 0;
213 System.out.println("Inserting jobs for " + date);
215 URL url = new URL(input);
216 URLConnection conn = url.openConnection();
217 BufferedReader alljobs = new BufferedReader(new InputStreamReader(conn.getInputStream()));
220 while ((line = alljobs.readLine()) != null) {
221 if (line.matches(date + ":(.*)jp_[^\\s]+")) {
223 String[] job = line.split("\\s+");
224 String jobid = job[job.length - 1];
225 if (cw.JobisNotInsterted(jobid)) {
226 countinsertions += analyseJob(job);
235 System.out.println("Total number of jobs = " + totalcount);
236 System.out.println(" " + countinserted + " jobs inserted already");
237 System.out.println(" " + countNotanalyzed + " not analysed jobs");
238 System.out.println(" " + countNoData + " jobs without *.concise.fasta file (RUNNING or FAILED)");
239 System.out.println(" " + countinsertions + " new job insertions\n");
240 } catch (MalformedURLException e) {
242 } catch (IOException e) {