1 package compbio.cassandra;
3 import java.io.BufferedReader;
4 import java.io.FileNotFoundException;
5 import java.io.IOException;
6 import java.io.InputStream;
7 import java.io.InputStreamReader;
8 import java.net.HttpURLConnection;
9 import java.net.MalformedURLException;
11 import java.net.URLConnection;
12 import java.util.ArrayList;
13 import java.util.Calendar;
14 import java.util.Date;
15 import java.util.List;
17 import compbio.cassandra.JpredParser;
18 import compbio.engine.JpredJob;
20 public class JpredParserHTTP implements JpredParser {
21 private CassandraWriter cw = new CassandraWriter();
22 private String dirprefix;
23 private List<FastaSequence> alignment;
24 private List<FastaSequence> predictions;
25 private int countNoData;
27 public JpredParserHTTP() {
28 dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
31 public JpredParserHTTP(String sourceurl) {
32 dirprefix = sourceurl;
35 public void setSource(String newsourceprefix) {
36 dirprefix = newsourceprefix;
39 public void Parsing(String source, int nDays) throws IOException {
40 Calendar cal = Calendar.getInstance();
41 cal.add(Calendar.DATE, -nDays);
42 for (int i = 0; i < nDays; ++i) {
43 cal.add(Calendar.DATE, 1);
44 String date = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DATE);
45 ParsingForDate(source, date);
50 * The method parses the Jpred output concise file in the FASTA format If
51 * there is a record with ID = QUERY or jobid, this a "one protein" job
52 * otherwise this is an alignment job
54 private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException {
55 final FastaReader fr = new FastaReader(stream);
57 alignment = new ArrayList<FastaSequence>();
58 predictions = new ArrayList<FastaSequence>();
59 while (fr.hasNext()) {
60 final FastaSequence fs = fr.next();
61 String seqid = fs.getId();
62 String seq = fs.getSequence().replaceAll("\n", "");
63 if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28")
64 || seqid.equals("JNETSOL25") || seqid.equals("JNETSOL5") || seqid.equals("JNETSOL0") || seqid.equals("JNETCONF")
65 || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM") || seqid.equals("JNETCONF")) {
69 if (seqid.equals("QUERY") || seqid.equals(jobid))
76 private String parseLogFile(final InputStream stream) throws IOException {
78 BufferedReader buffer = new BufferedReader(new InputStreamReader(stream));
80 while (null != (line = buffer.readLine())) {
86 private int analyseJob(String[] jobinfo) throws IOException {
87 boolean running = true;
88 boolean ConcisefileExists = false;
89 boolean LogfileExists = false;
90 JpredJob job = new JpredJob (jobinfo[jobinfo.length - 1], jobinfo[0], jobinfo[1]);
91 job.setIP(jobinfo[2]);
92 Date currDate = new Date();
93 String maindir = dirprefix + "/" + job.getJobID() + "/";
95 //System.out.println("analyzing job " + job.getJobID());
97 URL dirurl = new URL(maindir);
98 HttpURLConnection httpConnection_dirurl = (HttpURLConnection) dirurl.openConnection();
99 if (httpConnection_dirurl.getResponseCode() < 199 || 300 <= httpConnection_dirurl.getResponseCode()) {
102 URL conciseurl = new URL(maindir + job.getJobID() + ".concise.fasta");
103 URL archiveurl = new URL(maindir + job.getJobID() + ".tar.gz");
104 URL logurl = new URL(maindir + "LOG");
105 HttpURLConnection httpConnection_conciseurl = (HttpURLConnection) conciseurl.openConnection();
106 HttpURLConnection httpConnection_logurl = (HttpURLConnection) logurl.openConnection();
107 HttpURLConnection httpConnection_archiveurl = (HttpURLConnection) archiveurl.openConnection();
108 if (199 < httpConnection_conciseurl.getResponseCode() && httpConnection_conciseurl.getResponseCode() < 300) {
109 ConcisefileExists = true;
112 job.setProtein(parsePredictions(conciseurl.openStream(), job.getJobID()));
113 } catch (IOException e) {
117 // The job still can be running of failed...
120 if (199 < httpConnection_logurl.getResponseCode() && httpConnection_logurl.getResponseCode() < 300) {
121 LogfileExists = true;
122 job.setLog(parseLogFile(logurl.openStream()));
124 // The job has not been started at all...
125 job.setExecutionStatus("FAIL");
126 job.setFinalStatus("STOPPED");
129 if (job.getLog().matches("(.*)TIMEOUT\\syour\\sjob\\stimed\\sout(.*)")) {
130 // blast job was too long (more than 3600 secs by default)...
131 job.setExecutionStatus("FAIL");
132 job.setFinalStatus("TIMEDOUT");
134 } else if (job.getLog().matches("(.*)Jpred\\serror:\\sDied\\sat(.*)")) {
135 // an internal Jpred error...
136 job.setExecutionStatus("FAIL");
137 job.setFinalStatus("JPREDERROR");
139 } else if ((currDate.getTime() - job.getEndTime()) / 1000 > 3601 && LogfileExists && !ConcisefileExists) {
140 // the job was stopped with unknown reason...
141 job.setExecutionStatus("FAIL");
142 job.setFinalStatus("STOPPED");
146 httpConnection_conciseurl.disconnect();
147 httpConnection_logurl.disconnect();
148 httpConnection_archiveurl.disconnect();
149 } catch (MalformedURLException e) {
154 job.setAlignment(alignment);
155 job.setPredictions(predictions);
156 cw.FormQueryTables(job);
157 cw.ArchiveData(job, "undefined");
164 private void ParsingForDate(String input, String date) {
166 int countinsertions = 0;
167 int countinserted = 0;
168 int countNotanalyzed = 0;
171 System.out.println("Inserting jobs for " + date);
173 URL url = new URL(input);
174 URLConnection conn = url.openConnection();
175 BufferedReader alljobs = new BufferedReader(new InputStreamReader(conn.getInputStream()));
178 while ((line = alljobs.readLine()) != null) {
179 if (line.matches(date + ":(.*)jp_[^\\s]+")) {
181 String[] job = line.split("\\s+");
182 String jobid = job[job.length - 1];
183 if (cw.JobisNotInsterted(jobid)) {
184 countinsertions += analyseJob(job);
193 System.out.println("Total number of jobs = " + totalcount);
194 System.out.println(" " + countinserted + " jobs inserted already");
195 System.out.println(" " + countNotanalyzed + " not analysed jobs");
196 System.out.println(" " + countNoData + " jobs without *.concise.fasta file (RUNNING or FAILED)");
197 System.out.println(" " + countinsertions + " new job insertions\n");
198 } catch (MalformedURLException e) {
200 } catch (IOException e) {