1 package compbio.cassandra;
3 import java.io.BufferedReader;
4 import java.io.FileNotFoundException;
5 import java.io.IOException;
6 import java.io.InputStream;
7 import java.io.InputStreamReader;
8 import java.net.HttpURLConnection;
9 import java.net.MalformedURLException;
11 import java.net.URLConnection;
12 import java.util.ArrayList;
13 import java.util.Calendar;
14 import java.util.Date;
15 import java.util.List;
17 import compbio.cassandra.JpredParser;
18 import compbio.data.sequence.FastaReader;
19 import compbio.data.sequence.FastaSequence;
20 import compbio.engine.JpredJob;
21 import compbio.engine.ProteoCachePropertyHelperManager;
22 import compbio.engine.archive.Archive;
23 import compbio.engine.archive.ArchivedJob;
24 import compbio.util.PropertyHelper;
25 import compbio.util.Util;
27 public class JpredParserHTTP implements JpredParser {
28 private CassandraWriter cw = new CassandraWriter();
29 private static Archive archive;
30 private String dirprefix;
31 private List<FastaSequence> alignment;
32 private List<FastaSequence> predictions;
33 private int countNoData;
34 private static boolean archiving = false;
35 private static final PropertyHelper ph = ProteoCachePropertyHelperManager.getPropertyHelper();
37 public JpredParserHTTP() {
38 dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
41 public JpredParserHTTP(String sourceurl) {
42 dirprefix = sourceurl;
45 public void setSource(String newsourceprefix) {
46 dirprefix = newsourceprefix;
49 private boolean initBooleanValue(String key) {
51 String status = ph.getProperty(key);
52 if (Util.isEmpty(status)) {
55 return new Boolean(status.trim()).booleanValue();
58 public void Parsing(String source, int nDays) throws IOException {
59 Calendar cal = Calendar.getInstance();
60 cal.add(Calendar.DATE, -nDays);
61 archiving = initBooleanValue("archive.enable");
63 archive = new Archive();
65 for (int i = 0; i < nDays; ++i) {
66 cal.add(Calendar.DATE, 1);
67 String date = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DATE);
68 ParsingOneDay(source, date);
73 * The method parses the Jpred output concise file in the FASTA format If
74 * there is a record with ID = QUERY or jobid, this a "one protein" job
75 * otherwise this is an alignment job
77 private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException {
78 final FastaReader fr = new FastaReader(stream);
80 while (fr.hasNext()) {
81 final FastaSequence fs = fr.next();
82 String seqid = fs.getId();
83 String seq = fs.getSequence().replaceAll("\n", "");
84 if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28")
85 || seqid.equals("JNETSOL25") || seqid.equals("JNETSOL5") || seqid.equals("JNETSOL0") || seqid.equals("JNETCONF")
86 || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM") || seqid.equals("JNETCONF")) {
90 if (seqid.equals("QUERY") || seqid.equals(jobid))
97 private String parseSeqFile(final InputStream stream, String jobid) throws FileNotFoundException {
98 final FastaReader fr = new FastaReader(stream);
100 final FastaSequence fs = fr.next();
101 protein = fs.getSequence().replaceAll("\n", "");
103 // this is an aligment job...
109 private String parseLogFile(final InputStream stream) throws IOException {
111 BufferedReader buffer = new BufferedReader(new InputStreamReader(stream));
113 while (null != (line = buffer.readLine())) {
119 private int analyseJob(String[] jobinfo) throws IOException {
120 alignment = new ArrayList<FastaSequence>();
121 predictions = new ArrayList<FastaSequence>();
122 boolean running = true;
123 boolean ConcisefileExists = false;
124 boolean LogfileExists = false;
125 JpredJob job = new JpredJob(jobinfo[jobinfo.length - 1], jobinfo[0], jobinfo[1]);
126 job.setIP(jobinfo[2]);
127 Date currDate = new Date();
128 String maindir = dirprefix + "/" + job.getJobID() + "/";
131 URL dirurl = new URL(maindir);
132 HttpURLConnection httpConnection_dirurl = (HttpURLConnection) dirurl.openConnection();
133 if (httpConnection_dirurl.getResponseCode() < 199 || 300 <= httpConnection_dirurl.getResponseCode()) {
136 URL conciseurl = new URL(maindir + job.getJobID() + ".concise.fasta");
137 URL logurl = new URL(maindir + "LOG");
138 HttpURLConnection httpConnection_conciseurl = (HttpURLConnection) conciseurl.openConnection();
139 HttpURLConnection httpConnection_logurl = (HttpURLConnection) logurl.openConnection();
140 if (199 < httpConnection_conciseurl.getResponseCode() && httpConnection_conciseurl.getResponseCode() < 300) {
141 ConcisefileExists = true;
144 job.setProtein(parsePredictions(conciseurl.openStream(), job.getJobID()));
145 } catch (IOException e) {
149 // The job still can be running of failed...
152 if (199 < httpConnection_logurl.getResponseCode() && httpConnection_logurl.getResponseCode() < 300) {
153 LogfileExists = true;
154 job.setLog(parseLogFile(logurl.openStream()));
156 // The job has not been started at all...
157 job.setExecutionStatus("FAIL");
158 job.setFinalStatus("STOPPED");
161 if (job.getLog().matches("(.*)TIMEOUT\\syour\\sjob\\stimed\\sout(.*)")) {
162 // blast job was too long (more than 3600 secs by default)...
163 job.setExecutionStatus("FAIL");
164 job.setFinalStatus("TIMEDOUT");
166 } else if (job.getLog().matches("(.*)Jpred\\serror:\\sDied\\sat(.*)")) {
167 // an internal Jpred error...
168 job.setExecutionStatus("FAIL");
169 job.setFinalStatus("JPREDERROR");
171 } else if ((currDate.getTime() - job.getEndTime()) / 1000 > 3601 && LogfileExists && !ConcisefileExists) {
172 // the job was stopped with unknown reason...
173 job.setExecutionStatus("FAIL");
174 job.setFinalStatus("STOPPED");
178 httpConnection_conciseurl.disconnect();
179 httpConnection_logurl.disconnect();
180 } catch (MalformedURLException e) {
186 job.setAlignment(alignment);
187 job.setPredictions(predictions);
188 if (job.getExecutionStatus().equals("FAIL")) {
189 URL sequrl = new URL(maindir + job.getJobID() + ".seq");
190 HttpURLConnection httpConnection_sequrl = (HttpURLConnection) sequrl.openConnection();
191 if (199 < httpConnection_sequrl.getResponseCode() && httpConnection_sequrl.getResponseCode() < 300) {
193 job.setProtein(parseSeqFile(sequrl.openStream(), job.getJobID()));
194 } catch (IOException e) {
199 cw.FormQueryTables(job);
203 ArchivedJob ajob = new ArchivedJob(job.getJobID());
204 String arlink = archive.createJob(job.getJobID());
205 if (job.getFinalStatus().equals("OK")) {
206 ajob.setArchivePath(arlink);
207 ajob.copyArchiveFromWeb(maindir + job.getJobID() + ".tar.gz");
208 cw.ArchiveData(job, arlink);
210 cw.ArchiveData(job, "undefined");
219 private void ParsingOneDay(String input, String date) {
221 int countinsertions = 0;
222 int countinserted = 0;
223 int countNotanalyzed = 0;
226 System.out.println("Inserting jobs for " + date);
228 URL url = new URL(input);
229 URLConnection conn = url.openConnection();
230 BufferedReader alljobs = new BufferedReader(new InputStreamReader(conn.getInputStream()));
233 while ((line = alljobs.readLine()) != null) {
234 if (line.matches(date + ":(.*)jp_[^\\s]+")) {
236 String[] job = line.split("\\s+");
237 String jobid = job[job.length - 1];
238 if (cw.JobisNotInsterted(jobid)) {
239 countinsertions += analyseJob(job);
248 System.out.println("Total number of jobs = " + totalcount);
249 System.out.println(" " + countinserted + " jobs inserted already");
250 System.out.println(" " + countNotanalyzed + " not analysed jobs");
251 System.out.println(" " + countNoData + " jobs without *.concise.fasta file (RUNNING or FAILED)");
252 System.out.println(" " + countinsertions + " new job insertions\n");
253 } catch (MalformedURLException e) {
255 } catch (IOException e) {