1 package compbio.cassandra;
3 import java.io.BufferedReader;
4 import java.io.FileNotFoundException;
5 import java.io.IOException;
6 import java.io.InputStream;
7 import java.io.InputStreamReader;
8 import java.net.Authenticator;
9 import java.net.HttpURLConnection;
10 import java.net.MalformedURLException;
11 import java.net.PasswordAuthentication;
13 import java.net.URLConnection;
14 import java.text.ParseException;
15 import java.text.SimpleDateFormat;
16 import java.util.ArrayList;
17 import java.util.Calendar;
18 import java.util.Date;
19 import java.util.List;
20 import java.util.regex.Matcher;
21 import java.util.regex.Pattern;
23 import compbio.cassandra.JpredParser;
24 import compbio.data.sequence.FastaReader;
25 import compbio.data.sequence.FastaSequence;
26 import compbio.engine.JpredJob;
27 import compbio.engine.ProteoCachePropertyHelperManager;
28 import compbio.engine.archive.Archive;
29 import compbio.engine.archive.ArchivedJob;
30 import compbio.util.PropertyHelper;
31 import compbio.util.Util;
33 public class JpredParserHTTP implements JpredParser {
34 private CassandraWriter cw = new CassandraWriter();
35 private static Archive archive;
36 private String dirprefix;
37 private List<FastaSequence> alignment;
38 private List<FastaSequence> predictions;
39 private int countNoData;
40 private static boolean archiving = false;
41 private static final PropertyHelper ph = ProteoCachePropertyHelperManager.getPropertyHelper();
42 static SimpleDateFormat timeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
44 public JpredParserHTTP() {
45 dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
46 launchAuthenticator();
49 public void launchAuthenticator() {
50 final String authUser = "as373024";
51 final String authPassword = "Zx1--L12";
52 final String authHost = "gskproxy.gsk.com";
53 final String authPort = "800";
54 Authenticator.setDefault(new Authenticator() {
55 public PasswordAuthentication getPasswordAuthentication() {
56 return new PasswordAuthentication(authUser, authPassword.toCharArray());
59 System.setProperty("proxySet", "true");
60 System.setProperty("http.proxyUser", authUser);
61 System.setProperty("http.proxyPassword", authPassword);
62 System.setProperty("http.proxyHost", authHost);
63 System.setProperty("http.proxyPort", authPort);
66 public JpredParserHTTP(String sourceurl) {
67 dirprefix = sourceurl;
68 launchAuthenticator();
71 public void setSource(String newsourceprefix) {
72 dirprefix = newsourceprefix;
75 private boolean initBooleanValue(String key) {
77 String status = ph.getProperty(key);
78 if (Util.isEmpty(status)) {
81 return new Boolean(status.trim()).booleanValue();
84 public void Parsing(String source, int nDays) throws IOException {
85 Calendar cal = Calendar.getInstance();
86 cal.add(Calendar.DATE, -nDays);
87 archiving = initBooleanValue("archive.enable");
89 archive = new Archive();
91 for (int i = 0; i < nDays; ++i) {
92 cal.add(Calendar.DATE, 1);
93 String date = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DATE);
94 ParsingOneDay(source, date);
99 * The method parses the Jpred output concise file in the FASTA format If
100 * there is a record with ID = QUERY or jobid, this a "one protein" job
101 * otherwise this is an alignment job
103 private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException {
104 final FastaReader fr = new FastaReader(stream);
106 while (fr.hasNext()) {
107 final FastaSequence fs = fr.next();
108 String seqid = fs.getId();
109 String seq = fs.getSequence().replaceAll("\n", "");
110 if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28")
111 || seqid.equals("JNETSOL25") || seqid.equals("JNETSOL5") || seqid.equals("JNETSOL0") || seqid.equals("JNETCONF")
112 || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM") || seqid.equals("JNETCONF")) {
116 if (seqid.equals("QUERY") || seqid.equals(jobid))
123 private String parseSeqFile(final InputStream stream, String jobid) throws FileNotFoundException {
124 final FastaReader fr = new FastaReader(stream);
126 final FastaSequence fs = fr.next();
127 protein = fs.getSequence().replaceAll("\n", "");
129 // this is an aligment job...
135 private String parseLogFile(final InputStream stream, JpredJob job) throws IOException {
137 BufferedReader buffer = new BufferedReader(new InputStreamReader(stream));
139 if (null != (out = buffer.readLine()) && (out.contains("version"))) {
140 Matcher matcher = Pattern.compile("((\\d|\\.)+)").matcher(out);
142 job.setProgramVersion(matcher.group(0));
144 while (null != (line = buffer.readLine())) {
150 private int analyseJob(String[] jobinfo) throws IOException {
151 alignment = new ArrayList<FastaSequence>();
152 predictions = new ArrayList<FastaSequence>();
153 boolean running = true;
154 boolean ConcisefileExists = false;
155 boolean LogfileExists = false;
156 JpredJob job = new JpredJob(jobinfo[jobinfo.length - 1], jobinfo[0], jobinfo[1]);
157 job.setIP(jobinfo[2]);
158 job.setProgramName("Jpred");
159 job.setProgramVersion("3.0.1");
160 Date currDate = new Date();
161 String maindir = dirprefix + "/" + job.getJobID() + "/";
164 Date finishTime = timeformatter.parse(jobinfo[1]);
165 long delay = currDate.getTime() / 1000 - finishTime.getTime() / 1000;
166 if (delay < 120) return 0;
167 } catch (ParseException e) {
172 URL dirurl = new URL(maindir);
173 HttpURLConnection httpConnection_dirurl = (HttpURLConnection) dirurl.openConnection();
174 if (httpConnection_dirurl.getResponseCode() < 199 || 300 <= httpConnection_dirurl.getResponseCode()) {
177 URL conciseurl = new URL(maindir + job.getJobID() + ".concise.fasta");
178 URL logurl = new URL(maindir + "LOG");
179 HttpURLConnection httpConnection_conciseurl = (HttpURLConnection) conciseurl.openConnection();
180 HttpURLConnection httpConnection_logurl = (HttpURLConnection) logurl.openConnection();
181 if (199 < httpConnection_conciseurl.getResponseCode() && httpConnection_conciseurl.getResponseCode() < 300) {
182 ConcisefileExists = true;
185 job.setProtein(parsePredictions(conciseurl.openStream(), job.getJobID()));
186 } catch (IOException e) {
190 // The job still can be running of failed...
193 if (199 < httpConnection_logurl.getResponseCode() && httpConnection_logurl.getResponseCode() < 300) {
194 LogfileExists = true;
195 job.setLog(parseLogFile(logurl.openStream(), job));
197 // The job has not been started at all...
198 System.out.println ("WARNING! Job " + job.getJobID() + " has status FAIL/STOPPED");
199 job.setExecutionStatus("FAIL");
200 job.setFinalStatus("STOPPED");
203 if (job.getLog().matches("(.*)TIMEOUT\\syour\\sjob\\stimed\\sout(.*)")) {
204 // blast job was too long (more than 3600 secs by default)...
205 job.setExecutionStatus("FAIL");
206 job.setFinalStatus("TIMEDOUT");
208 } else if (job.getLog().matches("(.*)Jpred\\serror:\\sDied\\sat(.*)")) {
209 // an internal Jpred error...
210 job.setExecutionStatus("FAIL");
211 job.setFinalStatus("JPREDERROR");
213 } else if ((currDate.getTime() - job.getEndTime()) / 1000 > 3601 && LogfileExists && !ConcisefileExists) {
214 // the job was stopped with unknown reason...
215 job.setExecutionStatus("FAIL");
216 job.setFinalStatus("STOPPED");
220 httpConnection_conciseurl.disconnect();
221 httpConnection_logurl.disconnect();
222 } catch (MalformedURLException e) {
228 job.setAlignment(alignment);
229 job.setPredictions(predictions);
230 if (job.getExecutionStatus().equals("FAIL")) {
231 URL sequrl = new URL(maindir + job.getJobID() + ".seq");
232 HttpURLConnection httpConnection_sequrl = (HttpURLConnection) sequrl.openConnection();
233 if (199 < httpConnection_sequrl.getResponseCode() && httpConnection_sequrl.getResponseCode() < 300) {
235 job.setProtein(parseSeqFile(sequrl.openStream(), job.getJobID()));
236 } catch (IOException e) {
241 cw.FormQueryTables(job);
245 ArchivedJob ajob = new ArchivedJob(job.getJobID());
246 String arlink = archive.createJob(job.getJobID());
247 if (job.getFinalStatus().equals("OK")) {
248 ajob.setArchivePath(arlink);
249 ajob.copyArchiveFromWeb(maindir + job.getJobID() + ".tar.gz");
250 cw.ArchiveData(job, arlink);
252 cw.ArchiveData(job, "undefined");
261 private void ParsingOneDay(String input, String date) {
263 int countinsertions = 0;
264 int countinserted = 0;
265 int countNotanalyzed = 0;
268 System.out.println("Inserting jobs for " + date);
270 URL url = new URL(input);
271 URLConnection conn = url.openConnection();
272 BufferedReader alljobs = new BufferedReader(new InputStreamReader(conn.getInputStream()));
275 while ((line = alljobs.readLine()) != null) {
276 if (line.matches(date + ":(.*)jp_[^\\s]+")) {
278 String[] job = line.split("\\s+");
279 String jobid = job[job.length - 1];
280 if (cw.JobisNotInsterted(jobid)) {
281 countinsertions += analyseJob(job);
290 System.out.println("Total number of jobs = " + totalcount);
291 System.out.println(" " + countinserted + " jobs inserted already");
292 System.out.println(" " + countNotanalyzed + " not analysed jobs");
293 System.out.println(" " + countNoData + " jobs without *.concise.fasta file (RUNNING or FAILED)");
294 System.out.println(" " + countinsertions + " new job insertions\n");
295 } catch (MalformedURLException e) {
297 } catch (IOException e) {