1d888e45404c8e9e5d5eb3286c9a751c01e9cfd4
[proteocache.git] / datadb / compbio / cassandra / JpredParserHTTP.java
1 package compbio.cassandra;
2
3 import java.io.BufferedReader;
4 import java.io.FileNotFoundException;
5 import java.io.IOException;
6 import java.io.InputStream;
7 import java.io.InputStreamReader;
8 import java.net.HttpURLConnection;
9 import java.net.MalformedURLException;
10 import java.net.URL;
11 import java.net.URLConnection;
12 import java.util.ArrayList;
13 import java.util.Calendar;
14 import java.util.Date;
15 import java.util.List;
16 import java.util.regex.Matcher;
17 import java.util.regex.Pattern;
18
19 import compbio.cassandra.JpredParser;
20 import compbio.data.sequence.FastaReader;
21 import compbio.data.sequence.FastaSequence;
22 import compbio.engine.JpredJob;
23 import compbio.engine.ProteoCachePropertyHelperManager;
24 import compbio.engine.archive.Archive;
25 import compbio.engine.archive.ArchivedJob;
26 import compbio.util.PropertyHelper;
27 import compbio.util.Util;
28
29 public class JpredParserHTTP implements JpredParser {
30         private CassandraWriter cw = new CassandraWriter();
31         private static Archive archive;
32         private String dirprefix;
33         private List<FastaSequence> alignment;
34         private List<FastaSequence> predictions;
35         private int countNoData;
36         private static boolean archiving = false;
37         private static final PropertyHelper ph = ProteoCachePropertyHelperManager.getPropertyHelper();
38
39         public JpredParserHTTP() {
40                 dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
41         }
42
43         public JpredParserHTTP(String sourceurl) {
44                 dirprefix = sourceurl;
45         }
46
47         public void setSource(String newsourceprefix) {
48                 dirprefix = newsourceprefix;
49         }
50
51         private boolean initBooleanValue(String key) {
52                 assert key != null;
53                 String status = ph.getProperty(key);
54                 if (Util.isEmpty(status)) {
55                         return false;
56                 }
57                 return new Boolean(status.trim()).booleanValue();
58         }
59
60         public void Parsing(String source, int nDays) throws IOException {
61                 Calendar cal = Calendar.getInstance();
62                 cal.add(Calendar.DATE, -nDays);
63                 archiving = initBooleanValue("archive.enable");
64                 if (archiving) {
65                         archive = new Archive();
66                 }
67                 for (int i = 0; i < nDays; ++i) {
68                         cal.add(Calendar.DATE, 1);
69                         String date = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DATE);
70                         ParsingOneDay(source, date);
71                 }
72         }
73
74         /*
75          * The method parses the Jpred output concise file in the FASTA format If
76          * there is a record with ID = QUERY or jobid, this a "one protein" job
77          * otherwise this is an alignment job
78          */
79         private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException {
80                 final FastaReader fr = new FastaReader(stream);
81                 String protein = "";
82                 while (fr.hasNext()) {
83                         final FastaSequence fs = fr.next();
84                         String seqid = fs.getId();
85                         String seq = fs.getSequence().replaceAll("\n", "");
86                         if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28")
87                                         || seqid.equals("JNETSOL25") || seqid.equals("JNETSOL5") || seqid.equals("JNETSOL0") || seqid.equals("JNETCONF")
88                                         || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM") || seqid.equals("JNETCONF")) {
89                                 predictions.add(fs);
90                         } else {
91                                 alignment.add(fs);
92                                 if (seqid.equals("QUERY") || seqid.equals(jobid))
93                                         protein = seq;
94                         }
95                 }
96                 return protein;
97         }
98
99         private String parseSeqFile(final InputStream stream, String jobid) throws FileNotFoundException {
100                 final FastaReader fr = new FastaReader(stream);
101                 String protein = "";
102                 final FastaSequence fs = fr.next();
103                 protein = fs.getSequence().replaceAll("\n", "");
104                 if (fr.hasNext()) {
105                         // this is an aligment job...
106                         return "alignment";
107                 }
108                 return protein;
109         }
110
111         private String parseLogFile(final InputStream stream, JpredJob job) throws IOException {
112                 String out = "";
113                 BufferedReader buffer = new BufferedReader(new InputStreamReader(stream));
114                 String line;
115                 if (null != (out = buffer.readLine()) && (out.contains("version"))) {
116                         Matcher matcher = Pattern.compile("((\\d|\\.)+)").matcher(out);
117                         if (matcher.find())
118                                 job.setVersion(matcher.group(0));
119                 }
120                 while (null != (line = buffer.readLine())) {
121                         out += line;            
122                 }
123                 return out;
124         }
125
126         private int analyseJob(String[] jobinfo) throws IOException {
127                 alignment = new ArrayList<FastaSequence>();
128                 predictions = new ArrayList<FastaSequence>();
129                 boolean running = true;
130                 boolean ConcisefileExists = false;
131                 boolean LogfileExists = false;
132                 JpredJob job = new JpredJob(jobinfo[jobinfo.length - 1], jobinfo[0], jobinfo[1]);
133                 job.setIP(jobinfo[2]);
134                 Date currDate = new Date();
135                 String maindir = dirprefix + "/" + job.getJobID() + "/";
136
137                 try {
138                         URL dirurl = new URL(maindir);
139                         HttpURLConnection httpConnection_dirurl = (HttpURLConnection) dirurl.openConnection();
140                         if (httpConnection_dirurl.getResponseCode() < 199 || 300 <= httpConnection_dirurl.getResponseCode()) {
141                                 return 0;
142                         }
143                         URL conciseurl = new URL(maindir + job.getJobID() + ".concise.fasta");
144                         URL logurl = new URL(maindir + "LOG");
145                         HttpURLConnection httpConnection_conciseurl = (HttpURLConnection) conciseurl.openConnection();
146                         HttpURLConnection httpConnection_logurl = (HttpURLConnection) logurl.openConnection();
147                         if (199 < httpConnection_conciseurl.getResponseCode() && httpConnection_conciseurl.getResponseCode() < 300) {
148                                 ConcisefileExists = true;
149                                 running = false;
150                                 try {                           
151                                         job.setProtein(parsePredictions(conciseurl.openStream(), job.getJobID()));
152                                 } catch (IOException e) {
153                                         e.printStackTrace();
154                                 }
155                         } else {
156                                 // The job still can be running of failed...
157                                 ++countNoData;
158                         }
159                         if (199 < httpConnection_logurl.getResponseCode() && httpConnection_logurl.getResponseCode() < 300) {
160                                 LogfileExists = true;
161                                 job.setProgrammeName("Jpred");
162                                 job.setLog(parseLogFile(logurl.openStream(), job));
163                         } else {
164                                 // The job has not been started at all...
165                                 job.setExecutionStatus("FAIL");
166                                 job.setFinalStatus("STOPPED");
167                                 running = false;
168                         }
169                         if (job.getLog().matches("(.*)TIMEOUT\\syour\\sjob\\stimed\\sout(.*)")) {
170                                 // blast job was too long (more than 3600 secs by default)...
171                                 job.setExecutionStatus("FAIL");
172                                 job.setFinalStatus("TIMEDOUT");
173                                 running = false;
174                         } else if (job.getLog().matches("(.*)Jpred\\serror:\\sDied\\sat(.*)")) {
175                                 // an internal Jpred error...
176                                 job.setExecutionStatus("FAIL");
177                                 job.setFinalStatus("JPREDERROR");
178                                 running = false;
179                         } else if ((currDate.getTime() - job.getEndTime()) / 1000 > 3601 && LogfileExists && !ConcisefileExists) {
180                                 // the job was stopped with unknown reason...
181                                 job.setExecutionStatus("FAIL");
182                                 job.setFinalStatus("STOPPED");
183                                 running = false;
184                         }
185
186                         httpConnection_conciseurl.disconnect();
187                         httpConnection_logurl.disconnect();
188                 } catch (MalformedURLException e) {
189                         e.printStackTrace();
190                 }
191
192                 if (!running) {
193                         // logging the job
194                         job.setAlignment(alignment);
195                         job.setPredictions(predictions);
196                         if (job.getExecutionStatus().equals("FAIL")) {
197                                 URL sequrl = new URL(maindir + job.getJobID() + ".seq");
198                                 HttpURLConnection httpConnection_sequrl = (HttpURLConnection) sequrl.openConnection();
199                                 if (199 < httpConnection_sequrl.getResponseCode() && httpConnection_sequrl.getResponseCode() < 300) {
200                                         try {
201                                                 job.setProtein(parseSeqFile(sequrl.openStream(), job.getJobID()));
202                                         } catch (IOException e) {
203                                                 e.printStackTrace();
204                                         }
205                                 }
206                         }
207                         cw.FormQueryTables(job);
208
209                         // archiving the job
210                         if (archiving) {
211                                 ArchivedJob ajob = new ArchivedJob(job.getJobID());
212                                 String arlink = archive.createJob(job.getJobID());
213                                 if (job.getFinalStatus().equals("OK")) {
214                                         ajob.setArchivePath(arlink);
215                                         ajob.copyArchiveFromWeb(maindir + job.getJobID() + ".tar.gz");
216                                         cw.ArchiveData(job, arlink);
217                                 } else {
218                                         cw.ArchiveData(job, "undefined");
219                                 }
220                         }
221                         return 1;
222                 }
223
224                 return 0;
225         }
226
227         private void ParsingOneDay(String input, String date) {
228                 int totalcount = 0;
229                 int countinsertions = 0;
230                 int countinserted = 0;
231                 int countNotanalyzed = 0;
232                 countNoData = 0;
233
234                 System.out.println("Inserting jobs for " + date);
235                 try {
236                         URL url = new URL(input);
237                         URLConnection conn = url.openConnection();
238                         BufferedReader alljobs = new BufferedReader(new InputStreamReader(conn.getInputStream()));
239                         String line;
240
241                         while ((line = alljobs.readLine()) != null) {
242                                 if (line.matches(date + ":(.*)jp_[^\\s]+")) {
243                                         totalcount++;
244                                         String[] job = line.split("\\s+");
245                                         String jobid = job[job.length - 1];
246                                         if (cw.JobisNotInsterted(jobid)) {
247                                                 countinsertions += analyseJob(job);
248                                         } else {
249                                                 ++countinserted;
250                                         }
251                                 } else {
252                                         ++countNotanalyzed;
253                                 }
254                         }
255                         alljobs.close();
256                         System.out.println("Total number of jobs = " + totalcount);
257                         System.out.println("   " + countinserted + " jobs inserted already");
258                         System.out.println("   " + countNotanalyzed + " not analysed jobs");
259                         System.out.println("   " + countNoData + " jobs without *.concise.fasta file (RUNNING or FAILED)");
260                         System.out.println("   " + countinsertions + " new job insertions\n");
261                 } catch (MalformedURLException e) {
262                         e.printStackTrace();
263                 } catch (IOException e) {
264                         e.printStackTrace();
265                 }
266                 ;
267         }
268 };