453f01f73e9728d8691a01e8a5e87bb429c31226
[proteocache.git] / datadb / compbio / cassandra / JpredParserHTTP.java
1 package compbio.cassandra;
2
3 import java.io.BufferedReader;
4 import java.io.FileNotFoundException;
5 import java.io.IOException;
6 import java.io.InputStream;
7 import java.io.InputStreamReader;
8 import java.net.HttpURLConnection;
9 import java.net.MalformedURLException;
10 import java.net.URL;
11 import java.net.URLConnection;
12 import java.util.ArrayList;
13 import java.util.Calendar;
14 import java.util.Date;
15 import java.util.List;
16
17 import compbio.cassandra.JpredParser;
18 import compbio.data.sequence.FastaReader;
19 import compbio.data.sequence.FastaSequence;
20 import compbio.engine.JpredJob;
21 import compbio.engine.ProteoCachePropertyHelperManager;
22 import compbio.engine.archive.Archive;
23 import compbio.engine.archive.ArchivedJob;
24 import compbio.util.PropertyHelper;
25 import compbio.util.Util;
26
27 public class JpredParserHTTP implements JpredParser {
28         private CassandraWriter cw = new CassandraWriter();
29         private static Archive archive;
30         private String dirprefix;
31         private List<FastaSequence> alignment;
32         private List<FastaSequence> predictions;
33         private int countNoData;
34         private static boolean archiving = false;
35         private static final PropertyHelper ph = ProteoCachePropertyHelperManager.getPropertyHelper();
36
37         public JpredParserHTTP() {
38                 dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
39         }
40
41         public JpredParserHTTP(String sourceurl) {
42                 dirprefix = sourceurl;
43         }
44
45         public void setSource(String newsourceprefix) {
46                 dirprefix = newsourceprefix;
47         }
48
49         private boolean initBooleanValue(String key) {
50                 assert key != null;
51                 String status = ph.getProperty(key);
52                 if (Util.isEmpty(status)) {
53                         return false;
54                 }
55                 return new Boolean(status.trim()).booleanValue();
56         }
57
58         public void Parsing(String source, int nDays) throws IOException {
59                 Calendar cal = Calendar.getInstance();
60                 cal.add(Calendar.DATE, -nDays);
61                 archiving = initBooleanValue("archive.enable");
62                 if (archiving) {
63                         archive = new Archive();
64                 }
65                 for (int i = 0; i < nDays; ++i) {
66                         cal.add(Calendar.DATE, 1);
67                         String date = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DATE);
68                         ParsingOneDay(source, date);
69                 }
70         }
71
72         /*
73          * The method parses the Jpred output concise file in the FASTA format If
74          * there is a record with ID = QUERY or jobid, this a "one protein" job
75          * otherwise this is an alignment job
76          */
77         private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException {
78                 final FastaReader fr = new FastaReader(stream);
79                 String protein = "";
80                 while (fr.hasNext()) {
81                         final FastaSequence fs = fr.next();
82                         String seqid = fs.getId();
83                         String seq = fs.getSequence().replaceAll("\n", "");
84                         if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28")
85                                         || seqid.equals("JNETSOL25") || seqid.equals("JNETSOL5") || seqid.equals("JNETSOL0") || seqid.equals("JNETCONF")
86                                         || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM") || seqid.equals("JNETCONF")) {
87                                 predictions.add(fs);
88                         } else {
89                                 alignment.add(fs);
90                                 if (seqid.equals("QUERY") || seqid.equals(jobid))
91                                         protein = seq;
92                         }
93                 }
94                 return protein;
95         }
96         private String parseSeqFile(final InputStream stream, String jobid) throws FileNotFoundException {
97                 final FastaReader fr = new FastaReader(stream);
98                 String protein = "";
99                 final FastaSequence fs = fr.next();
100                 protein = fs.getSequence().replaceAll("\n", "");
101                 if (fr.hasNext()) {
102                         // this is an aligment job...
103                         return "alignment";
104                 }
105                 return protein;
106         }
107
108         private String parseLogFile(final InputStream stream) throws IOException {
109                 String out = "";
110                 BufferedReader buffer = new BufferedReader(new InputStreamReader(stream));
111                 String line;
112                 while (null != (line = buffer.readLine())) {
113                         out += line;
114                 }
115                 return out;
116         }
117
118         private int analyseJob(String[] jobinfo) throws IOException {
119                 alignment = new ArrayList<FastaSequence>();
120                 predictions = new ArrayList<FastaSequence>();
121                 boolean running = true;
122                 boolean ConcisefileExists = false;
123                 boolean LogfileExists = false;
124                 JpredJob job = new JpredJob(jobinfo[jobinfo.length - 1], jobinfo[0], jobinfo[1]);
125                 job.setIP(jobinfo[2]);
126                 Date currDate = new Date();
127                 String maindir = dirprefix + "/" + job.getJobID() + "/";
128
129                 try {
130                         URL dirurl = new URL(maindir);
131                         HttpURLConnection httpConnection_dirurl = (HttpURLConnection) dirurl.openConnection();
132                         if (httpConnection_dirurl.getResponseCode() < 199 || 300 <= httpConnection_dirurl.getResponseCode()) {
133                                 return 0;
134                         }
135                         URL conciseurl = new URL(maindir + job.getJobID() + ".concise.fasta");
136                         URL logurl = new URL(maindir + "LOG");
137                         HttpURLConnection httpConnection_conciseurl = (HttpURLConnection) conciseurl.openConnection();
138                         HttpURLConnection httpConnection_logurl = (HttpURLConnection) logurl.openConnection();
139                         if (199 < httpConnection_conciseurl.getResponseCode() && httpConnection_conciseurl.getResponseCode() < 300) {
140                                 ConcisefileExists = true;
141                                 running = false;
142                                 try {
143                                         job.setProtein(parsePredictions(conciseurl.openStream(), job.getJobID()));
144                                 } catch (IOException e) {
145                                         e.printStackTrace();
146                                 }
147                         } else {
148                                 // The job still can be running of failed...
149                                 ++countNoData;
150                         }
151                         if (199 < httpConnection_logurl.getResponseCode() && httpConnection_logurl.getResponseCode() < 300) {
152                                 LogfileExists = true;
153                                 job.setLog(parseLogFile(logurl.openStream()));
154                         } else {
155                                 // The job has not been started at all...
156                                 job.setExecutionStatus("FAIL");
157                                 job.setFinalStatus("STOPPED");
158                                 running = false;
159                         }
160                         if (job.getLog().matches("(.*)TIMEOUT\\syour\\sjob\\stimed\\sout(.*)")) {
161                                 // blast job was too long (more than 3600 secs by default)...
162                                 job.setExecutionStatus("FAIL");
163                                 job.setFinalStatus("TIMEDOUT");
164                                 running = false;
165                         } else if (job.getLog().matches("(.*)Jpred\\serror:\\sDied\\sat(.*)")) {
166                                 // an internal Jpred error...
167                                 job.setExecutionStatus("FAIL");
168                                 job.setFinalStatus("JPREDERROR");
169                                 running = false;
170                         } else if ((currDate.getTime() - job.getEndTime()) / 1000 > 3601 && LogfileExists && !ConcisefileExists) {
171                                 // the job was stopped with unknown reason...
172                                 job.setExecutionStatus("FAIL");
173                                 job.setFinalStatus("STOPPED");
174                                 running = false;
175                         }
176
177                         httpConnection_conciseurl.disconnect();
178                         httpConnection_logurl.disconnect();
179                 } catch (MalformedURLException e) {
180                         e.printStackTrace();
181                 }
182
183                 if (!running) {
184                         // logging the job
185                         job.setAlignment(alignment);
186                         job.setPredictions(predictions);
187                         if (job.getExecutionStatus().equals("FAIL")) {
188                                 URL sequrl = new URL(maindir + job.getJobID() + ".seq");
189                                 HttpURLConnection httpConnection_sequrl = (HttpURLConnection) sequrl.openConnection();
190                                 if (199 < httpConnection_sequrl.getResponseCode() && httpConnection_sequrl.getResponseCode() < 300) {
191                                         try {
192                                                 job.setProtein(parseSeqFile(sequrl.openStream(), job.getJobID()));
193                                         } catch (IOException e) {
194                                                 e.printStackTrace();
195                                         }
196                                 }
197                         }
198                         cw.FormQueryTables(job);
199
200                         // archiving the job
201                         if (archiving) {
202                                 ArchivedJob ajob = new ArchivedJob(job.getJobID());
203                                 String arlink = archive.createJob(job.getJobID());
204                                 if (job.getFinalStatus().equals("OK")) {
205                                         ajob.setArchivePath(arlink);
206                                         ajob.copyArchiveFromWeb(maindir + job.getJobID() + ".tar.gz");
207                                         cw.ArchiveData(job, arlink);
208                                 } else {
209                                         cw.ArchiveData(job, "undefined");
210                                 }
211                         }
212                         return 1;
213                 }
214
215                 return 0;
216         }
217
218         private void ParsingOneDay(String input, String date) {
219                 int totalcount = 0;
220                 int countinsertions = 0;
221                 int countinserted = 0;
222                 int countNotanalyzed = 0;
223                 countNoData = 0;
224
225                 System.out.println("Inserting jobs for " + date);
226                 try {
227                         URL url = new URL(input);
228                         URLConnection conn = url.openConnection();
229                         BufferedReader alljobs = new BufferedReader(new InputStreamReader(conn.getInputStream()));
230                         String line;
231
232                         while ((line = alljobs.readLine()) != null) {
233                                 if (line.matches(date + ":(.*)jp_[^\\s]+")) {
234                                         totalcount++;
235                                         String[] job = line.split("\\s+");
236                                         String jobid = job[job.length - 1];
237                                         if (cw.JobisNotInsterted(jobid)) {
238                                                 countinsertions += analyseJob(job);
239                                         } else {
240                                                 ++countinserted;
241                                         }
242                                 } else {
243                                         ++countNotanalyzed;
244                                 }
245                         }
246                         alljobs.close();
247                         System.out.println("Total number of jobs = " + totalcount);
248                         System.out.println("   " + countinserted + " jobs inserted already");
249                         System.out.println("   " + countNotanalyzed + " not analysed jobs");
250                         System.out.println("   " + countNoData + " jobs without *.concise.fasta file (RUNNING or FAILED)");
251                         System.out.println("   " + countinsertions + " new job insertions\n");
252                 } catch (MalformedURLException e) {
253                         e.printStackTrace();
254                 } catch (IOException e) {
255                         e.printStackTrace();
256                 }
257                 ;
258         }
259 };