Temporary fix problem with wrong Jpred version
[proteocache.git] / datadb / compbio / cassandra / JpredParserHTTP.java
1 package compbio.cassandra;
2
3 import java.io.BufferedReader;
4 import java.io.FileNotFoundException;
5 import java.io.IOException;
6 import java.io.InputStream;
7 import java.io.InputStreamReader;
8 import java.net.HttpURLConnection;
9 import java.net.MalformedURLException;
10 import java.net.URL;
11 import java.net.URLConnection;
12 import java.text.ParseException;
13 import java.text.SimpleDateFormat;
14 import java.util.ArrayList;
15 import java.util.Calendar;
16 import java.util.Date;
17 import java.util.List;
18 import java.util.regex.Matcher;
19 import java.util.regex.Pattern;
20
21 import compbio.cassandra.JpredParser;
22 import compbio.data.sequence.FastaReader;
23 import compbio.data.sequence.FastaSequence;
24 import compbio.engine.JpredJob;
25 import compbio.engine.ProteoCachePropertyHelperManager;
26 import compbio.engine.archive.Archive;
27 import compbio.engine.archive.ArchivedJob;
28 import compbio.util.PropertyHelper;
29 import compbio.util.Util;
30
31 public class JpredParserHTTP implements JpredParser {
32         private CassandraWriter cw = new CassandraWriter();
33         private static Archive archive;
34         private String dirprefix;
35         private List<FastaSequence> alignment;
36         private List<FastaSequence> predictions;
37         private int countNoData;
38         private static boolean archiving = false;
39         private static final PropertyHelper ph = ProteoCachePropertyHelperManager.getPropertyHelper();
40         static SimpleDateFormat timeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
41
42         public JpredParserHTTP() {
43                 dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
44         }
45
46         public JpredParserHTTP(String sourceurl) {
47                 dirprefix = sourceurl;
48         }
49
50         public void setSource(String newsourceprefix) {
51                 dirprefix = newsourceprefix;
52         }
53
54         private boolean initBooleanValue(String key) {
55                 assert key != null;
56                 String status = ph.getProperty(key);
57                 if (Util.isEmpty(status)) {
58                         return false;
59                 }
60                 return new Boolean(status.trim()).booleanValue();
61         }
62
63         public void Parsing(String source, int nDays) throws IOException {
64                 Calendar cal = Calendar.getInstance();
65                 cal.add(Calendar.DATE, -nDays);
66                 archiving = initBooleanValue("archive.enable");
67                 if (archiving) {
68                         archive = new Archive();
69                 }
70                 for (int i = 0; i < nDays; ++i) {
71                         cal.add(Calendar.DATE, 1);
72                         String date = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DATE);
73                         ParsingOneDay(source, date);
74                 }
75         }
76
77         /*
78          * The method parses the Jpred output concise file in the FASTA format If
79          * there is a record with ID = QUERY or jobid, this a "one protein" job
80          * otherwise this is an alignment job
81          */
82         private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException {
83                 final FastaReader fr = new FastaReader(stream);
84                 String protein = "";
85                 while (fr.hasNext()) {
86                         final FastaSequence fs = fr.next();
87                         String seqid = fs.getId();
88                         String seq = fs.getSequence().replaceAll("\n", "");
89                         if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28")
90                                         || seqid.equals("JNETSOL25") || seqid.equals("JNETSOL5") || seqid.equals("JNETSOL0") || seqid.equals("JNETCONF")
91                                         || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM") || seqid.equals("JNETCONF")) {
92                                 predictions.add(fs);
93                         } else {
94                                 alignment.add(fs);
95                                 if (seqid.equals("QUERY") || seqid.equals(jobid))
96                                         protein = seq;
97                         }
98                 }
99                 return protein;
100         }
101
102         private String parseSeqFile(final InputStream stream, String jobid) throws FileNotFoundException {
103                 final FastaReader fr = new FastaReader(stream);
104                 String protein = "";
105                 final FastaSequence fs = fr.next();
106                 protein = fs.getSequence().replaceAll("\n", "");
107                 if (fr.hasNext()) {
108                         // this is an aligment job...
109                         return "alignment";
110                 }
111                 return protein;
112         }
113
114         private String parseLogFile(final InputStream stream, JpredJob job) throws IOException {
115                 String out = "";
116                 BufferedReader buffer = new BufferedReader(new InputStreamReader(stream));
117                 String line;
118                 if (null != (out = buffer.readLine()) && (out.contains("version"))) {
119                         Matcher matcher = Pattern.compile("((\\d|\\.)+)").matcher(out);
120                         if (matcher.find())
121                                 job.setProgramVersion(matcher.group(0));
122                 }
123                 while (null != (line = buffer.readLine())) {
124                         out += line;            
125                 }
126                 return out;
127         }
128
129         private int analyseJob(String[] jobinfo) throws IOException {
130                 alignment = new ArrayList<FastaSequence>();
131                 predictions = new ArrayList<FastaSequence>();
132                 boolean running = true;
133                 boolean ConcisefileExists = false;
134                 boolean LogfileExists = false;
135                 JpredJob job = new JpredJob(jobinfo[jobinfo.length - 1], jobinfo[0], jobinfo[1]);
136                 job.setIP(jobinfo[2]);
137                 job.setProgramName("Jpred");
138                 job.setProgramVersion("3.0.1");
139                 Date currDate = new Date();
140                 String maindir = dirprefix + "/" + job.getJobID() + "/";
141
142                 try {
143                         Date finishTime = timeformatter.parse(jobinfo[1]);
144                         long delay = currDate.getTime() / 1000 - finishTime.getTime() / 1000;
145                         if (delay < 120) return 0;
146                 } catch (ParseException e) {
147                         e.printStackTrace();
148                 }
149
150                 try {
151                         URL dirurl = new URL(maindir);
152                         HttpURLConnection httpConnection_dirurl = (HttpURLConnection) dirurl.openConnection();
153                         if (httpConnection_dirurl.getResponseCode() < 199 || 300 <= httpConnection_dirurl.getResponseCode()) {
154                                 return 0;
155                         }
156                         URL conciseurl = new URL(maindir + job.getJobID() + ".concise.fasta");
157                         URL logurl = new URL(maindir + "LOG");
158                         HttpURLConnection httpConnection_conciseurl = (HttpURLConnection) conciseurl.openConnection();
159                         HttpURLConnection httpConnection_logurl = (HttpURLConnection) logurl.openConnection();
160                         if (199 < httpConnection_conciseurl.getResponseCode() && httpConnection_conciseurl.getResponseCode() < 300) {
161                                 ConcisefileExists = true;
162                                 running = false;
163                                 try {                           
164                                         job.setProtein(parsePredictions(conciseurl.openStream(), job.getJobID()));
165                                 } catch (IOException e) {
166                                         e.printStackTrace();
167                                 }
168                         } else {
169                                 // The job still can be running of failed...
170                                 ++countNoData;
171                         }
172                         if (199 < httpConnection_logurl.getResponseCode() && httpConnection_logurl.getResponseCode() < 300) {
173                                 LogfileExists = true;
174                                 job.setLog(parseLogFile(logurl.openStream(), job));
175                         } else {
176                                 // The job has not been started at all...
177                                 System.out.println ("WARNING! Job " + job.getJobID() + " has status FAIL/STOPPED");
178                                 job.setExecutionStatus("FAIL");
179                                 job.setFinalStatus("STOPPED");
180                                 running = false;
181                         }
182                         if (job.getLog().matches("(.*)TIMEOUT\\syour\\sjob\\stimed\\sout(.*)")) {
183                                 // blast job was too long (more than 3600 secs by default)...
184                                 job.setExecutionStatus("FAIL");
185                                 job.setFinalStatus("TIMEDOUT");
186                                 running = false;
187                         } else if (job.getLog().matches("(.*)Jpred\\serror:\\sDied\\sat(.*)")) {
188                                 // an internal Jpred error...
189                                 job.setExecutionStatus("FAIL");
190                                 job.setFinalStatus("JPREDERROR");
191                                 running = false;
192                         } else if ((currDate.getTime() - job.getEndTime()) / 1000 > 3601 && LogfileExists && !ConcisefileExists) {
193                                 // the job was stopped with unknown reason...
194                                 job.setExecutionStatus("FAIL");
195                                 job.setFinalStatus("STOPPED");
196                                 running = false;
197                         }
198
199                         httpConnection_conciseurl.disconnect();
200                         httpConnection_logurl.disconnect();
201                 } catch (MalformedURLException e) {
202                         e.printStackTrace();
203                 }
204
205                 if (!running) {
206                         // logging the job
207                         job.setAlignment(alignment);
208                         job.setPredictions(predictions);
209                         if (job.getExecutionStatus().equals("FAIL")) {
210                                 URL sequrl = new URL(maindir + job.getJobID() + ".seq");
211                                 HttpURLConnection httpConnection_sequrl = (HttpURLConnection) sequrl.openConnection();
212                                 if (199 < httpConnection_sequrl.getResponseCode() && httpConnection_sequrl.getResponseCode() < 300) {
213                                         try {
214                                                 job.setProtein(parseSeqFile(sequrl.openStream(), job.getJobID()));
215                                         } catch (IOException e) {
216                                                 e.printStackTrace();
217                                         }
218                                 }
219                         }
220                         cw.FormQueryTables(job);
221
222                         // archiving the job
223                         if (archiving) {
224                                 ArchivedJob ajob = new ArchivedJob(job.getJobID());
225                                 String arlink = archive.createJob(job.getJobID());
226                                 if (job.getFinalStatus().equals("OK")) {
227                                         ajob.setArchivePath(arlink);
228                                         ajob.copyArchiveFromWeb(maindir + job.getJobID() + ".tar.gz");
229                                         cw.ArchiveData(job, arlink);
230                                 } else {
231                                         cw.ArchiveData(job, "undefined");
232                                 }
233                         }
234                         return 1;
235                 }
236
237                 return 0;
238         }
239
240         private void ParsingOneDay(String input, String date) {
241                 int totalcount = 0;
242                 int countinsertions = 0;
243                 int countinserted = 0;
244                 int countNotanalyzed = 0;
245                 countNoData = 0;
246
247                 System.out.println("Inserting jobs for " + date);
248                 try {
249                         URL url = new URL(input);
250                         URLConnection conn = url.openConnection();
251                         BufferedReader alljobs = new BufferedReader(new InputStreamReader(conn.getInputStream()));
252                         String line;
253
254                         while ((line = alljobs.readLine()) != null) {
255                                 if (line.matches(date + ":(.*)jp_[^\\s]+")) {
256                                         totalcount++;
257                                         String[] job = line.split("\\s+");
258                                         String jobid = job[job.length - 1];
259                                         if (cw.JobisNotInsterted(jobid)) {
260                                                 countinsertions += analyseJob(job);
261                                         } else {
262                                                 ++countinserted;
263                                         }
264                                 } else {
265                                         ++countNotanalyzed;
266                                 }
267                         }
268                         alljobs.close();
269                         System.out.println("Total number of jobs = " + totalcount);
270                         System.out.println("   " + countinserted + " jobs inserted already");
271                         System.out.println("   " + countNotanalyzed + " not analysed jobs");
272                         System.out.println("   " + countNoData + " jobs without *.concise.fasta file (RUNNING or FAILED)");
273                         System.out.println("   " + countinsertions + " new job insertions\n");
274                 } catch (MalformedURLException e) {
275                         e.printStackTrace();
276                 } catch (IOException e) {
277                         e.printStackTrace();
278                 }
279                 ;
280         }
281 };