fix html errors
[proteocache.git] / datadb / compbio / cassandra / JpredParserHTTP.java
1 package compbio.cassandra;
2
3 import java.io.BufferedReader;
4 import java.io.FileNotFoundException;
5 import java.io.IOException;
6 import java.io.InputStream;
7 import java.io.InputStreamReader;
8 import java.net.Authenticator;
9 import java.net.HttpURLConnection;
10 import java.net.MalformedURLException;
11 import java.net.PasswordAuthentication;
12 import java.net.URL;
13 import java.net.URLConnection;
14 import java.text.ParseException;
15 import java.text.SimpleDateFormat;
16 import java.util.ArrayList;
17 import java.util.Calendar;
18 import java.util.Date;
19 import java.util.List;
20 import java.util.regex.Matcher;
21 import java.util.regex.Pattern;
22
23 import compbio.cassandra.JpredParser;
24 import compbio.data.sequence.FastaReader;
25 import compbio.data.sequence.FastaSequence;
26 import compbio.engine.JpredJob;
27 import compbio.engine.ProteoCachePropertyHelperManager;
28 import compbio.engine.archive.Archive;
29 import compbio.engine.archive.ArchivedJob;
30 import compbio.util.PropertyHelper;
31 import compbio.util.Util;
32
33 public class JpredParserHTTP implements JpredParser {
34         private CassandraWriter cw = new CassandraWriter();
35         private static Archive archive;
36         private String dirprefix;
37         private List<FastaSequence> alignment;
38         private List<FastaSequence> predictions;
39         private int countNoData;
40         private static boolean archiving = false;
41         private static final PropertyHelper ph = ProteoCachePropertyHelperManager.getPropertyHelper();
42         static SimpleDateFormat timeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
43
44         public JpredParserHTTP() {
45                 dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
46                 launchAuthenticator();
47         }
48
49         public void launchAuthenticator() {
50                 final String authUser = "as373024";
51             final String authPassword = "Zx1--L12";
52             final String authHost = "gskproxy.gsk.com";
53             final String authPort = "800";
54               Authenticator.setDefault(new Authenticator() {
55                  public PasswordAuthentication getPasswordAuthentication() {
56                     return new PasswordAuthentication(authUser, authPassword.toCharArray());
57                  }
58               });
59               System.setProperty("proxySet", "true");
60               System.setProperty("http.proxyUser", authUser);
61               System.setProperty("http.proxyPassword", authPassword);
62               System.setProperty("http.proxyHost", authHost);
63               System.setProperty("http.proxyPort", authPort);
64         }
65         
66         public JpredParserHTTP(String sourceurl) {
67               dirprefix = sourceurl;
68               launchAuthenticator();
69         }
70
71         public void setSource(String newsourceprefix) {
72                 dirprefix = newsourceprefix;
73         }
74
75         private boolean initBooleanValue(String key) {
76                 assert key != null;
77                 String status = ph.getProperty(key);
78                 if (Util.isEmpty(status)) {
79                         return false;
80                 }
81                 return new Boolean(status.trim()).booleanValue();
82         }
83
84         public void Parsing(String source, int nDays) throws IOException {
85                 Calendar cal = Calendar.getInstance();
86                 cal.add(Calendar.DATE, -nDays);
87                 archiving = initBooleanValue("archive.enable");
88                 if (archiving) {
89                         archive = new Archive();
90                 }
91                 for (int i = 0; i < nDays; ++i) {
92                         cal.add(Calendar.DATE, 1);
93                         String date = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DATE);
94                         ParsingOneDay(source, date);
95                 }
96         }
97
98         /*
99          * The method parses the Jpred output concise file in the FASTA format If
100          * there is a record with ID = QUERY or jobid, this a "one protein" job
101          * otherwise this is an alignment job
102          */
103         private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException {
104                 final FastaReader fr = new FastaReader(stream);
105                 String protein = "";
106                 while (fr.hasNext()) {
107                         final FastaSequence fs = fr.next();
108                         String seqid = fs.getId();
109                         String seq = fs.getSequence().replaceAll("\n", "");
110                         if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28")
111                                         || seqid.equals("JNETSOL25") || seqid.equals("JNETSOL5") || seqid.equals("JNETSOL0") || seqid.equals("JNETCONF")
112                                         || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM") || seqid.equals("JNETCONF")) {
113                                 predictions.add(fs);
114                         } else {
115                                 alignment.add(fs);
116                                 if (seqid.equals("QUERY") || seqid.equals(jobid))
117                                         protein = seq;
118                         }
119                 }
120                 return protein;
121         }
122
123         private String parseSeqFile(final InputStream stream, String jobid) throws FileNotFoundException {
124                 final FastaReader fr = new FastaReader(stream);
125                 String protein = "";
126                 final FastaSequence fs = fr.next();
127                 protein = fs.getSequence().replaceAll("\n", "");
128                 if (fr.hasNext()) {
129                         // this is an aligment job...
130                         return "alignment";
131                 }
132                 return protein;
133         }
134
135         private String parseLogFile(final InputStream stream, JpredJob job) throws IOException {
136                 String out = "";
137                 BufferedReader buffer = new BufferedReader(new InputStreamReader(stream));
138                 String line;
139                 if (null != (out = buffer.readLine()) && (out.contains("version"))) {
140                         Matcher matcher = Pattern.compile("((\\d|\\.)+)").matcher(out);
141                         if (matcher.find())
142                                 job.setProgramVersion(matcher.group(0));
143                 }
144                 while (null != (line = buffer.readLine())) {
145                         out += line;            
146                 }
147                 return out;
148         }
149
150         private int analyseJob(String[] jobinfo) throws IOException {
151                 alignment = new ArrayList<FastaSequence>();
152                 predictions = new ArrayList<FastaSequence>();
153                 boolean running = true;
154                 boolean ConcisefileExists = false;
155                 boolean LogfileExists = false;
156                 JpredJob job = new JpredJob(jobinfo[jobinfo.length - 1], jobinfo[0], jobinfo[1]);
157                 job.setIP(jobinfo[2]);
158                 job.setProgramName("Jpred");
159                 job.setProgramVersion("3.0.1");
160                 Date currDate = new Date();
161                 String maindir = dirprefix + "/" + job.getJobID() + "/";
162
163                 try {
164                         Date finishTime = timeformatter.parse(jobinfo[1]);
165                         long delay = currDate.getTime() / 1000 - finishTime.getTime() / 1000;
166                         if (delay < 120) return 0;
167                 } catch (ParseException e) {
168                         e.printStackTrace();
169                 }
170
171                 try {
172                         URL dirurl = new URL(maindir);
173                         HttpURLConnection httpConnection_dirurl = (HttpURLConnection) dirurl.openConnection();
174                         if (httpConnection_dirurl.getResponseCode() < 199 || 300 <= httpConnection_dirurl.getResponseCode()) {
175                                 return 0;
176                         }
177                         URL conciseurl = new URL(maindir + job.getJobID() + ".concise.fasta");
178                         URL logurl = new URL(maindir + "LOG");
179                         HttpURLConnection httpConnection_conciseurl = (HttpURLConnection) conciseurl.openConnection();
180                         HttpURLConnection httpConnection_logurl = (HttpURLConnection) logurl.openConnection();
181                         if (199 < httpConnection_conciseurl.getResponseCode() && httpConnection_conciseurl.getResponseCode() < 300) {
182                                 ConcisefileExists = true;
183                                 running = false;
184                                 try {                           
185                                         job.setProtein(parsePredictions(conciseurl.openStream(), job.getJobID()));
186                                 } catch (IOException e) {
187                                         e.printStackTrace();
188                                 }
189                         } else {
190                                 // The job still can be running of failed...
191                                 ++countNoData;
192                         }
193                         if (199 < httpConnection_logurl.getResponseCode() && httpConnection_logurl.getResponseCode() < 300) {
194                                 LogfileExists = true;
195                                 job.setLog(parseLogFile(logurl.openStream(), job));
196                         } else {
197                                 // The job has not been started at all...
198                                 System.out.println ("WARNING! Job " + job.getJobID() + " has status FAIL/STOPPED");
199                                 job.setExecutionStatus("FAIL");
200                                 job.setFinalStatus("STOPPED");
201                                 running = false;
202                         }
203                         if (job.getLog().matches("(.*)TIMEOUT\\syour\\sjob\\stimed\\sout(.*)")) {
204                                 // blast job was too long (more than 3600 secs by default)...
205                                 job.setExecutionStatus("FAIL");
206                                 job.setFinalStatus("TIMEDOUT");
207                                 running = false;
208                         } else if (job.getLog().matches("(.*)Jpred\\serror:\\sDied\\sat(.*)")) {
209                                 // an internal Jpred error...
210                                 job.setExecutionStatus("FAIL");
211                                 job.setFinalStatus("JPREDERROR");
212                                 running = false;
213                         } else if ((currDate.getTime() - job.getEndTime()) / 1000 > 3601 && LogfileExists && !ConcisefileExists) {
214                                 // the job was stopped with unknown reason...
215                                 job.setExecutionStatus("FAIL");
216                                 job.setFinalStatus("STOPPED");
217                                 running = false;
218                         }
219
220                         httpConnection_conciseurl.disconnect();
221                         httpConnection_logurl.disconnect();
222                 } catch (MalformedURLException e) {
223                         e.printStackTrace();
224                 }
225
226                 if (!running) {
227                         // logging the job
228                         job.setAlignment(alignment);
229                         job.setPredictions(predictions);
230                         if (job.getExecutionStatus().equals("FAIL")) {
231                                 URL sequrl = new URL(maindir + job.getJobID() + ".seq");
232                                 HttpURLConnection httpConnection_sequrl = (HttpURLConnection) sequrl.openConnection();
233                                 if (199 < httpConnection_sequrl.getResponseCode() && httpConnection_sequrl.getResponseCode() < 300) {
234                                         try {
235                                                 job.setProtein(parseSeqFile(sequrl.openStream(), job.getJobID()));
236                                         } catch (IOException e) {
237                                                 e.printStackTrace();
238                                         }
239                                 }
240                         }
241                         cw.FormQueryTables(job);
242
243                         // archiving the job
244                         if (archiving) {
245                                 ArchivedJob ajob = new ArchivedJob(job.getJobID());
246                                 String arlink = archive.createJob(job.getJobID());
247                                 if (job.getFinalStatus().equals("OK")) {
248                                         ajob.setArchivePath(arlink);
249                                         ajob.copyArchiveFromWeb(maindir + job.getJobID() + ".tar.gz");
250                                         cw.ArchiveData(job, arlink);
251                                 } else {
252                                         cw.ArchiveData(job, "undefined");
253                                 }
254                         }
255                         return 1;
256                 }
257
258                 return 0;
259         }
260
261         private void ParsingOneDay(String input, String date) {
262                 int totalcount = 0;
263                 int countinsertions = 0;
264                 int countinserted = 0;
265                 int countNotanalyzed = 0;
266                 countNoData = 0;
267
268                 System.out.println("Inserting jobs for " + date);
269                 try {
270                         URL url = new URL(input);
271                         URLConnection conn = url.openConnection();
272                         BufferedReader alljobs = new BufferedReader(new InputStreamReader(conn.getInputStream()));
273                         String line;
274
275                         while ((line = alljobs.readLine()) != null) {
276                                 if (line.matches(date + ":(.*)jp_[^\\s]+")) {
277                                         totalcount++;
278                                         String[] job = line.split("\\s+");
279                                         String jobid = job[job.length - 1];
280                                         if (cw.JobisNotInsterted(jobid)) {
281                                                 countinsertions += analyseJob(job);
282                                         } else {
283                                                 ++countinserted;
284                                         }
285                                 } else {
286                                         ++countNotanalyzed;
287                                 }
288                         }
289                         alljobs.close();
290                         System.out.println("Total number of jobs = " + totalcount);
291                         System.out.println("   " + countinserted + " jobs inserted already");
292                         System.out.println("   " + countNotanalyzed + " not analysed jobs");
293                         System.out.println("   " + countNoData + " jobs without *.concise.fasta file (RUNNING or FAILED)");
294                         System.out.println("   " + countinsertions + " new job insertions\n");
295                 } catch (MalformedURLException e) {
296                         e.printStackTrace();
297                 } catch (IOException e) {
298                         e.printStackTrace();
299                 }
300                 ;
301         }
302 };