Merge branch 'servlets'
[proteocache.git] / datadb / compbio / cassandra / JpredParserHTTP.java
1 package compbio.cassandra;
2
3 import java.io.BufferedReader;
4 import java.io.FileNotFoundException;
5 import java.io.IOException;
6 import java.io.InputStream;
7 import java.io.InputStreamReader;
8 import java.net.HttpURLConnection;
9 import java.net.MalformedURLException;
10 import java.net.URL;
11 import java.net.URLConnection;
12 import java.util.ArrayList;
13 import java.util.Calendar;
14 import java.util.Date;
15 import java.util.List;
16
17 import compbio.cassandra.JpredParser;
18 import compbio.data.sequence.FastaReader;
19 import compbio.data.sequence.FastaSequence;
20 import compbio.engine.JpredJob;
21
22 public class JpredParserHTTP implements JpredParser {
23         private CassandraWriter cw = new CassandraWriter();
24         private String dirprefix;
25         private List<FastaSequence> alignment;
26         private List<FastaSequence> predictions;
27         private int countNoData;
28
29         public JpredParserHTTP() {
30                 dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
31         }
32
33         public JpredParserHTTP(String sourceurl) {
34                 dirprefix = sourceurl;
35         }
36
37         public void setSource(String newsourceprefix) {
38                 dirprefix = newsourceprefix;
39         }
40
41         public void Parsing(String source, int nDays) throws IOException {
42                 Calendar cal = Calendar.getInstance();
43                 cal.add(Calendar.DATE, -nDays);
44                 for (int i = 0; i < nDays; ++i) {
45                         cal.add(Calendar.DATE, 1);
46                         String date = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DATE);
47                         ParsingForDate(source, date);
48                 }
49         }
50
51         /*
52          * The method parses the Jpred output concise file in the FASTA format If
53          * there is a record with ID = QUERY or jobid, this a "one protein" job
54          * otherwise this is an alignment job
55          */
56         private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException {
57                 final FastaReader fr = new FastaReader(stream);
58                 String protein = "";
59 //              alignment = new ArrayList<FastaSequence>();
60 //              predictions = new ArrayList<FastaSequence>();
61                 while (fr.hasNext()) {
62                         final FastaSequence fs = fr.next();
63                         String seqid = fs.getId();
64                         String seq = fs.getSequence().replaceAll("\n", "");
65                         if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28")
66                                         || seqid.equals("JNETSOL25") || seqid.equals("JNETSOL5") || seqid.equals("JNETSOL0") || seqid.equals("JNETCONF")
67                                         || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM") || seqid.equals("JNETCONF")) {
68                                 predictions.add(fs);
69                         } else {
70                                 alignment.add(fs);
71                                 if (seqid.equals("QUERY") || seqid.equals(jobid))
72                                         protein = seq;
73                         }
74                 }
75                 return protein;
76         }
77
78         private String parseLogFile(final InputStream stream) throws IOException {
79                 String out = "";
80                 BufferedReader buffer = new BufferedReader(new InputStreamReader(stream));
81                 String line;
82                 while (null != (line = buffer.readLine())) {
83                         out += line;
84                 }
85                 return out;
86         }
87
88         private int analyseJob(String[] jobinfo) throws IOException {
89                 alignment = new ArrayList<FastaSequence>();
90                 predictions = new ArrayList<FastaSequence>();
91                 boolean running = true;
92                 boolean ConcisefileExists = false;
93                 boolean LogfileExists = false;
94                 JpredJob job = new JpredJob (jobinfo[jobinfo.length - 1], jobinfo[0], jobinfo[1]);
95                 job.setIP(jobinfo[2]);
96                 Date currDate = new Date();
97                 String maindir = dirprefix + "/" + job.getJobID() + "/";
98
99                 //System.out.println("analyzing job " + job.getJobID());
100                 try {
101                         URL dirurl = new URL(maindir);
102                         HttpURLConnection httpConnection_dirurl = (HttpURLConnection) dirurl.openConnection();
103                         if (httpConnection_dirurl.getResponseCode() < 199 || 300 <= httpConnection_dirurl.getResponseCode()) {
104                                 return 0;
105                         }
106                         URL conciseurl = new URL(maindir + job.getJobID() + ".concise.fasta");
107                         URL archiveurl = new URL(maindir + job.getJobID() + ".tar.gz");
108                         URL logurl = new URL(maindir + "LOG");
109                         HttpURLConnection httpConnection_conciseurl = (HttpURLConnection) conciseurl.openConnection();
110                         HttpURLConnection httpConnection_logurl = (HttpURLConnection) logurl.openConnection();
111                         HttpURLConnection httpConnection_archiveurl = (HttpURLConnection) archiveurl.openConnection();
112                         if (199 < httpConnection_conciseurl.getResponseCode() && httpConnection_conciseurl.getResponseCode() < 300) {
113                                 ConcisefileExists = true;
114                                 running = false;
115                                 try {
116                                         job.setProtein(parsePredictions(conciseurl.openStream(), job.getJobID()));
117                                 } catch (IOException e) {
118                                         e.printStackTrace();
119                                 }
120                         } else {
121                                 // The job still can be running of failed...
122                                 ++countNoData;
123                         }
124                         if (199 < httpConnection_logurl.getResponseCode() && httpConnection_logurl.getResponseCode() < 300) {
125                                 LogfileExists = true;
126                                 job.setLog(parseLogFile(logurl.openStream()));
127                         } else {
128                                 // The job has not been started at all...
129                                 job.setExecutionStatus("FAIL");
130                                 job.setFinalStatus("STOPPED");
131                                 running = false;
132                         }
133                         if (job.getLog().matches("(.*)TIMEOUT\\syour\\sjob\\stimed\\sout(.*)")) {
134                                 // blast job was too long (more than 3600 secs by default)...
135                                 job.setExecutionStatus("FAIL");
136                                 job.setFinalStatus("TIMEDOUT");
137                                 running = false;
138                         } else if (job.getLog().matches("(.*)Jpred\\serror:\\sDied\\sat(.*)")) {
139                                 // an internal Jpred error...
140                                 job.setExecutionStatus("FAIL");
141                                 job.setFinalStatus("JPREDERROR");
142                                 running = false;
143                         } else if ((currDate.getTime() - job.getEndTime()) / 1000 > 3601 && LogfileExists && !ConcisefileExists) {
144                                 // the job was stopped with unknown reason...
145                                 job.setExecutionStatus("FAIL");
146                                 job.setFinalStatus("STOPPED");
147                                 running = false;
148                         }
149
150                         httpConnection_conciseurl.disconnect();
151                         httpConnection_logurl.disconnect();
152                         httpConnection_archiveurl.disconnect();
153                 } catch (MalformedURLException e) {
154                         e.printStackTrace();
155                 }
156
157                 if (!running) {
158                         job.setAlignment(alignment);
159                         job.setPredictions(predictions);
160                         cw.FormQueryTables(job);
161                         cw.ArchiveData(job, "undefined");
162                         return 1;
163                 }
164
165                 return 0;
166         }
167
168         private void ParsingForDate(String input, String date) {
169                 int totalcount = 0;
170                 int countinsertions = 0;
171                 int countinserted = 0;
172                 int countNotanalyzed = 0;
173                 countNoData = 0;
174
175                 System.out.println("Inserting jobs for " + date);
176                 try {
177                         URL url = new URL(input);
178                         URLConnection conn = url.openConnection();
179                         BufferedReader alljobs = new BufferedReader(new InputStreamReader(conn.getInputStream()));
180                         String line;
181
182                         while ((line = alljobs.readLine()) != null) {
183                                 if (line.matches(date + ":(.*)jp_[^\\s]+")) {
184                                         totalcount++;
185                                         String[] job = line.split("\\s+");
186                                         String jobid = job[job.length - 1];
187                                         if (cw.JobisNotInsterted(jobid)) {
188                                                 countinsertions += analyseJob(job);
189                                         } else {
190                                                 ++countinserted;
191                                         }
192                                 } else {
193                                         ++countNotanalyzed;
194                                 }
195                         }
196                         alljobs.close();
197                         System.out.println("Total number of jobs = " + totalcount);
198                         System.out.println("   " + countinserted + " jobs inserted already");
199                         System.out.println("   " + countNotanalyzed + " not analysed jobs");
200                         System.out.println("   " + countNoData + " jobs without *.concise.fasta file (RUNNING or FAILED)");
201                         System.out.println("   " + countinsertions + " new job insertions\n");
202                 } catch (MalformedURLException e) {
203                         e.printStackTrace();
204                 } catch (IOException e) {
205                         e.printStackTrace();
206                 }
207                 ;
208         }
209 }