1 package compbio.cassandra;
3 import java.io.BufferedReader;
4 import java.io.DataInputStream;
5 import java.io.EOFException;
6 import java.io.FileNotFoundException;
7 import java.io.IOException;
8 import java.io.InputStream;
9 import java.io.InputStreamReader;
10 import java.net.HttpURLConnection;
11 import java.net.MalformedURLException;
13 import java.net.URLConnection;
14 import java.text.ParseException;
15 import java.text.SimpleDateFormat;
16 import java.util.ArrayList;
17 import java.util.Calendar;
18 import java.util.Date;
19 import java.util.List;
21 import compbio.cassandra.JpredParser;
23 public class JpredParserHTTP implements JpredParser {
24 private CassandraNativeConnector cc = new CassandraNativeConnector();
25 private String dirprefix;
26 private List<FastaSequence> alignment;
27 private List<FastaSequence> predictions;
28 private String jnetpred;
31 dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
34 JpredParserHTTP(String sourceurl) {
35 dirprefix = sourceurl;
38 public void setSource(String newsourceprefix) {
39 dirprefix = newsourceprefix;
42 public void Parsing(String source, int nDays) throws IOException {
43 Calendar cal = Calendar.getInstance();
44 cal.add(Calendar.DATE, -nDays);
45 for (int i = 0; i < nDays; ++i) {
46 cal.add(Calendar.DATE, 1);
47 int month = cal.get(Calendar.MONTH) + 1;
48 int year = cal.get(Calendar.YEAR);
49 int day = cal.get(Calendar.DATE);
50 String date = year + "/" + month + "/" + day;
51 ParsingForDate(source, date);
55 private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException {
56 final FastaReader fr = new FastaReader(stream);
58 alignment = new ArrayList<FastaSequence>();
59 predictions = new ArrayList<FastaSequence>();
60 while (fr.hasNext()) {
61 final FastaSequence fs = fr.next();
62 String seqid = fs.getId();
63 String seq = fs.getSequence().replaceAll("\n", "");
64 if (seqid.equals("QUERY") || seqid.equals(jobid)) {
67 } else if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28")
68 || seqid.equals("JNETSOL25") || seqid.equals("JNETSOL5") || seqid.equals("JNETSOL0") || seqid.equals("JNETCONF")
69 || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM")) {
71 if (seqid.equals("jnetpred"))
80 private String parseLogFile(final InputStream stream) throws IOException {
82 BufferedReader buffer = new BufferedReader(new InputStreamReader(stream));
84 while (null != (line = buffer.readLine())) {
90 private List<Byte> parseArchiveFile(final InputStream stream) throws IOException {
91 DataInputStream data_in = new DataInputStream(stream);
92 List<Byte> out = new ArrayList<Byte>();
95 out.add(data_in.readByte());
96 } catch (EOFException eof) {
103 private void ParsingForDate(String input, String date) {
106 int countUnclearFASTAid = 0;
107 int countinsertions = 0;
108 int countinserted = 0;
109 int counAlignments = 0;
110 int countStrange = 0;
112 System.out.println("Inserting jobs for " + date);
114 URL url = new URL(input);
115 URLConnection conn = url.openConnection();
116 BufferedReader alljobs = new BufferedReader(new InputStreamReader(conn.getInputStream()));
119 while ((line = alljobs.readLine()) != null) {
120 if (line.matches(date + ":(.*)jp_[^\\s]+")) {
121 String[] table = line.split("\\s+");
122 // Format of a record:
123 // starttime endtime ip email jobid (directory)
124 // 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172
125 // unknown_email jp_J9HBCBT
126 String id = table[table.length - 1];
128 if (cc.JobisNotInsterted(id)) {
129 URL dataurl = new URL(dirprefix + "/" + id + "/" + id + ".concise.fasta");
130 URL archiveurl = new URL(dirprefix + "/" + id + "/" + id + ".tar.gz");
131 URL logurl = new URL(dirprefix + "/" + id + "/LOG");
132 HttpURLConnection httpConnection1 = (HttpURLConnection) dataurl.openConnection();
133 HttpURLConnection httpConnection2 = (HttpURLConnection) logurl.openConnection();
134 HttpURLConnection httpConnection3 = (HttpURLConnection) archiveurl.openConnection();
135 int response1 = httpConnection1.getResponseCode();
136 int response2 = httpConnection2.getResponseCode();
137 if (199 < response1 && response1 < 300) {
139 String protein = parsePredictions(dataurl.openStream(), id);
140 if (protein.equals("")) {
141 countUnclearFASTAid++;
143 SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd");
144 SimpleDateFormat timeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
145 String startdatestring = table[0].substring(0, table[0].indexOf(":"));
147 Date startdate = dateformatter.parse(startdatestring);
148 Date starttime = timeformatter.parse(table[0]);
149 Date endtime = timeformatter.parse(table[1]);
150 String ip = table[2];
151 String execstatus = "OK";
152 String finalstatus = "OK";
153 countinsertions += cc.FormQueryTables(startdate.getTime(), table[0], table[1], ip, id, execstatus,
154 finalstatus, protein, predictions);
156 long exectime = (endtime.getTime() - starttime.getTime()) / 1000;
158 if (199 < response2 && response2 < 300) {
159 log = parseLogFile(logurl.openStream());
161 cc.ArchiveData(startdate.getTime(), exectime, ip, id, execstatus, finalstatus, protein,
162 predictions, alignment, log, archiveurl.toString());
163 } catch (ParseException e) {
167 } catch (IOException e) {
173 httpConnection1.disconnect();
174 httpConnection2.disconnect();
175 httpConnection3.disconnect();
180 if (line.matches(date + "(.*)Sequence0/(.*)")) {
188 System.out.println("Total number of jobs = " + totalcount);
189 System.out.println(" " + countinserted + " jobs inserted already");
190 System.out.println(" " + counAlignments + " jalview jobs");
191 System.out.println(" " + countStrange + " not analysed jobs");
192 System.out.println(" " + countNoData + " jobs without *.concise.fasta file");
193 System.out.println(" " + countUnclearFASTAid + " jobs with unclear FASTA protein id in *.concise.fasta");
194 System.out.println(" " + countinsertions + " new job insertions\n");
195 } catch (MalformedURLException e) {
197 } catch (IOException e) {