Merge branch 'servlets'
[proteocache.git] / datadb / compbio / cassandra / JpredParserHTTP.java
index e308a25..5687a83 100644 (file)
@@ -1,7 +1,11 @@
 package compbio.cassandra;
 
 import java.io.BufferedReader;
+import java.io.DataInputStream;
+import java.io.EOFException;
+import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.net.HttpURLConnection;
 import java.net.MalformedURLException;
@@ -17,14 +21,17 @@ import java.util.List;
 import compbio.cassandra.JpredParser;
 
 public class JpredParserHTTP implements JpredParser {
-       private CassandraCreate cc = new CassandraCreate();
+       private CassandraWriter cw = new CassandraWriter();
        private String dirprefix;
+       private List<FastaSequence> alignment;
+       private List<FastaSequence> predictions;
+       private String jnetpred;
 
-       JpredParserHTTP() {
+       public JpredParserHTTP() {
                dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
        }
 
-       JpredParserHTTP(String sourceurl) {
+       public JpredParserHTTP(String sourceurl) {
                dirprefix = sourceurl;
        }
 
@@ -32,7 +39,7 @@ public class JpredParserHTTP implements JpredParser {
                dirprefix = newsourceprefix;
        }
 
-       public void Parsing(String source, int nDays) {
+       public void Parsing(String source, int nDays) throws IOException {
                Calendar cal = Calendar.getInstance();
                cal.add(Calendar.DATE, -nDays);
                for (int i = 0; i < nDays; ++i) {
@@ -45,6 +52,54 @@ public class JpredParserHTTP implements JpredParser {
                }
        }
 
+       private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException {
+               final FastaReader fr = new FastaReader(stream);
+               String query = "";
+               alignment = new ArrayList<FastaSequence>();
+               predictions = new ArrayList<FastaSequence>();
+               while (fr.hasNext()) {
+                       final FastaSequence fs = fr.next();
+                       String seqid = fs.getId();
+                       String seq = fs.getSequence().replaceAll("\n", "");
+                       if (seqid.equals("QUERY") || seqid.equals(jobid)) {
+                               query = seq;
+                               alignment.add(fs);
+                       } else if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28")
+                                       || seqid.equals("JNETSOL25") || seqid.equals("JNETSOL5") || seqid.equals("JNETSOL0") || seqid.equals("JNETCONF")
+                                       || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM")) {
+                               predictions.add(fs);
+                               if (seqid.equals("jnetpred"))
+                                       jnetpred = seq;
+                       } else {
+                               alignment.add(fs);
+                       }
+               }
+               return query;
+       }
+
+       private String parseLogFile(final InputStream stream) throws IOException {
+               String out = "";
+               BufferedReader buffer = new BufferedReader(new InputStreamReader(stream));
+               String line;
+               while (null != (line = buffer.readLine())) {
+                       out += line;
+               }
+               return out;
+       }
+
+       private List<Byte> parseArchiveFile(final InputStream stream) throws IOException {
+               DataInputStream data_in = new DataInputStream(stream);
+               List<Byte> out = new ArrayList<Byte>();
+               while (true) {
+                       try {
+                               out.add(data_in.readByte());
+                       } catch (EOFException eof) {
+                               break;
+                       }
+               }
+               return out;
+       }
+
        private void ParsingForDate(String input, String date) {
                int totalcount = 0;
                int countNoData = 0;
@@ -62,48 +117,52 @@ public class JpredParserHTTP implements JpredParser {
                        String line;
 
                        while ((line = alljobs.readLine()) != null) {
-                               if (line.matches(date + "(.*)jp_[^\\s]+")) {
+                               if (line.matches(date + ":(.*)jp_[^\\s]+")) {
                                        String[] table = line.split("\\s+");
                                        // Format of a record:
                                        // starttime endtime ip email jobid (directory)
-                                       // 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172 unknown_email jp_J9HBCBT
+                                       // 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172
+                                       // unknown_email jp_J9HBCBT
                                        String id = table[table.length - 1];
                                        totalcount++;
-                                       if (!cc.CheckID(id)) {
-                                               String datalink = dirprefix + "/" + id + "/" + id + ".concise.fasta";
-                                               URL urltable = new URL(datalink);
-                                               HttpURLConnection httpConnection = (HttpURLConnection) urltable.openConnection();
-                                               int responsecode = httpConnection.getResponseCode();
-                                               if (199 < responsecode && responsecode < 300) {
+                                       if (cw.JobisNotInsterted(id)) {
+                                               URL dataurl = new URL(dirprefix + "/" + id + "/" + id + ".concise.fasta");
+                                               URL archiveurl = new URL(dirprefix + "/" + id + "/" + id + ".tar.gz");
+                                               URL logurl = new URL(dirprefix + "/" + id + "/LOG");
+                                               HttpURLConnection httpConnection1 = (HttpURLConnection) dataurl.openConnection();
+                                               HttpURLConnection httpConnection2 = (HttpURLConnection) logurl.openConnection();
+                                               HttpURLConnection httpConnection3 = (HttpURLConnection) archiveurl.openConnection();
+                                               int response1 = httpConnection1.getResponseCode();
+                                               int response2 = httpConnection2.getResponseCode();
+                                               if (199 < response1 && response1 < 300) {
                                                        try {
-                                                               final FastaReader fr = new FastaReader(urltable.openStream());
-                                                               final List<FastaSequence> seqs = new ArrayList<FastaSequence>();
-                                                               String newprotein = "";
-                                                               while (fr.hasNext()) {
-                                                                       final FastaSequence fs = fr.next();
-                                                                       if (fs.getId().equals("QUERY") || fs.getId().equals(id))
-                                                                               newprotein = fs.getSequence().replaceAll("\n", "");
-                                                                       else
-                                                                               seqs.add(fs);
-                                                               }
-                                                               if (newprotein.equals("")) {
+                                                               String protein = parsePredictions(dataurl.openStream(), id);
+                                                               if (protein.equals("")) {
                                                                        countUnclearFASTAid++;
                                                                } else {
-                                                                       SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
-                                                                       String dateInString1 = table[0].substring(0, table[0].indexOf(":"));
-                                                                       long dateWork1 = 0;
+                                                                       SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd");
+                                                                       SimpleDateFormat timeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
+                                                                       String startdatestring = table[0].substring(0, table[0].indexOf(":"));
                                                                        try {
-                                                                               Date dat1 = formatter.parse(dateInString1);
-                                                                               dateWork1 = dat1.getTime();
+                                                                               Date startdate = dateformatter.parse(startdatestring);
+                                                                               Date starttime = timeformatter.parse(table[0]);
+                                                                               Date endtime = timeformatter.parse(table[1]);
+                                                                               String ip = table[2];
+                                                                               String execstatus = "OK";
+                                                                               String finalstatus = "OK";
+                                                                               countinsertions += cw.FormQueryTables(startdate.getTime(), table[0], table[1], ip, id, execstatus,
+                                                                                               finalstatus, protein, predictions);
+
+                                                                               long exectime = (endtime.getTime() - starttime.getTime()) / 1000;
+                                                                               String log = "";
+                                                                               if (199 < response2 && response2 < 300) {
+                                                                                       log = parseLogFile(logurl.openStream());
+                                                                               }
+                                                                               cw.ArchiveData(startdate.getTime(), exectime, ip, id, execstatus, finalstatus, protein,
+                                                                                               predictions, alignment, log, archiveurl.toString());
                                                                        } catch (ParseException e) {
                                                                                e.printStackTrace();
                                                                        }
-                                                                       cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs);
-                                                                       ++countinsertions;
-                                                                       // flush every 100 insertions
-                                                                       if (0 == countinsertions % 100) {
-                                                                               cc.flushData();
-                                                                       }
                                                                }
                                                        } catch (IOException e) {
                                                                e.printStackTrace();
@@ -111,6 +170,9 @@ public class JpredParserHTTP implements JpredParser {
                                                } else {
                                                        countNoData++;
                                                }
+                                               httpConnection1.disconnect();
+                                               httpConnection2.disconnect();
+                                               httpConnection3.disconnect();
                                        } else {
                                                ++countinserted;
                                        }
@@ -135,5 +197,6 @@ public class JpredParserHTTP implements JpredParser {
                } catch (IOException e) {
                        e.printStackTrace();
                }
+               ;
        }
 }