X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=datadb%2Fcompbio%2Fcassandra%2FJpredParserHTTP.java;h=5687a83ddd425e39d753f5571ecd53d0f1a31a88;hb=d4eea7f5a8fea637ab742ce69c84a0f835266567;hp=361681855f54f482993241b7bee3bed12ccb13fe;hpb=6a81b75c020845f9bb94c307a66347e4362da85f;p=proteocache.git diff --git a/datadb/compbio/cassandra/JpredParserHTTP.java b/datadb/compbio/cassandra/JpredParserHTTP.java index 3616818..5687a83 100644 --- a/datadb/compbio/cassandra/JpredParserHTTP.java +++ b/datadb/compbio/cassandra/JpredParserHTTP.java @@ -1,7 +1,11 @@ package compbio.cassandra; import java.io.BufferedReader; +import java.io.DataInputStream; +import java.io.EOFException; +import java.io.FileNotFoundException; import java.io.IOException; +import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.MalformedURLException; @@ -17,22 +21,25 @@ import java.util.List; import compbio.cassandra.JpredParser; public class JpredParserHTTP implements JpredParser { - private CassandraCreate cc = new CassandraCreate(); + private CassandraWriter cw = new CassandraWriter(); private String dirprefix; - - JpredParserHTTP() { - this.dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results"; + private List alignment; + private List predictions; + private String jnetpred; + + public JpredParserHTTP() { + dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results"; } - - JpredParserHTTP(String sourceurl) { - this.dirprefix = sourceurl; + + public JpredParserHTTP(String sourceurl) { + dirprefix = sourceurl; } - public void setSource (String newsourceprefix) { - this.dirprefix = newsourceprefix; + public void setSource(String newsourceprefix) { + dirprefix = newsourceprefix; } - public void Parsing(String source, int nDays) { + public void Parsing(String source, int nDays) throws IOException { Calendar cal = Calendar.getInstance(); cal.add(Calendar.DATE, -nDays); for (int i = 0; i < nDays; ++i) { @@ -45,6 +52,54 @@ public class JpredParserHTTP implements JpredParser { } } + private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException { + final FastaReader fr = new FastaReader(stream); + String query = ""; + alignment = new ArrayList(); + predictions = new ArrayList(); + while (fr.hasNext()) { + final FastaSequence fs = fr.next(); + String seqid = fs.getId(); + String seq = fs.getSequence().replaceAll("\n", ""); + if (seqid.equals("QUERY") || seqid.equals(jobid)) { + query = seq; + alignment.add(fs); + } else if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28") + || seqid.equals("JNETSOL25") || seqid.equals("JNETSOL5") || seqid.equals("JNETSOL0") || seqid.equals("JNETCONF") + || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM")) { + predictions.add(fs); + if (seqid.equals("jnetpred")) + jnetpred = seq; + } else { + alignment.add(fs); + } + } + return query; + } + + private String parseLogFile(final InputStream stream) throws IOException { + String out = ""; + BufferedReader buffer = new BufferedReader(new InputStreamReader(stream)); + String line; + while (null != (line = buffer.readLine())) { + out += line; + } + return out; + } + + private List parseArchiveFile(final InputStream stream) throws IOException { + DataInputStream data_in = new DataInputStream(stream); + List out = new ArrayList(); + while (true) { + try { + out.add(data_in.readByte()); + } catch (EOFException eof) { + break; + } + } + return out; + } + private void ParsingForDate(String input, String date) { int totalcount = 0; int countNoData = 0; @@ -62,44 +117,52 @@ public class JpredParserHTTP implements JpredParser { String line; while ((line = alljobs.readLine()) != null) { - if (line.matches(date + "(.*)jp_[^\\s]+")) { + if (line.matches(date + ":(.*)jp_[^\\s]+")) { String[] table = line.split("\\s+"); + // Format of a record: + // starttime endtime ip email jobid (directory) + // 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172 + // unknown_email jp_J9HBCBT String id = table[table.length - 1]; totalcount++; - if (!cc.CheckID(id)) { - URL urltable = new URL(dirprefix + "/" + id + "/" + id + ".concise.fasta"); - HttpURLConnection httpConnection = (HttpURLConnection) urltable.openConnection(); - int responsecode = httpConnection.getResponseCode(); - if (199 < responsecode && responsecode < 300) { + if (cw.JobisNotInsterted(id)) { + URL dataurl = new URL(dirprefix + "/" + id + "/" + id + ".concise.fasta"); + URL archiveurl = new URL(dirprefix + "/" + id + "/" + id + ".tar.gz"); + URL logurl = new URL(dirprefix + "/" + id + "/LOG"); + HttpURLConnection httpConnection1 = (HttpURLConnection) dataurl.openConnection(); + HttpURLConnection httpConnection2 = (HttpURLConnection) logurl.openConnection(); + HttpURLConnection httpConnection3 = (HttpURLConnection) archiveurl.openConnection(); + int response1 = httpConnection1.getResponseCode(); + int response2 = httpConnection2.getResponseCode(); + if (199 < response1 && response1 < 300) { try { - final FastaReader fr = new FastaReader(urltable.openStream()); - final List seqs = new ArrayList(); - String newprotein = ""; - while (fr.hasNext()) { - final FastaSequence fs = fr.next(); - if (fs.getId().equals("QUERY") || fs.getId().equals(id)) - newprotein = fs.getSequence().replaceAll("\n", ""); - else - seqs.add(fs); - } - if (newprotein.equals("")) { + String protein = parsePredictions(dataurl.openStream(), id); + if (protein.equals("")) { countUnclearFASTAid++; } else { - SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); - String dateInString1 = table[0].substring(0, table[0].indexOf(":")); - long dateWork1 = 0; + SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd"); + SimpleDateFormat timeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s"); + String startdatestring = table[0].substring(0, table[0].indexOf(":")); try { - Date dat1 = formatter.parse(dateInString1); - dateWork1 = dat1.getTime(); + Date startdate = dateformatter.parse(startdatestring); + Date starttime = timeformatter.parse(table[0]); + Date endtime = timeformatter.parse(table[1]); + String ip = table[2]; + String execstatus = "OK"; + String finalstatus = "OK"; + countinsertions += cw.FormQueryTables(startdate.getTime(), table[0], table[1], ip, id, execstatus, + finalstatus, protein, predictions); + + long exectime = (endtime.getTime() - starttime.getTime()) / 1000; + String log = ""; + if (199 < response2 && response2 < 300) { + log = parseLogFile(logurl.openStream()); + } + cw.ArchiveData(startdate.getTime(), exectime, ip, id, execstatus, finalstatus, protein, + predictions, alignment, log, archiveurl.toString()); } catch (ParseException e) { e.printStackTrace(); } - cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs); - ++countinsertions; - // flush every 100 insertions - if (0 == countinsertions % 100) { - cc.flushData(); - } } } catch (IOException e) { e.printStackTrace(); @@ -107,6 +170,9 @@ public class JpredParserHTTP implements JpredParser { } else { countNoData++; } + httpConnection1.disconnect(); + httpConnection2.disconnect(); + httpConnection3.disconnect(); } else { ++countinserted; } @@ -131,5 +197,6 @@ public class JpredParserHTTP implements JpredParser { } catch (IOException e) { e.printStackTrace(); } + ; } }