X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=datadb%2Fcompbio%2Fcassandra%2FJpredParserHTTP.java;h=5687a83ddd425e39d753f5571ecd53d0f1a31a88;hb=c4aca39f2d4a49b34e7e0b564450d100f442f3bf;hp=d03ac7999384cc0fec99f299625d266602a7fc1c;hpb=3887e427255a9ed5e9e43f4a52a377935331e5d3;p=proteocache.git diff --git a/datadb/compbio/cassandra/JpredParserHTTP.java b/datadb/compbio/cassandra/JpredParserHTTP.java index d03ac79..5687a83 100644 --- a/datadb/compbio/cassandra/JpredParserHTTP.java +++ b/datadb/compbio/cassandra/JpredParserHTTP.java @@ -1,8 +1,11 @@ package compbio.cassandra; import java.io.BufferedReader; +import java.io.DataInputStream; +import java.io.EOFException; import java.io.FileNotFoundException; import java.io.IOException; +import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.MalformedURLException; @@ -18,14 +21,17 @@ import java.util.List; import compbio.cassandra.JpredParser; public class JpredParserHTTP implements JpredParser { - private CassandraNativeConnector cc = new CassandraNativeConnector(); + private CassandraWriter cw = new CassandraWriter(); private String dirprefix; + private List alignment; + private List predictions; + private String jnetpred; - JpredParserHTTP() { + public JpredParserHTTP() { dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results"; } - JpredParserHTTP(String sourceurl) { + public JpredParserHTTP(String sourceurl) { dirprefix = sourceurl; } @@ -46,7 +52,55 @@ public class JpredParserHTTP implements JpredParser { } } - private int ParsingForDate(String input, String date) { + private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException { + final FastaReader fr = new FastaReader(stream); + String query = ""; + alignment = new ArrayList(); + predictions = new ArrayList(); + while (fr.hasNext()) { + final FastaSequence fs = fr.next(); + String seqid = fs.getId(); + String seq = fs.getSequence().replaceAll("\n", ""); + if (seqid.equals("QUERY") || seqid.equals(jobid)) { + query = seq; + alignment.add(fs); + } else if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28") + || seqid.equals("JNETSOL25") || seqid.equals("JNETSOL5") || seqid.equals("JNETSOL0") || seqid.equals("JNETCONF") + || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM")) { + predictions.add(fs); + if (seqid.equals("jnetpred")) + jnetpred = seq; + } else { + alignment.add(fs); + } + } + return query; + } + + private String parseLogFile(final InputStream stream) throws IOException { + String out = ""; + BufferedReader buffer = new BufferedReader(new InputStreamReader(stream)); + String line; + while (null != (line = buffer.readLine())) { + out += line; + } + return out; + } + + private List parseArchiveFile(final InputStream stream) throws IOException { + DataInputStream data_in = new DataInputStream(stream); + List out = new ArrayList(); + while (true) { + try { + out.add(data_in.readByte()); + } catch (EOFException eof) { + break; + } + } + return out; + } + + private void ParsingForDate(String input, String date) { int totalcount = 0; int countNoData = 0; int countUnclearFASTAid = 0; @@ -54,7 +108,6 @@ public class JpredParserHTTP implements JpredParser { int countinserted = 0; int counAlignments = 0; int countStrange = 0; - int njobs = 0; System.out.println("Inserting jobs for " + date); try { @@ -64,51 +117,52 @@ public class JpredParserHTTP implements JpredParser { String line; while ((line = alljobs.readLine()) != null) { - if (line.matches(date + "(.*)jp_[^\\s]+")) { + if (line.matches(date + ":(.*)jp_[^\\s]+")) { String[] table = line.split("\\s+"); // Format of a record: // starttime endtime ip email jobid (directory) - // 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172 unknown_email jp_J9HBCBT + // 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172 + // unknown_email jp_J9HBCBT String id = table[table.length - 1]; totalcount++; - if (!cc.CheckID(id)) { - String datalink = dirprefix + "/" + id + "/" + id + ".concise.fasta"; - URL urltable = new URL(datalink); - HttpURLConnection httpConnection = (HttpURLConnection) urltable.openConnection(); - int responsecode = httpConnection.getResponseCode(); - if (199 < responsecode && responsecode < 300) { + if (cw.JobisNotInsterted(id)) { + URL dataurl = new URL(dirprefix + "/" + id + "/" + id + ".concise.fasta"); + URL archiveurl = new URL(dirprefix + "/" + id + "/" + id + ".tar.gz"); + URL logurl = new URL(dirprefix + "/" + id + "/LOG"); + HttpURLConnection httpConnection1 = (HttpURLConnection) dataurl.openConnection(); + HttpURLConnection httpConnection2 = (HttpURLConnection) logurl.openConnection(); + HttpURLConnection httpConnection3 = (HttpURLConnection) archiveurl.openConnection(); + int response1 = httpConnection1.getResponseCode(); + int response2 = httpConnection2.getResponseCode(); + if (199 < response1 && response1 < 300) { try { - final FastaReader fr = new FastaReader(urltable.openStream()); - final List seqs = new ArrayList(); - String newprotein = ""; - while (fr.hasNext()) { - final FastaSequence fs = fr.next(); - if (fs.getId().equals("QUERY") || fs.getId().equals(id)) - newprotein = fs.getSequence().replaceAll("\n", ""); - else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) { - seqs.add(fs); - } - } - if (newprotein.equals("")) { + String protein = parsePredictions(dataurl.openStream(), id); + if (protein.equals("")) { countUnclearFASTAid++; } else { - SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); - String dateInString1 = table[0].substring(0, table[0].indexOf(":")); - long dateWork1 = 0; + SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd"); + SimpleDateFormat timeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s"); + String startdatestring = table[0].substring(0, table[0].indexOf(":")); try { - Date dat1 = formatter.parse(dateInString1); - dateWork1 = dat1.getTime(); + Date startdate = dateformatter.parse(startdatestring); + Date starttime = timeformatter.parse(table[0]); + Date endtime = timeformatter.parse(table[1]); + String ip = table[2]; + String execstatus = "OK"; + String finalstatus = "OK"; + countinsertions += cw.FormQueryTables(startdate.getTime(), table[0], table[1], ip, id, execstatus, + finalstatus, protein, predictions); + + long exectime = (endtime.getTime() - starttime.getTime()) / 1000; + String log = ""; + if (199 < response2 && response2 < 300) { + log = parseLogFile(logurl.openStream()); + } + cw.ArchiveData(startdate.getTime(), exectime, ip, id, execstatus, finalstatus, protein, + predictions, alignment, log, archiveurl.toString()); } catch (ParseException e) { e.printStackTrace(); } - cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs); - ++countinsertions; - ++njobs; - // flush every 50 insertions - //if (0 == countinsertions % 50) { - // cc.flushData(); - // njobs -= 50; - //} } } catch (IOException e) { e.printStackTrace(); @@ -116,6 +170,9 @@ public class JpredParserHTTP implements JpredParser { } else { countNoData++; } + httpConnection1.disconnect(); + httpConnection2.disconnect(); + httpConnection3.disconnect(); } else { ++countinserted; } @@ -140,6 +197,6 @@ public class JpredParserHTTP implements JpredParser { } catch (IOException e) { e.printStackTrace(); } - return njobs; + ; } }