package compbio.cassandra; import java.io.BufferedReader; import java.io.DataInputStream; import java.io.EOFException; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.List; import compbio.cassandra.JpredParser; public class JpredParserHTTP implements JpredParser { private CassandraWriter cw = new CassandraWriter(); private String dirprefix; private List alignment; private List predictions; private String jnetpred; public JpredParserHTTP() { dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results"; } public JpredParserHTTP(String sourceurl) { dirprefix = sourceurl; } public void setSource(String newsourceprefix) { dirprefix = newsourceprefix; } public void Parsing(String source, int nDays) throws IOException { Calendar cal = Calendar.getInstance(); cal.add(Calendar.DATE, -nDays); for (int i = 0; i < nDays; ++i) { cal.add(Calendar.DATE, 1); int month = cal.get(Calendar.MONTH) + 1; int year = cal.get(Calendar.YEAR); int day = cal.get(Calendar.DATE); String date = year + "/" + month + "/" + day; ParsingForDate(source, date); } } private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException { final FastaReader fr = new FastaReader(stream); String query = ""; alignment = new ArrayList(); predictions = new ArrayList(); while (fr.hasNext()) { final FastaSequence fs = fr.next(); String seqid = fs.getId(); String seq = fs.getSequence().replaceAll("\n", ""); if (seqid.equals("QUERY") || seqid.equals(jobid)) { query = seq; alignment.add(fs); } else if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28") || seqid.equals("JNETSOL25") || seqid.equals("JNETSOL5") || seqid.equals("JNETSOL0") || seqid.equals("JNETCONF") || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM")) { predictions.add(fs); if (seqid.equals("jnetpred")) jnetpred = seq; } else { alignment.add(fs); } } return query; } private String parseLogFile(final InputStream stream) throws IOException { String out = ""; BufferedReader buffer = new BufferedReader(new InputStreamReader(stream)); String line; while (null != (line = buffer.readLine())) { out += line; } return out; } private List parseArchiveFile(final InputStream stream) throws IOException { DataInputStream data_in = new DataInputStream(stream); List out = new ArrayList(); while (true) { try { out.add(data_in.readByte()); } catch (EOFException eof) { break; } } return out; } private void ParsingForDate(String input, String date) { int totalcount = 0; int countNoData = 0; int countUnclearFASTAid = 0; int countinsertions = 0; int countinserted = 0; int counAlignments = 0; int countStrange = 0; System.out.println("Inserting jobs for " + date); try { URL url = new URL(input); URLConnection conn = url.openConnection(); BufferedReader alljobs = new BufferedReader(new InputStreamReader(conn.getInputStream())); String line; while ((line = alljobs.readLine()) != null) { if (line.matches(date + ":(.*)jp_[^\\s]+")) { String[] table = line.split("\\s+"); // Format of a record: // starttime endtime ip email jobid (directory) // 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172 // unknown_email jp_J9HBCBT String id = table[table.length - 1]; totalcount++; if (cw.JobisNotInsterted(id)) { URL dataurl = new URL(dirprefix + "/" + id + "/" + id + ".concise.fasta"); URL archiveurl = new URL(dirprefix + "/" + id + "/" + id + ".tar.gz"); URL logurl = new URL(dirprefix + "/" + id + "/LOG"); HttpURLConnection httpConnection1 = (HttpURLConnection) dataurl.openConnection(); HttpURLConnection httpConnection2 = (HttpURLConnection) logurl.openConnection(); HttpURLConnection httpConnection3 = (HttpURLConnection) archiveurl.openConnection(); int response1 = httpConnection1.getResponseCode(); int response2 = httpConnection2.getResponseCode(); if (199 < response1 && response1 < 300) { try { String protein = parsePredictions(dataurl.openStream(), id); if (protein.equals("")) { countUnclearFASTAid++; } else { SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd"); SimpleDateFormat timeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s"); String startdatestring = table[0].substring(0, table[0].indexOf(":")); try { Date startdate = dateformatter.parse(startdatestring); Date starttime = timeformatter.parse(table[0]); Date endtime = timeformatter.parse(table[1]); String ip = table[2]; String execstatus = "OK"; String finalstatus = "OK"; countinsertions += cw.FormQueryTables(startdate.getTime(), table[0], table[1], ip, id, execstatus, finalstatus, protein, predictions); long exectime = (endtime.getTime() - starttime.getTime()) / 1000; String log = ""; if (199 < response2 && response2 < 300) { log = parseLogFile(logurl.openStream()); } cw.ArchiveData(startdate.getTime(), exectime, ip, id, execstatus, finalstatus, protein, predictions, alignment, log, archiveurl.toString()); } catch (ParseException e) { e.printStackTrace(); } } } catch (IOException e) { e.printStackTrace(); } } else { countNoData++; } httpConnection1.disconnect(); httpConnection2.disconnect(); httpConnection3.disconnect(); } else { ++countinserted; } } else { if (line.matches(date + "(.*)Sequence0/(.*)")) { ++counAlignments; } else { ++countStrange; } } } alljobs.close(); System.out.println("Total number of jobs = " + totalcount); System.out.println(" " + countinserted + " jobs inserted already"); System.out.println(" " + counAlignments + " jalview jobs"); System.out.println(" " + countStrange + " not analysed jobs"); System.out.println(" " + countNoData + " jobs without *.concise.fasta file"); System.out.println(" " + countUnclearFASTAid + " jobs with unclear FASTA protein id in *.concise.fasta"); System.out.println(" " + countinsertions + " new job insertions\n"); } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } ; } }