package compbio.cassandra;
import java.io.BufferedReader;
+import java.io.DataInputStream;
+import java.io.EOFException;
import java.io.FileNotFoundException;
import java.io.IOException;
+import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import compbio.cassandra.JpredParser;
public class JpredParserHTTP implements JpredParser {
- private CassandraNativeConnector cc = new CassandraNativeConnector();
+ private CassandraWriter cw = new CassandraWriter();
private String dirprefix;
+ private List<FastaSequence> alignment;
+ private List<FastaSequence> predictions;
+ private String jnetpred;
- JpredParserHTTP() {
+ public JpredParserHTTP() {
dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
}
- JpredParserHTTP(String sourceurl) {
+ public JpredParserHTTP(String sourceurl) {
dirprefix = sourceurl;
}
}
}
- private int ParsingForDate(String input, String date) {
+ private String parsePredictions(final InputStream stream, String jobid) throws FileNotFoundException {
+ final FastaReader fr = new FastaReader(stream);
+ String query = "";
+ alignment = new ArrayList<FastaSequence>();
+ predictions = new ArrayList<FastaSequence>();
+ while (fr.hasNext()) {
+ final FastaSequence fs = fr.next();
+ String seqid = fs.getId();
+ String seq = fs.getSequence().replaceAll("\n", "");
+ if (seqid.equals("QUERY") || seqid.equals(jobid)) {
+ query = seq;
+ alignment.add(fs);
+ } else if (seqid.equals("jnetpred") || seqid.equals("Lupas_21") || seqid.equals("Lupas_14") || seqid.equals("Lupas_28")
+ || seqid.equals("JNETSOL25") || seqid.equals("JNETSOL5") || seqid.equals("JNETSOL0") || seqid.equals("JNETCONF")
+ || seqid.equals("JNETHMM") || seqid.equals("JNETPSSM")) {
+ predictions.add(fs);
+ if (seqid.equals("jnetpred"))
+ jnetpred = seq;
+ } else {
+ alignment.add(fs);
+ }
+ }
+ return query;
+ }
+
+ private String parseLogFile(final InputStream stream) throws IOException {
+ String out = "";
+ BufferedReader buffer = new BufferedReader(new InputStreamReader(stream));
+ String line;
+ while (null != (line = buffer.readLine())) {
+ out += line;
+ }
+ return out;
+ }
+
+ private List<Byte> parseArchiveFile(final InputStream stream) throws IOException {
+ DataInputStream data_in = new DataInputStream(stream);
+ List<Byte> out = new ArrayList<Byte>();
+ while (true) {
+ try {
+ out.add(data_in.readByte());
+ } catch (EOFException eof) {
+ break;
+ }
+ }
+ return out;
+ }
+
+ private void ParsingForDate(String input, String date) {
int totalcount = 0;
int countNoData = 0;
int countUnclearFASTAid = 0;
int countinserted = 0;
int counAlignments = 0;
int countStrange = 0;
- int njobs = 0;
System.out.println("Inserting jobs for " + date);
try {
String line;
while ((line = alljobs.readLine()) != null) {
- if (line.matches(date + "(.*)jp_[^\\s]+")) {
+ if (line.matches(date + ":(.*)jp_[^\\s]+")) {
String[] table = line.split("\\s+");
// Format of a record:
// starttime endtime ip email jobid (directory)
- // 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172 unknown_email jp_J9HBCBT
+ // 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172
+ // unknown_email jp_J9HBCBT
String id = table[table.length - 1];
totalcount++;
- if (!cc.CheckID(id)) {
- String datalink = dirprefix + "/" + id + "/" + id + ".concise.fasta";
- URL urltable = new URL(datalink);
- HttpURLConnection httpConnection = (HttpURLConnection) urltable.openConnection();
- int responsecode = httpConnection.getResponseCode();
- if (199 < responsecode && responsecode < 300) {
+ if (cw.JobisNotInsterted(id)) {
+ URL dataurl = new URL(dirprefix + "/" + id + "/" + id + ".concise.fasta");
+ URL archiveurl = new URL(dirprefix + "/" + id + "/" + id + ".tar.gz");
+ URL logurl = new URL(dirprefix + "/" + id + "/LOG");
+ HttpURLConnection httpConnection1 = (HttpURLConnection) dataurl.openConnection();
+ HttpURLConnection httpConnection2 = (HttpURLConnection) logurl.openConnection();
+ HttpURLConnection httpConnection3 = (HttpURLConnection) archiveurl.openConnection();
+ int response1 = httpConnection1.getResponseCode();
+ int response2 = httpConnection2.getResponseCode();
+ if (199 < response1 && response1 < 300) {
try {
- final FastaReader fr = new FastaReader(urltable.openStream());
- final List<FastaSequence> seqs = new ArrayList<FastaSequence>();
- String newprotein = "";
- while (fr.hasNext()) {
- final FastaSequence fs = fr.next();
- if (fs.getId().equals("QUERY") || fs.getId().equals(id))
- newprotein = fs.getSequence().replaceAll("\n", "");
- else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) {
- seqs.add(fs);
- }
- }
- if (newprotein.equals("")) {
+ String protein = parsePredictions(dataurl.openStream(), id);
+ if (protein.equals("")) {
countUnclearFASTAid++;
} else {
- SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
- String dateInString1 = table[0].substring(0, table[0].indexOf(":"));
- long dateWork1 = 0;
+ SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd");
+ SimpleDateFormat timeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
+ String startdatestring = table[0].substring(0, table[0].indexOf(":"));
try {
- Date dat1 = formatter.parse(dateInString1);
- dateWork1 = dat1.getTime();
+ Date startdate = dateformatter.parse(startdatestring);
+ Date starttime = timeformatter.parse(table[0]);
+ Date endtime = timeformatter.parse(table[1]);
+ String ip = table[2];
+ String execstatus = "OK";
+ String finalstatus = "OK";
+ countinsertions += cw.FormQueryTables(startdate.getTime(), table[0], table[1], ip, id, execstatus,
+ finalstatus, protein, predictions);
+
+ long exectime = (endtime.getTime() - starttime.getTime()) / 1000;
+ String log = "";
+ if (199 < response2 && response2 < 300) {
+ log = parseLogFile(logurl.openStream());
+ }
+ cw.ArchiveData(startdate.getTime(), exectime, ip, id, execstatus, finalstatus, protein,
+ predictions, alignment, log, archiveurl.toString());
} catch (ParseException e) {
e.printStackTrace();
}
- cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs);
- ++countinsertions;
- ++njobs;
- // flush every 50 insertions
- //if (0 == countinsertions % 50) {
- // cc.flushData();
- // njobs -= 50;
- //}
}
} catch (IOException e) {
e.printStackTrace();
} else {
countNoData++;
}
+ httpConnection1.disconnect();
+ httpConnection2.disconnect();
+ httpConnection3.disconnect();
} else {
++countinserted;
}
} catch (IOException e) {
e.printStackTrace();
}
- return njobs;
+ ;
}
}