package compbio.cassandra;
import java.io.BufferedReader;
+import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import compbio.cassandra.JpredParser;
public class JpredParserHTTP implements JpredParser {
- private CassandraCreate cc = new CassandraCreate();
+ private CassandraNativeConnector cc = new CassandraNativeConnector();
private String dirprefix;
JpredParserHTTP() {
dirprefix = newsourceprefix;
}
- public void Parsing(String source, int nDays) {
+ public void Parsing(String source, int nDays) throws IOException {
Calendar cal = Calendar.getInstance();
cal.add(Calendar.DATE, -nDays);
for (int i = 0; i < nDays; ++i) {
}
}
- private void ParsingForDate(String input, String date) {
+ private int ParsingForDate(String input, String date) {
int totalcount = 0;
int countNoData = 0;
int countUnclearFASTAid = 0;
int countinserted = 0;
int counAlignments = 0;
int countStrange = 0;
+ int njobs = 0;
System.out.println("Inserting jobs for " + date);
try {
// 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172 unknown_email jp_J9HBCBT
String id = table[table.length - 1];
totalcount++;
- if (!cc.CheckID(id)) {
- String datalink = dirprefix + "/" + id + "/" + id + ".concise.fasta";
- URL urltable = new URL(datalink);
- HttpURLConnection httpConnection = (HttpURLConnection) urltable.openConnection();
- int responsecode = httpConnection.getResponseCode();
- if (199 < responsecode && responsecode < 300) {
- try {
- final FastaReader fr = new FastaReader(urltable.openStream());
- final List<FastaSequence> seqs = new ArrayList<FastaSequence>();
- String newprotein = "";
- while (fr.hasNext()) {
- final FastaSequence fs = fr.next();
- if (fs.getId().equals("QUERY") || fs.getId().equals(id))
- newprotein = fs.getSequence().replaceAll("\n", "");
- else
- seqs.add(fs);
+ String datalink = dirprefix + "/" + id + "/" + id + ".concise.fasta";
+ URL urltable = new URL(datalink);
+ HttpURLConnection httpConnection = (HttpURLConnection) urltable.openConnection();
+ int responsecode = httpConnection.getResponseCode();
+ if (199 < responsecode && responsecode < 300) {
+ try {
+ final FastaReader fr = new FastaReader(urltable.openStream());
+ final List<FastaSequence> seqs = new ArrayList<FastaSequence>();
+ String newprotein = "";
+ while (fr.hasNext()) {
+ final FastaSequence fs = fr.next();
+ if (fs.getId().equals("QUERY") || fs.getId().equals(id))
+ newprotein = fs.getSequence().replaceAll("\n", "");
+ else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) {
+ seqs.add(fs);
}
- if (newprotein.equals("")) {
- countUnclearFASTAid++;
- } else {
- SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
- String dateInString1 = table[0].substring(0, table[0].indexOf(":"));
- long dateWork1 = 0;
- try {
- Date dat1 = formatter.parse(dateInString1);
- dateWork1 = dat1.getTime();
- } catch (ParseException e) {
- e.printStackTrace();
- }
- cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs);
- ++countinsertions;
- // flush every 100 insertions
- if (0 == countinsertions % 100) {
- cc.flushData();
- }
+ }
+ if (newprotein.equals("")) {
+ countUnclearFASTAid++;
+ } else {
+ SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
+ String dateInString1 = table[0].substring(0, table[0].indexOf(":"));
+ long dateWork1 = 0;
+ try {
+ Date dat1 = formatter.parse(dateInString1);
+ dateWork1 = dat1.getTime();
+ } catch (ParseException e) {
+ e.printStackTrace();
}
- } catch (IOException e) {
- e.printStackTrace();
+ cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs);
+ ++countinsertions;
+ ++njobs;
+ // flush every 50 insertions
+ // if (0 == countinsertions % 50) {
+ // cc.flushData();
+ // njobs -= 50;
+ // }
}
- } else {
- countNoData++;
+ } catch (IOException e) {
+ e.printStackTrace();
}
} else {
- ++countinserted;
+ countNoData++;
}
} else {
if (line.matches(date + "(.*)Sequence0/(.*)")) {
} catch (IOException e) {
e.printStackTrace();
}
+ return njobs;
}
}