X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=datadb%2Fcompbio%2Fcassandra%2FJpredParserHTTP.java;h=b780ddf926f2217dd3a7b3b02cf35ffba4ec5139;hb=7fafa396b8b56bb5cc7249dc7c12fd37edf5724b;hp=e308a253a0ad2aba5cc582f29e46a903eef71818;hpb=2260ca3b3c0382ef6bfeea7341a692508058a2a6;p=proteocache.git diff --git a/datadb/compbio/cassandra/JpredParserHTTP.java b/datadb/compbio/cassandra/JpredParserHTTP.java index e308a25..b780ddf 100644 --- a/datadb/compbio/cassandra/JpredParserHTTP.java +++ b/datadb/compbio/cassandra/JpredParserHTTP.java @@ -1,6 +1,7 @@ package compbio.cassandra; import java.io.BufferedReader; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.net.HttpURLConnection; @@ -17,7 +18,7 @@ import java.util.List; import compbio.cassandra.JpredParser; public class JpredParserHTTP implements JpredParser { - private CassandraCreate cc = new CassandraCreate(); + private CassandraNativeConnector cc = new CassandraNativeConnector(); private String dirprefix; JpredParserHTTP() { @@ -32,7 +33,7 @@ public class JpredParserHTTP implements JpredParser { dirprefix = newsourceprefix; } - public void Parsing(String source, int nDays) { + public void Parsing(String source, int nDays) throws IOException { Calendar cal = Calendar.getInstance(); cal.add(Calendar.DATE, -nDays); for (int i = 0; i < nDays; ++i) { @@ -45,7 +46,7 @@ public class JpredParserHTTP implements JpredParser { } } - private void ParsingForDate(String input, String date) { + private int ParsingForDate(String input, String date) { int totalcount = 0; int countNoData = 0; int countUnclearFASTAid = 0; @@ -53,6 +54,7 @@ public class JpredParserHTTP implements JpredParser { int countinserted = 0; int counAlignments = 0; int countStrange = 0; + int njobs = 0; System.out.println("Inserting jobs for " + date); try { @@ -69,50 +71,49 @@ public class JpredParserHTTP implements JpredParser { // 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172 unknown_email jp_J9HBCBT String id = table[table.length - 1]; totalcount++; - if (!cc.CheckID(id)) { - String datalink = dirprefix + "/" + id + "/" + id + ".concise.fasta"; - URL urltable = new URL(datalink); - HttpURLConnection httpConnection = (HttpURLConnection) urltable.openConnection(); - int responsecode = httpConnection.getResponseCode(); - if (199 < responsecode && responsecode < 300) { - try { - final FastaReader fr = new FastaReader(urltable.openStream()); - final List seqs = new ArrayList(); - String newprotein = ""; - while (fr.hasNext()) { - final FastaSequence fs = fr.next(); - if (fs.getId().equals("QUERY") || fs.getId().equals(id)) - newprotein = fs.getSequence().replaceAll("\n", ""); - else - seqs.add(fs); + String datalink = dirprefix + "/" + id + "/" + id + ".concise.fasta"; + URL urltable = new URL(datalink); + HttpURLConnection httpConnection = (HttpURLConnection) urltable.openConnection(); + int responsecode = httpConnection.getResponseCode(); + if (199 < responsecode && responsecode < 300) { + try { + final FastaReader fr = new FastaReader(urltable.openStream()); + final List seqs = new ArrayList(); + String newprotein = ""; + while (fr.hasNext()) { + final FastaSequence fs = fr.next(); + if (fs.getId().equals("QUERY") || fs.getId().equals(id)) + newprotein = fs.getSequence().replaceAll("\n", ""); + else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) { + seqs.add(fs); } - if (newprotein.equals("")) { - countUnclearFASTAid++; - } else { - SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); - String dateInString1 = table[0].substring(0, table[0].indexOf(":")); - long dateWork1 = 0; - try { - Date dat1 = formatter.parse(dateInString1); - dateWork1 = dat1.getTime(); - } catch (ParseException e) { - e.printStackTrace(); - } - cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs); - ++countinsertions; - // flush every 100 insertions - if (0 == countinsertions % 100) { - cc.flushData(); - } + } + if (newprotein.equals("")) { + countUnclearFASTAid++; + } else { + SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); + String dateInString1 = table[0].substring(0, table[0].indexOf(":")); + long dateWork1 = 0; + try { + Date dat1 = formatter.parse(dateInString1); + dateWork1 = dat1.getTime(); + } catch (ParseException e) { + e.printStackTrace(); } - } catch (IOException e) { - e.printStackTrace(); + cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs); + ++countinsertions; + ++njobs; + // flush every 50 insertions + // if (0 == countinsertions % 50) { + // cc.flushData(); + // njobs -= 50; + // } } - } else { - countNoData++; + } catch (IOException e) { + e.printStackTrace(); } } else { - ++countinserted; + countNoData++; } } else { if (line.matches(date + "(.*)Sequence0/(.*)")) { @@ -135,5 +136,6 @@ public class JpredParserHTTP implements JpredParser { } catch (IOException e) { e.printStackTrace(); } + return njobs; } }