First working "writing" code for Cassandra 2.0
[proteocache.git] / datadb / compbio / cassandra / JpredParserHTTP.java
index 052ff6a..b780ddf 100644 (file)
@@ -1,6 +1,7 @@
 package compbio.cassandra;
 
 import java.io.BufferedReader;
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.net.HttpURLConnection;
@@ -17,7 +18,7 @@ import java.util.List;
 import compbio.cassandra.JpredParser;
 
 public class JpredParserHTTP implements JpredParser {
-       private CassandraCreate cc = new CassandraCreate();
+       private CassandraNativeConnector cc = new CassandraNativeConnector();
        private String dirprefix;
 
        JpredParserHTTP() {
@@ -32,7 +33,7 @@ public class JpredParserHTTP implements JpredParser {
                dirprefix = newsourceprefix;
        }
 
-       public void Parsing(String source, int nDays) {
+       public void Parsing(String source, int nDays) throws IOException {
                Calendar cal = Calendar.getInstance();
                cal.add(Calendar.DATE, -nDays);
                for (int i = 0; i < nDays; ++i) {
@@ -41,9 +42,7 @@ public class JpredParserHTTP implements JpredParser {
                        int year = cal.get(Calendar.YEAR);
                        int day = cal.get(Calendar.DATE);
                        String date = year + "/" + month + "/" + day;
-                       if (0 < ParsingForDate(source, date)) {
-                               cc.flushData();
-                       }
+                       ParsingForDate(source, date);
                }
        }
 
@@ -72,52 +71,49 @@ public class JpredParserHTTP implements JpredParser {
                                        // 013/10/25:21:55:7 2013/10/25:21:59:13 201.239.98.172 unknown_email jp_J9HBCBT
                                        String id = table[table.length - 1];
                                        totalcount++;
-                                       if (!cc.CheckID(id)) {
-                                               String datalink = dirprefix + "/" + id + "/" + id + ".concise.fasta";
-                                               URL urltable = new URL(datalink);
-                                               HttpURLConnection httpConnection = (HttpURLConnection) urltable.openConnection();
-                                               int responsecode = httpConnection.getResponseCode();
-                                               if (199 < responsecode && responsecode < 300) {
-                                                       try {
-                                                               final FastaReader fr = new FastaReader(urltable.openStream());
-                                                               final List<FastaSequence> seqs = new ArrayList<FastaSequence>();
-                                                               String newprotein = "";
-                                                               while (fr.hasNext()) {
-                                                                       final FastaSequence fs = fr.next();
-                                                                       if (fs.getId().equals("QUERY") || fs.getId().equals(id))
-                                                                               newprotein = fs.getSequence().replaceAll("\n", "");
-                                                                       else
-                                                                               seqs.add(fs);
+                                       String datalink = dirprefix + "/" + id + "/" + id + ".concise.fasta";
+                                       URL urltable = new URL(datalink);
+                                       HttpURLConnection httpConnection = (HttpURLConnection) urltable.openConnection();
+                                       int responsecode = httpConnection.getResponseCode();
+                                       if (199 < responsecode && responsecode < 300) {
+                                               try {
+                                                       final FastaReader fr = new FastaReader(urltable.openStream());
+                                                       final List<FastaSequence> seqs = new ArrayList<FastaSequence>();
+                                                       String newprotein = "";
+                                                       while (fr.hasNext()) {
+                                                               final FastaSequence fs = fr.next();
+                                                               if (fs.getId().equals("QUERY") || fs.getId().equals(id))
+                                                                       newprotein = fs.getSequence().replaceAll("\n", "");
+                                                               else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) {
+                                                                       seqs.add(fs);
                                                                }
-                                                               if (newprotein.equals("")) {
-                                                                       countUnclearFASTAid++;
-                                                               } else {
-                                                                       SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
-                                                                       String dateInString1 = table[0].substring(0, table[0].indexOf(":"));
-                                                                       long dateWork1 = 0;
-                                                                       try {
-                                                                               Date dat1 = formatter.parse(dateInString1);
-                                                                               dateWork1 = dat1.getTime();
-                                                                       } catch (ParseException e) {
-                                                                               e.printStackTrace();
-                                                                       }
-                                                                       cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs);
-                                                                       ++countinsertions;
-                                                                       ++njobs;
-                                                                       // flush every 50 insertions
-                                                                       if (0 == countinsertions % 50) {
-                                                                               cc.flushData();
-                                                                               njobs -= 50;
-                                                                       }
+                                                       }
+                                                       if (newprotein.equals("")) {
+                                                               countUnclearFASTAid++;
+                                                       } else {
+                                                               SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
+                                                               String dateInString1 = table[0].substring(0, table[0].indexOf(":"));
+                                                               long dateWork1 = 0;
+                                                               try {
+                                                                       Date dat1 = formatter.parse(dateInString1);
+                                                                       dateWork1 = dat1.getTime();
+                                                               } catch (ParseException e) {
+                                                                       e.printStackTrace();
                                                                }
-                                                       } catch (IOException e) {
-                                                               e.printStackTrace();
+                                                               cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs);
+                                                               ++countinsertions;
+                                                               ++njobs;
+                                                               // flush every 50 insertions
+                                                               // if (0 == countinsertions % 50) {
+                                                               // cc.flushData();
+                                                               // njobs -= 50;
+                                                               // }
                                                        }
-                                               } else {
-                                                       countNoData++;
+                                               } catch (IOException e) {
+                                                       e.printStackTrace();
                                                }
                                        } else {
-                                               ++countinserted;
+                                               countNoData++;
                                        }
                                } else {
                                        if (line.matches(date + "(.*)Sequence0/(.*)")) {