From e9b30dea3b174d340d695f122a2b57a02165e161 Mon Sep 17 00:00:00 2001 From: Sasha Sherstnev Date: Tue, 29 Oct 2013 10:59:56 +0000 Subject: [PATCH] Fix problem with too many open files and problem with not-flushed info --- datadb/compbio/cassandra/CassandraCreate.java | 11 ++++++----- datadb/compbio/cassandra/JpredParserHTTP.java | 14 ++++++++++---- datadb/compbio/cassandra/JpredParserLocalFile.java | 19 +++++++++++++------ 3 files changed, 29 insertions(+), 15 deletions(-) diff --git a/datadb/compbio/cassandra/CassandraCreate.java b/datadb/compbio/cassandra/CassandraCreate.java index 55ff2a1..b147499 100644 --- a/datadb/compbio/cassandra/CassandraCreate.java +++ b/datadb/compbio/cassandra/CassandraCreate.java @@ -80,7 +80,8 @@ public class CassandraCreate { */ mutatorLong = HFactory.createMutator(ksp, ls); - if (source.equals("http")) { + if (true) { + //if (source.equals("http")) { // get data from real Jpred production server System.out.println("Parsing web data source......"); String datasrc = "http://www.compbio.dundee.ac.uk/www-jpred/results/usage-new/alljobs.dat"; @@ -88,16 +89,16 @@ public class CassandraCreate { JpredParserHTTP parser = new JpredParserHTTP(prefix); parser.Parsing(datasrc, 4); flushData(); - } else if (source.equals("file")) { + } + if (true) { + //if (source.equals("file")) { // get irtifical data generated for the DB stress tests System.out.println("Parsing local file data source......"); String datasrc = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/data.dat"; String prefix = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/Jpreddata"; JpredParserLocalFile parser = new JpredParserLocalFile(prefix); - parser.Parsing(datasrc, 365); + parser.Parsing(datasrc, 190); flushData(); - } else { - System.out.println("Unknown data source......"); } } diff --git a/datadb/compbio/cassandra/JpredParserHTTP.java b/datadb/compbio/cassandra/JpredParserHTTP.java index e308a25..052ff6a 100644 --- a/datadb/compbio/cassandra/JpredParserHTTP.java +++ b/datadb/compbio/cassandra/JpredParserHTTP.java @@ -41,11 +41,13 @@ public class JpredParserHTTP implements JpredParser { int year = cal.get(Calendar.YEAR); int day = cal.get(Calendar.DATE); String date = year + "/" + month + "/" + day; - ParsingForDate(source, date); + if (0 < ParsingForDate(source, date)) { + cc.flushData(); + } } } - private void ParsingForDate(String input, String date) { + private int ParsingForDate(String input, String date) { int totalcount = 0; int countNoData = 0; int countUnclearFASTAid = 0; @@ -53,6 +55,7 @@ public class JpredParserHTTP implements JpredParser { int countinserted = 0; int counAlignments = 0; int countStrange = 0; + int njobs = 0; System.out.println("Inserting jobs for " + date); try { @@ -100,9 +103,11 @@ public class JpredParserHTTP implements JpredParser { } cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs); ++countinsertions; - // flush every 100 insertions - if (0 == countinsertions % 100) { + ++njobs; + // flush every 50 insertions + if (0 == countinsertions % 50) { cc.flushData(); + njobs -= 50; } } } catch (IOException e) { @@ -135,5 +140,6 @@ public class JpredParserHTTP implements JpredParser { } catch (IOException e) { e.printStackTrace(); } + return njobs; } } diff --git a/datadb/compbio/cassandra/JpredParserLocalFile.java b/datadb/compbio/cassandra/JpredParserLocalFile.java index c37ec7a..a3e1520 100644 --- a/datadb/compbio/cassandra/JpredParserLocalFile.java +++ b/datadb/compbio/cassandra/JpredParserLocalFile.java @@ -14,10 +14,10 @@ import java.util.Calendar; import java.util.Date; import java.util.List; -public class JpredParserLocalFile { +public class JpredParserLocalFile implements JpredParser { private CassandraCreate cc = new CassandraCreate(); private String dirprefix; - + public void setSource (String newsourceprefix) { this.dirprefix = newsourceprefix; } @@ -39,11 +39,13 @@ public class JpredParserLocalFile { int year = cal.get(Calendar.YEAR); int day = cal.get(Calendar.DATE); String date = year + "/" + month + "/" + day; - ParsingForDate(source, date); + if (0 < ParsingForDate(source, date)) { + cc.flushData(); + } } } - private void ParsingForDate(String input, String date) { + private int ParsingForDate(String input, String date) { int totalcount = 0; int countNoData = 0; int countUnclearFASTAid = 0; @@ -51,6 +53,7 @@ public class JpredParserLocalFile { int countinserted = 0; int counAlignments = 0; int countStrange = 0; + int njobs = 0; System.out.println("Inserting jobs for " + date); try { @@ -92,11 +95,14 @@ public class JpredParserLocalFile { } cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs); ++countinsertions; - // flush every 100 insertions - if (0 == countinsertions % 100) { + ++njobs; + // flush every 50 insertions + if (0 == countinsertions % 50) { cc.flushData(); + njobs -= 50; } } + fr.close(); } catch (IOException e) { e.printStackTrace(); } @@ -127,5 +133,6 @@ public class JpredParserLocalFile { } catch (IOException e) { e.printStackTrace(); } + return njobs; } } -- 1.7.10.2