X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=datadb%2Fcompbio%2Fcassandra%2FJpredParserLocalFile.java;h=f48178c9f862bd961270425e3c248258335098ed;hb=0e1f92db0e4043bfa93417481274b6461cdb09cd;hp=27d4252c19d5e9594de6742f1dda011d7901e7d7;hpb=3887e427255a9ed5e9e43f4a52a377935331e5d3;p=proteocache.git diff --git a/datadb/compbio/cassandra/JpredParserLocalFile.java b/datadb/compbio/cassandra/JpredParserLocalFile.java index 27d4252..f48178c 100644 --- a/datadb/compbio/cassandra/JpredParserLocalFile.java +++ b/datadb/compbio/cassandra/JpredParserLocalFile.java @@ -15,19 +15,22 @@ import java.util.Calendar; import java.util.Date; import java.util.List; +import compbio.data.sequence.FastaReader; +import compbio.data.sequence.FastaSequence; + public class JpredParserLocalFile implements JpredParser { - private CassandraNativeConnector cc = new CassandraNativeConnector(); + private CassandraWriter cw = new CassandraWriter(); private String dirprefix; public void setSource(String newsourceprefix) { this.dirprefix = newsourceprefix; } - JpredParserLocalFile() { + public JpredParserLocalFile() { this.dirprefix = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/data.dat"; } - JpredParserLocalFile(String sourceurl) { + public JpredParserLocalFile(String sourceurl) { this.dirprefix = sourceurl; } @@ -58,7 +61,7 @@ public class JpredParserLocalFile implements JpredParser { System.out.println("Execution Time = " + execTime + " ms"); } - private int ParsingForDate(List input, String date) { + private void ParsingForDate(List input, String date) { int totalcount = 0; int countNoData = 0; int countUnclearFASTAid = 0; @@ -66,64 +69,51 @@ public class JpredParserLocalFile implements JpredParser { int countinserted = 0; int counAlignments = 0; int countStrange = 0; - int njobs = 0; System.out.println("Inserting jobs for " + date); for (String in : input) { - if (in.matches(date + "(.*)jp_[^\\s]+")) { + if (in.matches(date + ":(.*)jp_[^\\s]+")) { String[] table = in.split("\\s+"); String starttime = table[0]; String finishtime = table[1]; String ip = table[2]; String id = table[table.length - 1]; totalcount++; - //if (!cc.CheckID(id)) { - if (true) { - String confilename = dirprefix + "/" + id + "/" + id + ".concise"; - File confile = new File(confilename); - if (confile.exists()) { - try { - final FastaReader fr = new FastaReader(confilename); - final List seqs = new ArrayList(); - String newprotein = ""; - while (fr.hasNext()) { - final FastaSequence fs = fr.next(); - if (fs.getId().equals("QUERY") || fs.getId().equals(id)) - newprotein = fs.getSequence().replaceAll("\n", ""); - else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) { - seqs.add(fs); - } + String confilename = dirprefix + "/" + id + "/" + id + ".concise"; + File confile = new File(confilename); + if (confile.exists()) { + try { + final FastaReader fr = new FastaReader(confilename); + final List seqs = new ArrayList(); + String newprotein = ""; + while (fr.hasNext()) { + final FastaSequence fs = fr.next(); + if (fs.getId().equals("QUERY") || fs.getId().equals(id)) + newprotein = fs.getSequence().replaceAll("\n", ""); + else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) { + seqs.add(fs); } - if (newprotein.equals("")) { - countUnclearFASTAid++; - } else { - SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); - String dateInString1 = starttime.substring(0, starttime.indexOf(":")); - long dateWork1 = 0; - try { - Date dat = formatter.parse(dateInString1); - dateWork1 = dat.getTime(); - } catch (ParseException e) { - e.printStackTrace(); - } - cc.InsertData(dateWork1, starttime, finishtime, ip, id, "OK", "OK", newprotein, seqs); - ++countinsertions; - ++njobs; - // flush every 50 insertions - //if (0 == countinsertions % 50) { - // cc.flushData(); - // njobs -= 50; - //} + } + if (newprotein.equals("")) { + countUnclearFASTAid++; + } else { + SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); + String dateInString1 = starttime.substring(0, starttime.indexOf(":")); + long insertdate = 0; + try { + Date dat = formatter.parse(dateInString1); + insertdate = dat.getTime(); + } catch (ParseException e) { + e.printStackTrace(); } - fr.close(); - } catch (IOException e) { - e.printStackTrace(); + //countinsertions += cw.FormQueryTables(insertdate, starttime, finishtime, ip, id, "OK", "OK", newprotein, seqs); } - } else { - countNoData++; + fr.close(); + } catch (IOException e) { + e.printStackTrace(); } } else { - ++countinserted; + countNoData++; } } else { if (in.matches(date + "(.*)Sequence0/(.*)")) { @@ -142,7 +132,6 @@ public class JpredParserLocalFile implements JpredParser { System.out.println(" " + countUnclearFASTAid + " jobs with unclear FASTA protein id in *.concise.fasta"); System.out.println(" " + countinsertions + " new job insertions\n"); } - return njobs; } }