X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=datadb%2Fcompbio%2Fcassandra%2FJpredParserLocalFile.java;h=c89fe5999c6e1e8869d4e465fea0e7837ca4549e;hb=75431c815ea42cf3ffa18f485721146636575265;hp=c37ec7aff75f5aecf6b6149c77b8004b8f29db14;hpb=6a81b75c020845f9bb94c307a66347e4362da85f;p=proteocache.git diff --git a/datadb/compbio/cassandra/JpredParserLocalFile.java b/datadb/compbio/cassandra/JpredParserLocalFile.java index c37ec7a..c89fe59 100644 --- a/datadb/compbio/cassandra/JpredParserLocalFile.java +++ b/datadb/compbio/cassandra/JpredParserLocalFile.java @@ -4,8 +4,6 @@ import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.InputStreamReader; -import java.net.HttpURLConnection; -import java.net.MalformedURLException; import java.io.FileInputStream; import java.text.ParseException; import java.text.SimpleDateFormat; @@ -14,36 +12,52 @@ import java.util.Calendar; import java.util.Date; import java.util.List; -public class JpredParserLocalFile { - private CassandraCreate cc = new CassandraCreate(); +import compbio.data.sequence.FastaReader; +import compbio.data.sequence.FastaSequence; + +public class JpredParserLocalFile implements JpredParser { private String dirprefix; - - public void setSource (String newsourceprefix) { + + public void setSource(String newsourceprefix) { this.dirprefix = newsourceprefix; } - JpredParserLocalFile() { + public JpredParserLocalFile() { this.dirprefix = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/data.dat"; } - - JpredParserLocalFile(String sourceurl) { + + public JpredParserLocalFile(String sourceurl) { this.dirprefix = sourceurl; } - public void Parsing(String source, int nDays) { + public void Parsing(String source, int nDays) throws IOException { Calendar cal = Calendar.getInstance(); cal.add(Calendar.DATE, -nDays); + List alljobs = new ArrayList(); + File file = new File(source); + BufferedReader alljobsfile = new BufferedReader(new InputStreamReader(new FileInputStream(file))); + String line; + + while ((line = alljobsfile.readLine()) != null) { + alljobs.add(line); + } + alljobsfile.close(); + + System.out.println("Inserting jobs for " + nDays + " days, " + alljobs.size() + " jobs in total"); + final long startTime = System.currentTimeMillis(); for (int i = 0; i < nDays; ++i) { cal.add(Calendar.DATE, 1); int month = cal.get(Calendar.MONTH) + 1; int year = cal.get(Calendar.YEAR); int day = cal.get(Calendar.DATE); String date = year + "/" + month + "/" + day; - ParsingForDate(source, date); + ParsingForDate(alljobs, date); } + final long execTime = System.currentTimeMillis() - startTime; + System.out.println("Execution Time = " + execTime + " ms"); } - private void ParsingForDate(String input, String date) { + private void ParsingForDate(List input, String date) { int totalcount = 0; int countNoData = 0; int countUnclearFASTAid = 0; @@ -53,68 +67,57 @@ public class JpredParserLocalFile { int countStrange = 0; System.out.println("Inserting jobs for " + date); - try { - File file = new File(input); - BufferedReader alljobs = new BufferedReader(new InputStreamReader(new FileInputStream(file))); - String line; - - while ((line = alljobs.readLine()) != null) { - if (line.matches(date + "(.*)jp_[^\\s]+")) { - String[] table = line.split("\\s+"); - String id = table[table.length - 1]; - totalcount++; - if (!cc.CheckID(id)) { - String confilename = dirprefix + "/" + id + "/" + id + ".concise"; - File confile = new File(confilename); - if (confile.exists()) { + for (String in : input) { + if (in.matches(date + ":(.*)jp_[^\\s]+")) { + String[] table = in.split("\\s+"); + String starttime = table[0]; + String id = table[table.length - 1]; + totalcount++; + String confilename = dirprefix + "/" + id + "/" + id + ".concise"; + File confile = new File(confilename); + if (confile.exists()) { + try { + final FastaReader fr = new FastaReader(confilename); + final List seqs = new ArrayList(); + String newprotein = ""; + while (fr.hasNext()) { + final FastaSequence fs = fr.next(); + if (fs.getId().equals("QUERY") || fs.getId().equals(id)) + newprotein = fs.getSequence().replaceAll("\n", ""); + else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) { + seqs.add(fs); + } + } + if (newprotein.equals("")) { + countUnclearFASTAid++; + } else { + SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); + String dateInString1 = starttime.substring(0, starttime.indexOf(":")); try { - final FastaReader fr = new FastaReader(confilename); - final List seqs = new ArrayList(); - String newprotein = ""; - while (fr.hasNext()) { - final FastaSequence fs = fr.next(); - if (fs.getId().equals("QUERY") || fs.getId().equals(id)) - newprotein = fs.getSequence().replaceAll("\n", ""); - else - seqs.add(fs); - } - if (newprotein.equals("")) { - countUnclearFASTAid++; - } else { - SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); - String dateInString1 = table[0].substring(0, table[0].indexOf(":")); - long dateWork1 = 0; - try { - Date dat1 = formatter.parse(dateInString1); - dateWork1 = dat1.getTime(); - } catch (ParseException e) { - e.printStackTrace(); - } - cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs); - ++countinsertions; - // flush every 100 insertions - if (0 == countinsertions % 100) { - cc.flushData(); - } - } - } catch (IOException e) { + Date dat = formatter.parse(dateInString1); + } catch (ParseException e) { e.printStackTrace(); } - } else { - countNoData++; + // countinsertions += cw.FormQueryTables(insertdate, + // starttime, finishtime, ip, id, "OK", "OK", + // newprotein, seqs); } - } else { - ++countinserted; + fr.close(); + } catch (IOException e) { + e.printStackTrace(); } } else { - if (line.matches(date + "(.*)Sequence0/(.*)")) { - ++counAlignments; - } else { - ++countStrange; - } + countNoData++; + } + } else { + if (in.matches(date + "(.*)Sequence0/(.*)")) { + ++counAlignments; + } else { + ++countStrange; } } - alljobs.close(); + } + if (true) { System.out.println("Total number of jobs = " + totalcount); System.out.println(" " + countinserted + " jobs inserted already"); System.out.println(" " + counAlignments + " jalview jobs"); @@ -122,10 +125,7 @@ public class JpredParserLocalFile { System.out.println(" " + countNoData + " jobs without *.concise.fasta file"); System.out.println(" " + countUnclearFASTAid + " jobs with unclear FASTA protein id in *.concise.fasta"); System.out.println(" " + countinsertions + " new job insertions\n"); - } catch (MalformedURLException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); } } + }