import java.util.Date;
import java.util.List;
+import compbio.data.sequence.FastaReader;
+import compbio.data.sequence.FastaSequence;
+
public class JpredParserLocalFile implements JpredParser {
- private CassandraNativeConnector cc = new CassandraNativeConnector();
+ private CassandraWriter cw = new CassandraWriter();
private String dirprefix;
public void setSource(String newsourceprefix) {
this.dirprefix = newsourceprefix;
}
- JpredParserLocalFile() {
+ public JpredParserLocalFile() {
this.dirprefix = "/home/asherstnev/Projects/Java.projects/proteocache/data_stress_test/data.dat";
}
- JpredParserLocalFile(String sourceurl) {
+ public JpredParserLocalFile(String sourceurl) {
this.dirprefix = sourceurl;
}
System.out.println("Execution Time = " + execTime + " ms");
}
- private int ParsingForDate(List<String> input, String date) {
+ private void ParsingForDate(List<String> input, String date) {
int totalcount = 0;
int countNoData = 0;
int countUnclearFASTAid = 0;
int countinserted = 0;
int counAlignments = 0;
int countStrange = 0;
- int njobs = 0;
System.out.println("Inserting jobs for " + date);
for (String in : input) {
- if (in.matches(date + "(.*)jp_[^\\s]+")) {
+ if (in.matches(date + ":(.*)jp_[^\\s]+")) {
String[] table = in.split("\\s+");
String starttime = table[0];
String finishtime = table[1];
String ip = table[2];
String id = table[table.length - 1];
totalcount++;
- //if (!cc.CheckID(id)) {
- if (true) {
- String confilename = dirprefix + "/" + id + "/" + id + ".concise";
- File confile = new File(confilename);
- if (confile.exists()) {
- try {
- final FastaReader fr = new FastaReader(confilename);
- final List<FastaSequence> seqs = new ArrayList<FastaSequence>();
- String newprotein = "";
- while (fr.hasNext()) {
- final FastaSequence fs = fr.next();
- if (fs.getId().equals("QUERY") || fs.getId().equals(id))
- newprotein = fs.getSequence().replaceAll("\n", "");
- else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) {
- seqs.add(fs);
- }
+ String confilename = dirprefix + "/" + id + "/" + id + ".concise";
+ File confile = new File(confilename);
+ if (confile.exists()) {
+ try {
+ final FastaReader fr = new FastaReader(confilename);
+ final List<FastaSequence> seqs = new ArrayList<FastaSequence>();
+ String newprotein = "";
+ while (fr.hasNext()) {
+ final FastaSequence fs = fr.next();
+ if (fs.getId().equals("QUERY") || fs.getId().equals(id))
+ newprotein = fs.getSequence().replaceAll("\n", "");
+ else if (fs.getId().equals("jnetpred") || fs.getId().equals("JNETPRED")) {
+ seqs.add(fs);
}
- if (newprotein.equals("")) {
- countUnclearFASTAid++;
- } else {
- SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
- String dateInString1 = starttime.substring(0, starttime.indexOf(":"));
- long dateWork1 = 0;
- try {
- Date dat = formatter.parse(dateInString1);
- dateWork1 = dat.getTime();
- } catch (ParseException e) {
- e.printStackTrace();
- }
- cc.InsertData(dateWork1, starttime, finishtime, ip, id, "OK", "OK", newprotein, seqs);
- ++countinsertions;
- ++njobs;
- // flush every 50 insertions
- //if (0 == countinsertions % 50) {
- // cc.flushData();
- // njobs -= 50;
- //}
+ }
+ if (newprotein.equals("")) {
+ countUnclearFASTAid++;
+ } else {
+ SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
+ String dateInString1 = starttime.substring(0, starttime.indexOf(":"));
+ long insertdate = 0;
+ try {
+ Date dat = formatter.parse(dateInString1);
+ insertdate = dat.getTime();
+ } catch (ParseException e) {
+ e.printStackTrace();
}
- fr.close();
- } catch (IOException e) {
- e.printStackTrace();
+ //countinsertions += cw.FormQueryTables(insertdate, starttime, finishtime, ip, id, "OK", "OK", newprotein, seqs);
}
- } else {
- countNoData++;
+ fr.close();
+ } catch (IOException e) {
+ e.printStackTrace();
}
} else {
- ++countinserted;
+ countNoData++;
}
} else {
if (in.matches(date + "(.*)Sequence0/(.*)")) {
System.out.println(" " + countUnclearFASTAid + " jobs with unclear FASTA protein id in *.concise.fasta");
System.out.println(" " + countinsertions + " new job insertions\n");
}
- return njobs;
}
}