package compbio.statistic; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.Iterator; import java.util.List; import java.util.Map; import compbio.cassandra.CassandraNativeConnector; import compbio.cassandra.CassandraReader; import compbio.cassandra.DataBase; import compbio.cassandra.Pair; import compbio.cassandra.StructureJobLog; import compbio.cassandra.StructureProteinPrediction; public class CassandraRequester { private CassandraReader db = new CassandraReader(); private ArrayList query; private static long currentDate = 0; private static long earlestDate = 0; private final static SimpleDateFormat formatYYMMDD = new SimpleDateFormat("yyyy/MM/dd"); private final static SimpleDateFormat formatDDMMYY = new SimpleDateFormat("dd/MM/yyyy"); /* * query: execution time for the period from date1 till date2 */ public List extractExecutionTime(String date1, String date2) { if (null == date1) { date1 = "1970/1/1"; } if (null == date2) { date1 = "2100/1/1"; } if (!isThisDateValid(date1,formatYYMMDD) || !isThisDateValid(date2,formatYYMMDD)) { System.out.println("Wrong date: point 3"); return null; } SetDateRange(); int nbins = 5; long dateStart = DateParsing(date1, formatYYMMDD); long dateEnd = DateParsing(date2, formatYYMMDD); if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd) return null; if (dateStart < earlestDate) dateStart = earlestDate; if (dateEnd > currentDate) dateStart = currentDate; Calendar start = Calendar.getInstance(); start.setTime(new Date(dateStart)); Calendar end = Calendar.getInstance(); end.setTime(new Date(dateEnd)); query = new ArrayList(); List totalTime = new ArrayList(); for (int i = 0; i < nbins; i++) totalTime.add(i, 0); List> res = db.ReadProteinDataTable(); List> numres = new ArrayList>(); for (Pair entry : res) { SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd"); try { Date jobstartdate = dateformatter.parse(entry.getElement0()); long date = jobstartdate.getTime(); if (dateStart <= date && date <= dateEnd) { SimpleDateFormat datetimeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s"); Date jobstarttime = datetimeformatter.parse(entry.getElement0()); Date jobendtime = datetimeformatter.parse(entry.getElement1()); long diff = (jobendtime.getTime() - jobstarttime.getTime()) / 1000; Pair pair = new Pair(jobstartdate, Long.valueOf(diff)); numres.add(pair); } } catch (ParseException e) { e.printStackTrace(); } } for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) { List timeResult = new ArrayList(); for (int i = 0; i < nbins; i++) timeResult.add(i, 0); for (Pair p : numres) { if (date.equals(p.getElement0())) { long lenResult = p.getElement1().longValue(); if (lenResult <= 30) timeResult.set(0, timeResult.get(0) + 1); else if (lenResult > 30 && lenResult <= 60) timeResult.set(1, timeResult.get(1) + 1); else if (lenResult > 60 && lenResult <= 120) timeResult.set(2, timeResult.get(2) + 1); else if (lenResult > 120 && lenResult <= 600) timeResult.set(3, timeResult.get(3) + 1); else { timeResult.set(4, timeResult.get(4) + 1); } } } for (int i = 0; i < nbins; i++) totalTime.set(i, totalTime.get(i) + timeResult.get(i)); DataBase db = new DataBase(); db.setTimeRez(timeResult); db.setDate(DateFormat(date.getTime())); query.add(db); } DataBase db = new DataBase(); db.setTimeTotalExec(totalTime); query.add(db); System.out.println("StatisticsProt.readLength: total number of dates = " + query.size()); return query; } /* * query: total number of jobs for the period from date1 till date2 */ public List countJobs(String date1, String date2) { if (null == date1) { date1 = "1970/1/1"; } if (null == date2) { date1 = "2100/1/1"; } if (!isThisDateValid(date1, formatYYMMDD) || !isThisDateValid(date2, formatYYMMDD)) { System.out.println("Wrong date: point 3"); return null; } SetDateRange(); long dateStart = DateParsing(date1, formatYYMMDD); long dateEnd = DateParsing(date2, formatYYMMDD); if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd) return null; if (dateStart < earlestDate) dateStart = earlestDate; if (dateEnd > currentDate) dateStart = currentDate; Calendar start = Calendar.getInstance(); start.setTime(new Date(dateStart)); Calendar end = Calendar.getInstance(); end.setTime(new Date(dateEnd)); query = new ArrayList(); for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) { List res = db.ReadDateTable(date.getTime()); if (res == null) continue; DataBase bean = new DataBase(); bean.setTotal((int)(long)res.get(0)); bean.setTotalOK((int)(long)res.get(1)); bean.setTotalStopped((int)(long)res.get(2)); bean.setTotalError((int)(long)res.get(3)); bean.setTotalTimeOut((int)(long)res.get(4)); bean.setDate(DateFormat(date.getTime())); query.add(bean); } System.out.println("StatisticsProt.readLength: total number of dates = " + query.size()); return query; } /* * query: jobs and sequence at date */ public List readJobByDay (String date) { System.out.println(date); if (null == date) { return null; } if (!isThisDateValid(date, formatDDMMYY)) { System.out.println("Wrong date: point 3"); return null; } SetDateRange(); long day = DateParsing(date, formatDDMMYY); System.out.println(day); if (day < earlestDate || day > currentDate) return null; List> res = db.ReadProteinData(day); if (res == null) return null; query = new ArrayList(); for (Pair entry : res) { DataBase bean = new DataBase(); bean.setDate(date); bean.setId(entry.getElement0()); bean.setProt(entry.getElement1()); query.add(bean); } System.out.println("StatisticsProt.readLength: total number of dates = " + query.size()); return query; } /* * query: protein sequence * */ public List readProteins(String protIn, String flag) { if (protIn == null) return null; System.out.println(protIn.length()); query = new ArrayList(); List res; if (flag.equals("whole")) res = db.ReadWholeSequence(protIn); else res = (protIn.length() > 0) ? db.ReadPartOfSequence(protIn) : null; if (res == null) return null; for (StructureProteinPrediction entry : res) { DataBase bean = new DataBase(); bean.setProt(entry.getSequence()); bean.setId(entry.getJobid()); bean.setJpred(entry.getJnetpred()); if (flag.equals("part")) bean.setSubProt(CreateSubprot (entry.getSequence(), protIn)); query.add(bean); } return query; } /* * query protein sequences with number of jobs */ public List readProteinByCounter(int minimalcounter) { query = new ArrayList(); Map map = db.ReadProteinSequenceByCounter(); for (Map.Entry entry : map.entrySet()) { if (entry.getValue() > minimalcounter && entry.getKey().length() > 0) { DataBase bean = new DataBase(); bean.setTotalId(entry.getValue()); bean.setProt(entry.getKey()); query.add(bean); } } return query; } /* * query jobs log info */ public DataBase readJobLog(String jobid) { if (jobid == null) return null; StructureJobLog res = db.ReadJobLog(jobid); if (res == null) return null; DataBase query = new DataBase(); query.setLogInfo(res); return query; } /* * create list of parts of protein sequence; */ private static List CreateSubprot (String protein, String subprot) { List sub = new ArrayList(); String subStr = protein; while (subStr.length() > 0 && subStr.contains(subprot)) { String first = subStr.substring(0, subStr.indexOf(subprot)); if (first.length() > 0) sub.add(first); sub.add(subprot); subStr = subStr.substring(subStr.indexOf(subprot) + subprot.length(), subStr.length()); } if (subStr.length() > 0) sub.add(subStr); return sub; } /* * convert String date into long date (miliseconds since the epoch start) */ private static long DateParsing(String datInput, SimpleDateFormat formatter) { if (datInput == null) { return 0; } long dateWorkSt = 0; try { dateWorkSt = formatter.parse(datInput).getTime(); } catch (ParseException e) { e.printStackTrace(); } return dateWorkSt; } // convert long to date in string format private static String DateFormat(long inDate) { SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy"); return datformat.format(new Date(inDate)); } /* * set earlest date and current dates. earlestDate is static and should be * set at the 1st call currentDate should be re-calculated every time */ private static void SetDateRange() { Calendar cal = Calendar.getInstance(); currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH), formatYYMMDD); if (0 == earlestDate) { CassandraRequester cr = new CassandraRequester(); earlestDate = cr.earliestDate(); System.out.println("Set earlest Date = " + earlestDate); } } public boolean isThisDateValid(String dateToValidate, SimpleDateFormat sdf) { if (dateToValidate == null || dateToValidate.equals("")) { System.out.println("Undefined date"); return false; } try { // if not valid, this will throw ParseException sdf.setLenient(false); Date date = sdf.parse(dateToValidate); } catch (ParseException e) { e.printStackTrace(); return false; } return true; } /* * find the earliest date in the database */ public long earliestDate() { earlestDate = CassandraNativeConnector.getEarliestDateInDB(); return earlestDate; } }