X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=server%2Fcompbio%2Fstatistic%2FCassandraRequester.java;fp=server%2Fcompbio%2Fstatistic%2FCassandraRequester.java;h=2da38e76f54cccc193061a232e8801c4c959b3b8;hb=2983c86c8b92d323768ea4af98a50c5bf4b4d3ab;hp=0000000000000000000000000000000000000000;hpb=b054de5bfb1b14bbb3fa43b232d4e28f468f9bc4;p=proteocache.git diff --git a/server/compbio/statistic/CassandraRequester.java b/server/compbio/statistic/CassandraRequester.java new file mode 100755 index 0000000..2da38e7 --- /dev/null +++ b/server/compbio/statistic/CassandraRequester.java @@ -0,0 +1,287 @@ +package compbio.statistic; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Date; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import compbio.cassandra.CassandraNativeConnector; +import compbio.cassandra.CassandraReader; +import compbio.cassandra.DataBase; +import compbio.cassandra.Pair; +import compbio.cassandra.StructureJobLog; +import compbio.cassandra.StructureProteinPrediction; + +public class CassandraRequester { + private CassandraReader db = new CassandraReader(); + private ArrayList query; + private static long currentDate = 0; + private static long earlestDate = 0; + + /* + * query: execution time for the period from date1 till date2 + */ + public List extractExecutionTime(String date1, String date2) { + if (null == date1) { + date1 = "1970/1/1"; + } + if (null == date2) { + date1 = "2100/1/1"; + } + if (!isThisDateValid(date1) || !isThisDateValid(date2)) { + System.out.println("Wrong date: point 3"); + return null; + } + SetDateRange(); + int nbins = 5; + long dateStart = DateParsing(date1); + long dateEnd = DateParsing(date2); + if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd) + return null; + if (dateStart < earlestDate) + dateStart = earlestDate; + if (dateEnd > currentDate) + dateStart = currentDate; + + Calendar start = Calendar.getInstance(); + start.setTime(new Date(dateStart)); + Calendar end = Calendar.getInstance(); + end.setTime(new Date(dateEnd)); + query = new ArrayList(); + List totalTime = new ArrayList(); + for (int i = 0; i < nbins; i++) + totalTime.add(i, 0); + List> res = db.ReadProteinDataTable(); + List> numres = new ArrayList>(); + + for (Pair entry : res) { + SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd"); + try { + Date jobstartdate = dateformatter.parse(entry.getElement0()); + long date = jobstartdate.getTime(); + if (dateStart <= date && date <= dateEnd) { + SimpleDateFormat datetimeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s"); + Date jobstarttime = datetimeformatter.parse(entry.getElement0()); + Date jobendtime = datetimeformatter.parse(entry.getElement1()); + long diff = (jobendtime.getTime() - jobstarttime.getTime()) / 1000; + Pair pair = new Pair(jobstartdate, Long.valueOf(diff)); + numres.add(pair); + } + } catch (ParseException e) { + e.printStackTrace(); + } + } + + for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) { + List timeResult = new ArrayList(); + for (int i = 0; i < nbins; i++) + timeResult.add(i, 0); + for (Pair p : numres) { + if (date.equals(p.getElement0())) { + long lenResult = p.getElement1().longValue(); + if (lenResult <= 30) + timeResult.set(0, timeResult.get(0) + 1); + else if (lenResult > 30 && lenResult <= 60) + timeResult.set(1, timeResult.get(1) + 1); + else if (lenResult > 60 && lenResult <= 120) + timeResult.set(2, timeResult.get(2) + 1); + else if (lenResult > 120 && lenResult <= 600) + timeResult.set(3, timeResult.get(3) + 1); + else { + timeResult.set(4, timeResult.get(4) + 1); + } + } + } + for (int i = 0; i < nbins; i++) + totalTime.set(i, totalTime.get(i) + timeResult.get(i)); + DataBase db = new DataBase(); + db.setTimeRez(timeResult); + db.setDate(DateFormat(date.getTime())); + query.add(db); + } + + DataBase db = new DataBase(); + db.setTimeTotalExec(totalTime); + query.add(db); + System.out.println("StatisticsProt.readLength: total number of dates = " + query.size()); + return query; + } + + /* + * query: total number of jobs for the period from date1 till date2 + */ + public List countJobs(String date1, String date2) { + if (null == date1) { + date1 = "1970/1/1"; + } + if (null == date2) { + date1 = "2100/1/1"; + } + if (!isThisDateValid(date1) || !isThisDateValid(date2)) { + System.out.println("Wrong date: point 3"); + return null; + } + SetDateRange(); + long dateStart = DateParsing(date1); + long dateEnd = DateParsing(date2); + if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd) + return null; + if (dateStart < earlestDate) + dateStart = earlestDate; + if (dateEnd > currentDate) + dateStart = currentDate; + + Calendar start = Calendar.getInstance(); + start.setTime(new Date(dateStart)); + Calendar end = Calendar.getInstance(); + end.setTime(new Date(dateEnd)); + query = new ArrayList(); + for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) { + long res = db.ReadDateTable(date.getTime()); + DataBase db = new DataBase(); + db.setTotal((int)res); + db.setDate(DateFormat(date.getTime())); + query.add(db); + } + System.out.println("StatisticsProt.readLength: total number of dates = " + query.size()); + return query; + } + /* + * query: protein sequence + * */ + public List readProteins(String protIn, String flag) { + query = new ArrayList(); + List res; + if (flag.equals("whole")) + res = db.ReadWholeSequence(protIn); + else + res = db.ReadPartOfSequence(protIn); + for (StructureProteinPrediction entry : res) { + Map pred = entry.getPrediction(); + Iterator it = pred.entrySet().iterator(); + while (it.hasNext()) { + DataBase db = new DataBase(); + db.setProt(entry.getSequence()); + Map.Entry pairs = (Map.Entry)it.next(); + db.setId(entry.getJobid()); + db.setJpred(pairs.getValue().toString()); + if (flag.equals("part")) + db.setSubProt(CreateSubprot (entry.getSequence(), protIn)); + query.add(db); + } + } + return query; + } + + /* + * query protein sequences with number of jobs + */ + public List readProteinByCounter(int minimalcounter) { + query = new ArrayList(); + Map map = db.ReadProteinSequenceByCounter(); + for (Map.Entry entry : map.entrySet()) { + if (entry.getValue() > minimalcounter) { + DataBase db = new DataBase(); + db.setTotalId(entry.getValue()); + db.setProt(entry.getKey()); + query.add(db); + } + } + return query; + } + + /* + * query jobs log info + */ + public DataBase readJobLog(String jobid) { + // query = new ArrayList(); + StructureJobLog res = db.ReadJobLog(jobid); + DataBase query = new DataBase(); + query.setLogInfo(res); + // query.setres); + return query; + } + /* + * create list of parts of protein sequence; + */ + private static List CreateSubprot (String protein, String subprot) { + List sub = new ArrayList(); + String subStr = protein; + while (subStr.length() > 0 && subStr.contains(subprot)) { + String first = subStr.substring(0, subStr.indexOf(subprot)); + if (first.length() > 0) + sub.add(first); + sub.add(subprot); + subStr = subStr.substring(subStr.indexOf(subprot) + subprot.length(), subStr.length()); + } + if (subStr.length() > 0) + sub.add(subStr); + return sub; + } + /* + * convert String date into long date (miliseconds since the epoch start) + */ + private static long DateParsing(String datInput) { + if (datInput == null) { + return 0; + } + long dateWorkSt = 0; + SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); + try { + dateWorkSt = formatter.parse(datInput).getTime(); + } catch (ParseException e) { + e.printStackTrace(); + } + return dateWorkSt; + } + + // convert long to date in string format + private static String DateFormat(long inDate) { + SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy"); + return datformat.format(new Date(inDate)); + } + + /* + * set earlest date and current dates. earlestDate is static and should be + * set at the 1st call currentDate should be re-calculated every time + */ + private static void SetDateRange() { + Calendar cal = Calendar.getInstance(); + currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH)); + if (0 == earlestDate) { + CassandraRequester cr = new CassandraRequester(); + earlestDate = cr.earliestDate(); + System.out.println("Set earlest Date = " + earlestDate); + } + } + + public boolean isThisDateValid(String dateToValidate) { + if (dateToValidate == null || dateToValidate.equals("")) { + System.out.println("Undefined date"); + return false; + } + SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd"); + try { + // if not valid, this will throw ParseException + sdf.setLenient(false); + Date date = sdf.parse(dateToValidate); + } catch (ParseException e) { + e.printStackTrace(); + return false; + } + return true; + } + + /* + * find the earliest date in the database + */ + public long earliestDate() { + earlestDate = CassandraNativeConnector.getEarliestDateInDB(); + return earlestDate; + } + +}