X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=server%2Fcompbio%2Fstatistic%2FCassandraRequester.java;h=0ce458baebe317b018a5ff65fcbc86716129bf7b;hb=c441a23b5f24da06199b9b5b830010e5a5833ab8;hp=1906c97e081ad88adf3f30d9792ee330d0117da2;hpb=87b9ef699282c308e5f259fb576b08eb25a1a25f;p=proteocache.git diff --git a/server/compbio/statistic/CassandraRequester.java b/server/compbio/statistic/CassandraRequester.java index 1906c97..0ce458b 100755 --- a/server/compbio/statistic/CassandraRequester.java +++ b/server/compbio/statistic/CassandraRequester.java @@ -4,42 +4,57 @@ import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; -import java.util.Collections; import java.util.Date; -import java.util.Iterator; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import compbio.cassandra.DateBean; +import compbio.cassandra.ProteinBean; import compbio.cassandra.CassandraNativeConnector; +import compbio.cassandra.CassandraReaderOld; import compbio.cassandra.DataBase; +import compbio.cassandra.Pair; +import compbio.cassandra.JobBean; +import compbio.cassandra.Total; +import compbio.cassandra.TotalByCounterBean; +import compbio.cassandra.TotalJobsStatisticBean; +import compbio.cassandra.UserBean; +import compbio.engine.JobStatus; public class CassandraRequester { - private CassandraNativeConnector DBInstance = new CassandraNativeConnector(); + private CassandraReaderOld db = new CassandraReaderOld(); private ArrayList query; private static long currentDate = 0; private static long earlestDate = 0; - + private final static SimpleDateFormat formatYYMMDD = new SimpleDateFormat("yyyy/MM/dd"); + private final static SimpleDateFormat formatDDMMYY = new SimpleDateFormat("dd/MM/yyyy"); /* * query: execution time for the period from date1 till date2 - * */ + */ public List extractExecutionTime(String date1, String date2) { - if (!isThisDateValid(date1) || !isThisDateValid(date2)) { - System.out.println("Wrong date: point 3"); + if (null == date1) { + date1 = "1970/1/1"; + } + if (null == date2) { + date1 = "2100/1/1"; + } + if (!isThisDateValid(date1, formatYYMMDD) || !isThisDateValid(date2, formatYYMMDD)) { + System.out.println("CassandraRequester.extractExecutionTime: wrong format for date1 " + date1 + "or date2 " + date2); return null; } SetDateRange(); int nbins = 5; - long dateStart = DateParsing(date1); - long dateEnd = DateParsing(date2); - if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate) || dateStart > dateEnd) + long dateStart = DateParsing(date1, formatYYMMDD); + long dateEnd = DateParsing(date2, formatYYMMDD); + if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd) return null; if (dateStart < earlestDate) dateStart = earlestDate; if (dateEnd > currentDate) dateStart = currentDate; - System.out.println("CassandraRequester.extractExecutionTime: earlestDate = " + earlestDate + ", currentDate = " + currentDate); - Calendar start = Calendar.getInstance(); start.setTime(new Date(dateStart)); Calendar end = Calendar.getInstance(); @@ -48,24 +63,34 @@ public class CassandraRequester { List totalTime = new ArrayList(); for (int i = 0; i < nbins; i++) totalTime.add(i, 0); - /* + List> res = db.ReadProteinDataTable(); + List> numres = new ArrayList>(); + + for (Pair entry : res) { + SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd"); + try { + Date jobstartdate = dateformatter.parse(entry.getElement0()); + long date = jobstartdate.getTime(); + if (dateStart <= date && date <= dateEnd) { + SimpleDateFormat datetimeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s"); + Date jobstarttime = datetimeformatter.parse(entry.getElement0()); + Date jobendtime = datetimeformatter.parse(entry.getElement1()); + long diff = (jobendtime.getTime() - jobstarttime.getTime()) / 1000; + Pair pair = new Pair(jobstartdate, Long.valueOf(diff)); + numres.add(pair); + } + } catch (ParseException e) { + e.printStackTrace(); + } + } + for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) { List timeResult = new ArrayList(); - SliceQuery result = HFactory.createSliceQuery(DBInstance.GetKeyspace(), LongSerializer.get(), - StringSerializer.get(), StringSerializer.get()); - result.setColumnFamily("ProteinData"); - result.setKey(date.getTime()); - result.setRange(null, null, false, Integer.MAX_VALUE); - QueryResult> columnSlice = result.execute(); - List> col = columnSlice.get().getColumns(); - if (!col.isEmpty()) { - Iterator> itCol = col.iterator(); - for (int i = 0; i < nbins; i++) - timeResult.add(i, 0); - // split all jobs into nbins bins - while (itCol.hasNext()) { - String id = itCol.next().getName(); - long lenResult = CountID(id); + for (int i = 0; i < nbins; i++) + timeResult.add(i, 0); + for (Pair p : numres) { + if (date.equals(p.getElement0())) { + long lenResult = p.getElement1().longValue(); if (lenResult <= 30) timeResult.set(0, timeResult.get(0) + 1); else if (lenResult > 30 && lenResult <= 60) @@ -78,112 +103,241 @@ public class CassandraRequester { timeResult.set(4, timeResult.get(4) + 1); } } - for (int i = 0; i < nbins; i++) - totalTime.set(i, totalTime.get(i) + timeResult.get(i)); - DataBase db = new DataBase(); - db.setTimeRez(timeResult); - db.setDate(DateFormat(date.getTime())); - query.add(db); } + for (int i = 0; i < nbins; i++) + totalTime.set(i, totalTime.get(i) + timeResult.get(i)); + DataBase db = new DataBase(); + db.setTimeRez(timeResult); + db.setDate(DateFormat(date.getTime())); + query.add(db); } - */ + DataBase db = new DataBase(); db.setTimeTotalExec(totalTime); query.add(db); - System.out.println("StatisticsProt.readLength: total number of dates = " + query.size()); return query; } - /* - * convert String date into long date (miliseconds since the epoch start) + /* + * query: total number of jobs for the period from date1 till date2 */ - private static long DateParsing(String datInput) { - if (datInput == null) { - return 0; + public TotalJobsStatisticBean countJobs(String date1, String date2) { + /* + * if (null == date1) { date1 = "1970/1/1"; } if (null == date2) { date1 + * = "2100/1/1"; } if (!isThisDateValid(date1, formatYYMMDD) || + * !isThisDateValid(date2, formatYYMMDD)) { System.out.println( + * "CassandraRequester.countJobs: wrong format for date1 " + date1 + + * "or date2 " + date2); return null; } + */ + SetDateRange(); + long dateStart = DateParsing(date1, formatYYMMDD); + long dateEnd = DateParsing(date2, formatYYMMDD); + /* + * if (dateEnd < earlestDate || dateStart > currentDate || dateStart > + * dateEnd) return null; if (dateStart < earlestDate) dateStart = + * earlestDate; if (dateEnd > currentDate) dateStart = currentDate; + */ + Calendar start = Calendar.getInstance(); + start.setTime(new Date(dateStart)); + Calendar end = Calendar.getInstance(); + end.setTime(new Date(dateEnd)); + TotalJobsStatisticBean query = new TotalJobsStatisticBean(); + Total wholeTotal = new Total(0, 0, 0, 0, 0); + for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) { + Total res = db.ReadDateTable(date.getTime()); + if (res == null) + continue; + query.setDateTotal(DateFormat(date.getTime()), res); + wholeTotal.setTotal(res.getTotal() + wholeTotal.getTotal()); + wholeTotal.setTotalOK(res.getTotalOK() + wholeTotal.getTotalOK()); + wholeTotal.setTotalStopped(res.getTotalStopped() + wholeTotal.getTotalStopped()); + wholeTotal.setTotalError(res.getTotalError() + wholeTotal.getTotalError()); + wholeTotal.setTotalTimeOut(res.getTotalTimeOut() + wholeTotal.getTotalTimeOut()); } - long dateWorkSt = 0; - SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); - try { - dateWorkSt = formatter.parse(datInput).getTime(); - } catch (ParseException e) { - e.printStackTrace(); + query.setWholeTotal(wholeTotal); + return query; + } + + /* + * query: jobs and sequence at date + */ + public DateBean readJobByDay(String date, JobStatus status) { + if (!isThisDateValid(date, formatDDMMYY)) { + System.out.println("CassandraRequester.readJobByDay: Wrong date format for " + date); + return null; } - return dateWorkSt; + SetDateRange(); + long day = DateParsing(date, formatDDMMYY); + if (day < earlestDate || day > currentDate) + return null; + + if (status == JobStatus.OK) { + return db.ReadProteinData(day, date); + } + return db.ReadFailedJobs(day, date, status); } /* - * convert String date:time into long date:time (miliseconds since the epoch start) + * query: protein sequence */ - private static long TimeConvert(String datInput) { - long dateWorkSt = 0; - if (datInput == null) { - return dateWorkSt; + public List readProteins(String protIn, String searchtype) { + List result; + if (searchtype.equals("whole")) + result = db.ReadWholeSequence(protIn); + else + result = db.ReadPartOfSequence(protIn); + if (result == null) + return null; + + if (searchtype.equals("partial")) { + for (ProteinBean entry : result) { + entry.setSubProt(CreateSubprot(entry.getSequence(), protIn)); + } } - SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss"); - try { - dateWorkSt = formatter.parse(datInput).getTime(); - } catch (ParseException e) { - e.printStackTrace(); + return result; + } + + /* + * query: protein feature + */ + public Map readProteinsPrediction(String feature, int percent) { + Map result = db.ReadProtein(); + ; + if (result == null) + return null; + Map query = new HashMap(); + for (Map.Entry entry : result.entrySet()) { + String pred = entry.getValue(); + if (pred.replaceAll("[^" + feature + "]", "").length() > pred.length() * percent / 100 && (!entry.getKey().equals(""))) { + // if (!entry.getKey().equals("")) + query.put(entry.getKey(), pred); + } } - return dateWorkSt; + return query; } - // convert long to date in string format - private static String DateFormat(long inDate) { - SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy"); - String dateString = datformat.format(new Date(inDate)); - return dateString; + /* + * query protein sequences with number of jobs + */ + public List readProteinByCounter(int minimalcounter) { + List query = new ArrayList(); + Map map = db.ReadProteinSequenceByCounter(); + if (map == null) + return null; + for (Map.Entry entry : map.entrySet()) { + if (entry.getValue() > minimalcounter) { + TotalByCounterBean bean = new TotalByCounterBean(); + bean.setTotaljobs(entry.getValue()); + bean.setName(entry.getKey()); + query.add(bean); + } + } + return query; + } + + /* + * query ip with number of jobs + */ + public List readIpByCounter(Integer minimalcounter) { + List query = new ArrayList(); + Map map = db.ReadIpByCounter(); + if (minimalcounter == null) + minimalcounter = 0; + if (map == null) + return null; + for (Map.Entry entry : map.entrySet()) { + if (entry.getValue() > minimalcounter) { + TotalByCounterBean bean = new TotalByCounterBean(); + bean.setTotaljobs(entry.getValue()); + bean.setName(entry.getKey()); + query.add(bean); + } + } + return query; } /* - * convert ??? + * query jobs log info */ - public static String DateFormatYYMMDD(long indate) { - SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd"); - String dateString = datformat.format(new Date(indate)); - return dateString; + public JobBean readJobLog(String jobid) { + if (jobid == null) + return null; + return db.ReadJobLog(jobid); } /* - * ??? + * query jobs by ipStructureJobLog */ - public long CountID(String id) { - /* - SliceQuery sliceQuery = HFactory.createSliceQuery(DBInstance.GetKeyspace(), StringSerializer.get(), - StringSerializer.get(), StringSerializer.get()); - sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100); - QueryResult> result = sliceQuery.execute(); - String datBegin = result.get().getColumnByName("DataBegin").getValue(); - String datEnd = result.get().getColumnByName("DataEnd").getValue(); + public UserBean readIp(String ip) { + if (ip == null) + return null; + Map res = db.ReadIpWithJobs(ip); + if (res == null) + return null; + UserBean query = new UserBean(ip); + query.setMainInfo(res); + return query; + } + + /* + * create list of parts of protein sequence; + */ + private static List CreateSubprot(String protein, String subprot) { + List sub = new ArrayList(); + String subStr = protein; + while (subStr.length() > 0 && subStr.contains(subprot)) { + String first = subStr.substring(0, subStr.indexOf(subprot)); + if (first.length() > 0) + sub.add(first); + sub.add(subprot); + subStr = subStr.substring(subStr.indexOf(subprot) + subprot.length(), subStr.length()); + } + if (subStr.length() > 0) + sub.add(subStr); + return sub; + } + + /* + * convert String date into long date (miliseconds since the epoch start) + */ + private static long DateParsing(String datInput, SimpleDateFormat formatter) { + if (datInput == null) { + return 0; + } + long dateWorkSt = 0; + + try { + dateWorkSt = formatter.parse(datInput).getTime(); + } catch (ParseException e) { + e.printStackTrace(); + } + return dateWorkSt; + } - long datBeginLong = TimeConvert(datBegin); - long datEndLong = TimeConvert(datEnd); - return (datEndLong - datBeginLong) / 1000; - */ - return 0; + // convert long to date in string format + private static String DateFormat(long inDate) { + SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy"); + return datformat.format(new Date(inDate)); } /* - * set earlest date and current dates. - * earlestDate is static and should be set at the 1st call - * currentDate should be re-calculated every time + * set earlest date and current dates. earlestDate is static and should be + * set at the 1st call currentDate should be re-calculated every time */ private static void SetDateRange() { + Calendar cal = Calendar.getInstance(); + currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH), + formatYYMMDD); if (0 == earlestDate) { - StatisticsProt sp = new StatisticsProt(); - earlestDate = sp.earliestDate(); - System.out.println("Set earlest Date = " + earlestDate); + CassandraRequester cr = new CassandraRequester(); + earlestDate = cr.earliestDate(); } - Calendar cal = Calendar.getInstance(); - currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH)); } - public boolean isThisDateValid(String dateToValidate) { + public boolean isThisDateValid(String dateToValidate, SimpleDateFormat sdf) { if (dateToValidate == null || dateToValidate.equals("")) { - System.out.println("Undefined date"); return false; } - SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd"); try { // if not valid, this will throw ParseException sdf.setLenient(false); @@ -199,31 +353,26 @@ public class CassandraRequester { * find the earliest date in the database */ public long earliestDate() { - /* - ArrayList dateSort = new ArrayList(); - int row_count = 10000; - RangeSlicesQuery result = HFactory.createRangeSlicesQuery(DBInstance.GetKeyspace(), LongSerializer.get(), - StringSerializer.get(), StringSerializer.get()); - result.setColumnFamily("ProteinData"); - result.setRange(null, null, false, Integer.MAX_VALUE); - result.setRowCount(row_count); - Long last_key = null; - while (true) { - result.setKeys(last_key, null); - QueryResult> columnSlice = result.execute(); - OrderedRows rows = columnSlice.get(); - Iterator> rowsIterator = rows.iterator(); - while (rowsIterator.hasNext()) { - Row row = rowsIterator.next(); - last_key = row.getKey(); - dateSort.add(last_key); - } - if (rows.getCount() < row_count) - break; + earlestDate = CassandraNativeConnector.getEarliestDateInDB(); + return earlestDate; + } + + /** + * prepares an example of either job id or IP for the DB + * + * @param exampletype + * defines which example you need (an existing job from the DB - + * jobid, an IP - "ip") + * @return a string representation of the requested example, if the example + * type is not known empty string is returned + */ + public String getExample(String exampletype) { + if (exampletype.equals("jobid")) { + return "jp_NzBOJKo"; + } else if (exampletype.equals("ip")) { + return "127.0.0.1"; } - Collections.sort(dateSort); - return dateSort.get(0); - */ - return 0; + return ""; } + }