1 package compbio.statistic;
3 import java.text.ParseException;
4 import java.text.SimpleDateFormat;
5 import java.util.ArrayList;
6 import java.util.Calendar;
8 import java.util.Iterator;
12 import compbio.cassandra.ProteinBean;
13 import compbio.cassandra.CassandraNativeConnector;
14 import compbio.cassandra.CassandraReader;
15 import compbio.cassandra.DataBase;
16 import compbio.cassandra.Pair;
17 import compbio.cassandra.StructureJobLog;
19 public class CassandraRequester {
20 private CassandraReader db = new CassandraReader();
21 private ArrayList<DataBase> query;
22 private static long currentDate = 0;
23 private static long earlestDate = 0;
24 private final static SimpleDateFormat formatYYMMDD = new SimpleDateFormat("yyyy/MM/dd");
25 private final static SimpleDateFormat formatDDMMYY = new SimpleDateFormat("dd/MM/yyyy");
28 * query: execution time for the period from date1 till date2
30 public List<DataBase> extractExecutionTime(String date1, String date2) {
37 if (!isThisDateValid(date1,formatYYMMDD) || !isThisDateValid(date2,formatYYMMDD)) {
38 System.out.println("CassandraRequester.extractExecutionTime: wrong format for date1 " + date1 + "or date2 " + date2);
43 long dateStart = DateParsing(date1, formatYYMMDD);
44 long dateEnd = DateParsing(date2, formatYYMMDD);
45 if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd)
47 if (dateStart < earlestDate)
48 dateStart = earlestDate;
49 if (dateEnd > currentDate)
50 dateStart = currentDate;
52 Calendar start = Calendar.getInstance();
53 start.setTime(new Date(dateStart));
54 Calendar end = Calendar.getInstance();
55 end.setTime(new Date(dateEnd));
56 query = new ArrayList<DataBase>();
57 List<Integer> totalTime = new ArrayList<Integer>();
58 for (int i = 0; i < nbins; i++)
60 List<Pair<String, String>> res = db.ReadProteinDataTable();
61 List<Pair<Date, Long>> numres = new ArrayList<Pair<Date, Long>>();
63 for (Pair<String, String> entry : res) {
64 SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd");
66 Date jobstartdate = dateformatter.parse(entry.getElement0());
67 long date = jobstartdate.getTime();
68 if (dateStart <= date && date <= dateEnd) {
69 SimpleDateFormat datetimeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
70 Date jobstarttime = datetimeformatter.parse(entry.getElement0());
71 Date jobendtime = datetimeformatter.parse(entry.getElement1());
72 long diff = (jobendtime.getTime() - jobstarttime.getTime()) / 1000;
73 Pair<Date, Long> pair = new Pair<Date, Long>(jobstartdate, Long.valueOf(diff));
76 } catch (ParseException e) {
81 for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
82 List<Integer> timeResult = new ArrayList<Integer>();
83 for (int i = 0; i < nbins; i++)
85 for (Pair<Date, Long> p : numres) {
86 if (date.equals(p.getElement0())) {
87 long lenResult = p.getElement1().longValue();
89 timeResult.set(0, timeResult.get(0) + 1);
90 else if (lenResult > 30 && lenResult <= 60)
91 timeResult.set(1, timeResult.get(1) + 1);
92 else if (lenResult > 60 && lenResult <= 120)
93 timeResult.set(2, timeResult.get(2) + 1);
94 else if (lenResult > 120 && lenResult <= 600)
95 timeResult.set(3, timeResult.get(3) + 1);
97 timeResult.set(4, timeResult.get(4) + 1);
101 for (int i = 0; i < nbins; i++)
102 totalTime.set(i, totalTime.get(i) + timeResult.get(i));
103 DataBase db = new DataBase();
104 db.setTimeRez(timeResult);
105 db.setDate(DateFormat(date.getTime()));
109 DataBase db = new DataBase();
110 db.setTimeTotalExec(totalTime);
116 * query: total number of jobs for the period from date1 till date2
118 public List<DataBase> countJobs(String date1, String date2) {
125 if (!isThisDateValid(date1, formatYYMMDD) || !isThisDateValid(date2, formatYYMMDD)) {
126 System.out.println("CassandraRequester.countJobs: wrong format for date1 " + date1 + "or date2 " + date2);
130 long dateStart = DateParsing(date1, formatYYMMDD);
131 long dateEnd = DateParsing(date2, formatYYMMDD);
132 if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd)
134 if (dateStart < earlestDate)
135 dateStart = earlestDate;
136 if (dateEnd > currentDate)
137 dateStart = currentDate;
139 Calendar start = Calendar.getInstance();
140 start.setTime(new Date(dateStart));
141 Calendar end = Calendar.getInstance();
142 end.setTime(new Date(dateEnd));
143 query = new ArrayList<DataBase>();
144 for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
145 List<Long> res = db.ReadDateTable(date.getTime());
148 DataBase bean = new DataBase();
149 bean.setTotal((int)(long)res.get(0));
150 bean.setTotalOK((int)(long)res.get(1));
151 bean.setTotalStopped((int)(long)res.get(2));
152 bean.setTotalError((int)(long)res.get(3));
153 bean.setTotalTimeOut((int)(long)res.get(4));
154 bean.setDate(DateFormat(date.getTime()));
161 * query: jobs and sequence at date
163 public List<DataBase> readJobByDay (String date) {
167 if (!isThisDateValid(date, formatDDMMYY)) {
168 System.out.println("CassandraRequester.readJobByDay: Wrong date format for " + date);
172 long day = DateParsing(date, formatDDMMYY);
173 if (day < earlestDate || day > currentDate)
175 List<Pair<String, String>> res = db.ReadProteinData(day);
178 query = new ArrayList<DataBase>();
179 for (Pair<String, String> entry : res) {
180 DataBase bean = new DataBase();
182 bean.setId(entry.getElement0());
183 bean.setProt(entry.getElement1());
191 * query: protein sequence
193 public List<ProteinBean> readProteins(String protIn, String flag) {
194 List<ProteinBean> result;
195 if (flag.equals("whole"))
196 result = db.ReadWholeSequence(protIn);
198 result = db.ReadPartOfSequence(protIn);
202 if (flag.equals("part")) {
203 for (ProteinBean entry : result) {
204 entry.setSubProt(CreateSubprot(entry.getSequence(), protIn));
212 * query protein sequences with number of jobs
214 public List<DataBase> readProteinByCounter(int minimalcounter) {
215 query = new ArrayList<DataBase>();
216 Map<String, Integer> map = db.ReadProteinSequenceByCounter();
217 for (Map.Entry<String, Integer> entry : map.entrySet()) {
218 if (entry.getValue() > minimalcounter && entry.getKey().length() > 0) {
219 DataBase bean = new DataBase();
220 bean.setTotalId(entry.getValue());
221 bean.setProt(entry.getKey());
229 * query ip with number of jobs
231 public List<DataBase> readIpByCounter(Integer minimalcounter) {
232 query = new ArrayList<DataBase>();
233 Map<String, Integer> map = db.ReadIpByCounter();
234 if (minimalcounter == null)
238 for (Map.Entry<String, Integer> entry : map.entrySet()) {
239 if (entry.getValue() > minimalcounter) {
240 DataBase bean = new DataBase();
241 bean.setTotalId(entry.getValue());
242 bean.setIp(entry.getKey());
249 * query jobs log info
251 public DataBase readJobLog(String jobid) {
254 StructureJobLog res = db.ReadJobLog(jobid);
257 DataBase query = new DataBase();
258 query.setLogInfo(res);
264 * query jobs by ipStructureJobLog
266 public List<DataBase> readIp(String ip) {
269 List<Pair<String, String>> res = db.ReadIpWithJobs(ip);
272 query = new ArrayList<DataBase>();
273 for (Pair<String, String> entry : res) {
274 DataBase bean = new DataBase();
276 bean.setId(entry.getElement0());
277 bean.setProt(entry.getElement1());
284 * create list of parts of protein sequence;
286 private static List<String> CreateSubprot (String protein, String subprot) {
287 List<String> sub = new ArrayList<String>();
288 String subStr = protein;
289 while (subStr.length() > 0 && subStr.contains(subprot)) {
290 String first = subStr.substring(0, subStr.indexOf(subprot));
291 if (first.length() > 0)
294 subStr = subStr.substring(subStr.indexOf(subprot) + subprot.length(), subStr.length());
296 if (subStr.length() > 0)
301 * convert String date into long date (miliseconds since the epoch start)
303 private static long DateParsing(String datInput, SimpleDateFormat formatter) {
304 if (datInput == null) {
310 dateWorkSt = formatter.parse(datInput).getTime();
311 } catch (ParseException e) {
317 // convert long to date in string format
318 private static String DateFormat(long inDate) {
319 SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
320 return datformat.format(new Date(inDate));
324 * set earlest date and current dates. earlestDate is static and should be
325 * set at the 1st call currentDate should be re-calculated every time
327 private static void SetDateRange() {
328 Calendar cal = Calendar.getInstance();
329 currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH), formatYYMMDD);
330 if (0 == earlestDate) {
331 CassandraRequester cr = new CassandraRequester();
332 earlestDate = cr.earliestDate();
336 public boolean isThisDateValid(String dateToValidate, SimpleDateFormat sdf) {
337 if (dateToValidate == null || dateToValidate.equals("")) {
341 // if not valid, this will throw ParseException
342 sdf.setLenient(false);
343 Date date = sdf.parse(dateToValidate);
344 } catch (ParseException e) {
352 * find the earliest date in the database
354 public long earliestDate() {
355 earlestDate = CassandraNativeConnector.getEarliestDateInDB();