1 package compbio.statistic;
3 import java.text.ParseException;
4 import java.text.SimpleDateFormat;
5 import java.util.ArrayList;
6 import java.util.Calendar;
8 import java.util.Iterator;
12 import compbio.cassandra.AnnotatedProteinSequenceBean;
13 import compbio.cassandra.CassandraNativeConnector;
14 import compbio.cassandra.CassandraReader;
15 import compbio.cassandra.DataBase;
16 import compbio.cassandra.Pair;
17 import compbio.cassandra.StructureJobLog;
19 public class CassandraRequester {
20 private CassandraReader db = new CassandraReader();
21 private ArrayList<DataBase> query;
22 private static long currentDate = 0;
23 private static long earlestDate = 0;
24 private final static SimpleDateFormat formatYYMMDD = new SimpleDateFormat("yyyy/MM/dd");
25 private final static SimpleDateFormat formatDDMMYY = new SimpleDateFormat("dd/MM/yyyy");
28 * query: execution time for the period from date1 till date2
30 public List<DataBase> extractExecutionTime(String date1, String date2) {
37 if (!isThisDateValid(date1,formatYYMMDD) || !isThisDateValid(date2,formatYYMMDD)) {
38 System.out.println("CassandraRequester.extractExecutionTime: wrong format for date1 " + date1 + "or date2 " + date2);
43 long dateStart = DateParsing(date1, formatYYMMDD);
44 long dateEnd = DateParsing(date2, formatYYMMDD);
45 if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd)
47 if (dateStart < earlestDate)
48 dateStart = earlestDate;
49 if (dateEnd > currentDate)
50 dateStart = currentDate;
52 Calendar start = Calendar.getInstance();
53 start.setTime(new Date(dateStart));
54 Calendar end = Calendar.getInstance();
55 end.setTime(new Date(dateEnd));
56 query = new ArrayList<DataBase>();
57 List<Integer> totalTime = new ArrayList<Integer>();
58 for (int i = 0; i < nbins; i++)
60 List<Pair<String, String>> res = db.ReadProteinDataTable();
61 List<Pair<Date, Long>> numres = new ArrayList<Pair<Date, Long>>();
63 for (Pair<String, String> entry : res) {
64 SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd");
66 Date jobstartdate = dateformatter.parse(entry.getElement0());
67 long date = jobstartdate.getTime();
68 if (dateStart <= date && date <= dateEnd) {
69 SimpleDateFormat datetimeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
70 Date jobstarttime = datetimeformatter.parse(entry.getElement0());
71 Date jobendtime = datetimeformatter.parse(entry.getElement1());
72 long diff = (jobendtime.getTime() - jobstarttime.getTime()) / 1000;
73 Pair<Date, Long> pair = new Pair<Date, Long>(jobstartdate, Long.valueOf(diff));
76 } catch (ParseException e) {
81 for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
82 List<Integer> timeResult = new ArrayList<Integer>();
83 for (int i = 0; i < nbins; i++)
85 for (Pair<Date, Long> p : numres) {
86 if (date.equals(p.getElement0())) {
87 long lenResult = p.getElement1().longValue();
89 timeResult.set(0, timeResult.get(0) + 1);
90 else if (lenResult > 30 && lenResult <= 60)
91 timeResult.set(1, timeResult.get(1) + 1);
92 else if (lenResult > 60 && lenResult <= 120)
93 timeResult.set(2, timeResult.get(2) + 1);
94 else if (lenResult > 120 && lenResult <= 600)
95 timeResult.set(3, timeResult.get(3) + 1);
97 timeResult.set(4, timeResult.get(4) + 1);
101 for (int i = 0; i < nbins; i++)
102 totalTime.set(i, totalTime.get(i) + timeResult.get(i));
103 DataBase db = new DataBase();
104 db.setTimeRez(timeResult);
105 db.setDate(DateFormat(date.getTime()));
109 DataBase db = new DataBase();
110 db.setTimeTotalExec(totalTime);
116 * query: total number of jobs for the period from date1 till date2
118 public List<DataBase> countJobs(String date1, String date2) {
125 if (!isThisDateValid(date1, formatYYMMDD) || !isThisDateValid(date2, formatYYMMDD)) {
126 System.out.println("CassandraRequester.countJobs: wrong format for date1 " + date1 + "or date2 " + date2);
130 long dateStart = DateParsing(date1, formatYYMMDD);
131 long dateEnd = DateParsing(date2, formatYYMMDD);
132 if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd)
134 if (dateStart < earlestDate)
135 dateStart = earlestDate;
136 if (dateEnd > currentDate)
137 dateStart = currentDate;
139 Calendar start = Calendar.getInstance();
140 start.setTime(new Date(dateStart));
141 Calendar end = Calendar.getInstance();
142 end.setTime(new Date(dateEnd));
143 query = new ArrayList<DataBase>();
144 for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
145 List<Long> res = db.ReadDateTable(date.getTime());
148 DataBase bean = new DataBase();
149 bean.setTotal((int)(long)res.get(0));
150 bean.setTotalOK((int)(long)res.get(1));
151 bean.setTotalStopped((int)(long)res.get(2));
152 bean.setTotalError((int)(long)res.get(3));
153 bean.setTotalTimeOut((int)(long)res.get(4));
154 bean.setDate(DateFormat(date.getTime()));
161 * query: jobs and sequence at date
163 public List<DataBase> readJobByDay (String date) {
167 if (!isThisDateValid(date, formatDDMMYY)) {
168 System.out.println("CassandraRequester.readJobByDay: Wrong date format for " + date);
172 long day = DateParsing(date, formatDDMMYY);
173 if (day < earlestDate || day > currentDate)
175 List<Pair<String, String>> res = db.ReadProteinData(day);
178 query = new ArrayList<DataBase>();
179 for (Pair<String, String> entry : res) {
180 DataBase bean = new DataBase();
182 bean.setId(entry.getElement0());
183 bean.setProt(entry.getElement1());
191 * query: protein sequence
193 public List<DataBase> readProteins(String protIn, String flag) {
194 query = new ArrayList<DataBase>();
195 List<AnnotatedProteinSequenceBean> res;
196 if (flag.equals("whole"))
197 res = db.ReadWholeSequence(protIn);
199 res = db.ReadPartOfSequence(protIn);
202 for (AnnotatedProteinSequenceBean entry : res) {
203 DataBase db = new DataBase();
204 db.setProt(entry.getSequence());
205 db.setPredictions(entry);
206 db.setId(entry.getJobid());
207 if (flag.equals("part")) {
208 db.setSubProt(CreateSubprot (entry.getSequence(), protIn));
217 * query protein sequences with number of jobs
219 public List<DataBase> readProteinByCounter(int minimalcounter) {
220 query = new ArrayList<DataBase>();
221 Map<String, Integer> map = db.ReadProteinSequenceByCounter();
222 for (Map.Entry<String, Integer> entry : map.entrySet()) {
223 if (entry.getValue() > minimalcounter && entry.getKey().length() > 0) {
224 DataBase bean = new DataBase();
225 bean.setTotalId(entry.getValue());
226 bean.setProt(entry.getKey());
234 * query ip with number of jobs
236 public List<DataBase> readIpByCounter(Integer minimalcounter) {
237 query = new ArrayList<DataBase>();
238 Map<String, Integer> map = db.ReadIpByCounter();
239 if (minimalcounter == null)
243 for (Map.Entry<String, Integer> entry : map.entrySet()) {
244 if (entry.getValue() > minimalcounter) {
245 DataBase bean = new DataBase();
246 bean.setTotalId(entry.getValue());
247 bean.setIp(entry.getKey());
254 * query jobs log info
256 public DataBase readJobLog(String jobid) {
259 StructureJobLog res = db.ReadJobLog(jobid);
262 DataBase query = new DataBase();
263 query.setLogInfo(res);
269 * query jobs by ipStructureJobLog
271 public List<DataBase> readIp(String ip) {
274 List<Pair<String, String>> res = db.ReadIpWithJobs(ip);
277 query = new ArrayList<DataBase>();
278 for (Pair<String, String> entry : res) {
279 DataBase bean = new DataBase();
281 bean.setId(entry.getElement0());
282 bean.setProt(entry.getElement1());
289 * create list of parts of protein sequence;
291 private static List<String> CreateSubprot (String protein, String subprot) {
292 List<String> sub = new ArrayList<String>();
293 String subStr = protein;
294 while (subStr.length() > 0 && subStr.contains(subprot)) {
295 String first = subStr.substring(0, subStr.indexOf(subprot));
296 if (first.length() > 0)
299 subStr = subStr.substring(subStr.indexOf(subprot) + subprot.length(), subStr.length());
301 if (subStr.length() > 0)
306 * convert String date into long date (miliseconds since the epoch start)
308 private static long DateParsing(String datInput, SimpleDateFormat formatter) {
309 if (datInput == null) {
315 dateWorkSt = formatter.parse(datInput).getTime();
316 } catch (ParseException e) {
322 // convert long to date in string format
323 private static String DateFormat(long inDate) {
324 SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
325 return datformat.format(new Date(inDate));
329 * set earlest date and current dates. earlestDate is static and should be
330 * set at the 1st call currentDate should be re-calculated every time
332 private static void SetDateRange() {
333 Calendar cal = Calendar.getInstance();
334 currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH), formatYYMMDD);
335 if (0 == earlestDate) {
336 CassandraRequester cr = new CassandraRequester();
337 earlestDate = cr.earliestDate();
341 public boolean isThisDateValid(String dateToValidate, SimpleDateFormat sdf) {
342 if (dateToValidate == null || dateToValidate.equals("")) {
346 // if not valid, this will throw ParseException
347 sdf.setLenient(false);
348 Date date = sdf.parse(dateToValidate);
349 } catch (ParseException e) {
357 * find the earliest date in the database
359 public long earliestDate() {
360 earlestDate = CassandraNativeConnector.getEarliestDateInDB();