1 package compbio.statistic;
3 import java.text.ParseException;
4 import java.text.SimpleDateFormat;
5 import java.util.ArrayList;
6 import java.util.Calendar;
8 import java.util.Iterator;
12 import compbio.cassandra.ProteinBean;
13 import compbio.cassandra.CassandraNativeConnector;
14 import compbio.cassandra.CassandraReader;
15 import compbio.cassandra.DataBase;
16 import compbio.cassandra.Pair;
17 import compbio.cassandra.StructureJobLog;
19 public class CassandraRequester {
20 private CassandraReader db = new CassandraReader();
21 private ArrayList<DataBase> query;
22 private static long currentDate = 0;
23 private static long earlestDate = 0;
24 private final static SimpleDateFormat formatYYMMDD = new SimpleDateFormat("yyyy/MM/dd");
25 private final static SimpleDateFormat formatDDMMYY = new SimpleDateFormat("dd/MM/yyyy");
28 * query: execution time for the period from date1 till date2
30 public List<DataBase> extractExecutionTime(String date1, String date2) {
37 if (!isThisDateValid(date1,formatYYMMDD) || !isThisDateValid(date2,formatYYMMDD)) {
38 System.out.println("Wrong date: point 3");
43 long dateStart = DateParsing(date1, formatYYMMDD);
44 long dateEnd = DateParsing(date2, formatYYMMDD);
45 if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd)
47 if (dateStart < earlestDate)
48 dateStart = earlestDate;
49 if (dateEnd > currentDate)
50 dateStart = currentDate;
52 Calendar start = Calendar.getInstance();
53 start.setTime(new Date(dateStart));
54 Calendar end = Calendar.getInstance();
55 end.setTime(new Date(dateEnd));
56 query = new ArrayList<DataBase>();
57 List<Integer> totalTime = new ArrayList<Integer>();
58 for (int i = 0; i < nbins; i++)
60 List<Pair<String, String>> res = db.ReadProteinDataTable();
61 List<Pair<Date, Long>> numres = new ArrayList<Pair<Date, Long>>();
63 for (Pair<String, String> entry : res) {
64 SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd");
66 Date jobstartdate = dateformatter.parse(entry.getElement0());
67 long date = jobstartdate.getTime();
68 if (dateStart <= date && date <= dateEnd) {
69 SimpleDateFormat datetimeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
70 Date jobstarttime = datetimeformatter.parse(entry.getElement0());
71 Date jobendtime = datetimeformatter.parse(entry.getElement1());
72 long diff = (jobendtime.getTime() - jobstarttime.getTime()) / 1000;
73 Pair<Date, Long> pair = new Pair<Date, Long>(jobstartdate, Long.valueOf(diff));
76 } catch (ParseException e) {
81 for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
82 List<Integer> timeResult = new ArrayList<Integer>();
83 for (int i = 0; i < nbins; i++)
85 for (Pair<Date, Long> p : numres) {
86 if (date.equals(p.getElement0())) {
87 long lenResult = p.getElement1().longValue();
89 timeResult.set(0, timeResult.get(0) + 1);
90 else if (lenResult > 30 && lenResult <= 60)
91 timeResult.set(1, timeResult.get(1) + 1);
92 else if (lenResult > 60 && lenResult <= 120)
93 timeResult.set(2, timeResult.get(2) + 1);
94 else if (lenResult > 120 && lenResult <= 600)
95 timeResult.set(3, timeResult.get(3) + 1);
97 timeResult.set(4, timeResult.get(4) + 1);
101 for (int i = 0; i < nbins; i++)
102 totalTime.set(i, totalTime.get(i) + timeResult.get(i));
103 DataBase db = new DataBase();
104 db.setTimeRez(timeResult);
105 db.setDate(DateFormat(date.getTime()));
109 DataBase db = new DataBase();
110 db.setTimeTotalExec(totalTime);
112 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
117 * query: total number of jobs for the period from date1 till date2
119 public List<DataBase> countJobs(String date1, String date2) {
126 if (!isThisDateValid(date1, formatYYMMDD) || !isThisDateValid(date2, formatYYMMDD)) {
127 System.out.println("Wrong date: point 3");
131 long dateStart = DateParsing(date1, formatYYMMDD);
132 long dateEnd = DateParsing(date2, formatYYMMDD);
133 if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd)
135 if (dateStart < earlestDate)
136 dateStart = earlestDate;
137 if (dateEnd > currentDate)
138 dateStart = currentDate;
140 Calendar start = Calendar.getInstance();
141 start.setTime(new Date(dateStart));
142 Calendar end = Calendar.getInstance();
143 end.setTime(new Date(dateEnd));
144 query = new ArrayList<DataBase>();
145 for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
146 List<Long> res = db.ReadDateTable(date.getTime());
149 DataBase bean = new DataBase();
150 bean.setTotal((int)(long)res.get(0));
151 bean.setTotalOK((int)(long)res.get(1));
152 bean.setTotalStopped((int)(long)res.get(2));
153 bean.setTotalError((int)(long)res.get(3));
154 bean.setTotalTimeOut((int)(long)res.get(4));
155 bean.setDate(DateFormat(date.getTime()));
158 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
163 * query: jobs and sequence at date
165 public List<DataBase> readJobByDay (String date) {
166 System.out.println(date);
170 if (!isThisDateValid(date, formatDDMMYY)) {
171 System.out.println("Wrong date: point 3");
175 long day = DateParsing(date, formatDDMMYY);
176 System.out.println(day);
177 if (day < earlestDate || day > currentDate)
179 List<Pair<String, String>> res = db.ReadProteinData(day);
182 query = new ArrayList<DataBase>();
183 for (Pair<String, String> entry : res) {
184 DataBase bean = new DataBase();
186 bean.setId(entry.getElement0());
187 bean.setProt(entry.getElement1());
190 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
196 * query: protein sequence
198 public List<ProteinBean> readProteins(String protIn, String flag) {
199 List<ProteinBean> result;
200 if (flag.equals("whole"))
201 result = db.ReadWholeSequence(protIn);
203 result = db.ReadPartOfSequence(protIn);
207 if (flag.equals("part")) {
208 for (ProteinBean entry : result) {
209 entry.setSubProt(CreateSubprot(entry.getSequence(), protIn));
217 * query protein sequences with number of jobs
219 public List<DataBase> readProteinByCounter(int minimalcounter) {
220 query = new ArrayList<DataBase>();
221 Map<String, Integer> map = db.ReadProteinSequenceByCounter();
222 for (Map.Entry<String, Integer> entry : map.entrySet()) {
223 if (entry.getValue() > minimalcounter && entry.getKey().length() > 0) {
224 DataBase bean = new DataBase();
225 bean.setTotalId(entry.getValue());
226 bean.setProt(entry.getKey());
234 * query ip with number of jobs
236 public List<DataBase> readIpByCounter(Integer minimalcounter) {
237 query = new ArrayList<DataBase>();
238 Map<String, Integer> map = db.ReadIpByCounter();
239 if (minimalcounter == null)
243 for (Map.Entry<String, Integer> entry : map.entrySet()) {
244 if (entry.getValue() > minimalcounter) {
245 DataBase bean = new DataBase();
246 bean.setTotalId(entry.getValue());
247 bean.setIp(entry.getKey());
254 * query jobs log info
256 public DataBase readJobLog(String jobid) {
259 StructureJobLog res = db.ReadJobLog(jobid);
262 DataBase query = new DataBase();
263 query.setLogInfo(res);
269 * query jobs by ipStructureJobLog
271 public List<DataBase> readIp(String ip) {
274 List<Pair<String, String>> res = db.ReadIpWithJobs(ip);
275 // System.out.println(res.size());
278 query = new ArrayList<DataBase>();
279 for (Pair<String, String> entry : res) {
280 System.out.println("ip " + ip);
281 System.out.println("id " + entry.getElement0());
282 DataBase bean = new DataBase();
284 bean.setId(entry.getElement0());
285 bean.setProt(entry.getElement1());
288 System.out.println(query.size());
293 * create list of parts of protein sequence;
295 private static List<String> CreateSubprot (String protein, String subprot) {
296 List<String> sub = new ArrayList<String>();
297 String subStr = protein;
298 while (subStr.length() > 0 && subStr.contains(subprot)) {
299 String first = subStr.substring(0, subStr.indexOf(subprot));
300 if (first.length() > 0)
303 subStr = subStr.substring(subStr.indexOf(subprot) + subprot.length(), subStr.length());
305 if (subStr.length() > 0)
310 * convert String date into long date (miliseconds since the epoch start)
312 private static long DateParsing(String datInput, SimpleDateFormat formatter) {
313 if (datInput == null) {
319 dateWorkSt = formatter.parse(datInput).getTime();
320 } catch (ParseException e) {
326 // convert long to date in string format
327 private static String DateFormat(long inDate) {
328 SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
329 return datformat.format(new Date(inDate));
333 * set earlest date and current dates. earlestDate is static and should be
334 * set at the 1st call currentDate should be re-calculated every time
336 private static void SetDateRange() {
337 Calendar cal = Calendar.getInstance();
338 currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH), formatYYMMDD);
339 if (0 == earlestDate) {
340 CassandraRequester cr = new CassandraRequester();
341 earlestDate = cr.earliestDate();
342 System.out.println("Set earlest Date = " + earlestDate);
346 public boolean isThisDateValid(String dateToValidate, SimpleDateFormat sdf) {
347 if (dateToValidate == null || dateToValidate.equals("")) {
348 System.out.println("Undefined date");
352 // if not valid, this will throw ParseException
353 sdf.setLenient(false);
354 Date date = sdf.parse(dateToValidate);
355 } catch (ParseException e) {
363 * find the earliest date in the database
365 public long earliestDate() {
366 earlestDate = CassandraNativeConnector.getEarliestDateInDB();