f4c4c18fa37105fc947a86d0a3e9580efc866784
[proteocache.git] / server / compbio / statistic / CassandraRequester.java
1 package compbio.statistic;
2
3 import java.text.ParseException;
4 import java.text.SimpleDateFormat;
5 import java.util.ArrayList;
6 import java.util.Calendar;
7 import java.util.Date;
8 import java.util.Iterator;
9 import java.util.List;
10 import java.util.Map;
11
12 import compbio.cassandra.AnnotatedProteinSequenceBean;
13 import compbio.cassandra.CassandraNativeConnector;
14 import compbio.cassandra.CassandraReader;
15 import compbio.cassandra.DataBase;
16 import compbio.cassandra.Pair;
17 import compbio.cassandra.StructureJobLog;
18
19 public class CassandraRequester {
20         private CassandraReader db = new CassandraReader();
21         private ArrayList<DataBase> query;
22         private static long currentDate = 0;
23         private static long earlestDate = 0;
24         private final static SimpleDateFormat formatYYMMDD = new SimpleDateFormat("yyyy/MM/dd");
25         private final static SimpleDateFormat formatDDMMYY = new SimpleDateFormat("dd/MM/yyyy");
26
27         /*
28          * query: execution time for the period from date1 till date2
29          */
30         public List<DataBase> extractExecutionTime(String date1, String date2) {
31                 if (null == date1) {
32                         date1 = "1970/1/1";
33                 }
34                 if (null == date2) {
35                         date1 = "2100/1/1";
36                 }
37                 if (!isThisDateValid(date1,formatYYMMDD) || !isThisDateValid(date2,formatYYMMDD)) {
38                         System.out.println("Wrong date: point 3");
39                         return null;
40                 }
41                 SetDateRange();
42                 int nbins = 5;
43                 long dateStart = DateParsing(date1, formatYYMMDD);
44                 long dateEnd = DateParsing(date2, formatYYMMDD);
45                 if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd)
46                         return null;
47                 if (dateStart < earlestDate)
48                         dateStart = earlestDate;
49                 if (dateEnd > currentDate)
50                         dateStart = currentDate;
51
52                 Calendar start = Calendar.getInstance();
53                 start.setTime(new Date(dateStart));
54                 Calendar end = Calendar.getInstance();
55                 end.setTime(new Date(dateEnd));
56                 query = new ArrayList<DataBase>();
57                 List<Integer> totalTime = new ArrayList<Integer>();
58                 for (int i = 0; i < nbins; i++)
59                         totalTime.add(i, 0);
60                 List<Pair<String, String>> res = db.ReadProteinDataTable();
61                 List<Pair<Date, Long>> numres = new ArrayList<Pair<Date, Long>>();
62
63                 for (Pair<String, String> entry : res) {
64                         SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd");
65                         try {
66                                 Date jobstartdate = dateformatter.parse(entry.getElement0());
67                                 long date = jobstartdate.getTime();
68                                 if (dateStart <= date && date <= dateEnd) {
69                                         SimpleDateFormat datetimeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
70                                         Date jobstarttime = datetimeformatter.parse(entry.getElement0());
71                                         Date jobendtime = datetimeformatter.parse(entry.getElement1());
72                                         long diff = (jobendtime.getTime() - jobstarttime.getTime()) / 1000;
73                                         Pair<Date, Long> pair = new Pair<Date, Long>(jobstartdate, Long.valueOf(diff));
74                                         numres.add(pair);
75                                 }
76                         } catch (ParseException e) {
77                                 e.printStackTrace();
78                         }
79                 }
80
81                 for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
82                         List<Integer> timeResult = new ArrayList<Integer>();
83                         for (int i = 0; i < nbins; i++)
84                                 timeResult.add(i, 0);
85                         for (Pair<Date, Long> p : numres) {
86                                 if (date.equals(p.getElement0())) {
87                                         long lenResult = p.getElement1().longValue();
88                                         if (lenResult <= 30)
89                                                 timeResult.set(0, timeResult.get(0) + 1);
90                                         else if (lenResult > 30 && lenResult <= 60)
91                                                 timeResult.set(1, timeResult.get(1) + 1);
92                                         else if (lenResult > 60 && lenResult <= 120)
93                                                 timeResult.set(2, timeResult.get(2) + 1);
94                                         else if (lenResult > 120 && lenResult <= 600)
95                                                 timeResult.set(3, timeResult.get(3) + 1);
96                                         else {
97                                                 timeResult.set(4, timeResult.get(4) + 1);
98                                         }
99                                 }
100                         }
101                         for (int i = 0; i < nbins; i++)
102                                 totalTime.set(i, totalTime.get(i) + timeResult.get(i));
103                         DataBase db = new DataBase();
104                         db.setTimeRez(timeResult);
105                         db.setDate(DateFormat(date.getTime()));
106                         query.add(db);
107                 }
108
109                 DataBase db = new DataBase();
110                 db.setTimeTotalExec(totalTime);
111                 query.add(db);
112                 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
113                 return query;
114         }
115         
116         /*
117          * query: total number of jobs  for the period from date1 till date2
118          */
119         public List<DataBase> countJobs(String date1, String date2) {
120                 if (null == date1) {
121                         date1 = "1970/1/1";
122                 }
123                 if (null == date2) {
124                         date1 = "2100/1/1";
125                 }
126                 if (!isThisDateValid(date1, formatYYMMDD) || !isThisDateValid(date2, formatYYMMDD)) {
127                         System.out.println("Wrong date: point 3");
128                         return null;
129                 }
130                 SetDateRange();
131                 long dateStart = DateParsing(date1, formatYYMMDD);
132                 long dateEnd = DateParsing(date2, formatYYMMDD);
133                 if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd)
134                         return null;
135                 if (dateStart < earlestDate)
136                         dateStart = earlestDate;
137                 if (dateEnd > currentDate)
138                         dateStart = currentDate;
139
140                 Calendar start = Calendar.getInstance();
141                 start.setTime(new Date(dateStart));
142                 Calendar end = Calendar.getInstance();
143                 end.setTime(new Date(dateEnd));
144                 query = new ArrayList<DataBase>();
145                 for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
146                         List<Long> res = db.ReadDateTable(date.getTime());
147                         if (res == null)
148                                 continue;
149                         DataBase bean = new DataBase();
150                         bean.setTotal((int)(long)res.get(0));
151                         bean.setTotalOK((int)(long)res.get(1));
152                         bean.setTotalStopped((int)(long)res.get(2));
153                         bean.setTotalError((int)(long)res.get(3));
154                         bean.setTotalTimeOut((int)(long)res.get(4));
155                         bean.setDate(DateFormat(date.getTime()));
156                         query.add(bean);
157                 }
158                 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
159                 return query;
160         }
161
162         /*
163          * query: jobs and sequence at date
164          */
165         public List<DataBase> readJobByDay (String date) {
166                 System.out.println(date);
167                 if (null == date) {
168                         return null;
169                 }
170                 if (!isThisDateValid(date, formatDDMMYY)) {
171                         System.out.println("Wrong date: point 3");
172                         return null;
173                 }
174                 SetDateRange();
175                 long day = DateParsing(date, formatDDMMYY);
176                 System.out.println(day);
177                 if (day < earlestDate || day > currentDate)
178                         return null;    
179                 List<Pair<String, String>>      res = db.ReadProteinData(day);
180                 if (res == null)
181                         return null;
182                 query = new ArrayList<DataBase>();
183                 for (Pair<String, String> entry : res) {                
184                         DataBase bean = new DataBase();
185                         bean.setDate(date);
186                         bean.setId(entry.getElement0());
187                         bean.setProt(entry.getElement1());
188                         query.add(bean);
189                 }
190                 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
191                 return query;
192         }
193
194
195         /* 
196          * query: protein sequence
197          * */
198         public List<DataBase> readProteins(String protIn, String flag) {
199                 query = new ArrayList<DataBase>();
200                 List<AnnotatedProteinSequenceBean> res;
201                 if (flag.equals("whole")) 
202                         res = db.ReadWholeSequence(protIn);
203                  else 
204                         res = db.ReadPartOfSequence(protIn);
205                 if (res == null)
206                         return null;
207                 for (AnnotatedProteinSequenceBean entry : res) {
208                         DataBase db = new DataBase();
209                         db.setProt(entry.getSequence());
210                         db.setPredictions(entry);
211                         db.setId(entry.getJobid());
212                         if (flag.equals("part")) {
213                                 db.setSubProt(CreateSubprot (entry.getSequence(), protIn));
214                         }
215                         query.add(db);
216                 }
217                 return query;
218         }
219         
220
221         /* 
222          * query protein sequences with number of jobs
223          */
224         public List<DataBase> readProteinByCounter(int minimalcounter) {
225                 query = new ArrayList<DataBase>();
226                 Map<String, Integer> map = db.ReadProteinSequenceByCounter();
227                 for (Map.Entry<String, Integer> entry : map.entrySet()) {
228                         if (entry.getValue() > minimalcounter && entry.getKey().length() > 0) {
229                                 DataBase bean = new DataBase();
230                                 bean.setTotalId(entry.getValue());
231                                 bean.setProt(entry.getKey());
232                                 query.add(bean);
233                         }
234                 }
235                 return query;
236         }
237         
238         /* 
239          * query ip with number of jobs
240          */
241         public List<DataBase> readIpByCounter(Integer minimalcounter) {
242                 query = new ArrayList<DataBase>();
243                 Map<String, Integer> map = db.ReadIpByCounter();
244                 if (minimalcounter == null)
245                         minimalcounter = 0;
246                 if (map == null)
247                         return null;
248                 for (Map.Entry<String, Integer> entry : map.entrySet()) {
249                         if (entry.getValue() > minimalcounter) {
250                                 DataBase bean = new DataBase();
251                                 bean.setTotalId(entry.getValue());
252                                 bean.setIp(entry.getKey());
253                                 query.add(bean);
254                         }
255                 }
256                 return query;
257         }
258         /*
259          * query jobs log info
260          */
261         public DataBase readJobLog(String jobid) {
262                 if (jobid == null)
263                         return null;
264                 StructureJobLog res = db.ReadJobLog(jobid);
265                 if (res == null)
266                         return null;
267                 DataBase query = new DataBase();
268                 query.setLogInfo(res);
269                 return query;
270         }
271         
272         
273         /*
274          * query jobs by ipStructureJobLog
275          */
276         public List<DataBase> readIp(String ip) {
277                 if (ip == null)
278                         return null;
279                 List<Pair<String, String>> res = db.ReadIpWithJobs(ip);
280         //      System.out.println(res.size());
281                 if (res == null) 
282                         return null;
283                 query = new ArrayList<DataBase>();
284                 for (Pair<String, String> entry : res) {        
285                         System.out.println("ip " + ip);
286                         System.out.println("id " + entry.getElement0());
287                         DataBase bean = new DataBase();
288                         bean.setIp(ip);
289                         bean.setId(entry.getElement0());
290                         bean.setProt(entry.getElement1());
291                         query.add(bean);
292                 }
293                 System.out.println(query.size());
294                 return query;
295         }
296         
297         /*
298          * create list of parts of protein sequence;
299          */
300         private static List<String> CreateSubprot (String protein, String subprot) {
301                 List<String> sub = new ArrayList<String>();
302                 String subStr = protein;
303                 while (subStr.length() > 0 && subStr.contains(subprot)) {
304                         String first = subStr.substring(0, subStr.indexOf(subprot));
305                         if (first.length() > 0)
306                                 sub.add(first);
307                         sub.add(subprot);
308                         subStr = subStr.substring(subStr.indexOf(subprot) + subprot.length(), subStr.length());
309                 }
310                 if (subStr.length() > 0)
311                         sub.add(subStr);
312                 return sub;
313         }
314         /*
315          * convert String date into long date (miliseconds since the epoch start)
316          */
317         private static long DateParsing(String datInput, SimpleDateFormat formatter) {
318                 if (datInput == null) {
319                         return 0;
320                 }
321                 long dateWorkSt = 0;
322                 
323                 try {
324                         dateWorkSt = formatter.parse(datInput).getTime();
325                 } catch (ParseException e) {
326                         e.printStackTrace();
327                 }
328                 return dateWorkSt;
329         }
330
331         // convert long to date in string format
332         private static String DateFormat(long inDate) {
333                 SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
334                 return datformat.format(new Date(inDate));
335         }
336
337         /*
338          * set earlest date and current dates. earlestDate is static and should be
339          * set at the 1st call currentDate should be re-calculated every time
340          */
341         private static void SetDateRange() {
342                 Calendar cal = Calendar.getInstance();
343                 currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH), formatYYMMDD);
344                 if (0 == earlestDate) {
345                         CassandraRequester cr = new CassandraRequester();
346                         earlestDate = cr.earliestDate();
347                         System.out.println("Set earlest Date = " + earlestDate);
348                 }
349         }
350
351         public boolean isThisDateValid(String dateToValidate, SimpleDateFormat sdf) {
352                 if (dateToValidate == null || dateToValidate.equals("")) {
353                         System.out.println("Undefined date");
354                         return false;
355                 }
356                 try {
357                         // if not valid, this will throw ParseException
358                         sdf.setLenient(false);
359                         Date date = sdf.parse(dateToValidate);
360                 } catch (ParseException e) {
361                         e.printStackTrace();
362                         return false;
363                 }
364                 return true;
365         }
366
367         /*
368          * find the earliest date in the database
369          */
370         public long earliestDate() {
371                 earlestDate = CassandraNativeConnector.getEarliestDateInDB();
372                 return earlestDate;
373         }
374         
375 }