PROT-7 fixed report by dates
[proteocache.git] / server / compbio / statistic / CassandraRequester.java
1 package compbio.statistic;
2
3 import java.text.ParseException;
4 import java.text.SimpleDateFormat;
5 import java.util.ArrayList;
6 import java.util.Calendar;
7 import java.util.Date;
8 import java.util.Iterator;
9 import java.util.List;
10 import java.util.Map;
11
12 import compbio.cassandra.CassandraNativeConnector;
13 import compbio.cassandra.CassandraReader;
14 import compbio.cassandra.DataBase;
15 import compbio.cassandra.Pair;
16 import compbio.cassandra.StructureJobLog;
17 import compbio.cassandra.StructureProteinPrediction;
18
19 public class CassandraRequester {
20         private CassandraReader db = new CassandraReader();
21         private ArrayList<DataBase> query;
22         private static long currentDate = 0;
23         private static long earlestDate = 0;
24
25         /*
26          * query: execution time for the period from date1 till date2
27          */
28         public List<DataBase> extractExecutionTime(String date1, String date2) {
29                 if (null == date1) {
30                         date1 = "1970/1/1";
31                 }
32                 if (null == date2) {
33                         date1 = "2100/1/1";
34                 }
35                 if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
36                         System.out.println("Wrong date: point 3");
37                         return null;
38                 }
39                 SetDateRange();
40                 int nbins = 5;
41                 long dateStart = DateParsing(date1);
42                 long dateEnd = DateParsing(date2);
43                 if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd)
44                         return null;
45                 if (dateStart < earlestDate)
46                         dateStart = earlestDate;
47                 if (dateEnd > currentDate)
48                         dateStart = currentDate;
49
50                 Calendar start = Calendar.getInstance();
51                 start.setTime(new Date(dateStart));
52                 Calendar end = Calendar.getInstance();
53                 end.setTime(new Date(dateEnd));
54                 query = new ArrayList<DataBase>();
55                 List<Integer> totalTime = new ArrayList<Integer>();
56                 for (int i = 0; i < nbins; i++)
57                         totalTime.add(i, 0);
58                 List<Pair<String, String>> res = db.ReadProteinDataTable();
59                 List<Pair<Date, Long>> numres = new ArrayList<Pair<Date, Long>>();
60
61                 for (Pair<String, String> entry : res) {
62                         SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd");
63                         try {
64                                 Date jobstartdate = dateformatter.parse(entry.getElement0());
65                                 long date = jobstartdate.getTime();
66                                 if (dateStart <= date && date <= dateEnd) {
67                                         SimpleDateFormat datetimeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
68                                         Date jobstarttime = datetimeformatter.parse(entry.getElement0());
69                                         Date jobendtime = datetimeformatter.parse(entry.getElement1());
70                                         long diff = (jobendtime.getTime() - jobstarttime.getTime()) / 1000;
71                                         Pair<Date, Long> pair = new Pair<Date, Long>(jobstartdate, Long.valueOf(diff));
72                                         numres.add(pair);
73                                 }
74                         } catch (ParseException e) {
75                                 e.printStackTrace();
76                         }
77                 }
78
79                 for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
80                         List<Integer> timeResult = new ArrayList<Integer>();
81                         for (int i = 0; i < nbins; i++)
82                                 timeResult.add(i, 0);
83                         for (Pair<Date, Long> p : numres) {
84                                 if (date.equals(p.getElement0())) {
85                                         long lenResult = p.getElement1().longValue();
86                                         if (lenResult <= 30)
87                                                 timeResult.set(0, timeResult.get(0) + 1);
88                                         else if (lenResult > 30 && lenResult <= 60)
89                                                 timeResult.set(1, timeResult.get(1) + 1);
90                                         else if (lenResult > 60 && lenResult <= 120)
91                                                 timeResult.set(2, timeResult.get(2) + 1);
92                                         else if (lenResult > 120 && lenResult <= 600)
93                                                 timeResult.set(3, timeResult.get(3) + 1);
94                                         else {
95                                                 timeResult.set(4, timeResult.get(4) + 1);
96                                         }
97                                 }
98                         }
99                         for (int i = 0; i < nbins; i++)
100                                 totalTime.set(i, totalTime.get(i) + timeResult.get(i));
101                         DataBase db = new DataBase();
102                         db.setTimeRez(timeResult);
103                         db.setDate(DateFormat(date.getTime()));
104                         query.add(db);
105                 }
106
107                 DataBase db = new DataBase();
108                 db.setTimeTotalExec(totalTime);
109                 query.add(db);
110                 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
111                 return query;
112         }
113         
114         /*
115          * query: total number of jobs  for the period from date1 till date2
116          */
117         public List<DataBase> countJobs(String date1, String date2) {
118                 if (null == date1) {
119                         date1 = "1970/1/1";
120                 }
121                 if (null == date2) {
122                         date1 = "2100/1/1";
123                 }
124                 if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
125                         System.out.println("Wrong date: point 3");
126                         return null;
127                 }
128                 SetDateRange();
129                 long dateStart = DateParsing(date1);
130                 long dateEnd = DateParsing(date2);
131                 if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd)
132                         return null;
133                 if (dateStart < earlestDate)
134                         dateStart = earlestDate;
135                 if (dateEnd > currentDate)
136                         dateStart = currentDate;
137
138                 Calendar start = Calendar.getInstance();
139                 start.setTime(new Date(dateStart));
140                 Calendar end = Calendar.getInstance();
141                 end.setTime(new Date(dateEnd));
142                 query = new ArrayList<DataBase>();
143                 for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
144                         List<Long> res = db.ReadDateTable(date.getTime());
145                         if (res == null)
146                                 continue;
147                         DataBase bean = new DataBase();
148                         bean.setTotal((int)(long)res.get(0));
149                         bean.setTotalOK((int)(long)res.get(1));
150                         bean.setTotalStopped((int)(long)res.get(2));
151                         bean.setTotalError((int)(long)res.get(3));
152                         bean.setTotalTimeOut((int)(long)res.get(4));
153                         bean.setDate(DateFormat(date.getTime()));
154                         query.add(bean);
155                 }
156                 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
157                 return query;
158         }
159         /* 
160          * query: protein sequence
161          * */
162         public List<DataBase> readProteins(String protIn, String flag) {
163                 query = new ArrayList<DataBase>();
164                 List<StructureProteinPrediction> res;
165                 if (flag.equals("whole")) 
166                         res = db.ReadWholeSequence(protIn);
167                  else 
168                         res = db.ReadPartOfSequence(protIn);
169                 if (res == null)
170                         return null;
171                 for (StructureProteinPrediction entry : res) {
172                         Map<String,String> pred = entry.getPrediction();
173                         Iterator it = pred.entrySet().iterator();
174                         while (it.hasNext()) {
175                                 DataBase db = new DataBase();
176                                 db.setProt(entry.getSequence());
177                                 Map.Entry pairs = (Map.Entry)it.next();
178                                 db.setId(entry.getJobid());
179                                 db.setJpred(pairs.getValue().toString());
180                                 if (flag.equals("part"))
181                                         db.setSubProt(CreateSubprot (entry.getSequence(), protIn));                             
182                                 query.add(db);
183                         }
184                 }
185                 return query;
186         }
187         
188         /* 
189          * query protein sequences with number of jobs
190          */
191         public List<DataBase> readProteinByCounter(int minimalcounter) {
192                 query = new ArrayList<DataBase>();
193                 Map<String, Integer> map = db.ReadProteinSequenceByCounter();
194                 for (Map.Entry<String, Integer> entry : map.entrySet()) {
195                         if (entry.getValue() > minimalcounter && entry.getKey().length() > 0) {
196                                 DataBase bean = new DataBase();
197                                 bean.setTotalId(entry.getValue());
198                                 bean.setProt(entry.getKey());
199                                 query.add(bean);
200                         }
201                 }
202                 return query;
203         }
204         
205         /*
206          * query jobs log info
207          */
208         public DataBase readJobLog(String jobid) {
209         //      query = new ArrayList<DataBase>();
210                 StructureJobLog res = db.ReadJobLog(jobid);
211                 DataBase query = new DataBase();
212                 query.setLogInfo(res);
213         //      query.setres);
214                 return query;
215         }
216         /*
217          * create list of parts of protein sequence;
218          */
219         private static List<String> CreateSubprot (String protein, String subprot) {
220                 List<String> sub = new ArrayList<String>();
221                 String subStr = protein;
222                 while (subStr.length() > 0 && subStr.contains(subprot)) {
223                         String first = subStr.substring(0, subStr.indexOf(subprot));
224                         if (first.length() > 0)
225                                 sub.add(first);
226                         sub.add(subprot);
227                         subStr = subStr.substring(subStr.indexOf(subprot) + subprot.length(), subStr.length());
228                 }
229                 if (subStr.length() > 0)
230                         sub.add(subStr);
231                 return sub;
232         }
233         /*
234          * convert String date into long date (miliseconds since the epoch start)
235          */
236         private static long DateParsing(String datInput) {
237                 if (datInput == null) {
238                         return 0;
239                 }
240                 long dateWorkSt = 0;
241                 SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
242                 try {
243                         dateWorkSt = formatter.parse(datInput).getTime();
244                 } catch (ParseException e) {
245                         e.printStackTrace();
246                 }
247                 return dateWorkSt;
248         }
249
250         // convert long to date in string format
251         private static String DateFormat(long inDate) {
252                 SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
253                 return datformat.format(new Date(inDate));
254         }
255
256         /*
257          * set earlest date and current dates. earlestDate is static and should be
258          * set at the 1st call currentDate should be re-calculated every time
259          */
260         private static void SetDateRange() {
261                 Calendar cal = Calendar.getInstance();
262                 currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH));
263                 if (0 == earlestDate) {
264                         CassandraRequester cr = new CassandraRequester();
265                         earlestDate = cr.earliestDate();
266                         System.out.println("Set earlest Date = " + earlestDate);
267                 }
268         }
269
270         public boolean isThisDateValid(String dateToValidate) {
271                 if (dateToValidate == null || dateToValidate.equals("")) {
272                         System.out.println("Undefined date");
273                         return false;
274                 }
275                 SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd");
276                 try {
277                         // if not valid, this will throw ParseException
278                         sdf.setLenient(false);
279                         Date date = sdf.parse(dateToValidate);
280                 } catch (ParseException e) {
281                         e.printStackTrace();
282                         return false;
283                 }
284                 return true;
285         }
286
287         /*
288          * find the earliest date in the database
289          */
290         public long earliestDate() {
291                 earlestDate = CassandraNativeConnector.getEarliestDateInDB();
292                 return earlestDate;
293         }
294         
295 }