Add comments and make Time execution report more granular
[proteocache.git] / server / compbio / statistic / StatisticsProt.java
1 package compbio.statistic;
2
3 import java.text.ParseException;
4 import java.text.SimpleDateFormat;
5 import java.util.ArrayList;
6 import java.util.Calendar;
7 import java.util.Collections;
8 import java.util.Date;
9 import java.util.Iterator;
10 import java.util.List;
11
12 import me.prettyprint.cassandra.serializers.LongSerializer;
13 import me.prettyprint.cassandra.serializers.StringSerializer;
14 import me.prettyprint.hector.api.beans.ColumnSlice;
15 import me.prettyprint.hector.api.beans.HColumn;
16 import me.prettyprint.hector.api.beans.OrderedRows;
17 import me.prettyprint.hector.api.beans.Row;
18 import me.prettyprint.hector.api.factory.HFactory;
19 import me.prettyprint.hector.api.query.QueryResult;
20 import me.prettyprint.hector.api.query.RangeSlicesQuery;
21 import me.prettyprint.hector.api.query.SliceQuery;
22 import compbio.cassandra.CassandraCreate;
23 import compbio.cassandra.DataBase;
24
25 public class StatisticsProt {
26         private CassandraCreate cc = new CassandraCreate();
27         private ArrayList<DataBase> query;
28         private static long currentDate = 0;
29         private static long earlestDate = 0;
30
31         /* 
32          * query: the period from date1 till date2
33          * */
34         public List<DataBase> readDetails(String date1, String date2) {
35
36                 if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
37                         System.out.println("Wrong date: point 1");
38                         return null;
39                 }
40                 SetDateRange();
41                 long dateStart = DateParsing(date1);
42                 long dateEnd = DateParsing(date2);
43                 if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate) || dateStart > dateEnd)
44                         return null;
45                 if (dateStart < earlestDate)
46                         dateStart = earlestDate;
47                 if (dateEnd > currentDate)
48                         dateStart = currentDate;
49                 System.out.println("StatisticsProt.readDetails: earlestDate = " + earlestDate + ", currentDate = " + currentDate);
50                 System.out.println("StatisticsProt.readDetails: Start date " + date1 + ": int representation = " + dateStart);
51                 System.out.println("StatisticsProt.readDetails: End date " + date2 + ": int representation = " + dateEnd);
52                 Calendar start = Calendar.getInstance();
53                 start.setTime(new Date(dateStart));
54                 Calendar end = Calendar.getInstance();
55                 end.setTime(new Date(dateEnd));
56                 query = new ArrayList<DataBase>();
57                 int day = 0;
58                 for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
59                         SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
60                                         StringSerializer.get(), StringSerializer.get());
61                         result.setColumnFamily("ProteinData");
62                         result.setKey(date.getTime());
63                         result.setRange(null, null, false, Integer.MAX_VALUE);
64                         QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
65                         ++day;
66                         System.out.print("Day " + day + ": dataStart = " + date + ": ");
67                         if (!columnSlice.get().getColumns().isEmpty()) {
68                                 DataBase db = new DataBase(DateFormat(date.getTime()), columnSlice.get().getColumns().size());
69                                 query.add(db);
70                                 System.out.println("data exist");
71                         } else {
72                                 System.out.println("no data");
73                         }
74                 }
75                 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
76                 return query;
77         }
78
79         /*
80          * query: execution time for the period from date1 till date2
81          * */
82         public List<DataBase> readLength(String date1, String date2) {
83                 if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
84                         System.out.println("Wrong date: point 3");
85                         return null;
86                 }
87                 SetDateRange();
88                 int nbins = 5;
89                 long dateStart = DateParsing(date1);
90                 long dateEnd = DateParsing(date2);
91                 if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate) || dateStart > dateEnd)
92                         return null;
93                 if (dateStart < earlestDate)
94                         dateStart = earlestDate;
95                 if (dateEnd > currentDate)
96                         dateStart = currentDate;
97                 System.out.println("StatisticsProt.readLength: earlestDate = " + earlestDate + ", currentDate = " + currentDate);
98                 System.out.println("StatisticsProt.readLength: Start date is " + date1 + ": int representation = " + dateStart);
99                 System.out.println("StatisticsProt.readLength: End date is " + date2 + ": int representation = " + dateEnd);
100                 Calendar start = Calendar.getInstance();
101                 start.setTime(new Date(dateStart));
102                 Calendar end = Calendar.getInstance();
103                 end.setTime(new Date(dateEnd));
104                 query = new ArrayList<DataBase>();
105                 List<Integer> totalTime = new ArrayList<Integer>();
106                 for (int i = 0; i < nbins; i++)
107                         totalTime.add(i, 0);
108                 for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
109                         List<Integer> timeResult = new ArrayList<Integer>();
110                         SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
111                                         StringSerializer.get(), StringSerializer.get());
112                         result.setColumnFamily("ProteinData");
113                         result.setKey(date.getTime());
114                         result.setRange(null, null, false, Integer.MAX_VALUE);
115                         QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
116                         List<HColumn<String, String>> col = columnSlice.get().getColumns();
117                         if (!col.isEmpty()) {
118                                 Iterator<HColumn<String, String>> itCol = col.iterator();
119                                 for (int i = 0; i < nbins; i++)
120                                         timeResult.add(i, 0);
121                                 // split all jobs into nbins bins
122                                 while (itCol.hasNext()) {
123                                         String id = itCol.next().getName();
124                                         long lenResult = CountID(id);
125                                         if (lenResult <= 30)
126                                                 timeResult.set(0, timeResult.get(0) + 1);
127                                         else if (lenResult > 30 && lenResult <= 60)
128                                                 timeResult.set(1, timeResult.get(1) + 1);
129                                         else if (lenResult > 60 && lenResult <= 120)
130                                                 timeResult.set(2, timeResult.get(2) + 1);
131                                         else if (lenResult > 120 && lenResult <= 600)
132                                                 timeResult.set(3, timeResult.get(3) + 1);
133                                         else {
134                                                 timeResult.set(4, timeResult.get(4) + 1);
135                                         }
136                                 }
137                                 for (int i = 0; i < nbins; i++)
138                                         totalTime.set(i, totalTime.get(i) + timeResult.get(i));
139                                 DataBase db = new DataBase();
140                                 db.setTimeRez(timeResult);
141                                 db.setDate(DateFormat(date.getTime()));
142                                 query.add(db);
143                         }
144                 }
145                 DataBase db = new DataBase();
146                 db.setTimeTotalExec(totalTime);
147                 query.add(db);
148                 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
149                 return query;
150         }
151
152         /* 
153          * query: protein sequence
154          * */
155         public List<DataBase> readProteins(String protIn) {
156                 query = new ArrayList<DataBase>();
157                 SliceQuery<String, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
158                                 StringSerializer.get(), StringSerializer.get());
159                 result.setColumnFamily("ProteinRow");
160                 result.setKey(protIn);
161                 result.setRange(null, null, false, Integer.MAX_VALUE);
162                 QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
163                 Iterator<HColumn<String, String>> it = columnSlice.get().getColumns().iterator();
164                 while (it.hasNext()) {
165                         HColumn<String, String> col = it.next();
166                         String name = col.getName();
167                         if (name.matches("(.*)jnetpred")) {
168                                 DataBase db = new DataBase();
169                                 db.setProt(protIn);
170                                 db.setId(col.getName());
171                                 db.setJpred(col.getValue());
172                                 query.add(db);
173                         }
174                 }
175                 return query;
176         }
177
178         /* 
179          * query by a protein sequence
180          * */
181         public List<DataBase> readProtID(int counter) {
182                 query = new ArrayList<DataBase>();
183                 int row_count = 100000000;
184                 RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
185                                 StringSerializer.get(), StringSerializer.get());
186                 result.setColumnFamily("ProteinRow");
187                 result.setRange(null, null, false, Integer.MAX_VALUE);
188                 result.setRowCount(row_count);
189                 String last_key = null;
190                 while (true) {
191                         result.setKeys(last_key, null);
192                         QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
193                         OrderedRows<String, String, String> rows = columnSlice.get();
194                         Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
195                         while (rowsIterator.hasNext()) {
196                                 Row<String, String, String> row = rowsIterator.next();
197                                 last_key = row.getKey();
198                                 List<HColumn<String, String>> clms = row.getColumnSlice().getColumns();
199                                 int npred = 0;
200                                 for (HColumn<String, String> cln : clms) {
201                                         String name = cln.getName();
202                                         if (name.matches("(.*)jnetpred")) {
203                                                 ++npred;
204                                         }
205                                 }
206                                 if (npred > counter) {
207                                         DataBase db = new DataBase();
208                                         db.setProt(last_key);
209                                         db.setTotalId(npred);
210                                         query.add(db);
211                                 }
212                         }
213                         if (rows.getCount() < row_count)
214                                 break;
215                 }
216                 return query;
217         }
218
219         /* 
220          * query by a part of sequence
221          * */
222         public List<DataBase> readPart(String protIn) {
223                 int row_count = 10000;
224                 query = new ArrayList<DataBase>();
225                 RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
226                                 StringSerializer.get(), StringSerializer.get());
227                 result.setColumnFamily("ProteinRow");
228                 result.setRange(null, null, false, Integer.MAX_VALUE);
229                 result.setRowCount(row_count);
230                 String last_key = null;
231                 while (true) {
232                         result.setKeys(last_key, null);
233                         QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
234                         OrderedRows<String, String, String> rows = columnSlice.get();
235                         Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
236                         while (rowsIterator.hasNext()) {
237                                 Row<String, String, String> row = rowsIterator.next();
238                                 last_key = row.getKey();
239                                 if (last_key.matches("(.*)" + protIn + "(.*)")) {
240                                         Iterator<HColumn<String, String>> it = row.getColumnSlice().getColumns().iterator();
241                                         while (it.hasNext()) {
242                                                 HColumn<String, String> col = it.next();
243                                                 List<String> subProt = new ArrayList<String>();
244                                                 String subStr = last_key;
245                                                 while (subStr.length() > 0 && subStr.contains(protIn)) {
246                                                         String first = subStr.substring(0, subStr.indexOf(protIn));
247                                                         if (first.length() > 0)
248                                                                 subProt.add(first);
249                                                         subProt.add(protIn);
250                                                         subStr = subStr.substring(subStr.indexOf(protIn) + protIn.length(), subStr.length());
251                                                 }
252                                                 if (subStr.length() > 0)
253                                                         subProt.add(subStr);
254                                                 String name = col.getName();
255                                                 if (name.matches("(.*)jnetpred")) {
256                                                         DataBase db = new DataBase();
257                                                         db.setProt(last_key);
258                                                         db.setId(col.getName());
259                                                         db.setJpred(col.getValue());
260                                                         db.setSubProt(subProt);
261                                                         query.add(db);
262                                                 }
263                                         }
264                                 }
265                         }
266                         if (rows.getCount() < row_count)
267                                 break;
268                 }
269                 return query;
270         }
271
272         /* 
273          * convert String date into long date (miliseconds since the epoch start)
274          */
275         private static long DateParsing(String datInput) {
276                 if (datInput == null) {
277                         return 0;
278                 }
279                 long dateWorkSt = 0;
280                 SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
281                 try {
282                         dateWorkSt = formatter.parse(datInput).getTime();
283                 } catch (ParseException e) {
284                         e.printStackTrace();
285                 }
286                 return dateWorkSt;
287         }
288
289         /*
290          * convert String date:time into long date:time (miliseconds since the epoch start)
291          */
292         private static long TimeConvert(String datInput) {
293                 long dateWorkSt = 0;
294                 if (datInput == null) {
295                         return dateWorkSt;
296                 }
297                 SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss");
298                 try {
299                         dateWorkSt = formatter.parse(datInput).getTime();
300                 } catch (ParseException e) {
301                         e.printStackTrace();
302                 }
303                 return dateWorkSt;
304         }
305
306         // convert long to date in string format
307         private static String DateFormat(long inDate) {
308                 SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
309                 String dateString = datformat.format(new Date(inDate));
310                 return dateString;
311         }
312
313         /*
314          * convert ???
315          */
316         public static String DateFormatYYMMDD(long indate) {
317                 SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd");
318                 String dateString = datformat.format(new Date(indate));
319                 return dateString;
320         }
321
322         /*
323          * ???
324          */
325         public long CountID(String id) {
326                 SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
327                                 StringSerializer.get(), StringSerializer.get());
328                 sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100);
329                 QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
330                 String datBegin = result.get().getColumnByName("DataBegin").getValue();
331                 String datEnd = result.get().getColumnByName("DataEnd").getValue();
332
333                 long datBeginLong = TimeConvert(datBegin);
334                 long datEndLong = TimeConvert(datEnd);
335                 return (datEndLong - datBeginLong) / 1000;
336         }
337
338         /*
339          * set earlest date and current dates. 
340          * earlestDate is static and should be set at the 1st call
341          * currentDate should be re-calculated every time
342          */
343         private static void SetDateRange() {
344                 if (0 == earlestDate) {
345                         StatisticsProt sp = new StatisticsProt();
346                         earlestDate = sp.earliestDate();
347                         System.out.println("Set earlest Date = " + earlestDate);
348                 }
349                 Calendar cal = Calendar.getInstance();
350                 currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH));
351         }
352
353         public boolean isThisDateValid(String dateToValidate) {
354                 if (dateToValidate == null || dateToValidate.equals("")) {
355                         System.out.println("Undefined date");
356                         return false;
357                 }
358                 SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd");
359                 try {
360                         // if not valid, this will throw ParseException
361                         sdf.setLenient(false);
362                         Date date = sdf.parse(dateToValidate);
363                 } catch (ParseException e) {
364                         e.printStackTrace();
365                         return false;
366                 }
367                 return true;
368         }
369
370         /*
371          * find the earliest date in the database
372          */
373         public long earliestDate() {
374                 ArrayList<Long> dateSort = new ArrayList<Long>();
375                 int row_count = 10000;
376                 RangeSlicesQuery<Long, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), LongSerializer.get(),
377                                 StringSerializer.get(), StringSerializer.get());
378                 result.setColumnFamily("ProteinData");
379                 result.setRange(null, null, false, Integer.MAX_VALUE);
380                 result.setRowCount(row_count);
381                 Long last_key = null;
382                 while (true) {
383                         result.setKeys(last_key, null);
384                         QueryResult<OrderedRows<Long, String, String>> columnSlice = result.execute();
385                         OrderedRows<Long, String, String> rows = columnSlice.get();
386                         Iterator<Row<Long, String, String>> rowsIterator = rows.iterator();
387                         while (rowsIterator.hasNext()) {
388                                 Row<Long, String, String> row = rowsIterator.next();
389                                 last_key = row.getKey();
390                                 dateSort.add(last_key);
391                         }
392                         if (rows.getCount() < row_count)
393                                 break;
394                 }
395                 Collections.sort(dateSort);
396                 return dateSort.get(0);
397         }
398 }