Comment wrong code
[proteocache.git] / server / compbio / statistic / StatisticsProt.java
1 package compbio.statistic;
2
3 import java.text.ParseException;
4 import java.text.SimpleDateFormat;
5 import java.util.ArrayList;
6 import java.util.Calendar;
7 import java.util.Collections;
8 import java.util.Date;
9 import java.util.Iterator;
10 import java.util.List;
11
12 import compbio.cassandra.CassandraNativeConnector;
13 import compbio.cassandra.DataBase;
14
15 public class StatisticsProt {
16         private CassandraNativeConnector cc = new CassandraNativeConnector();
17         private ArrayList<DataBase> query;
18         private static long currentDate = 0;
19         private static long earlestDate = 0;
20
21         /* 
22          * query: the period from date1 till date2
23          * */
24         public List<DataBase> readDetails(String date1, String date2) {
25
26                 if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
27                         System.out.println("Wrong date: point 1");
28                         return null;
29                 }
30                 SetDateRange();
31                 long dateStart = DateParsing(date1);
32                 long dateEnd = DateParsing(date2);
33                 if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate) || dateStart > dateEnd)
34                         return null;
35                 if (dateStart < earlestDate)
36                         dateStart = earlestDate;
37                 if (dateEnd > currentDate)
38                         dateStart = currentDate;
39                 System.out.println("StatisticsProt.readDetails: earlestDate = " + earlestDate + ", currentDate = " + currentDate);
40                 System.out.println("StatisticsProt.readDetails: Start date " + date1 + ": int representation = " + dateStart);
41                 System.out.println("StatisticsProt.readDetails: End date " + date2 + ": int representation = " + dateEnd);
42                 Calendar start = Calendar.getInstance();
43                 start.setTime(new Date(dateStart));
44                 Calendar end = Calendar.getInstance();
45                 end.setTime(new Date(dateEnd));
46                 query = new ArrayList<DataBase>();
47                 int day = 0;
48                 /*
49                 for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
50                         SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
51                                         StringSerializer.get(), StringSerializer.get());
52                         result.setColumnFamily("ProteinData");
53                         result.setKey(date.getTime());
54                         result.setRange(null, null, false, Integer.MAX_VALUE);
55                         QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
56                         ++day;
57                         System.out.print("Day " + day + ": dataStart = " + date + ": ");
58                         if (!columnSlice.get().getColumns().isEmpty()) {
59                                 DataBase db = new DataBase(DateFormat(date.getTime()), columnSlice.get().getColumns().size());
60                                 query.add(db);
61                                 System.out.println("data exist");
62                         } else {
63                                 System.out.println("no data");
64                         }
65                 }
66                 */
67                 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
68                 return query;
69         }
70
71         /*
72          * query: execution time for the period from date1 till date2
73          * */
74         public List<DataBase> readLength(String date1, String date2) {
75                 if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
76                         System.out.println("Wrong date: point 3");
77                         return null;
78                 }
79                 SetDateRange();
80                 int nbins = 5;
81                 long dateStart = DateParsing(date1);
82                 long dateEnd = DateParsing(date2);
83                 if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate) || dateStart > dateEnd)
84                         return null;
85                 if (dateStart < earlestDate)
86                         dateStart = earlestDate;
87                 if (dateEnd > currentDate)
88                         dateStart = currentDate;
89                 System.out.println("StatisticsProt.readLength: earlestDate = " + earlestDate + ", currentDate = " + currentDate);
90                 System.out.println("StatisticsProt.readLength: Start date is " + date1 + ": int representation = " + dateStart);
91                 System.out.println("StatisticsProt.readLength: End date is " + date2 + ": int representation = " + dateEnd);
92                 Calendar start = Calendar.getInstance();
93                 start.setTime(new Date(dateStart));
94                 Calendar end = Calendar.getInstance();
95                 end.setTime(new Date(dateEnd));
96                 query = new ArrayList<DataBase>();
97                 List<Integer> totalTime = new ArrayList<Integer>();
98                 for (int i = 0; i < nbins; i++)
99                         totalTime.add(i, 0);
100                 /*
101                 for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
102                         List<Integer> timeResult = new ArrayList<Integer>();
103                         SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
104                                         StringSerializer.get(), StringSerializer.get());
105                         result.setColumnFamily("ProteinData");
106                         result.setKey(date.getTime());
107                         result.setRange(null, null, false, Integer.MAX_VALUE);
108                         QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
109                         List<HColumn<String, String>> col = columnSlice.get().getColumns();
110                         if (!col.isEmpty()) {
111                                 Iterator<HColumn<String, String>> itCol = col.iterator();
112                                 for (int i = 0; i < nbins; i++)
113                                         timeResult.add(i, 0);
114                                 // split all jobs into nbins bins
115                                 while (itCol.hasNext()) {
116                                         String id = itCol.next().getName();
117                                         long lenResult = CountID(id);
118                                         if (lenResult <= 30)
119                                                 timeResult.set(0, timeResult.get(0) + 1);
120                                         else if (lenResult > 30 && lenResult <= 60)
121                                                 timeResult.set(1, timeResult.get(1) + 1);
122                                         else if (lenResult > 60 && lenResult <= 120)
123                                                 timeResult.set(2, timeResult.get(2) + 1);
124                                         else if (lenResult > 120 && lenResult <= 600)
125                                                 timeResult.set(3, timeResult.get(3) + 1);
126                                         else {
127                                                 timeResult.set(4, timeResult.get(4) + 1);
128                                         }
129                                 }
130                                 for (int i = 0; i < nbins; i++)
131                                         totalTime.set(i, totalTime.get(i) + timeResult.get(i));
132                                 DataBase db = new DataBase();
133                                 db.setTimeRez(timeResult);
134                                 db.setDate(DateFormat(date.getTime()));
135                                 query.add(db);
136                         }
137                 }
138                 */
139                 DataBase db = new DataBase();
140                 db.setTimeTotalExec(totalTime);
141                 query.add(db);
142                 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
143                 return query;
144         }
145
146         /* 
147          * query: protein sequence
148          * */
149         public List<DataBase> readProteins(String protIn) {
150                 query = new ArrayList<DataBase>();
151                 /*
152                 SliceQuery<String, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
153                                 StringSerializer.get(), StringSerializer.get());
154                 result.setColumnFamily("ProteinRow");
155                 result.setKey(protIn);
156                 result.setRange(null, null, false, Integer.MAX_VALUE);
157                 QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
158                 Iterator<HColumn<String, String>> it = columnSlice.get().getColumns().iterator();
159                 while (it.hasNext()) {
160                         HColumn<String, String> col = it.next();
161                         String name = col.getName();
162                         if (name.matches("(.*)jnetpred")) {
163                                 DataBase db = new DataBase();
164                                 db.setProt(protIn);
165                                 db.setId(col.getName());
166                                 db.setJpred(col.getValue());
167                                 query.add(db);
168                         }
169                 }
170                 */
171                 return query;
172         }
173
174         /* 
175          * query by a protein sequence
176          * */
177         public List<DataBase> readProtID(int counter) {
178                 query = new ArrayList<DataBase>();
179                 int row_count = 100;
180                 /*
181                 RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
182                                 StringSerializer.get(), StringSerializer.get());
183                 result.setColumnFamily("ProteinRow");
184                 result.setRange(null, null, false, 100);
185                 result.setRowCount(row_count);
186                 String last_key = null;
187                 while (true) {
188                         result.setKeys(last_key, null);
189                         QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
190                         OrderedRows<String, String, String> rows = columnSlice.get();
191                         Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
192                         while (rowsIterator.hasNext()) {
193                                 Row<String, String, String> row = rowsIterator.next();
194                                 last_key = row.getKey();
195                                 List<HColumn<String, String>> clms = row.getColumnSlice().getColumns();
196                                 //int npred = 0;
197                                 //for (HColumn<String, String> cln : clms) {
198                                 //      String name = cln.getName();
199                                 //      if (name.matches("(.*)jnetpred")) {
200                                 //              ++npred;
201                                 //      }
202                                 //}
203                                 int npred = clms.size();
204                                 if (npred > counter) {
205                                         DataBase db = new DataBase();
206                                         db.setProt(last_key);
207                                         db.setTotalId(npred);
208                                         query.add(db);
209                                 }
210                         }
211                         if (rows.getCount() < row_count)
212                                 break;
213                 }*/
214                 return query;
215         }
216
217         /* 
218          * query by a part of sequence
219          * */
220         public List<DataBase> readPart(String protIn) {
221                 int row_count = 10000;
222                 query = new ArrayList<DataBase>();
223                 /*
224                 RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
225                                 StringSerializer.get(), StringSerializer.get());
226                 result.setColumnFamily("ProteinRow");
227                 result.setRange(null, null, false, Integer.MAX_VALUE);
228                 result.setRowCount(row_count);
229                 String last_key = null;
230                 while (true) {
231                         result.setKeys(last_key, null);
232                         QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
233                         OrderedRows<String, String, String> rows = columnSlice.get();
234                         Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
235                         while (rowsIterator.hasNext()) {
236                                 Row<String, String, String> row = rowsIterator.next();
237                                 last_key = row.getKey();
238                                 if (last_key.matches("(.*)" + protIn + "(.*)")) {
239                                         Iterator<HColumn<String, String>> it = row.getColumnSlice().getColumns().iterator();
240                                         while (it.hasNext()) {
241                                                 HColumn<String, String> col = it.next();
242                                                 List<String> subProt = new ArrayList<String>();
243                                                 String subStr = last_key;
244                                                 while (subStr.length() > 0 && subStr.contains(protIn)) {
245                                                         String first = subStr.substring(0, subStr.indexOf(protIn));
246                                                         if (first.length() > 0)
247                                                                 subProt.add(first);
248                                                         subProt.add(protIn);
249                                                         subStr = subStr.substring(subStr.indexOf(protIn) + protIn.length(), subStr.length());
250                                                 }
251                                                 if (subStr.length() > 0)
252                                                         subProt.add(subStr);
253                                                 String name = col.getName();
254                                                 if (name.matches("(.*)jnetpred")) {
255                                                         DataBase db = new DataBase();
256                                                         db.setProt(last_key);
257                                                         db.setId(col.getName());
258                                                         db.setJpred(col.getValue());
259                                                         db.setSubProt(subProt);
260                                                         query.add(db);
261                                                 }
262                                         }
263                                 }
264                         }
265                         if (rows.getCount() < row_count)
266                                 break;
267                 }
268                 */
269                 return query;
270         }
271
272         /* 
273          * convert String date into long date (miliseconds since the epoch start)
274          */
275         private static long DateParsing(String datInput) {
276                 if (datInput == null) {
277                         return 0;
278                 }
279                 long dateWorkSt = 0;
280                 SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
281                 try {
282                         dateWorkSt = formatter.parse(datInput).getTime();
283                 } catch (ParseException e) {
284                         e.printStackTrace();
285                 }
286                 return dateWorkSt;
287         }
288
289         /*
290          * convert String date:time into long date:time (miliseconds since the epoch start)
291          */
292         private static long TimeConvert(String datInput) {
293                 long dateWorkSt = 0;
294                 if (datInput == null) {
295                         return dateWorkSt;
296                 }
297                 SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss");
298                 try {
299                         dateWorkSt = formatter.parse(datInput).getTime();
300                 } catch (ParseException e) {
301                         e.printStackTrace();
302                 }
303                 return dateWorkSt;
304         }
305
306         // convert long to date in string format
307         private static String DateFormat(long inDate) {
308                 SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
309                 String dateString = datformat.format(new Date(inDate));
310                 return dateString;
311         }
312
313         /*
314          * convert ???
315          */
316         public static String DateFormatYYMMDD(long indate) {
317                 SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd");
318                 String dateString = datformat.format(new Date(indate));
319                 return dateString;
320         }
321
322         /*
323          * ???
324          */
325         public long CountID(String id) {
326                 /*
327                 SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
328                                 StringSerializer.get(), StringSerializer.get());
329                 sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100);
330                 QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
331                 String datBegin = result.get().getColumnByName("DataBegin").getValue();
332                 String datEnd = result.get().getColumnByName("DataEnd").getValue();
333
334                 long datBeginLong = TimeConvert(datBegin);
335                 long datEndLong = TimeConvert(datEnd);
336                 return (datEndLong - datBeginLong) / 1000;
337                 */
338                 return 0;
339         }
340
341         /*
342          * set earlest date and current dates. 
343          * earlestDate is static and should be set at the 1st call
344          * currentDate should be re-calculated every time
345          */
346         private static void SetDateRange() {
347                 if (0 == earlestDate) {
348                         StatisticsProt sp = new StatisticsProt();
349                         earlestDate = sp.earliestDate();
350                         System.out.println("Set earlest Date = " + earlestDate);
351                 }
352                 Calendar cal = Calendar.getInstance();
353                 currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH));
354         }
355
356         public boolean isThisDateValid(String dateToValidate) {
357                 if (dateToValidate == null || dateToValidate.equals("")) {
358                         System.out.println("Undefined date");
359                         return false;
360                 }
361                 SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd");
362                 try {
363                         // if not valid, this will throw ParseException
364                         sdf.setLenient(false);
365                         Date date = sdf.parse(dateToValidate);
366                 } catch (ParseException e) {
367                         e.printStackTrace();
368                         return false;
369                 }
370                 return true;
371         }
372
373         /*
374          * find the earliest date in the database
375          */
376         public long earliestDate() {
377                 ArrayList<Long> dateSort = new ArrayList<Long>();
378                 int row_count = 10000;
379                 /*
380                 RangeSlicesQuery<Long, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), LongSerializer.get(),
381                                 StringSerializer.get(), StringSerializer.get());
382                 result.setColumnFamily("ProteinData");
383                 result.setRange(null, null, false, Integer.MAX_VALUE);
384                 result.setRowCount(row_count);
385                 Long last_key = null;
386                 while (true) {
387                         result.setKeys(last_key, null);
388                         QueryResult<OrderedRows<Long, String, String>> columnSlice = result.execute();
389                         OrderedRows<Long, String, String> rows = columnSlice.get();
390                         Iterator<Row<Long, String, String>> rowsIterator = rows.iterator();
391                         while (rowsIterator.hasNext()) {
392                                 Row<Long, String, String> row = rowsIterator.next();
393                                 last_key = row.getKey();
394                                 dateSort.add(last_key);
395                         }
396                         if (rows.getCount() < row_count)
397                                 break;
398                 }*/
399                 Collections.sort(dateSort);
400                 return dateSort.get(0);
401         }
402 }