done time execution for queries, query by counter of job, total in column in query...
[proteocache.git] / server / compbio / statistic / StatisticsProt.java
1 package compbio.statistic;
2
3 import java.text.ParseException;
4 import java.text.SimpleDateFormat;
5 import java.util.ArrayList;
6 import java.util.Calendar;
7 import java.util.Collections;
8 import java.util.Date;
9 import java.util.Iterator;
10 import java.util.List;
11
12 import me.prettyprint.cassandra.serializers.LongSerializer;
13 import me.prettyprint.cassandra.serializers.StringSerializer;
14 import me.prettyprint.hector.api.beans.ColumnSlice;
15 import me.prettyprint.hector.api.beans.HColumn;
16 import me.prettyprint.hector.api.beans.OrderedRows;
17 import me.prettyprint.hector.api.beans.Row;
18 import me.prettyprint.hector.api.factory.HFactory;
19 import me.prettyprint.hector.api.query.QueryResult;
20 import me.prettyprint.hector.api.query.RangeSlicesQuery;
21 import me.prettyprint.hector.api.query.SliceQuery;
22 import compbio.cassandra.CassandraCreate;
23 import compbio.cassandra.DataBase;
24
25 public class StatisticsProt {
26         private final static long MILLISECONDS_PER_DAY = 1000L * 60 * 60 * 24;
27         private CassandraCreate cc = new CassandraCreate();
28         private ArrayList<DataBase> query;
29         private static long currentDate = 0;
30         private static long earlestDate = 0;
31
32         /* query: the period from date1 till date2 */
33         public List<DataBase> readDetails(String date1, String date2) {
34                 if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
35                         System.out.println("Wrong date: point 1");
36                         return null;
37                 }
38                 SetDateRange();
39
40                 long dateStart = DateParsing(date1);
41                 long dateEnd = DateParsing(date2);
42                 if (dateStart < earlestDate)
43                         dateStart = earlestDate;
44                 if (dateStart > currentDate)
45                         dateStart = currentDate - MILLISECONDS_PER_DAY;
46                 if (dateEnd < earlestDate)
47                         dateStart = earlestDate + MILLISECONDS_PER_DAY;
48                 if (dateEnd > currentDate)
49                         dateStart = currentDate;
50                 System.out.println("StatisticsProt.readDetails: earlestDate = " + earlestDate + ", currentDate = " + currentDate);
51                 System.out.println("StatisticsProt.readDetails: Start date " + date1 + ": int representation = " + dateStart);
52                 System.out.println("StatisticsProt.readDetails: End date " + date2 + ": int representation = " + dateEnd);
53
54                 query = new ArrayList<DataBase>();
55                 int day = 0;
56                 while (dateStart <= dateEnd) {
57                         SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
58                                         StringSerializer.get(), StringSerializer.get());
59                         result.setColumnFamily("ProteinData");
60                         result.setKey(dateStart);
61                         result.setRange(null, null, false, Integer.MAX_VALUE);
62                         QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
63                         ++day;
64                         System.out.print("Day " + day + ": dataStart = " + dateStart + ": ");
65                         if (!columnSlice.get().getColumns().isEmpty()) {
66                                 DataBase db = new DataBase(DateFormat(dateStart), columnSlice.get().getColumns().size());
67                                 query.add(db);
68                                 System.out.println("data exist");
69                         } else {
70                                 System.out.println("no data");
71                         }
72                         dateStart += MILLISECONDS_PER_DAY;
73                 }
74                 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
75                 return query;
76         }
77
78         /*
79          * query: execution time for the period from date1 till date2
80          */
81         public List<DataBase> readLength(String date1, String date2) {
82                 if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
83                         System.out.println("Wrong date: point 3");
84                         return null;
85                 }
86                 SetDateRange();
87
88                 long dateStart = DateParsing(date1);
89                 long dateEnd = DateParsing(date2);
90                 if (dateStart < earlestDate)
91                         dateStart = earlestDate;
92                 if (dateStart > currentDate)
93                         dateStart = currentDate - MILLISECONDS_PER_DAY;
94                 if (dateEnd < earlestDate)
95                         dateStart = earlestDate + MILLISECONDS_PER_DAY;
96                 if (dateEnd > currentDate)
97                         dateStart = currentDate;
98                 System.out.println("StatisticsProt.readLength: earlestDate = " + earlestDate + ", currentDate = " + currentDate);
99                 System.out.println("StatisticsProt.readLength: Start date is " + date1 + ": int representation = " + dateStart);
100                 System.out.println("StatisticsProt.readLength: End date is " + date2 + ": int representation = " + dateEnd);
101
102                 query = new ArrayList<DataBase>();
103                 List<Integer> totalTime = new ArrayList<Integer>();
104                 for (int i = 0; i < 4; i++)
105                         totalTime.add(i, 0);
106                 while (dateStart <= dateEnd) {
107                         List<Integer> timeResult = new ArrayList<Integer>();
108                         SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
109                                         StringSerializer.get(), StringSerializer.get());
110                         result.setColumnFamily("ProteinData");
111                         result.setKey(dateStart);
112                         result.setRange(null, null, false, Integer.MAX_VALUE);
113                         QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
114                         List<HColumn<String, String>> col = columnSlice.get().getColumns();
115                         if (!col.isEmpty()) {
116                                 Iterator<HColumn<String, String>> itCol = col.iterator();
117                                 for (int i = 0; i < 4; i++)
118                                         timeResult.add(i, 0);
119                                 while (itCol.hasNext()) {
120                                         String id = itCol.next().getName();
121                                         long lenResult = CountID(id);
122                                         if (lenResult <= 30) 
123                                                 timeResult.set(0, timeResult.get(0) + 1);
124                                         else if (lenResult > 30 && lenResult <= 60)
125                                                 timeResult.set(1, timeResult.get(1) + 1);
126                                         else if (lenResult > 60 && lenResult <= 120)
127                                                 timeResult.set(2, timeResult.get(2) + 1);
128                                         else {
129                                                 timeResult.set(3, timeResult.get(3) + 1);
130                                         }
131                                 }
132                                 for (int i = 0; i < 4; i++)
133                                         totalTime.set(i, totalTime.get(i) + timeResult.get(i));
134                                 DataBase db = new DataBase();
135                                 db.setTimeRez(timeResult);
136                                 db.setDate(DateFormat(dateStart));
137                                 query.add(db);
138                         }
139                         dateStart += MILLISECONDS_PER_DAY;
140                 }
141                 DataBase db = new DataBase();
142                 db.setTimeTotalExec(totalTime);
143                 query.add(db);
144                 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
145                 return query;
146         }
147
148         /* query: protein sequence */
149         public List<DataBase> readProteins(String protIn) {
150                 query = new ArrayList<DataBase>();
151                 SliceQuery<String, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
152                                 StringSerializer.get(), StringSerializer.get());
153                 result.setColumnFamily("ProteinRow");
154                 result.setKey(protIn);
155                 result.setRange(null, null, false, Integer.MAX_VALUE);
156                 QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
157                 Iterator<HColumn<String, String>> it = columnSlice.get().getColumns().iterator();
158                 while (it.hasNext()) {
159                         HColumn<String, String> col = it.next();
160                         String name = col.getName();
161                         if (name.matches("(.*)jnetpred")) {
162                                 DataBase db = new DataBase();
163                                 db.setProt(protIn);
164                                 db.setId(col.getName());
165                                 db.setJpred(col.getValue());
166                                 query.add(db);
167                         }
168                 }
169                 return query;
170         }
171
172         // query by a protein sequence
173         public List<DataBase> readProtID(int counter) {
174                 query = new ArrayList<DataBase>();
175                 int row_count = 100000000;
176                 RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
177                                 StringSerializer.get(), StringSerializer.get());
178                 result.setColumnFamily("ProteinRow");
179                 result.setRange(null, null, false, Integer.MAX_VALUE);
180                 result.setRowCount(row_count);
181                 String last_key = null;
182                 while (true) {
183                         result.setKeys(last_key, null);
184                         QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
185                         OrderedRows<String, String, String> rows = columnSlice.get();
186                         Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
187                         while (rowsIterator.hasNext()) {
188                                 Row<String, String, String> row = rowsIterator.next();
189                                 last_key = row.getKey();
190                                 List<HColumn<String, String>> clms = row.getColumnSlice().getColumns();
191                                 int npred = 0;
192                                 for (HColumn<String, String> cln : clms) {
193                                         String name = cln.getName();
194                                         if (name.matches("(.*)jnetpred")) {
195                                                 ++npred;
196                                         }
197                                 }
198                                 if (npred >= counter) {
199                                         DataBase db = new DataBase();
200                                         db.setProt(last_key);
201                                         db.setTotalId(npred);
202                                         query.add(db);
203                                 }
204                         }
205                         if (rows.getCount() < row_count)
206                                 break;
207                 }
208                 return query;
209         }
210
211         // query by a part of sequence
212         public List<DataBase> readPart(String protIn) {
213                 int row_count = 10000;
214                 query = new ArrayList<DataBase>();
215                 RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
216                                 StringSerializer.get(), StringSerializer.get());
217                 result.setColumnFamily("ProteinRow");
218                 result.setRange(null, null, false, Integer.MAX_VALUE);
219                 result.setRowCount(row_count);
220                 String last_key = null;
221                 while (true) {
222                         result.setKeys(last_key, null);
223                         QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
224                         OrderedRows<String, String, String> rows = columnSlice.get();
225                         Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
226                         while (rowsIterator.hasNext()) {
227                                 Row<String, String, String> row = rowsIterator.next();
228                                 last_key = row.getKey();
229                                 if (last_key.matches("(.*)" + protIn + "(.*)")) {
230                                         Iterator<HColumn<String, String>> it = row.getColumnSlice().getColumns().iterator();
231                                         while (it.hasNext()) {
232                                                 HColumn<String, String> col = it.next();
233                                                 List<String> subProt = new ArrayList<String>();
234                                                 String subStr = last_key;
235                                                 while (subStr.length() > 0 && subStr.contains(protIn)) {
236                                                         String first = subStr.substring(0, subStr.indexOf(protIn));
237                                                         if (first.length() > 0)
238                                                                 subProt.add(first);
239                                                         subProt.add(protIn);
240                                                         subStr = subStr.substring(subStr.indexOf(protIn) + protIn.length(), subStr.length());
241                                                 }
242                                                 if (subStr.length() > 0)
243                                                         subProt.add(subStr);
244                                                 String name = col.getName();
245                                                 if (name.matches("(.*)jnetpred")) {
246                                                         DataBase db = new DataBase();
247                                                         db.setProt(last_key);
248                                                         db.setId(col.getName());
249                                                         db.setJpred(col.getValue());
250                                                         db.setSubProt(subProt);
251                                                         query.add(db);
252                                                 }
253                                         }
254                                 }
255                         }
256                         if (rows.getCount() < row_count)
257                                 break;
258                 }
259                 return query;
260         }
261
262         // convert String to Date
263         private static long DateParsing(String datInput) {
264                 if (datInput == null) {
265                         return 0;
266                 }
267                 long dateWorkSt = 0;
268                 SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
269                 try {
270                         dateWorkSt = formatter.parse(datInput).getTime();
271                 } catch (ParseException e) {
272                         e.printStackTrace();
273                 }
274                 return dateWorkSt;
275         }
276
277         // convert String to Date
278         private static long TimeConvert(String datInput) {
279                 long dateWorkSt = 0;
280                 if (datInput == null) {
281                         return dateWorkSt;
282                 }
283                 SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss");
284                 try {
285                         dateWorkSt = formatter.parse(datInput).getTime();
286                 } catch (ParseException e) {
287                         e.printStackTrace();
288                 }
289                 return dateWorkSt;
290         }
291
292         // convert long to date in string format
293         private static String DateFormat(long inDate) {
294                 SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
295                 String dateString = datformat.format(new Date(inDate));
296                 return dateString;
297         }
298
299         /*
300          * private static String DateFormat1(long inDate) { SimpleDateFormat
301          * datformat = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss"); String
302          * dateString = datformat.format(new Date(inDate)); return dateString; }
303          */
304         public static String DateFormatYYMMDD(long indate) {
305                 SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd");
306                 String dateString = datformat.format(new Date(indate));
307                 return dateString;
308         }
309
310         public long CountID(String id) {
311                 SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
312                                 StringSerializer.get(), StringSerializer.get());
313                 sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100);
314                 QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
315                 String datBegin = result.get().getColumnByName("DataBegin").getValue();
316                 String datEnd = result.get().getColumnByName("DataEnd").getValue();
317
318                 long datBeginLong = TimeConvert(datBegin);
319                 long datEndLong = TimeConvert(datEnd);
320                 return (datEndLong - datBeginLong) / 1000;
321         }
322
323         private static void SetDateRange() {
324                 if (0 == earlestDate) {
325                         StatisticsProt sp = new StatisticsProt();
326                         earlestDate = sp.earliestDate();
327                         System.out.println("Set earlest Date = " + earlestDate);
328                 }
329                 Calendar cal = Calendar.getInstance();
330                 currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH));
331         }
332
333         public boolean isThisDateValid(String dateToValidate) {
334                 if (dateToValidate == null || dateToValidate.equals("")) {
335                         System.out.println("Undefined date");
336                         return false;
337                 }
338                 SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd");
339                 try {
340                         // if not valid, this will throw ParseException
341                         sdf.setLenient(false);
342                         Date date = sdf.parse(dateToValidate);
343                 } catch (ParseException e) {
344                         e.printStackTrace();
345                         return false;
346                 }
347                 return true;
348         }
349
350         // find the earliest date
351         public long earliestDate() {
352                 ArrayList<Long> dateSort = new ArrayList<Long>();
353                 int row_count = 10000;
354                 RangeSlicesQuery<Long, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), LongSerializer.get(),
355                                 StringSerializer.get(), StringSerializer.get());
356                 result.setColumnFamily("ProteinData");
357                 result.setRange(null, null, false, Integer.MAX_VALUE);
358                 result.setRowCount(row_count);
359                 Long last_key = null;
360                 while (true) {
361                         result.setKeys(last_key, null);
362                         QueryResult<OrderedRows<Long, String, String>> columnSlice = result.execute();
363                         OrderedRows<Long, String, String> rows = columnSlice.get();
364                         Iterator<Row<Long, String, String>> rowsIterator = rows.iterator();
365                         while (rowsIterator.hasNext()) {
366                                 Row<Long, String, String> row = rowsIterator.next();
367                                 last_key = row.getKey();
368                                 dateSort.add(last_key);
369                         }
370                         if (rows.getCount() < row_count)
371                                 break;
372                 }
373                 Collections.sort(dateSort);
374                 return dateSort.get(0);
375         }
376 }