4ee29172fc04a2fa133bc302e5a5c7ef0aa07ec2
[proteocache.git] / server / compbio / statistic / StatisticsProt.java
1 package compbio.statistic;
2
3 import java.text.ParseException;
4 import java.text.SimpleDateFormat;
5 import java.util.ArrayList;
6 import java.util.Calendar;
7 import java.util.Collections;
8 import java.util.Date;
9 import java.util.Iterator;
10 import java.util.List;
11
12 import me.prettyprint.cassandra.serializers.LongSerializer;
13 import me.prettyprint.cassandra.serializers.StringSerializer;
14 import me.prettyprint.hector.api.beans.ColumnSlice;
15 import me.prettyprint.hector.api.beans.HColumn;
16 import me.prettyprint.hector.api.beans.OrderedRows;
17 import me.prettyprint.hector.api.beans.Row;
18 import me.prettyprint.hector.api.factory.HFactory;
19 import me.prettyprint.hector.api.query.QueryResult;
20 import me.prettyprint.hector.api.query.RangeSlicesQuery;
21 import me.prettyprint.hector.api.query.SliceQuery;
22 import compbio.cassandra.CassandraCreate;
23 import compbio.cassandra.DataBase;
24
25 public class StatisticsProt {
26 //      private final static long MILLISECONDS_PER_DAY = 1000L * 60 * 60 * 24;
27         private CassandraCreate cc = new CassandraCreate();
28         private ArrayList<DataBase> query;
29         private static long currentDate = 0;
30         private static long earlestDate = 0;
31
32         /* query: the period from date1 till date2 */
33         public List<DataBase> readDetails(String date1, String date2) {
34         
35                 if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
36                         System.out.println("Wrong date: point 1");
37                         return null;
38                 }
39                 SetDateRange();
40                 long dateStart = DateParsing(date1);
41                 long dateEnd = DateParsing(date2);
42                 if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate)
43                                 || dateStart > dateEnd)
44                         return null;
45                 if (dateStart < earlestDate)
46                         dateStart = earlestDate;
47                 if (dateEnd > currentDate)
48                         dateStart = currentDate;
49                 System.out.println("StatisticsProt.readDetails: earlestDate = " + earlestDate + ", currentDate = " + currentDate);
50                 System.out.println("StatisticsProt.readDetails: Start date " + date1 + ": int representation = " + dateStart);
51                 System.out.println("StatisticsProt.readDetails: End date " + date2 + ": int representation = " + dateEnd);
52                 Calendar start = Calendar.getInstance();
53                 start.setTime(new Date(dateStart));
54                 Calendar end = Calendar.getInstance();
55                 end.setTime(new Date(dateEnd));
56                 query = new ArrayList<DataBase>();
57                 int day = 0;
58                 for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
59                         SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
60                                         StringSerializer.get(), StringSerializer.get());
61                         result.setColumnFamily("ProteinData");
62                         result.setKey(date.getTime());
63                         result.setRange(null, null, false, Integer.MAX_VALUE);
64                         QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
65                         ++day;
66                         System.out.print("Day " + day + ": dataStart = " + date + ": ");
67                         if (!columnSlice.get().getColumns().isEmpty()) {
68                                 DataBase db = new DataBase(DateFormat(date.getTime()), columnSlice.get().getColumns().size());
69                                 query.add(db);
70                                 System.out.println("data exist");
71                         } else {
72                                 System.out.println("no data");
73                         }
74                 //      dateStart += MILLISECONDS_PER_DAY;
75                 }
76                 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
77                 return query;
78         }
79
80         /*
81          * query: execution time for the period from date1 till date2
82          */
83         public List<DataBase> readLength(String date1, String date2) {
84                 if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
85                         System.out.println("Wrong date: point 3");
86                         return null;
87                 }
88                 SetDateRange();
89                 long dateStart = DateParsing(date1);
90                 long dateEnd = DateParsing(date2);
91                 if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate)
92                                 || dateStart > dateEnd)
93                         return null;
94                 if (dateStart < earlestDate)
95                         dateStart = earlestDate;
96                 if (dateEnd > currentDate)
97                         dateStart = currentDate;
98                 System.out.println("StatisticsProt.readLength: earlestDate = " + earlestDate + ", currentDate = " + currentDate);
99                 System.out.println("StatisticsProt.readLength: Start date is " + date1 + ": int representation = " + dateStart);
100                 System.out.println("StatisticsProt.readLength: End date is " + date2 + ": int representation = " + dateEnd);
101                 Calendar start = Calendar.getInstance();
102                 start.setTime(new Date(dateStart));
103                 Calendar end = Calendar.getInstance();
104                 end.setTime(new Date(dateEnd));
105                 query = new ArrayList<DataBase>();
106                 List<Integer> totalTime = new ArrayList<Integer>();
107                 for (int i = 0; i < 4; i++)
108                         totalTime.add(i, 0);
109                 for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
110                         List<Integer> timeResult = new ArrayList<Integer>();
111                         SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
112                                         StringSerializer.get(), StringSerializer.get());
113                         result.setColumnFamily("ProteinData");
114                         result.setKey(date.getTime());
115                         result.setRange(null, null, false, Integer.MAX_VALUE);
116                         QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
117                         List<HColumn<String, String>> col = columnSlice.get().getColumns();
118                         if (!col.isEmpty()) {
119                                 Iterator<HColumn<String, String>> itCol = col.iterator();
120                                 for (int i = 0; i < 4; i++)
121                                         timeResult.add(i, 0);
122                                 while (itCol.hasNext()) {
123                                         String id = itCol.next().getName();
124                                         long lenResult = CountID(id);
125                                         if (lenResult <= 30) 
126                                                 timeResult.set(0, timeResult.get(0) + 1);
127                                         else if (lenResult > 30 && lenResult <= 60)
128                                                 timeResult.set(1, timeResult.get(1) + 1);
129                                         else if (lenResult > 60 && lenResult <= 120)
130                                                 timeResult.set(2, timeResult.get(2) + 1);
131                                         else {
132                                                 timeResult.set(3, timeResult.get(3) + 1);
133                                         }
134                                 }
135                                 for (int i = 0; i < 4; i++)
136                                         totalTime.set(i, totalTime.get(i) + timeResult.get(i));
137                                 DataBase db = new DataBase();
138                                 db.setTimeRez(timeResult);
139                                 db.setDate(DateFormat(date.getTime()));
140                                 query.add(db);
141                         }
142         //              dateStart += MILLISECONDS_PER_DAY;
143                 }
144                 DataBase db = new DataBase();
145                 db.setTimeTotalExec(totalTime);
146                 query.add(db);
147                 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
148                 return query;
149         }
150
151         /* query: protein sequence */
152         public List<DataBase> readProteins(String protIn) {
153                 query = new ArrayList<DataBase>();
154                 SliceQuery<String, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
155                                 StringSerializer.get(), StringSerializer.get());
156                 result.setColumnFamily("ProteinRow");
157                 result.setKey(protIn);
158                 result.setRange(null, null, false, Integer.MAX_VALUE);
159                 QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
160                 Iterator<HColumn<String, String>> it = columnSlice.get().getColumns().iterator();
161                 while (it.hasNext()) {
162                         HColumn<String, String> col = it.next();
163                         String name = col.getName();
164                         if (name.matches("(.*)jnetpred")) {
165                                 DataBase db = new DataBase();
166                                 db.setProt(protIn);
167                                 db.setId(col.getName());
168                                 db.setJpred(col.getValue());
169                                 query.add(db);
170                         }
171                 }
172                 return query;
173         }
174
175         // query by a protein sequence
176         public List<DataBase> readProtID(int counter) {
177                 query = new ArrayList<DataBase>();
178                 int row_count = 100000000;
179                 RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
180                                 StringSerializer.get(), StringSerializer.get());
181                 result.setColumnFamily("ProteinRow");
182                 result.setRange(null, null, false, Integer.MAX_VALUE);
183                 result.setRowCount(row_count);
184                 String last_key = null;
185                 while (true) {
186                         result.setKeys(last_key, null);
187                         QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
188                         OrderedRows<String, String, String> rows = columnSlice.get();
189                         Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
190                         while (rowsIterator.hasNext()) {
191                                 Row<String, String, String> row = rowsIterator.next();
192                                 last_key = row.getKey();
193                                 List<HColumn<String, String>> clms = row.getColumnSlice().getColumns();
194                                 int npred = 0;
195                                 for (HColumn<String, String> cln : clms) {
196                                         String name = cln.getName();
197                                         if (name.matches("(.*)jnetpred")) {
198                                                 ++npred;
199                                         }
200                                 }
201                                 if (npred > counter) {
202                                         DataBase db = new DataBase();
203                                         db.setProt(last_key);
204                                         db.setTotalId(npred);
205                                         query.add(db);
206                                 }
207                         }
208                         if (rows.getCount() < row_count)
209                                 break;
210                 }
211                 return query;
212         }
213
214         // query by a part of sequence
215         public List<DataBase> readPart(String protIn) {
216                 int row_count = 10000;
217                 query = new ArrayList<DataBase>();
218                 RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
219                                 StringSerializer.get(), StringSerializer.get());
220                 result.setColumnFamily("ProteinRow");
221                 result.setRange(null, null, false, Integer.MAX_VALUE);
222                 result.setRowCount(row_count);
223                 String last_key = null;
224                 while (true) {
225                         result.setKeys(last_key, null);
226                         QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
227                         OrderedRows<String, String, String> rows = columnSlice.get();
228                         Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
229                         while (rowsIterator.hasNext()) {
230                                 Row<String, String, String> row = rowsIterator.next();
231                                 last_key = row.getKey();
232                                 if (last_key.matches("(.*)" + protIn + "(.*)")) {
233                                         Iterator<HColumn<String, String>> it = row.getColumnSlice().getColumns().iterator();
234                                         while (it.hasNext()) {
235                                                 HColumn<String, String> col = it.next();
236                                                 List<String> subProt = new ArrayList<String>();
237                                                 String subStr = last_key;
238                                                 while (subStr.length() > 0 && subStr.contains(protIn)) {
239                                                         String first = subStr.substring(0, subStr.indexOf(protIn));
240                                                         if (first.length() > 0)
241                                                                 subProt.add(first);
242                                                         subProt.add(protIn);
243                                                         subStr = subStr.substring(subStr.indexOf(protIn) + protIn.length(), subStr.length());
244                                                 }
245                                                 if (subStr.length() > 0)
246                                                         subProt.add(subStr);
247                                                 String name = col.getName();
248                                                 if (name.matches("(.*)jnetpred")) {
249                                                         DataBase db = new DataBase();
250                                                         db.setProt(last_key);
251                                                         db.setId(col.getName());
252                                                         db.setJpred(col.getValue());
253                                                         db.setSubProt(subProt);
254                                                         query.add(db);
255                                                 }
256                                         }
257                                 }
258                         }
259                         if (rows.getCount() < row_count)
260                                 break;
261                 }
262                 return query;
263         }
264
265         // convert String to Date
266         private static long DateParsing(String datInput) {
267                 if (datInput == null) {
268                         return 0;
269                 }
270                 long dateWorkSt = 0;
271                 SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
272                 try {
273                         dateWorkSt = formatter.parse(datInput).getTime();
274                 } catch (ParseException e) {
275                         e.printStackTrace();
276                 }
277                 return dateWorkSt;
278         }
279
280         // convert String to Date
281         private static long TimeConvert(String datInput) {
282                 long dateWorkSt = 0;
283                 if (datInput == null) {
284                         return dateWorkSt;
285                 }
286                 SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss");
287                 try {
288                         dateWorkSt = formatter.parse(datInput).getTime();
289                 } catch (ParseException e) {
290                         e.printStackTrace();
291                 }
292                 return dateWorkSt;
293         }
294
295         // convert long to date in string format
296         private static String DateFormat(long inDate) {
297                 SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
298                 String dateString = datformat.format(new Date(inDate));
299                 return dateString;
300         }
301
302         /*
303          * private static String DateFormat1(long inDate) { SimpleDateFormat
304          * datformat = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss"); String
305          * dateString = datformat.format(new Date(inDate)); return dateString; }
306          */
307         public static String DateFormatYYMMDD(long indate) {
308                 SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd");
309                 String dateString = datformat.format(new Date(indate));
310                 return dateString;
311         }
312
313         public long CountID(String id) {
314                 SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
315                                 StringSerializer.get(), StringSerializer.get());
316                 sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100);
317                 QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
318                 String datBegin = result.get().getColumnByName("DataBegin").getValue();
319                 String datEnd = result.get().getColumnByName("DataEnd").getValue();
320
321                 long datBeginLong = TimeConvert(datBegin);
322                 long datEndLong = TimeConvert(datEnd);
323                 return (datEndLong - datBeginLong) / 1000;
324         }
325
326         private static void SetDateRange() {
327                 if (0 == earlestDate) {
328                         StatisticsProt sp = new StatisticsProt();
329                         earlestDate = sp.earliestDate();
330                         System.out.println("Set earlest Date = " + earlestDate);
331                 }
332                 Calendar cal = Calendar.getInstance();
333                 currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH));
334         }
335
336         public boolean isThisDateValid(String dateToValidate) {
337                 if (dateToValidate == null || dateToValidate.equals("")) {
338                         System.out.println("Undefined date");
339                         return false;
340                 }
341                 SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd");
342                 try {
343                         // if not valid, this will throw ParseException
344                         sdf.setLenient(false);
345                         Date date = sdf.parse(dateToValidate);
346                 } catch (ParseException e) {
347                         e.printStackTrace();
348                         return false;
349                 }
350                 return true;
351         }
352
353         // find the earliest date
354         public long earliestDate() {
355                 ArrayList<Long> dateSort = new ArrayList<Long>();
356                 int row_count = 10000;
357                 RangeSlicesQuery<Long, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), LongSerializer.get(),
358                                 StringSerializer.get(), StringSerializer.get());
359                 result.setColumnFamily("ProteinData");
360                 result.setRange(null, null, false, Integer.MAX_VALUE);
361                 result.setRowCount(row_count);
362                 Long last_key = null;
363                 while (true) {
364                         result.setKeys(last_key, null);
365                         QueryResult<OrderedRows<Long, String, String>> columnSlice = result.execute();
366                         OrderedRows<Long, String, String> rows = columnSlice.get();
367                         Iterator<Row<Long, String, String>> rowsIterator = rows.iterator();
368                         while (rowsIterator.hasNext()) {
369                                 Row<Long, String, String> row = rowsIterator.next();
370                                 last_key = row.getKey();
371                                 dateSort.add(last_key);
372                         }
373                         if (rows.getCount() < row_count)
374                                 break;
375                 }
376                 Collections.sort(dateSort);
377                 return dateSort.get(0);
378         }
379 }