Optimize code
[proteocache.git] / server / compbio / statistic / StatisticsProt.java
1 package compbio.statistic;
2
3 import java.text.ParseException;
4 import java.text.SimpleDateFormat;
5 import java.util.ArrayList;
6 import java.util.Calendar;
7 import java.util.Collections;
8 import java.util.Date;
9 import java.util.Iterator;
10 import java.util.List;
11
12 import me.prettyprint.cassandra.serializers.LongSerializer;
13 import me.prettyprint.cassandra.serializers.StringSerializer;
14 import me.prettyprint.hector.api.beans.ColumnSlice;
15 import me.prettyprint.hector.api.beans.HColumn;
16 import me.prettyprint.hector.api.beans.OrderedRows;
17 import me.prettyprint.hector.api.beans.Row;
18 import me.prettyprint.hector.api.factory.HFactory;
19 import me.prettyprint.hector.api.query.QueryResult;
20 import me.prettyprint.hector.api.query.RangeSlicesQuery;
21 import me.prettyprint.hector.api.query.SliceQuery;
22 import compbio.cassandra.CassandraCreate;
23 import compbio.cassandra.DataBase;
24
25 public class StatisticsProt {
26         private final static long MILLISECONDS_PER_DAY = 1000L * 60 * 60 * 24;
27         private CassandraCreate cc = new CassandraCreate();
28         private ArrayList<DataBase> query;
29         private static long currentDate = 0;
30         private static long earlestDate = 0;
31
32         /* query: the period from date1 till date2 */
33         public List<DataBase> readDetails(String date1, String date2) {
34                 if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
35                         System.out.println("Wrong date: point 1");
36                         return null;
37                 }
38                 SetDateRange();
39
40                 long dateStart = DateParsing(date1);
41                 long dateEnd = DateParsing(date2);
42                 if (dateStart < earlestDate)
43                         dateStart = earlestDate;
44                 if (dateStart > currentDate)
45                         dateStart = currentDate - MILLISECONDS_PER_DAY;
46                 if (dateEnd < earlestDate)
47                         dateStart = earlestDate + MILLISECONDS_PER_DAY;
48                 if (dateEnd > currentDate)
49                         dateStart = currentDate;
50                 System.out.println("StatisticsProt.readDetails: earlestDate = " + earlestDate + ", currentDate = " + currentDate);
51                 System.out.println("StatisticsProt.readDetails: Start date " + date1 + ": int representation = " + dateStart);
52                 System.out.println("StatisticsProt.readDetails: End date " + date2 + ": int representation = " + dateEnd);
53
54                 query = new ArrayList<DataBase>();
55                 int day = 0;
56                 while (dateStart <= dateEnd) {
57                         SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
58                                         StringSerializer.get(), StringSerializer.get());
59                         result.setColumnFamily("ProteinData");
60                         result.setKey(dateStart);
61                         result.setRange(null, null, false, Integer.MAX_VALUE);
62                         QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
63                         ++day;
64                         System.out.print("Day " + day + ": dataStart = " + dateStart + ": ");
65                         if (!columnSlice.get().getColumns().isEmpty()) {
66                                 DataBase db = new DataBase(DateFormat(dateStart), columnSlice.get().getColumns().size());
67                                 query.add(db);
68                                 System.out.println("data exist");
69                         } else {
70                                 System.out.println("no data");
71                         }
72                         dateStart += MILLISECONDS_PER_DAY;
73                 }
74                 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
75                 return query;
76         }
77
78         /*
79          * query: execution time for the period from date1 till date2
80          */
81         public List<DataBase> readLength(String date1, String date2) {
82                 if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
83                         System.out.println("Wrong date: point 3");
84                         return null;
85                 }
86                 SetDateRange();
87
88                 long dateStart = DateParsing(date1);
89                 long dateEnd = DateParsing(date2);
90                 if (dateStart < earlestDate)
91                         dateStart = earlestDate;
92                 if (dateStart > currentDate)
93                         dateStart = currentDate - MILLISECONDS_PER_DAY;
94                 if (dateEnd < earlestDate)
95                         dateStart = earlestDate + MILLISECONDS_PER_DAY;
96                 if (dateEnd > currentDate)
97                         dateStart = currentDate;
98                 System.out.println("StatisticsProt.readLength: earlestDate = " + earlestDate + ", currentDate = " + currentDate);
99                 System.out.println("StatisticsProt.readLength: Start date is " + date1 + ": int representation = " + dateStart);
100                 System.out.println("StatisticsProt.readLength: End date is " + date2 + ": int representation = " + dateEnd);
101
102                 query = new ArrayList<DataBase>();
103                 List<Integer> totalTime = new ArrayList<Integer>();
104                 for (int i = 0; i < 4; i++)
105                         totalTime.add(i, 0);
106                 while (dateStart <= dateEnd) {
107                         List<Integer> timeResult = new ArrayList<Integer>();
108                         SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
109                                         StringSerializer.get(), StringSerializer.get());
110                         result.setColumnFamily("ProteinData");
111                         result.setKey(dateStart);
112                         result.setRange(null, null, false, Integer.MAX_VALUE);
113                         QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
114                         List<HColumn<String, String>> col = columnSlice.get().getColumns();
115                         if (!col.isEmpty()) {
116                                 Iterator<HColumn<String, String>> itCol = col.iterator();
117                                 for (int i = 0; i < 4; i++)
118                                         timeResult.add(i, 0);
119                                 while (itCol.hasNext()) {
120                                         String id = itCol.next().getName();
121                                         long lenResult = CountID(id);
122                                         if (lenResult <= 30)
123                                                 timeResult.set(0, timeResult.get(0) + 1);
124                                         else if (lenResult > 30 && lenResult <= 60)
125                                                 timeResult.set(1, timeResult.get(1) + 1);
126                                         else if (lenResult > 60 && lenResult <= 120)
127                                                 timeResult.set(2, timeResult.get(2) + 1);
128                                         else {
129                                                 timeResult.set(3, timeResult.get(3) + 1);
130                                         }
131                                 }
132                                 DataBase db = new DataBase();
133                                 db.setTimeRez(timeResult);
134                                 db.setDate(DateFormat(dateStart));
135                                 query.add(db);
136                         }
137                         dateStart += MILLISECONDS_PER_DAY;
138                 }
139                 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
140                 return query;
141         }
142
143         /* query: protein sequence */
144         public List<DataBase> readProteins(String protIn) {
145                 query = new ArrayList<DataBase>();
146                 SliceQuery<String, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
147                                 StringSerializer.get(), StringSerializer.get());
148                 result.setColumnFamily("ProteinRow");
149                 result.setKey(protIn);
150                 result.setRange(null, null, false, Integer.MAX_VALUE);
151                 QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
152                 Iterator<HColumn<String, String>> it = columnSlice.get().getColumns().iterator();
153                 while (it.hasNext()) {
154                         HColumn<String, String> col = it.next();
155                         String name = col.getName();
156                         if (name.matches("(.*)jnetpred")) {
157                                 DataBase db = new DataBase();
158                                 db.setProt(protIn);
159                                 db.setId(col.getName());
160                                 db.setJpred(col.getValue());
161                                 query.add(db);
162                         }
163                 }
164                 return query;
165         }
166
167         // query by a protein sequence
168         public List<DataBase> readProtID() {
169                 query = new ArrayList<DataBase>();
170                 int row_count = 100000000;
171                 RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
172                                 StringSerializer.get(), StringSerializer.get());
173                 result.setColumnFamily("ProteinRow");
174                 result.setRange(null, null, false, Integer.MAX_VALUE);
175                 result.setRowCount(row_count);
176                 String last_key = null;
177                 while (true) {
178                         result.setKeys(last_key, null);
179                         QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
180                         OrderedRows<String, String, String> rows = columnSlice.get();
181                         Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
182                         while (rowsIterator.hasNext()) {
183                                 Row<String, String, String> row = rowsIterator.next();
184                                 last_key = row.getKey();
185                                 List<HColumn<String, String>> clms = row.getColumnSlice().getColumns();
186                                 int npred = 0;
187                                 for (HColumn<String, String> cln : clms) {
188                                         String name = cln.getName();
189                                         if (name.matches("(.*)jnetpred")) {
190                                                 ++npred;
191                                         }
192                                 }
193                                 if (npred > 3) {
194                                         DataBase db = new DataBase();
195                                         db.setProt(last_key);
196                                         db.setTotalId(npred);
197                                         query.add(db);
198                                 }
199                         }
200                         if (rows.getCount() < row_count)
201                                 break;
202                 }
203                 return query;
204         }
205
206         // query by a part of sequence
207         public List<DataBase> readPart(String protIn) {
208                 int row_count = 10000;
209                 query = new ArrayList<DataBase>();
210                 RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
211                                 StringSerializer.get(), StringSerializer.get());
212                 result.setColumnFamily("ProteinRow");
213                 result.setRange(null, null, false, Integer.MAX_VALUE);
214                 result.setRowCount(row_count);
215                 String last_key = null;
216                 while (true) {
217                         result.setKeys(last_key, null);
218                         QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
219                         OrderedRows<String, String, String> rows = columnSlice.get();
220                         Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
221                         while (rowsIterator.hasNext()) {
222                                 Row<String, String, String> row = rowsIterator.next();
223                                 last_key = row.getKey();
224                                 if (last_key.matches("(.*)" + protIn + "(.*)")) {
225                                         Iterator<HColumn<String, String>> it = row.getColumnSlice().getColumns().iterator();
226                                         while (it.hasNext()) {
227                                                 HColumn<String, String> col = it.next();
228                                                 List<String> subProt = new ArrayList<String>();
229                                                 String subStr = last_key;
230                                                 while (subStr.length() > 0 && subStr.contains(protIn)) {
231                                                         String first = subStr.substring(0, subStr.indexOf(protIn));
232                                                         if (first.length() > 0)
233                                                                 subProt.add(first);
234                                                         subProt.add(protIn);
235                                                         subStr = subStr.substring(subStr.indexOf(protIn) + protIn.length(), subStr.length());
236                                                 }
237                                                 if (subStr.length() > 0)
238                                                         subProt.add(subStr);
239                                                 String name = col.getName();
240                                                 if (name.matches("(.*)jnetpred")) {
241                                                         DataBase db = new DataBase();
242                                                         db.setProt(last_key);
243                                                         db.setId(col.getName());
244                                                         db.setJpred(col.getValue());
245                                                         db.setSubProt(subProt);
246                                                         query.add(db);
247                                                 }
248                                         }
249                                 }
250                         }
251                         if (rows.getCount() < row_count)
252                                 break;
253                 }
254                 return query;
255         }
256
257         // convert String to Date
258         private static long DateParsing(String datInput) {
259                 if (datInput == null) {
260                         return 0;
261                 }
262                 long dateWorkSt = 0;
263                 SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
264                 try {
265                         dateWorkSt = formatter.parse(datInput).getTime();
266                 } catch (ParseException e) {
267                         e.printStackTrace();
268                 }
269                 return dateWorkSt;
270         }
271
272         // convert String to Date
273         private static long TimeConvert(String datInput) {
274                 long dateWorkSt = 0;
275                 if (datInput == null) {
276                         return dateWorkSt;
277                 }
278                 SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss");
279                 try {
280                         dateWorkSt = formatter.parse(datInput).getTime();
281                 } catch (ParseException e) {
282                         e.printStackTrace();
283                 }
284                 return dateWorkSt;
285         }
286
287         // convert long to date in string format
288         private static String DateFormat(long inDate) {
289                 SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
290                 String dateString = datformat.format(new Date(inDate));
291                 return dateString;
292         }
293
294         /*
295          * private static String DateFormat1(long inDate) { SimpleDateFormat
296          * datformat = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss"); String
297          * dateString = datformat.format(new Date(inDate)); return dateString; }
298          */
299         public static String DateFormatYYMMDD(long indate) {
300                 SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd");
301                 String dateString = datformat.format(new Date(indate));
302                 return dateString;
303         }
304
305         public long CountID(String id) {
306                 SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
307                                 StringSerializer.get(), StringSerializer.get());
308                 sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100);
309                 QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
310                 String datBegin = result.get().getColumnByName("DataBegin").getValue();
311                 String datEnd = result.get().getColumnByName("DataEnd").getValue();
312
313                 long datBeginLong = TimeConvert(datBegin);
314                 long datEndLong = TimeConvert(datEnd);
315                 return (datEndLong - datBeginLong) / 1000;
316         }
317
318         private static void SetDateRange() {
319                 if (0 == earlestDate) {
320                         StatisticsProt sp = new StatisticsProt();
321                         earlestDate = sp.earliestDate();
322                         System.out.println("Set earlest Date = " + earlestDate);
323                 }
324                 Calendar cal = Calendar.getInstance();
325                 currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH));
326         }
327
328         public boolean isThisDateValid(String dateToValidate) {
329                 if (dateToValidate == null || dateToValidate.equals("")) {
330                         System.out.println("Undefined date");
331                         return false;
332                 }
333                 SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd");
334                 try {
335                         // if not valid, this will throw ParseException
336                         sdf.setLenient(false);
337                         Date date = sdf.parse(dateToValidate);
338                 } catch (ParseException e) {
339                         e.printStackTrace();
340                         return false;
341                 }
342                 return true;
343         }
344
345         // find the earliest date
346         public long earliestDate() {
347                 ArrayList<Long> dateSort = new ArrayList<Long>();
348                 int row_count = 10000;
349                 RangeSlicesQuery<Long, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), LongSerializer.get(),
350                                 StringSerializer.get(), StringSerializer.get());
351                 result.setColumnFamily("ProteinData");
352                 result.setRange(null, null, false, Integer.MAX_VALUE);
353                 result.setRowCount(row_count);
354                 Long last_key = null;
355                 while (true) {
356                         result.setKeys(last_key, null);
357                         QueryResult<OrderedRows<Long, String, String>> columnSlice = result.execute();
358                         OrderedRows<Long, String, String> rows = columnSlice.get();
359                         Iterator<Row<Long, String, String>> rowsIterator = rows.iterator();
360                         while (rowsIterator.hasNext()) {
361                                 Row<Long, String, String> row = rowsIterator.next();
362                                 last_key = row.getKey();
363                                 dateSort.add(last_key);
364                         }
365                         if (rows.getCount() < row_count)
366                                 break;
367                 }
368                 Collections.sort(dateSort);
369                 return dateSort.get(0);
370         }
371 }