First working code
[proteocache.git] / server / compbio / statistic / StatisticsProt.java
1 package compbio.statistic;
2
3 import java.text.ParseException;
4 import java.text.SimpleDateFormat;
5 import java.util.ArrayList;
6 import java.util.Calendar;
7 import java.util.Collections;
8 import java.util.Date;
9 import java.util.Iterator;
10 import java.util.List;
11
12 import me.prettyprint.cassandra.serializers.LongSerializer;
13 import me.prettyprint.cassandra.serializers.StringSerializer;
14 import me.prettyprint.hector.api.beans.ColumnSlice;
15 import me.prettyprint.hector.api.beans.HColumn;
16 import me.prettyprint.hector.api.beans.OrderedRows;
17 import me.prettyprint.hector.api.beans.Row;
18 import me.prettyprint.hector.api.factory.HFactory;
19 import me.prettyprint.hector.api.query.QueryResult;
20 import me.prettyprint.hector.api.query.RangeSlicesQuery;
21 import me.prettyprint.hector.api.query.SliceQuery;
22 import compbio.cassandra.CassandraCreate;
23 import compbio.cassandra.DataBase;
24
25 public class StatisticsProt {
26         private final static long MILLISECONDS_PER_DAY = 1000L * 60 * 60 * 24;
27         private CassandraCreate cc = new CassandraCreate();
28         private ArrayList<DataBase> query;
29
30         // query for the period from date1 till date2
31         public List<DataBase> readDetail(String dateInStringSt, String dateInStringEnd) {
32                 if (!isThisDateValid(dateInStringSt))
33                         return null;
34                 long dateWorkSt = DateParsing(dateInStringSt);
35                 long dateWorkEnd = DateParsing(dateInStringEnd);
36                 if (CheckDate(dateWorkSt) && CheckDate(dateWorkEnd)) {
37                         query = new ArrayList<DataBase>();
38                         while (dateWorkSt <= dateWorkEnd) {
39                                 SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
40                                                 StringSerializer.get(), StringSerializer.get());
41                                 result.setColumnFamily("ProteinData");
42                                 result.setKey(dateWorkSt);
43                                 result.setRange(null, null, false, Integer.MAX_VALUE);
44                                 QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
45                                 if (!columnSlice.get().getColumns().isEmpty()) {
46                                         DataBase db = new DataBase(DateFormat(dateWorkSt), columnSlice.get().getColumns().size());
47                                         query.add(db);
48                                 }
49                                 dateWorkSt += MILLISECONDS_PER_DAY;
50                         }
51                 } else
52                         System.out.println("Wrong date");
53                 return query;
54         }
55
56         // find the earliest date
57         public long earliestDate() {
58                 ArrayList<Long> dateSort = new ArrayList<Long>();
59                 int row_count = 10000;
60                 RangeSlicesQuery<Long, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), LongSerializer.get(),
61                                 StringSerializer.get(), StringSerializer.get());
62                 result.setColumnFamily("ProteinData");
63                 result.setRange(null, null, false, Integer.MAX_VALUE);
64                 result.setRowCount(row_count);
65                 Long last_key = null;
66                 while (true) {
67                         result.setKeys(last_key, null);
68                         QueryResult<OrderedRows<Long, String, String>> columnSlice = result.execute();
69                         OrderedRows<Long, String, String> rows = columnSlice.get();
70                         Iterator<Row<Long, String, String>> rowsIterator = rows.iterator();
71                         while (rowsIterator.hasNext()) {
72                                 Row<Long, String, String> row = rowsIterator.next();
73                                 last_key = row.getKey();
74                                 dateSort.add(last_key);
75                         }
76                         if (rows.getCount() < row_count)
77                                 break;
78                 }
79                 Collections.sort(dateSort);
80                 return dateSort.get(0);
81         }
82
83         // query execution time for the period from dateInStringSt till
84         // dateInStringEnd
85         public List<DataBase> readLength(String dateInStringSt, String dateInStringEnd) {
86                 long dateWorkSt = DateParsing(dateInStringSt);
87                 long dateWorkEnd = DateParsing(dateInStringEnd);
88                 if (CheckDate(dateWorkSt) && CheckDate(dateWorkEnd)) {
89                         query = new ArrayList<DataBase>();
90                         List<Integer> totalTime = new ArrayList<Integer>();
91                         for (int i = 0; i < 4; i++)
92                                 totalTime.add(i, 0);
93                         while (dateWorkSt <= dateWorkEnd) {
94                                 List<Integer> timeResult = new ArrayList<Integer>();
95                                 SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
96                                                 StringSerializer.get(), StringSerializer.get());
97                                 result.setColumnFamily("ProteinData");
98                                 result.setKey(dateWorkSt);
99                                 result.setRange(null, null, false, Integer.MAX_VALUE);
100                                 QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
101                                 List<HColumn<String, String>> col = columnSlice.get().getColumns();
102                                 if (!col.isEmpty()) {
103                                         Iterator<HColumn<String, String>> itCol = col.iterator();
104                                         for (int i = 0; i < 4; i++)
105                                                 timeResult.add(i, 0);
106                                         while (itCol.hasNext()) {
107                                                 String id = itCol.next().getName();
108                                                 long lenResult = CountID(id);
109                                                 if (lenResult <= 30)
110                                                         timeResult.set(0, timeResult.get(0) + 1);
111                                                 else if (lenResult > 30 && lenResult <= 60)
112                                                         timeResult.set(1, timeResult.get(1) + 1);
113                                                 else if (lenResult > 60 && lenResult <= 120)
114                                                         timeResult.set(2, timeResult.get(2) + 1);
115                                                 else {
116                                                         timeResult.set(3, timeResult.get(3) + 1);
117                                                         // System.out.println(lenResult + "; " + id);
118                                                 }
119                                         }
120                                         DataBase db = new DataBase();
121                                         db.setTimeRez(timeResult);
122                                         db.setDate(DateFormat(dateWorkSt));
123                                         query.add(db);
124                                 }
125                                 dateWorkSt += MILLISECONDS_PER_DAY;
126                         }
127                 } else
128                         System.out.println("Wrong date");
129                 return query;
130         }
131
132         // query by a protein sequence
133         public List<DataBase> readProt(String protIn) {
134                 query = new ArrayList<DataBase>();
135                 SliceQuery<String, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
136                                 StringSerializer.get(), StringSerializer.get());
137                 result.setColumnFamily("ProteinRow");
138                 result.setKey(protIn);
139                 result.setRange(null, null, false, Integer.MAX_VALUE);
140                 QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
141                 Iterator<HColumn<String, String>> it = columnSlice.get().getColumns().iterator();
142                 while (it.hasNext()) {
143                         HColumn<String, String> col = it.next();
144                         DataBase db = new DataBase();
145                         db.setProt(protIn);
146                         db.setId(col.getName());
147                         db.setJpred(col.getValue());
148                         query.add(db);
149                 }
150                 return query;
151         }
152
153         // query by a protein sequence
154         public List<DataBase> readProtID() {
155                 query = new ArrayList<DataBase>();
156                 int row_count = 100000000;
157                 RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
158                                 StringSerializer.get(), StringSerializer.get());
159                 result.setColumnFamily("ProteinRow");
160                 result.setRange(null, null, false, Integer.MAX_VALUE);
161                 result.setRowCount(row_count);
162                 String last_key = null;
163                 while (true) {
164                         result.setKeys(last_key, null);
165                         QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
166                         OrderedRows<String, String, String> rows = columnSlice.get();
167                         Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
168                         while (rowsIterator.hasNext()) {
169                                 Row<String, String, String> row = rowsIterator.next();
170                                 last_key = row.getKey();
171                                 if (row.getColumnSlice().getColumns().size() > 3) {
172                                         DataBase db = new DataBase();
173                                         db.setProt(last_key);
174                                         db.setTotalId(row.getColumnSlice().getColumns().size());
175                                         query.add(db);
176                                 }
177                         }
178                         if (rows.getCount() < row_count)
179                                 break;
180                 }
181                 return query;
182         }
183
184         // query by a part of sequence
185         public List<DataBase> readPart(String protIn) {
186                 int row_count = 10000;
187                 query = new ArrayList<DataBase>();
188                 RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
189                                 StringSerializer.get(), StringSerializer.get());
190                 result.setColumnFamily("ProteinRow");
191                 result.setRange(null, null, false, Integer.MAX_VALUE);
192                 result.setRowCount(row_count);
193                 String last_key = null;
194                 while (true) {
195                         result.setKeys(last_key, null);
196                         QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
197                         OrderedRows<String, String, String> rows = columnSlice.get();
198                         Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
199                         while (rowsIterator.hasNext()) {
200                                 Row<String, String, String> row = rowsIterator.next();
201                                 last_key = row.getKey();
202                                 if (last_key.matches("(.*)" + protIn + "(.*)")) {
203                                         Iterator<HColumn<String, String>> it = row.getColumnSlice().getColumns().iterator();
204                                         while (it.hasNext()) {
205                                                 HColumn<String, String> col = it.next();
206                                                 List<String> subProt = new ArrayList<String>();
207                                                 String subStr = last_key;
208                                                 while (subStr.length() > 0 && subStr.contains(protIn)) {
209                                                         String first = subStr.substring(0, subStr.indexOf(protIn));
210                                                         if (first.length() > 0)
211                                                                 subProt.add(first);
212                                                         subProt.add(protIn);
213                                                         subStr = subStr.substring(subStr.indexOf(protIn) + protIn.length(), subStr.length());
214                                                 }
215                                                 if (subStr.length() > 0)
216                                                         subProt.add(subStr);
217                                                 DataBase db = new DataBase();
218                                                 db.setProt(last_key);
219                                                 db.setId(col.getName());
220                                                 db.setJpred(col.getValue());
221                                                 db.setSubProt(subProt);
222                                                 query.add(db);
223                                         }
224                                 }
225                         }
226                         if (rows.getCount() < row_count)
227                                 break;
228                 }
229                 return query;
230         }
231
232         // convert String to Date
233         private static long DateParsing(String datInput) {
234                 if (datInput == null) {
235                         return 0;
236                 }
237                 long dateWorkSt = 0;
238                 SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
239                 try {
240                         dateWorkSt = formatter.parse(datInput).getTime();
241                 } catch (ParseException e) {
242                         e.printStackTrace();
243                 }
244                 return dateWorkSt;
245         }
246
247         // convert String to Date
248         private static long TimeConvert(String datInput) {
249                 long dateWorkSt = 0;
250                 if (datInput == null) {
251                         return dateWorkSt;
252                 }
253                 SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss");
254                 try {
255                         dateWorkSt = formatter.parse(datInput).getTime();
256                 } catch (ParseException e) {
257                         e.printStackTrace();
258                 }
259                 // System.out.println(datInput + "start reverce" +
260                 // DateFormat1(dateWorkSt));
261                 return dateWorkSt;
262         }
263
264         // convert long to date in string format
265         private static String DateFormat(long inDate) {
266                 SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
267                 String dateString = datformat.format(new Date(inDate));
268                 return dateString;
269         }
270
271         private static String DateFormat1(long inDate) {
272                 SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss");
273                 String dateString = datformat.format(new Date(inDate));
274                 return dateString;
275         }
276
277         public static String DateFormatYYMMDD(long indate) {
278                 SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd");
279                 String dateString = datformat.format(new Date(indate));
280                 return dateString;
281         }
282
283         public long CountID(String id) {
284                 SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
285                                 StringSerializer.get(), StringSerializer.get());
286                 sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100);
287                 QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
288                 String datBegin = result.get().getColumnByName("DataBegin").getValue();
289                 String datEnd = result.get().getColumnByName("DataEnd").getValue();
290
291                 long datBeginLong = TimeConvert(datBegin);
292                 long datEndLong = TimeConvert(datEnd);
293                 return (datEndLong - datBeginLong) / 1000;
294
295         }
296
297         public static boolean CheckDate(long indate) {
298                 if (indate == 0) {
299                         return false;
300                 }
301                 StatisticsProt sp = new StatisticsProt();
302                 Calendar cal = Calendar.getInstance();
303                 String currentDate = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH);
304                 if (indate >= sp.earliestDate() && indate <= DateParsing(currentDate)) {
305                         return true;
306                 }
307                 return false;
308         }
309
310         public boolean isThisDateValid(String dateToValidate) {
311                 if (dateToValidate == null) {
312                         return false;
313                 }
314                 SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd");
315                 try {
316                         // if not valid, it will throw ParseException
317                         sdf.setLenient(false);
318                         Date date = sdf.parse(dateToValidate);
319                         // System.out.println(date);
320                 } catch (ParseException e) {
321                         e.printStackTrace();
322                         return false;
323                 }
324                 return true;
325         }
326
327 }