1 package compbio.statistic;
3 import java.text.ParseException;
4 import java.text.SimpleDateFormat;
5 import java.util.ArrayList;
6 import java.util.Calendar;
7 import java.util.Collections;
9 import java.util.Iterator;
10 import java.util.List;
12 import compbio.cassandra.CassandraNativeConnector;
13 import compbio.cassandra.DataBase;
15 public class StatisticsProt {
16 private CassandraNativeConnector cc = new CassandraNativeConnector();
17 private ArrayList<DataBase> query;
18 private static long currentDate = 0;
19 private static long earlestDate = 0;
22 * query: the period from date1 till date2
24 public List<DataBase> readDetails(String date1, String date2) {
26 if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
27 System.out.println("Wrong date: point 1");
31 long dateStart = DateParsing(date1);
32 long dateEnd = DateParsing(date2);
33 if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate) || dateStart > dateEnd)
35 if (dateStart < earlestDate)
36 dateStart = earlestDate;
37 if (dateEnd > currentDate)
38 dateStart = currentDate;
39 System.out.println("StatisticsProt.readDetails: earlestDate = " + earlestDate + ", currentDate = " + currentDate);
40 System.out.println("StatisticsProt.readDetails: Start date " + date1 + ": int representation = " + dateStart);
41 System.out.println("StatisticsProt.readDetails: End date " + date2 + ": int representation = " + dateEnd);
42 Calendar start = Calendar.getInstance();
43 start.setTime(new Date(dateStart));
44 Calendar end = Calendar.getInstance();
45 end.setTime(new Date(dateEnd));
46 query = new ArrayList<DataBase>();
49 for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
50 SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
51 StringSerializer.get(), StringSerializer.get());
52 result.setColumnFamily("ProteinData");
53 result.setKey(date.getTime());
54 result.setRange(null, null, false, Integer.MAX_VALUE);
55 QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
57 System.out.print("Day " + day + ": dataStart = " + date + ": ");
58 if (!columnSlice.get().getColumns().isEmpty()) {
59 DataBase db = new DataBase(DateFormat(date.getTime()), columnSlice.get().getColumns().size());
61 System.out.println("data exist");
63 System.out.println("no data");
67 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
72 * query: execution time for the period from date1 till date2
74 public List<DataBase> readLength(String date1, String date2) {
75 if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
76 System.out.println("Wrong date: point 3");
81 long dateStart = DateParsing(date1);
82 long dateEnd = DateParsing(date2);
83 if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate) || dateStart > dateEnd)
85 if (dateStart < earlestDate)
86 dateStart = earlestDate;
87 if (dateEnd > currentDate)
88 dateStart = currentDate;
89 System.out.println("StatisticsProt.readLength: earlestDate = " + earlestDate + ", currentDate = " + currentDate);
90 System.out.println("StatisticsProt.readLength: Start date is " + date1 + ": int representation = " + dateStart);
91 System.out.println("StatisticsProt.readLength: End date is " + date2 + ": int representation = " + dateEnd);
92 Calendar start = Calendar.getInstance();
93 start.setTime(new Date(dateStart));
94 Calendar end = Calendar.getInstance();
95 end.setTime(new Date(dateEnd));
96 query = new ArrayList<DataBase>();
97 List<Integer> totalTime = new ArrayList<Integer>();
98 for (int i = 0; i < nbins; i++)
101 for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
102 List<Integer> timeResult = new ArrayList<Integer>();
103 SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
104 StringSerializer.get(), StringSerializer.get());
105 result.setColumnFamily("ProteinData");
106 result.setKey(date.getTime());
107 result.setRange(null, null, false, Integer.MAX_VALUE);
108 QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
109 List<HColumn<String, String>> col = columnSlice.get().getColumns();
110 if (!col.isEmpty()) {
111 Iterator<HColumn<String, String>> itCol = col.iterator();
112 for (int i = 0; i < nbins; i++)
113 timeResult.add(i, 0);
114 // split all jobs into nbins bins
115 while (itCol.hasNext()) {
116 String id = itCol.next().getName();
117 long lenResult = CountID(id);
119 timeResult.set(0, timeResult.get(0) + 1);
120 else if (lenResult > 30 && lenResult <= 60)
121 timeResult.set(1, timeResult.get(1) + 1);
122 else if (lenResult > 60 && lenResult <= 120)
123 timeResult.set(2, timeResult.get(2) + 1);
124 else if (lenResult > 120 && lenResult <= 600)
125 timeResult.set(3, timeResult.get(3) + 1);
127 timeResult.set(4, timeResult.get(4) + 1);
130 for (int i = 0; i < nbins; i++)
131 totalTime.set(i, totalTime.get(i) + timeResult.get(i));
132 DataBase db = new DataBase();
133 db.setTimeRez(timeResult);
134 db.setDate(DateFormat(date.getTime()));
139 DataBase db = new DataBase();
140 db.setTimeTotalExec(totalTime);
142 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
147 * query: protein sequence
149 public List<DataBase> readProteins(String protIn) {
150 query = new ArrayList<DataBase>();
152 SliceQuery<String, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
153 StringSerializer.get(), StringSerializer.get());
154 result.setColumnFamily("ProteinRow");
155 result.setKey(protIn);
156 result.setRange(null, null, false, Integer.MAX_VALUE);
157 QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
158 Iterator<HColumn<String, String>> it = columnSlice.get().getColumns().iterator();
159 while (it.hasNext()) {
160 HColumn<String, String> col = it.next();
161 String name = col.getName();
162 if (name.matches("(.*)jnetpred")) {
163 DataBase db = new DataBase();
165 db.setId(col.getName());
166 db.setJpred(col.getValue());
175 * query by a protein sequence
177 public List<DataBase> readProtID(int counter) {
178 query = new ArrayList<DataBase>();
181 RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
182 StringSerializer.get(), StringSerializer.get());
183 result.setColumnFamily("ProteinRow");
184 result.setRange(null, null, false, 100);
185 result.setRowCount(row_count);
186 String last_key = null;
188 result.setKeys(last_key, null);
189 QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
190 OrderedRows<String, String, String> rows = columnSlice.get();
191 Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
192 while (rowsIterator.hasNext()) {
193 Row<String, String, String> row = rowsIterator.next();
194 last_key = row.getKey();
195 List<HColumn<String, String>> clms = row.getColumnSlice().getColumns();
197 //for (HColumn<String, String> cln : clms) {
198 // String name = cln.getName();
199 // if (name.matches("(.*)jnetpred")) {
203 int npred = clms.size();
204 if (npred > counter) {
205 DataBase db = new DataBase();
206 db.setProt(last_key);
207 db.setTotalId(npred);
211 if (rows.getCount() < row_count)
218 * query by a part of sequence
220 public List<DataBase> readPart(String protIn) {
221 int row_count = 10000;
222 query = new ArrayList<DataBase>();
224 RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
225 StringSerializer.get(), StringSerializer.get());
226 result.setColumnFamily("ProteinRow");
227 result.setRange(null, null, false, Integer.MAX_VALUE);
228 result.setRowCount(row_count);
229 String last_key = null;
231 result.setKeys(last_key, null);
232 QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
233 OrderedRows<String, String, String> rows = columnSlice.get();
234 Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
235 while (rowsIterator.hasNext()) {
236 Row<String, String, String> row = rowsIterator.next();
237 last_key = row.getKey();
238 if (last_key.matches("(.*)" + protIn + "(.*)")) {
239 Iterator<HColumn<String, String>> it = row.getColumnSlice().getColumns().iterator();
240 while (it.hasNext()) {
241 HColumn<String, String> col = it.next();
242 List<String> subProt = new ArrayList<String>();
243 String subStr = last_key;
244 while (subStr.length() > 0 && subStr.contains(protIn)) {
245 String first = subStr.substring(0, subStr.indexOf(protIn));
246 if (first.length() > 0)
249 subStr = subStr.substring(subStr.indexOf(protIn) + protIn.length(), subStr.length());
251 if (subStr.length() > 0)
253 String name = col.getName();
254 if (name.matches("(.*)jnetpred")) {
255 DataBase db = new DataBase();
256 db.setProt(last_key);
257 db.setId(col.getName());
258 db.setJpred(col.getValue());
259 db.setSubProt(subProt);
265 if (rows.getCount() < row_count)
273 * convert String date into long date (miliseconds since the epoch start)
275 private static long DateParsing(String datInput) {
276 if (datInput == null) {
280 SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
282 dateWorkSt = formatter.parse(datInput).getTime();
283 } catch (ParseException e) {
290 * convert String date:time into long date:time (miliseconds since the epoch start)
292 private static long TimeConvert(String datInput) {
294 if (datInput == null) {
297 SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss");
299 dateWorkSt = formatter.parse(datInput).getTime();
300 } catch (ParseException e) {
306 // convert long to date in string format
307 private static String DateFormat(long inDate) {
308 SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
309 String dateString = datformat.format(new Date(inDate));
316 public static String DateFormatYYMMDD(long indate) {
317 SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd");
318 String dateString = datformat.format(new Date(indate));
325 public long CountID(String id) {
327 SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
328 StringSerializer.get(), StringSerializer.get());
329 sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100);
330 QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
331 String datBegin = result.get().getColumnByName("DataBegin").getValue();
332 String datEnd = result.get().getColumnByName("DataEnd").getValue();
334 long datBeginLong = TimeConvert(datBegin);
335 long datEndLong = TimeConvert(datEnd);
336 return (datEndLong - datBeginLong) / 1000;
342 * set earlest date and current dates.
343 * earlestDate is static and should be set at the 1st call
344 * currentDate should be re-calculated every time
346 private static void SetDateRange() {
347 if (0 == earlestDate) {
348 StatisticsProt sp = new StatisticsProt();
349 earlestDate = sp.earliestDate();
350 System.out.println("Set earlest Date = " + earlestDate);
352 Calendar cal = Calendar.getInstance();
353 currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH));
356 public boolean isThisDateValid(String dateToValidate) {
357 if (dateToValidate == null || dateToValidate.equals("")) {
358 System.out.println("Undefined date");
361 SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd");
363 // if not valid, this will throw ParseException
364 sdf.setLenient(false);
365 Date date = sdf.parse(dateToValidate);
366 } catch (ParseException e) {
374 * find the earliest date in the database
376 public long earliestDate() {
377 ArrayList<Long> dateSort = new ArrayList<Long>();
378 int row_count = 10000;
380 RangeSlicesQuery<Long, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), LongSerializer.get(),
381 StringSerializer.get(), StringSerializer.get());
382 result.setColumnFamily("ProteinData");
383 result.setRange(null, null, false, Integer.MAX_VALUE);
384 result.setRowCount(row_count);
385 Long last_key = null;
387 result.setKeys(last_key, null);
388 QueryResult<OrderedRows<Long, String, String>> columnSlice = result.execute();
389 OrderedRows<Long, String, String> rows = columnSlice.get();
390 Iterator<Row<Long, String, String>> rowsIterator = rows.iterator();
391 while (rowsIterator.hasNext()) {
392 Row<Long, String, String> row = rowsIterator.next();
393 last_key = row.getKey();
394 dateSort.add(last_key);
396 if (rows.getCount() < row_count)
399 Collections.sort(dateSort);
400 return dateSort.get(0);