1 package compbio.statistic;
3 import java.text.ParseException;
4 import java.text.SimpleDateFormat;
5 import java.util.ArrayList;
6 import java.util.Calendar;
7 import java.util.Collections;
9 import java.util.Iterator;
10 import java.util.List;
12 import me.prettyprint.cassandra.serializers.LongSerializer;
13 import me.prettyprint.cassandra.serializers.StringSerializer;
14 import me.prettyprint.hector.api.beans.ColumnSlice;
15 import me.prettyprint.hector.api.beans.HColumn;
16 import me.prettyprint.hector.api.beans.OrderedRows;
17 import me.prettyprint.hector.api.beans.Row;
18 import me.prettyprint.hector.api.factory.HFactory;
19 import me.prettyprint.hector.api.query.QueryResult;
20 import me.prettyprint.hector.api.query.RangeSlicesQuery;
21 import me.prettyprint.hector.api.query.SliceQuery;
22 import compbio.cassandra.CassandraCreate;
23 import compbio.cassandra.DataBase;
25 public class StatisticsProt {
26 // private final static long MILLISECONDS_PER_DAY = 1000L * 60 * 60 * 24;
27 private CassandraCreate cc = new CassandraCreate();
28 private ArrayList<DataBase> query;
29 private static long currentDate = 0;
30 private static long earlestDate = 0;
32 /* query: the period from date1 till date2 */
33 public List<DataBase> readDetails(String date1, String date2) {
35 if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
36 System.out.println("Wrong date: point 1");
40 long dateStart = DateParsing(date1);
41 long dateEnd = DateParsing(date2);
42 if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate)
43 || dateStart > dateEnd)
45 if (dateStart < earlestDate)
46 dateStart = earlestDate;
47 if (dateEnd > currentDate)
48 dateStart = currentDate;
49 System.out.println("StatisticsProt.readDetails: earlestDate = " + earlestDate + ", currentDate = " + currentDate);
50 System.out.println("StatisticsProt.readDetails: Start date " + date1 + ": int representation = " + dateStart);
51 System.out.println("StatisticsProt.readDetails: End date " + date2 + ": int representation = " + dateEnd);
52 Calendar start = Calendar.getInstance();
53 start.setTime(new Date(dateStart));
54 Calendar end = Calendar.getInstance();
55 end.setTime(new Date(dateEnd));
56 query = new ArrayList<DataBase>();
58 for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
59 SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
60 StringSerializer.get(), StringSerializer.get());
61 result.setColumnFamily("ProteinData");
62 result.setKey(date.getTime());
63 result.setRange(null, null, false, Integer.MAX_VALUE);
64 QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
66 System.out.print("Day " + day + ": dataStart = " + date + ": ");
67 if (!columnSlice.get().getColumns().isEmpty()) {
68 DataBase db = new DataBase(DateFormat(date.getTime()), columnSlice.get().getColumns().size());
70 System.out.println("data exist");
72 System.out.println("no data");
74 // dateStart += MILLISECONDS_PER_DAY;
76 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
81 * query: execution time for the period from date1 till date2
83 public List<DataBase> readLength(String date1, String date2) {
84 if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
85 System.out.println("Wrong date: point 3");
89 long dateStart = DateParsing(date1);
90 long dateEnd = DateParsing(date2);
91 if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate)
92 || dateStart > dateEnd)
94 if (dateStart < earlestDate)
95 dateStart = earlestDate;
96 if (dateEnd > currentDate)
97 dateStart = currentDate;
98 System.out.println("StatisticsProt.readLength: earlestDate = " + earlestDate + ", currentDate = " + currentDate);
99 System.out.println("StatisticsProt.readLength: Start date is " + date1 + ": int representation = " + dateStart);
100 System.out.println("StatisticsProt.readLength: End date is " + date2 + ": int representation = " + dateEnd);
101 Calendar start = Calendar.getInstance();
102 start.setTime(new Date(dateStart));
103 Calendar end = Calendar.getInstance();
104 end.setTime(new Date(dateEnd));
105 query = new ArrayList<DataBase>();
106 List<Integer> totalTime = new ArrayList<Integer>();
107 for (int i = 0; i < 4; i++)
109 for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
110 List<Integer> timeResult = new ArrayList<Integer>();
111 SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
112 StringSerializer.get(), StringSerializer.get());
113 result.setColumnFamily("ProteinData");
114 result.setKey(date.getTime());
115 result.setRange(null, null, false, Integer.MAX_VALUE);
116 QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
117 List<HColumn<String, String>> col = columnSlice.get().getColumns();
118 if (!col.isEmpty()) {
119 Iterator<HColumn<String, String>> itCol = col.iterator();
120 for (int i = 0; i < 4; i++)
121 timeResult.add(i, 0);
122 while (itCol.hasNext()) {
123 String id = itCol.next().getName();
124 long lenResult = CountID(id);
126 timeResult.set(0, timeResult.get(0) + 1);
127 else if (lenResult > 30 && lenResult <= 60)
128 timeResult.set(1, timeResult.get(1) + 1);
129 else if (lenResult > 60 && lenResult <= 120)
130 timeResult.set(2, timeResult.get(2) + 1);
132 timeResult.set(3, timeResult.get(3) + 1);
135 for (int i = 0; i < 4; i++)
136 totalTime.set(i, totalTime.get(i) + timeResult.get(i));
137 DataBase db = new DataBase();
138 db.setTimeRez(timeResult);
139 db.setDate(DateFormat(date.getTime()));
142 // dateStart += MILLISECONDS_PER_DAY;
144 DataBase db = new DataBase();
145 db.setTimeTotalExec(totalTime);
147 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
151 /* query: protein sequence */
152 public List<DataBase> readProteins(String protIn) {
153 query = new ArrayList<DataBase>();
154 SliceQuery<String, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
155 StringSerializer.get(), StringSerializer.get());
156 result.setColumnFamily("ProteinRow");
157 result.setKey(protIn);
158 result.setRange(null, null, false, Integer.MAX_VALUE);
159 QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
160 Iterator<HColumn<String, String>> it = columnSlice.get().getColumns().iterator();
161 while (it.hasNext()) {
162 HColumn<String, String> col = it.next();
163 String name = col.getName();
164 if (name.matches("(.*)jnetpred")) {
165 DataBase db = new DataBase();
167 db.setId(col.getName());
168 db.setJpred(col.getValue());
175 // query by a protein sequence
176 public List<DataBase> readProtID(int counter) {
177 query = new ArrayList<DataBase>();
178 int row_count = 100000000;
179 RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
180 StringSerializer.get(), StringSerializer.get());
181 result.setColumnFamily("ProteinRow");
182 result.setRange(null, null, false, Integer.MAX_VALUE);
183 result.setRowCount(row_count);
184 String last_key = null;
186 result.setKeys(last_key, null);
187 QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
188 OrderedRows<String, String, String> rows = columnSlice.get();
189 Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
190 while (rowsIterator.hasNext()) {
191 Row<String, String, String> row = rowsIterator.next();
192 last_key = row.getKey();
193 List<HColumn<String, String>> clms = row.getColumnSlice().getColumns();
195 for (HColumn<String, String> cln : clms) {
196 String name = cln.getName();
197 if (name.matches("(.*)jnetpred")) {
201 if (npred > counter) {
202 DataBase db = new DataBase();
203 db.setProt(last_key);
204 db.setTotalId(npred);
208 if (rows.getCount() < row_count)
214 // query by a part of sequence
215 public List<DataBase> readPart(String protIn) {
216 int row_count = 10000;
217 query = new ArrayList<DataBase>();
218 RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
219 StringSerializer.get(), StringSerializer.get());
220 result.setColumnFamily("ProteinRow");
221 result.setRange(null, null, false, Integer.MAX_VALUE);
222 result.setRowCount(row_count);
223 String last_key = null;
225 result.setKeys(last_key, null);
226 QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
227 OrderedRows<String, String, String> rows = columnSlice.get();
228 Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
229 while (rowsIterator.hasNext()) {
230 Row<String, String, String> row = rowsIterator.next();
231 last_key = row.getKey();
232 if (last_key.matches("(.*)" + protIn + "(.*)")) {
233 Iterator<HColumn<String, String>> it = row.getColumnSlice().getColumns().iterator();
234 while (it.hasNext()) {
235 HColumn<String, String> col = it.next();
236 List<String> subProt = new ArrayList<String>();
237 String subStr = last_key;
238 while (subStr.length() > 0 && subStr.contains(protIn)) {
239 String first = subStr.substring(0, subStr.indexOf(protIn));
240 if (first.length() > 0)
243 subStr = subStr.substring(subStr.indexOf(protIn) + protIn.length(), subStr.length());
245 if (subStr.length() > 0)
247 String name = col.getName();
248 if (name.matches("(.*)jnetpred")) {
249 DataBase db = new DataBase();
250 db.setProt(last_key);
251 db.setId(col.getName());
252 db.setJpred(col.getValue());
253 db.setSubProt(subProt);
259 if (rows.getCount() < row_count)
265 // convert String to Date
266 private static long DateParsing(String datInput) {
267 if (datInput == null) {
271 SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
273 dateWorkSt = formatter.parse(datInput).getTime();
274 } catch (ParseException e) {
280 // convert String to Date
281 private static long TimeConvert(String datInput) {
283 if (datInput == null) {
286 SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss");
288 dateWorkSt = formatter.parse(datInput).getTime();
289 } catch (ParseException e) {
295 // convert long to date in string format
296 private static String DateFormat(long inDate) {
297 SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
298 String dateString = datformat.format(new Date(inDate));
303 * private static String DateFormat1(long inDate) { SimpleDateFormat
304 * datformat = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss"); String
305 * dateString = datformat.format(new Date(inDate)); return dateString; }
307 public static String DateFormatYYMMDD(long indate) {
308 SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd");
309 String dateString = datformat.format(new Date(indate));
313 public long CountID(String id) {
314 SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
315 StringSerializer.get(), StringSerializer.get());
316 sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100);
317 QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
318 String datBegin = result.get().getColumnByName("DataBegin").getValue();
319 String datEnd = result.get().getColumnByName("DataEnd").getValue();
321 long datBeginLong = TimeConvert(datBegin);
322 long datEndLong = TimeConvert(datEnd);
323 return (datEndLong - datBeginLong) / 1000;
326 private static void SetDateRange() {
327 if (0 == earlestDate) {
328 StatisticsProt sp = new StatisticsProt();
329 earlestDate = sp.earliestDate();
330 System.out.println("Set earlest Date = " + earlestDate);
332 Calendar cal = Calendar.getInstance();
333 currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH));
336 public boolean isThisDateValid(String dateToValidate) {
337 if (dateToValidate == null || dateToValidate.equals("")) {
338 System.out.println("Undefined date");
341 SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd");
343 // if not valid, this will throw ParseException
344 sdf.setLenient(false);
345 Date date = sdf.parse(dateToValidate);
346 } catch (ParseException e) {
353 // find the earliest date
354 public long earliestDate() {
355 ArrayList<Long> dateSort = new ArrayList<Long>();
356 int row_count = 10000;
357 RangeSlicesQuery<Long, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), LongSerializer.get(),
358 StringSerializer.get(), StringSerializer.get());
359 result.setColumnFamily("ProteinData");
360 result.setRange(null, null, false, Integer.MAX_VALUE);
361 result.setRowCount(row_count);
362 Long last_key = null;
364 result.setKeys(last_key, null);
365 QueryResult<OrderedRows<Long, String, String>> columnSlice = result.execute();
366 OrderedRows<Long, String, String> rows = columnSlice.get();
367 Iterator<Row<Long, String, String>> rowsIterator = rows.iterator();
368 while (rowsIterator.hasNext()) {
369 Row<Long, String, String> row = rowsIterator.next();
370 last_key = row.getKey();
371 dateSort.add(last_key);
373 if (rows.getCount() < row_count)
376 Collections.sort(dateSort);
377 return dateSort.get(0);