1 package compbio.statistic;
3 import java.text.ParseException;
4 import java.text.SimpleDateFormat;
5 import java.util.ArrayList;
6 import java.util.Calendar;
7 import java.util.Collections;
9 import java.util.Iterator;
10 import java.util.List;
12 import me.prettyprint.cassandra.serializers.LongSerializer;
13 import me.prettyprint.cassandra.serializers.StringSerializer;
14 import me.prettyprint.hector.api.beans.ColumnSlice;
15 import me.prettyprint.hector.api.beans.HColumn;
16 import me.prettyprint.hector.api.beans.OrderedRows;
17 import me.prettyprint.hector.api.beans.Row;
18 import me.prettyprint.hector.api.factory.HFactory;
19 import me.prettyprint.hector.api.query.QueryResult;
20 import me.prettyprint.hector.api.query.RangeSlicesQuery;
21 import me.prettyprint.hector.api.query.SliceQuery;
22 import compbio.cassandra.CassandraCreate;
23 import compbio.cassandra.DataBase;
25 public class StatisticsProt {
26 private final static long MILLISECONDS_PER_DAY = 1000L * 60 * 60 * 24;
27 private CassandraCreate cc = new CassandraCreate();
28 private ArrayList<DataBase> query;
29 private static long currentDate = 0;
30 private static long earlestDate = 0;
32 /* query: the period from date1 till date2 */
33 public List<DataBase> readDetails(String date1, String date2) {
34 if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
35 System.out.println("Wrong date: point 1");
40 long dateStart = DateParsing(date1);
41 long dateEnd = DateParsing(date2);
42 if (dateStart < earlestDate)
43 dateStart = earlestDate;
44 if (dateStart > currentDate)
45 dateStart = currentDate - MILLISECONDS_PER_DAY;
46 if (dateEnd < earlestDate)
47 dateStart = earlestDate + MILLISECONDS_PER_DAY;
48 if (dateEnd > currentDate)
49 dateStart = currentDate;
50 System.out.println("StatisticsProt.readDetails: earlestDate = " + earlestDate + ", currentDate = " + currentDate);
51 System.out.println("StatisticsProt.readDetails: Start date " + date1 + ": int representation = " + dateStart);
52 System.out.println("StatisticsProt.readDetails: End date " + date2 + ": int representation = " + dateEnd);
54 query = new ArrayList<DataBase>();
56 while (dateStart <= dateEnd) {
57 SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
58 StringSerializer.get(), StringSerializer.get());
59 result.setColumnFamily("ProteinData");
60 result.setKey(dateStart);
61 result.setRange(null, null, false, Integer.MAX_VALUE);
62 QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
64 System.out.print("Day " + day + ": dataStart = " + dateStart + ": ");
65 if (!columnSlice.get().getColumns().isEmpty()) {
66 DataBase db = new DataBase(DateFormat(dateStart), columnSlice.get().getColumns().size());
68 System.out.println("data exist");
70 System.out.println("no data");
72 dateStart += MILLISECONDS_PER_DAY;
74 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
79 * query: execution time for the period from date1 till date2
81 public List<DataBase> readLength(String date1, String date2) {
82 if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
83 System.out.println("Wrong date: point 3");
88 long dateStart = DateParsing(date1);
89 long dateEnd = DateParsing(date2);
90 if (dateStart < earlestDate)
91 dateStart = earlestDate;
92 if (dateStart > currentDate)
93 dateStart = currentDate - MILLISECONDS_PER_DAY;
94 if (dateEnd < earlestDate)
95 dateStart = earlestDate + MILLISECONDS_PER_DAY;
96 if (dateEnd > currentDate)
97 dateStart = currentDate;
98 System.out.println("StatisticsProt.readLength: earlestDate = " + earlestDate + ", currentDate = " + currentDate);
99 System.out.println("StatisticsProt.readLength: Start date is " + date1 + ": int representation = " + dateStart);
100 System.out.println("StatisticsProt.readLength: End date is " + date2 + ": int representation = " + dateEnd);
102 query = new ArrayList<DataBase>();
103 List<Integer> totalTime = new ArrayList<Integer>();
104 for (int i = 0; i < 4; i++)
106 while (dateStart <= dateEnd) {
107 List<Integer> timeResult = new ArrayList<Integer>();
108 SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
109 StringSerializer.get(), StringSerializer.get());
110 result.setColumnFamily("ProteinData");
111 result.setKey(dateStart);
112 result.setRange(null, null, false, Integer.MAX_VALUE);
113 QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
114 List<HColumn<String, String>> col = columnSlice.get().getColumns();
115 if (!col.isEmpty()) {
116 Iterator<HColumn<String, String>> itCol = col.iterator();
117 for (int i = 0; i < 4; i++)
118 timeResult.add(i, 0);
119 while (itCol.hasNext()) {
120 String id = itCol.next().getName();
121 long lenResult = CountID(id);
123 timeResult.set(0, timeResult.get(0) + 1);
124 else if (lenResult > 30 && lenResult <= 60)
125 timeResult.set(1, timeResult.get(1) + 1);
126 else if (lenResult > 60 && lenResult <= 120)
127 timeResult.set(2, timeResult.get(2) + 1);
129 timeResult.set(3, timeResult.get(3) + 1);
132 for (int i = 0; i < 4; i++)
133 totalTime.set(i, totalTime.get(i) + timeResult.get(i));
134 DataBase db = new DataBase();
135 db.setTimeRez(timeResult);
136 db.setDate(DateFormat(dateStart));
139 dateStart += MILLISECONDS_PER_DAY;
141 DataBase db = new DataBase();
142 db.setTimeTotalExec(totalTime);
144 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
148 /* query: protein sequence */
149 public List<DataBase> readProteins(String protIn) {
150 query = new ArrayList<DataBase>();
151 SliceQuery<String, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
152 StringSerializer.get(), StringSerializer.get());
153 result.setColumnFamily("ProteinRow");
154 result.setKey(protIn);
155 result.setRange(null, null, false, Integer.MAX_VALUE);
156 QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
157 Iterator<HColumn<String, String>> it = columnSlice.get().getColumns().iterator();
158 while (it.hasNext()) {
159 HColumn<String, String> col = it.next();
160 String name = col.getName();
161 if (name.matches("(.*)jnetpred")) {
162 DataBase db = new DataBase();
164 db.setId(col.getName());
165 db.setJpred(col.getValue());
172 // query by a protein sequence
173 public List<DataBase> readProtID(int counter) {
174 query = new ArrayList<DataBase>();
175 int row_count = 100000000;
176 RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
177 StringSerializer.get(), StringSerializer.get());
178 result.setColumnFamily("ProteinRow");
179 result.setRange(null, null, false, Integer.MAX_VALUE);
180 result.setRowCount(row_count);
181 String last_key = null;
183 result.setKeys(last_key, null);
184 QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
185 OrderedRows<String, String, String> rows = columnSlice.get();
186 Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
187 while (rowsIterator.hasNext()) {
188 Row<String, String, String> row = rowsIterator.next();
189 last_key = row.getKey();
190 List<HColumn<String, String>> clms = row.getColumnSlice().getColumns();
192 for (HColumn<String, String> cln : clms) {
193 String name = cln.getName();
194 if (name.matches("(.*)jnetpred")) {
198 if (npred >= counter) {
199 DataBase db = new DataBase();
200 db.setProt(last_key);
201 db.setTotalId(npred);
205 if (rows.getCount() < row_count)
211 // query by a part of sequence
212 public List<DataBase> readPart(String protIn) {
213 int row_count = 10000;
214 query = new ArrayList<DataBase>();
215 RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
216 StringSerializer.get(), StringSerializer.get());
217 result.setColumnFamily("ProteinRow");
218 result.setRange(null, null, false, Integer.MAX_VALUE);
219 result.setRowCount(row_count);
220 String last_key = null;
222 result.setKeys(last_key, null);
223 QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
224 OrderedRows<String, String, String> rows = columnSlice.get();
225 Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
226 while (rowsIterator.hasNext()) {
227 Row<String, String, String> row = rowsIterator.next();
228 last_key = row.getKey();
229 if (last_key.matches("(.*)" + protIn + "(.*)")) {
230 Iterator<HColumn<String, String>> it = row.getColumnSlice().getColumns().iterator();
231 while (it.hasNext()) {
232 HColumn<String, String> col = it.next();
233 List<String> subProt = new ArrayList<String>();
234 String subStr = last_key;
235 while (subStr.length() > 0 && subStr.contains(protIn)) {
236 String first = subStr.substring(0, subStr.indexOf(protIn));
237 if (first.length() > 0)
240 subStr = subStr.substring(subStr.indexOf(protIn) + protIn.length(), subStr.length());
242 if (subStr.length() > 0)
244 String name = col.getName();
245 if (name.matches("(.*)jnetpred")) {
246 DataBase db = new DataBase();
247 db.setProt(last_key);
248 db.setId(col.getName());
249 db.setJpred(col.getValue());
250 db.setSubProt(subProt);
256 if (rows.getCount() < row_count)
262 // convert String to Date
263 private static long DateParsing(String datInput) {
264 if (datInput == null) {
268 SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
270 dateWorkSt = formatter.parse(datInput).getTime();
271 } catch (ParseException e) {
277 // convert String to Date
278 private static long TimeConvert(String datInput) {
280 if (datInput == null) {
283 SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss");
285 dateWorkSt = formatter.parse(datInput).getTime();
286 } catch (ParseException e) {
292 // convert long to date in string format
293 private static String DateFormat(long inDate) {
294 SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
295 String dateString = datformat.format(new Date(inDate));
300 * private static String DateFormat1(long inDate) { SimpleDateFormat
301 * datformat = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss"); String
302 * dateString = datformat.format(new Date(inDate)); return dateString; }
304 public static String DateFormatYYMMDD(long indate) {
305 SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd");
306 String dateString = datformat.format(new Date(indate));
310 public long CountID(String id) {
311 SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
312 StringSerializer.get(), StringSerializer.get());
313 sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100);
314 QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
315 String datBegin = result.get().getColumnByName("DataBegin").getValue();
316 String datEnd = result.get().getColumnByName("DataEnd").getValue();
318 long datBeginLong = TimeConvert(datBegin);
319 long datEndLong = TimeConvert(datEnd);
320 return (datEndLong - datBeginLong) / 1000;
323 private static void SetDateRange() {
324 if (0 == earlestDate) {
325 StatisticsProt sp = new StatisticsProt();
326 earlestDate = sp.earliestDate();
327 System.out.println("Set earlest Date = " + earlestDate);
329 Calendar cal = Calendar.getInstance();
330 currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH));
333 public boolean isThisDateValid(String dateToValidate) {
334 if (dateToValidate == null || dateToValidate.equals("")) {
335 System.out.println("Undefined date");
338 SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd");
340 // if not valid, this will throw ParseException
341 sdf.setLenient(false);
342 Date date = sdf.parse(dateToValidate);
343 } catch (ParseException e) {
350 // find the earliest date
351 public long earliestDate() {
352 ArrayList<Long> dateSort = new ArrayList<Long>();
353 int row_count = 10000;
354 RangeSlicesQuery<Long, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), LongSerializer.get(),
355 StringSerializer.get(), StringSerializer.get());
356 result.setColumnFamily("ProteinData");
357 result.setRange(null, null, false, Integer.MAX_VALUE);
358 result.setRowCount(row_count);
359 Long last_key = null;
361 result.setKeys(last_key, null);
362 QueryResult<OrderedRows<Long, String, String>> columnSlice = result.execute();
363 OrderedRows<Long, String, String> rows = columnSlice.get();
364 Iterator<Row<Long, String, String>> rowsIterator = rows.iterator();
365 while (rowsIterator.hasNext()) {
366 Row<Long, String, String> row = rowsIterator.next();
367 last_key = row.getKey();
368 dateSort.add(last_key);
370 if (rows.getCount() < row_count)
373 Collections.sort(dateSort);
374 return dateSort.get(0);