1 package combio.statistic;
3 import java.text.ParseException;
4 import java.text.SimpleDateFormat;
5 import java.util.ArrayList;
6 import java.util.Arrays;
7 import java.util.Collections;
9 import java.util.Iterator;
10 import java.util.List;
12 import me.prettyprint.cassandra.serializers.LongSerializer;
13 import me.prettyprint.cassandra.serializers.StringSerializer;
14 import me.prettyprint.hector.api.beans.ColumnSlice;
15 import me.prettyprint.hector.api.beans.HColumn;
16 import me.prettyprint.hector.api.beans.OrderedRows;
17 import me.prettyprint.hector.api.beans.Row;
18 import me.prettyprint.hector.api.factory.HFactory;
19 import me.prettyprint.hector.api.query.QueryResult;
20 import me.prettyprint.hector.api.query.RangeSlicesQuery;
21 import me.prettyprint.hector.api.query.SliceQuery;
22 import combio.cassandra.CassandraCreate;
23 import combio.cassandra.DataBase;
25 public class StatisticsProt {
26 private final static long MILLISECONDS_PER_DAY = 1000L * 60 * 60 * 24;
27 private CassandraCreate cc = new CassandraCreate();
28 private ArrayList<DataBase> query;
30 // query for the period from date1 till date2
31 public List<DataBase> readDetail(String dateInStringSt, String dateInStringEnd) {
32 long dateWorkSt = DateParsing(dateInStringSt);
33 long dateWorkEnd = DateParsing(dateInStringEnd);
34 query = new ArrayList<DataBase>();
35 while (dateWorkSt <= dateWorkEnd) {
36 SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(), StringSerializer.get(), StringSerializer.get());
37 result.setColumnFamily("ProteinData");
38 result.setKey(dateWorkSt);
39 result.setRange(null, null, false, Integer.MAX_VALUE);
40 QueryResult <ColumnSlice<String, String>> columnSlice = result.execute();
41 DataBase db = new DataBase(DateFormat(dateWorkSt), columnSlice.get().getColumns().size());
43 dateWorkSt += MILLISECONDS_PER_DAY ;
48 // query jobs for the period from dateInStringSt till dateInStringEnd
49 public List<DataBase> readLength(String dateInStringSt, String dateInStringEnd) {
50 query = new ArrayList<DataBase>();
51 long dateWorkSt = DateParsing(dateInStringSt);
52 long dateWorkEnd = DateParsing(dateInStringEnd);
53 while (dateWorkSt <= dateWorkEnd) {
54 List<Integer> timeResult = new ArrayList<Integer>();
55 SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(), StringSerializer.get(), StringSerializer.get());
56 result.setColumnFamily("ProteinData");
57 result.setKey(dateWorkSt);
58 result.setRange(null, null, false, Integer.MAX_VALUE);
59 QueryResult <ColumnSlice<String, String>> columnSlice = result.execute();
60 List<HColumn<String, String>> col = columnSlice.get().getColumns();
61 Iterator<HColumn<String, String>> itCol = col.iterator();
62 for (int i = 0; i < 4; i++)
64 while (itCol.hasNext()) {
65 String id = itCol.next().getName();
66 long lenResult = CountID(id);
68 timeResult.set(0, timeResult.get(0) + 1);
69 else if (lenResult > 1 && lenResult <= 10)
70 timeResult.set(1, timeResult.get(1) + 1);
71 else if (lenResult > 10 && lenResult <= 20)
72 timeResult.set(2, timeResult.get(2) + 1);
74 timeResult.set(3, timeResult.get(3) + 1);
76 DataBase db = new DataBase();
77 db.setTimeRez(timeResult);
78 db.setDate(DateFormat(dateWorkSt));
80 List<Integer> test = query.get(0).getTimeRez();
81 dateWorkSt += MILLISECONDS_PER_DAY ;
86 //query by a protein name
87 public List<DataBase> readProt(String protIn) {
88 query = new ArrayList<DataBase>();
89 SliceQuery<String, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(), StringSerializer.get(), StringSerializer.get());
90 result.setColumnFamily("ProteinRow");
91 result.setKey(protIn);
92 result.setRange(null, null, false, Integer.MAX_VALUE);
93 QueryResult <ColumnSlice<String, String>> columnSlice = result.execute();
94 Iterator <HColumn<String, String>> it = columnSlice.get().getColumns().iterator();
95 while (it.hasNext()) {
96 HColumn<String, String> col = it.next();
97 DataBase db = new DataBase();
99 db.setId(col.getName());
100 db.setJpred(col.getValue());
107 //query by a sequence (whether exists proteins contains this sequence in the name)
108 public List<DataBase> readPart(String protIn) {
109 int row_count = 10000;
110 query = new ArrayList<DataBase>();
111 RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(), StringSerializer.get(), StringSerializer.get());
112 result.setColumnFamily("ProteinRow");
113 result.setRange(null, null, false, Integer.MAX_VALUE);
114 result.setRowCount(row_count);
115 String last_key = null;
117 result.setKeys(last_key, null);
118 QueryResult <OrderedRows<String,String, String>> columnSlice = result.execute();
119 OrderedRows<String, String, String> rows = columnSlice.get();
120 Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
121 while (rowsIterator.hasNext()) {
122 Row<String, String, String> row = rowsIterator.next();
123 last_key = row.getKey();
124 if (last_key.matches("(.*)" + protIn + "(.*)")) {
125 Iterator <HColumn<String, String>> it = row.getColumnSlice().getColumns().iterator();
126 while (it.hasNext()) {
127 HColumn<String, String> col = it.next();
128 List<String> subProt = new ArrayList<String>();
129 String subStr = last_key;
130 while (subStr.length() > 0 && subStr.contains(protIn)) {
131 String first = subStr.substring(0, subStr.indexOf(protIn));
132 if (first.length() > 0)
135 subStr = subStr.substring(subStr.indexOf(protIn) + protIn.length(), subStr.length());
137 if (subStr.length() > 0)
139 DataBase db = new DataBase();
140 db.setProt(last_key);
141 db.setId(col.getName());
142 db.setJpred(col.getValue());
143 db.setSubProt(subProt);
148 if (rows.getCount() < row_count)
154 // convert String to Date
155 private static long DateParsing(String datInput) {
157 SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
159 dateWorkSt = formatter.parse(datInput).getTime();
160 } catch (ParseException e) {
166 // convert String to Date
167 private static long TimeConvert(String datInput) {
169 SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss");
171 dateWorkSt = formatter.parse(datInput).getTime();
172 } catch (ParseException e) {
175 // System.out.println("start reverce" + DateFormat1(dateWorkSt));
179 // convert long to date in string format
180 private static String DateFormat(long inDate){
181 SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
182 String dateString = datformat.format(new Date(inDate));
186 private static String DateFormat1(long inDate){
187 SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy:hh:mm:ss");
188 String dateString = datformat.format(new Date(inDate));
192 public long CountID(String id) {
193 SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(), StringSerializer.get(), StringSerializer.get());
194 sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100);
195 QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
196 String datBegin = result.get().getColumnByName("DataBegin").getValue();
197 String datEnd = result.get().getColumnByName("DataEnd").getValue();
198 long datBeginLong = TimeConvert(datBegin);
199 long datEndLong = TimeConvert(datEnd);
200 return datEndLong-datBeginLong;