1 package compbio.statistic;
3 import java.text.ParseException;
4 import java.text.SimpleDateFormat;
5 import java.util.ArrayList;
6 import java.util.Calendar;
7 import java.util.Collections;
9 import java.util.Iterator;
10 import java.util.List;
12 import me.prettyprint.cassandra.serializers.LongSerializer;
13 import me.prettyprint.cassandra.serializers.StringSerializer;
14 import me.prettyprint.hector.api.beans.ColumnSlice;
15 import me.prettyprint.hector.api.beans.HColumn;
16 import me.prettyprint.hector.api.beans.OrderedRows;
17 import me.prettyprint.hector.api.beans.Row;
18 import me.prettyprint.hector.api.factory.HFactory;
19 import me.prettyprint.hector.api.query.QueryResult;
20 import me.prettyprint.hector.api.query.RangeSlicesQuery;
21 import me.prettyprint.hector.api.query.SliceQuery;
22 import compbio.cassandra.CassandraCreate;
23 import compbio.cassandra.DataBase;
25 public class StatisticsProt {
26 private final static long MILLISECONDS_PER_DAY = 1000L * 60 * 60 * 24;
27 private CassandraCreate cc = new CassandraCreate();
28 private ArrayList<DataBase> query;
30 // query for the period from date1 till date2
31 public List<DataBase> readDetail(String dateInStringSt, String dateInStringEnd) {
32 if (!isThisDateValid(dateInStringSt))
34 long dateWorkSt = DateParsing(dateInStringSt);
35 long dateWorkEnd = DateParsing(dateInStringEnd);
36 if (CheckDate(dateWorkSt) && CheckDate(dateWorkEnd)) {
37 query = new ArrayList<DataBase>();
38 while (dateWorkSt <= dateWorkEnd) {
39 SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
40 StringSerializer.get(), StringSerializer.get());
41 result.setColumnFamily("ProteinData");
42 result.setKey(dateWorkSt);
43 result.setRange(null, null, false, Integer.MAX_VALUE);
44 QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
45 if (!columnSlice.get().getColumns().isEmpty()) {
46 DataBase db = new DataBase(DateFormat(dateWorkSt), columnSlice.get().getColumns().size());
49 dateWorkSt += MILLISECONDS_PER_DAY;
52 System.out.println("Wrong date");
56 // find the earliest date
57 public long earliestDate() {
58 ArrayList<Long> dateSort = new ArrayList<Long>();
59 int row_count = 10000;
60 RangeSlicesQuery<Long, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), LongSerializer.get(),
61 StringSerializer.get(), StringSerializer.get());
62 result.setColumnFamily("ProteinData");
63 result.setRange(null, null, false, Integer.MAX_VALUE);
64 result.setRowCount(row_count);
67 result.setKeys(last_key, null);
68 QueryResult<OrderedRows<Long, String, String>> columnSlice = result.execute();
69 OrderedRows<Long, String, String> rows = columnSlice.get();
70 Iterator<Row<Long, String, String>> rowsIterator = rows.iterator();
71 while (rowsIterator.hasNext()) {
72 Row<Long, String, String> row = rowsIterator.next();
73 last_key = row.getKey();
74 dateSort.add(last_key);
76 if (rows.getCount() < row_count)
79 Collections.sort(dateSort);
80 return dateSort.get(0);
83 // query execution time for the period from dateInStringSt till
85 public List<DataBase> readLength(String dateInStringSt, String dateInStringEnd) {
86 long dateWorkSt = DateParsing(dateInStringSt);
87 long dateWorkEnd = DateParsing(dateInStringEnd);
88 if (CheckDate(dateWorkSt) && CheckDate(dateWorkEnd)) {
89 query = new ArrayList<DataBase>();
90 List<Integer> totalTime = new ArrayList<Integer>();
91 for (int i = 0; i < 4; i++)
93 while (dateWorkSt <= dateWorkEnd) {
94 List<Integer> timeResult = new ArrayList<Integer>();
95 SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
96 StringSerializer.get(), StringSerializer.get());
97 result.setColumnFamily("ProteinData");
98 result.setKey(dateWorkSt);
99 result.setRange(null, null, false, Integer.MAX_VALUE);
100 QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
101 List<HColumn<String, String>> col = columnSlice.get().getColumns();
102 if (!col.isEmpty()) {
103 Iterator<HColumn<String, String>> itCol = col.iterator();
104 for (int i = 0; i < 4; i++)
105 timeResult.add(i, 0);
106 while (itCol.hasNext()) {
107 String id = itCol.next().getName();
108 long lenResult = CountID(id);
110 timeResult.set(0, timeResult.get(0) + 1);
111 else if (lenResult > 30 && lenResult <= 60)
112 timeResult.set(1, timeResult.get(1) + 1);
113 else if (lenResult > 60 && lenResult <= 120)
114 timeResult.set(2, timeResult.get(2) + 1);
116 timeResult.set(3, timeResult.get(3) + 1);
117 // System.out.println(lenResult + "; " + id);
120 DataBase db = new DataBase();
121 db.setTimeRez(timeResult);
122 db.setDate(DateFormat(dateWorkSt));
125 dateWorkSt += MILLISECONDS_PER_DAY;
128 System.out.println("Wrong date");
132 // query by a protein sequence
133 public List<DataBase> readProt(String protIn) {
134 query = new ArrayList<DataBase>();
135 SliceQuery<String, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
136 StringSerializer.get(), StringSerializer.get());
137 result.setColumnFamily("ProteinRow");
138 result.setKey(protIn);
139 result.setRange(null, null, false, Integer.MAX_VALUE);
140 QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
141 Iterator<HColumn<String, String>> it = columnSlice.get().getColumns().iterator();
142 while (it.hasNext()) {
143 HColumn<String, String> col = it.next();
144 DataBase db = new DataBase();
146 db.setId(col.getName());
147 db.setJpred(col.getValue());
153 // query by a protein sequence
154 public List<DataBase> readProtID() {
155 query = new ArrayList<DataBase>();
156 int row_count = 100000000;
157 RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
158 StringSerializer.get(), StringSerializer.get());
159 result.setColumnFamily("ProteinRow");
160 result.setRange(null, null, false, Integer.MAX_VALUE);
161 result.setRowCount(row_count);
162 String last_key = null;
164 result.setKeys(last_key, null);
165 QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
166 OrderedRows<String, String, String> rows = columnSlice.get();
167 Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
168 while (rowsIterator.hasNext()) {
169 Row<String, String, String> row = rowsIterator.next();
170 last_key = row.getKey();
171 if (row.getColumnSlice().getColumns().size() > 3) {
172 DataBase db = new DataBase();
173 db.setProt(last_key);
174 db.setTotalId(row.getColumnSlice().getColumns().size());
178 if (rows.getCount() < row_count)
184 // query by a part of sequence
185 public List<DataBase> readPart(String protIn) {
186 int row_count = 10000;
187 query = new ArrayList<DataBase>();
188 RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
189 StringSerializer.get(), StringSerializer.get());
190 result.setColumnFamily("ProteinRow");
191 result.setRange(null, null, false, Integer.MAX_VALUE);
192 result.setRowCount(row_count);
193 String last_key = null;
195 result.setKeys(last_key, null);
196 QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
197 OrderedRows<String, String, String> rows = columnSlice.get();
198 Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
199 while (rowsIterator.hasNext()) {
200 Row<String, String, String> row = rowsIterator.next();
201 last_key = row.getKey();
202 if (last_key.matches("(.*)" + protIn + "(.*)")) {
203 Iterator<HColumn<String, String>> it = row.getColumnSlice().getColumns().iterator();
204 while (it.hasNext()) {
205 HColumn<String, String> col = it.next();
206 List<String> subProt = new ArrayList<String>();
207 String subStr = last_key;
208 while (subStr.length() > 0 && subStr.contains(protIn)) {
209 String first = subStr.substring(0, subStr.indexOf(protIn));
210 if (first.length() > 0)
213 subStr = subStr.substring(subStr.indexOf(protIn) + protIn.length(), subStr.length());
215 if (subStr.length() > 0)
217 DataBase db = new DataBase();
218 db.setProt(last_key);
219 db.setId(col.getName());
220 db.setJpred(col.getValue());
221 db.setSubProt(subProt);
226 if (rows.getCount() < row_count)
232 // convert String to Date
233 private static long DateParsing(String datInput) {
234 if (datInput == null) {
238 SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
240 dateWorkSt = formatter.parse(datInput).getTime();
241 } catch (ParseException e) {
247 // convert String to Date
248 private static long TimeConvert(String datInput) {
250 if (datInput == null) {
253 SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss");
255 dateWorkSt = formatter.parse(datInput).getTime();
256 } catch (ParseException e) {
259 // System.out.println(datInput + "start reverce" +
260 // DateFormat1(dateWorkSt));
264 // convert long to date in string format
265 private static String DateFormat(long inDate) {
266 SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
267 String dateString = datformat.format(new Date(inDate));
271 private static String DateFormat1(long inDate) {
272 SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss");
273 String dateString = datformat.format(new Date(inDate));
277 public static String DateFormatYYMMDD(long indate) {
278 SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd");
279 String dateString = datformat.format(new Date(indate));
283 public long CountID(String id) {
284 SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
285 StringSerializer.get(), StringSerializer.get());
286 sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100);
287 QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
288 String datBegin = result.get().getColumnByName("DataBegin").getValue();
289 String datEnd = result.get().getColumnByName("DataEnd").getValue();
291 long datBeginLong = TimeConvert(datBegin);
292 long datEndLong = TimeConvert(datEnd);
293 return (datEndLong - datBeginLong) / 1000;
297 public static boolean CheckDate(long indate) {
301 StatisticsProt sp = new StatisticsProt();
302 Calendar cal = Calendar.getInstance();
303 String currentDate = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH);
304 if (indate >= sp.earliestDate() && indate <= DateParsing(currentDate)) {
310 public boolean isThisDateValid(String dateToValidate) {
311 if (dateToValidate == null) {
314 SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd");
316 // if not valid, it will throw ParseException
317 sdf.setLenient(false);
318 Date date = sdf.parse(dateToValidate);
319 // System.out.println(date);
320 } catch (ParseException e) {