+package compbio.statistic;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.Collections;
+import java.util.Date;
+import java.util.Iterator;
+import java.util.List;
+
+import me.prettyprint.cassandra.serializers.LongSerializer;
+import me.prettyprint.cassandra.serializers.StringSerializer;
+import me.prettyprint.hector.api.beans.ColumnSlice;
+import me.prettyprint.hector.api.beans.HColumn;
+import me.prettyprint.hector.api.beans.OrderedRows;
+import me.prettyprint.hector.api.beans.Row;
+import me.prettyprint.hector.api.factory.HFactory;
+import me.prettyprint.hector.api.query.QueryResult;
+import me.prettyprint.hector.api.query.RangeSlicesQuery;
+import me.prettyprint.hector.api.query.SliceQuery;
+import compbio.cassandra.CassandraCreate;
+import compbio.cassandra.DataBase;
+
+public class StatisticsProt {
+ private final static long MILLISECONDS_PER_DAY = 1000L * 60 * 60 * 24;
+ private CassandraCreate cc = new CassandraCreate();
+ private ArrayList<DataBase> query;
+
+ // query for the period from date1 till date2
+ public List<DataBase> readDetail(String dateInStringSt, String dateInStringEnd) {
+ if (!isThisDateValid(dateInStringSt))
+ return null;
+ long dateWorkSt = DateParsing(dateInStringSt);
+ long dateWorkEnd = DateParsing(dateInStringEnd);
+ if (CheckDate(dateWorkSt) && CheckDate(dateWorkEnd)) {
+ query = new ArrayList<DataBase>();
+ while (dateWorkSt <= dateWorkEnd) {
+ SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
+ StringSerializer.get(), StringSerializer.get());
+ result.setColumnFamily("ProteinData");
+ result.setKey(dateWorkSt);
+ result.setRange(null, null, false, Integer.MAX_VALUE);
+ QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
+ if (!columnSlice.get().getColumns().isEmpty()) {
+ DataBase db = new DataBase(DateFormat(dateWorkSt), columnSlice.get().getColumns().size());
+ query.add(db);
+ }
+ dateWorkSt += MILLISECONDS_PER_DAY;
+ }
+ } else
+ System.out.println("Wrong date");
+ return query;
+ }
+
+ // find the earliest date
+ public long earliestDate() {
+ ArrayList<Long> dateSort = new ArrayList<Long>();
+ int row_count = 10000;
+ RangeSlicesQuery<Long, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), LongSerializer.get(),
+ StringSerializer.get(), StringSerializer.get());
+ result.setColumnFamily("ProteinData");
+ result.setRange(null, null, false, Integer.MAX_VALUE);
+ result.setRowCount(row_count);
+ Long last_key = null;
+ while (true) {
+ result.setKeys(last_key, null);
+ QueryResult<OrderedRows<Long, String, String>> columnSlice = result.execute();
+ OrderedRows<Long, String, String> rows = columnSlice.get();
+ Iterator<Row<Long, String, String>> rowsIterator = rows.iterator();
+ while (rowsIterator.hasNext()) {
+ Row<Long, String, String> row = rowsIterator.next();
+ last_key = row.getKey();
+ dateSort.add(last_key);
+ }
+ if (rows.getCount() < row_count)
+ break;
+ }
+ Collections.sort(dateSort);
+ return dateSort.get(0);
+ }
+
+ // query execution time for the period from dateInStringSt till
+ // dateInStringEnd
+ public List<DataBase> readLength(String dateInStringSt, String dateInStringEnd) {
+ long dateWorkSt = DateParsing(dateInStringSt);
+ long dateWorkEnd = DateParsing(dateInStringEnd);
+ if (CheckDate(dateWorkSt) && CheckDate(dateWorkEnd)) {
+ query = new ArrayList<DataBase>();
+ List<Integer> totalTime = new ArrayList<Integer>();
+ for (int i = 0; i < 4; i++)
+ totalTime.add(i, 0);
+ while (dateWorkSt <= dateWorkEnd) {
+ List<Integer> timeResult = new ArrayList<Integer>();
+ SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
+ StringSerializer.get(), StringSerializer.get());
+ result.setColumnFamily("ProteinData");
+ result.setKey(dateWorkSt);
+ result.setRange(null, null, false, Integer.MAX_VALUE);
+ QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
+ List<HColumn<String, String>> col = columnSlice.get().getColumns();
+ if (!col.isEmpty()) {
+ Iterator<HColumn<String, String>> itCol = col.iterator();
+ for (int i = 0; i < 4; i++)
+ timeResult.add(i, 0);
+ while (itCol.hasNext()) {
+ String id = itCol.next().getName();
+ long lenResult = CountID(id);
+ if (lenResult <= 30)
+ timeResult.set(0, timeResult.get(0) + 1);
+ else if (lenResult > 30 && lenResult <= 60)
+ timeResult.set(1, timeResult.get(1) + 1);
+ else if (lenResult > 60 && lenResult <= 120)
+ timeResult.set(2, timeResult.get(2) + 1);
+ else {
+ timeResult.set(3, timeResult.get(3) + 1);
+ // System.out.println(lenResult + "; " + id);
+ }
+ }
+ DataBase db = new DataBase();
+ db.setTimeRez(timeResult);
+ db.setDate(DateFormat(dateWorkSt));
+ query.add(db);
+ }
+ dateWorkSt += MILLISECONDS_PER_DAY;
+ }
+ } else
+ System.out.println("Wrong date");
+ return query;
+ }
+
+ // query by a protein sequence
+ public List<DataBase> readProt(String protIn) {
+ query = new ArrayList<DataBase>();
+ SliceQuery<String, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
+ StringSerializer.get(), StringSerializer.get());
+ result.setColumnFamily("ProteinRow");
+ result.setKey(protIn);
+ result.setRange(null, null, false, Integer.MAX_VALUE);
+ QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
+ Iterator<HColumn<String, String>> it = columnSlice.get().getColumns().iterator();
+ while (it.hasNext()) {
+ HColumn<String, String> col = it.next();
+ DataBase db = new DataBase();
+ db.setProt(protIn);
+ db.setId(col.getName());
+ db.setJpred(col.getValue());
+ query.add(db);
+ }
+ return query;
+ }
+
+ // query by a protein sequence
+ public List<DataBase> readProtID() {
+ query = new ArrayList<DataBase>();
+ int row_count = 100000000;
+ RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
+ StringSerializer.get(), StringSerializer.get());
+ result.setColumnFamily("ProteinRow");
+ result.setRange(null, null, false, Integer.MAX_VALUE);
+ result.setRowCount(row_count);
+ String last_key = null;
+ while (true) {
+ result.setKeys(last_key, null);
+ QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
+ OrderedRows<String, String, String> rows = columnSlice.get();
+ Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
+ while (rowsIterator.hasNext()) {
+ Row<String, String, String> row = rowsIterator.next();
+ last_key = row.getKey();
+ if (row.getColumnSlice().getColumns().size() > 3) {
+ DataBase db = new DataBase();
+ db.setProt(last_key);
+ db.setTotalId(row.getColumnSlice().getColumns().size());
+ query.add(db);
+ }
+ }
+ if (rows.getCount() < row_count)
+ break;
+ }
+ return query;
+ }
+
+ // query by a part of sequence
+ public List<DataBase> readPart(String protIn) {
+ int row_count = 10000;
+ query = new ArrayList<DataBase>();
+ RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
+ StringSerializer.get(), StringSerializer.get());
+ result.setColumnFamily("ProteinRow");
+ result.setRange(null, null, false, Integer.MAX_VALUE);
+ result.setRowCount(row_count);
+ String last_key = null;
+ while (true) {
+ result.setKeys(last_key, null);
+ QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
+ OrderedRows<String, String, String> rows = columnSlice.get();
+ Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
+ while (rowsIterator.hasNext()) {
+ Row<String, String, String> row = rowsIterator.next();
+ last_key = row.getKey();
+ if (last_key.matches("(.*)" + protIn + "(.*)")) {
+ Iterator<HColumn<String, String>> it = row.getColumnSlice().getColumns().iterator();
+ while (it.hasNext()) {
+ HColumn<String, String> col = it.next();
+ List<String> subProt = new ArrayList<String>();
+ String subStr = last_key;
+ while (subStr.length() > 0 && subStr.contains(protIn)) {
+ String first = subStr.substring(0, subStr.indexOf(protIn));
+ if (first.length() > 0)
+ subProt.add(first);
+ subProt.add(protIn);
+ subStr = subStr.substring(subStr.indexOf(protIn) + protIn.length(), subStr.length());
+ }
+ if (subStr.length() > 0)
+ subProt.add(subStr);
+ DataBase db = new DataBase();
+ db.setProt(last_key);
+ db.setId(col.getName());
+ db.setJpred(col.getValue());
+ db.setSubProt(subProt);
+ query.add(db);
+ }
+ }
+ }
+ if (rows.getCount() < row_count)
+ break;
+ }
+ return query;
+ }
+
+ // convert String to Date
+ private static long DateParsing(String datInput) {
+ if (datInput == null) {
+ return 0;
+ }
+ long dateWorkSt = 0;
+ SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
+ try {
+ dateWorkSt = formatter.parse(datInput).getTime();
+ } catch (ParseException e) {
+ e.printStackTrace();
+ }
+ return dateWorkSt;
+ }
+
+ // convert String to Date
+ private static long TimeConvert(String datInput) {
+ long dateWorkSt = 0;
+ if (datInput == null) {
+ return dateWorkSt;
+ }
+ SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss");
+ try {
+ dateWorkSt = formatter.parse(datInput).getTime();
+ } catch (ParseException e) {
+ e.printStackTrace();
+ }
+ // System.out.println(datInput + "start reverce" +
+ // DateFormat1(dateWorkSt));
+ return dateWorkSt;
+ }
+
+ // convert long to date in string format
+ private static String DateFormat(long inDate) {
+ SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
+ String dateString = datformat.format(new Date(inDate));
+ return dateString;
+ }
+
+ private static String DateFormat1(long inDate) {
+ SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss");
+ String dateString = datformat.format(new Date(inDate));
+ return dateString;
+ }
+
+ public static String DateFormatYYMMDD(long indate) {
+ SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd");
+ String dateString = datformat.format(new Date(indate));
+ return dateString;
+ }
+
+ public long CountID(String id) {
+ SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
+ StringSerializer.get(), StringSerializer.get());
+ sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100);
+ QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
+ String datBegin = result.get().getColumnByName("DataBegin").getValue();
+ String datEnd = result.get().getColumnByName("DataEnd").getValue();
+
+ long datBeginLong = TimeConvert(datBegin);
+ long datEndLong = TimeConvert(datEnd);
+ return (datEndLong - datBeginLong) / 1000;
+
+ }
+
+ public static boolean CheckDate(long indate) {
+ if (indate == 0) {
+ return false;
+ }
+ StatisticsProt sp = new StatisticsProt();
+ Calendar cal = Calendar.getInstance();
+ String currentDate = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH);
+ if (indate >= sp.earliestDate() && indate <= DateParsing(currentDate)) {
+ return true;
+ }
+ return false;
+ }
+
+ public boolean isThisDateValid(String dateToValidate) {
+ if (dateToValidate == null) {
+ return false;
+ }
+ SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd");
+ try {
+ // if not valid, it will throw ParseException
+ sdf.setLenient(false);
+ Date date = sdf.parse(dateToValidate);
+ // System.out.println(date);
+ } catch (ParseException e) {
+ e.printStackTrace();
+ return false;
+ }
+ return true;
+ }
+
+}