-package combio.statistic;
-
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Date;
-import java.util.Iterator;
-import java.util.List;
-
-import me.prettyprint.cassandra.serializers.LongSerializer;
-import me.prettyprint.cassandra.serializers.StringSerializer;
-import me.prettyprint.hector.api.beans.ColumnSlice;
-import me.prettyprint.hector.api.beans.HColumn;
-import me.prettyprint.hector.api.beans.OrderedRows;
-import me.prettyprint.hector.api.beans.Row;
-import me.prettyprint.hector.api.factory.HFactory;
-import me.prettyprint.hector.api.query.QueryResult;
-import me.prettyprint.hector.api.query.RangeSlicesQuery;
-import me.prettyprint.hector.api.query.SliceQuery;
-import combio.cassandra.CassandraCreate;
-import combio.cassandra.DataBase;
-
-public class StatisticsProt {
- private final static long MILLISECONDS_PER_DAY = 1000L * 60 * 60 * 24;
- private CassandraCreate cc = new CassandraCreate();
- private ArrayList<DataBase> query;
-
- // query for the period from date1 till date2
- public List<DataBase> readDetail(String dateInStringSt, String dateInStringEnd) {
- long dateWorkSt = DateParsing(dateInStringSt);
- long dateWorkEnd = DateParsing(dateInStringEnd);
- query = new ArrayList<DataBase>();
- while (dateWorkSt <= dateWorkEnd) {
- SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(), StringSerializer.get(), StringSerializer.get());
- result.setColumnFamily("ProteinData");
- result.setKey(dateWorkSt);
- result.setRange(null, null, false, Integer.MAX_VALUE);
- QueryResult <ColumnSlice<String, String>> columnSlice = result.execute();
- DataBase db = new DataBase(DateFormat(dateWorkSt), columnSlice.get().getColumns().size());
- query.add(db);
- dateWorkSt += MILLISECONDS_PER_DAY ;
- }
- return query;
- }
-
- // query jobs for the period from dateInStringSt till dateInStringEnd
- public List<DataBase> readLength(String dateInStringSt, String dateInStringEnd) {
- query = new ArrayList<DataBase>();
- long dateWorkSt = DateParsing(dateInStringSt);
- long dateWorkEnd = DateParsing(dateInStringEnd);
- while (dateWorkSt <= dateWorkEnd) {
- List<Integer> timeResult = new ArrayList<Integer>();
- SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(), StringSerializer.get(), StringSerializer.get());
- result.setColumnFamily("ProteinData");
- result.setKey(dateWorkSt);
- result.setRange(null, null, false, Integer.MAX_VALUE);
- QueryResult <ColumnSlice<String, String>> columnSlice = result.execute();
- List<HColumn<String, String>> col = columnSlice.get().getColumns();
- Iterator<HColumn<String, String>> itCol = col.iterator();
- for (int i = 0; i < 4; i++)
- timeResult.add(i, 0);
- while (itCol.hasNext()) {
- String id = itCol.next().getName();
- long lenResult = CountID(id);
- if (lenResult <= 1)
- timeResult.set(0, timeResult.get(0) + 1);
- else if (lenResult > 1 && lenResult <= 10)
- timeResult.set(1, timeResult.get(1) + 1);
- else if (lenResult > 10 && lenResult <= 20)
- timeResult.set(2, timeResult.get(2) + 1);
- else
- timeResult.set(3, timeResult.get(3) + 1);
- }
- DataBase db = new DataBase();
- db.setTimeRez(timeResult);
- db.setDate(DateFormat(dateWorkSt));
- query.add(db);
- List<Integer> test = query.get(0).getTimeRez();
- dateWorkSt += MILLISECONDS_PER_DAY ;
- }
- return query;
- }
-
- //query by a protein name
- public List<DataBase> readProt(String protIn) {
- query = new ArrayList<DataBase>();
- SliceQuery<String, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(), StringSerializer.get(), StringSerializer.get());
- result.setColumnFamily("ProteinRow");
- result.setKey(protIn);
- result.setRange(null, null, false, Integer.MAX_VALUE);
- QueryResult <ColumnSlice<String, String>> columnSlice = result.execute();
- Iterator <HColumn<String, String>> it = columnSlice.get().getColumns().iterator();
- while (it.hasNext()) {
- HColumn<String, String> col = it.next();
- DataBase db = new DataBase();
- db.setProt(protIn);
- db.setId(col.getName());
- db.setJpred(col.getValue());
- query.add(db);
- }
- return query;
- }
-
-
- //query by a sequence (whether exists proteins contains this sequence in the name)
- public List<DataBase> readPart(String protIn) {
- int row_count = 10000;
- query = new ArrayList<DataBase>();
- RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(), StringSerializer.get(), StringSerializer.get());
- result.setColumnFamily("ProteinRow");
- result.setRange(null, null, false, Integer.MAX_VALUE);
- result.setRowCount(row_count);
- String last_key = null;
- while (true) {
- result.setKeys(last_key, null);
- QueryResult <OrderedRows<String,String, String>> columnSlice = result.execute();
- OrderedRows<String, String, String> rows = columnSlice.get();
- Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
- while (rowsIterator.hasNext()) {
- Row<String, String, String> row = rowsIterator.next();
- last_key = row.getKey();
- if (last_key.matches("(.*)" + protIn + "(.*)")) {
- Iterator <HColumn<String, String>> it = row.getColumnSlice().getColumns().iterator();
- while (it.hasNext()) {
- HColumn<String, String> col = it.next();
- List<String> subProt = new ArrayList<String>();
- String subStr = last_key;
- while (subStr.length() > 0 && subStr.contains(protIn)) {
- String first = subStr.substring(0, subStr.indexOf(protIn));
- if (first.length() > 0)
- subProt.add(first);
- subProt.add(protIn);
- subStr = subStr.substring(subStr.indexOf(protIn) + protIn.length(), subStr.length());
- }
- if (subStr.length() > 0)
- subProt.add(subStr);
- DataBase db = new DataBase();
- db.setProt(last_key);
- db.setId(col.getName());
- db.setJpred(col.getValue());
- db.setSubProt(subProt);
- query.add(db);
- }
- }
- }
- if (rows.getCount() < row_count)
- break;
- }
- return query;
- }
-
- // convert String to Date
- private static long DateParsing(String datInput) {
- long dateWorkSt = 0;
- SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
- try {
- dateWorkSt = formatter.parse(datInput).getTime();
- } catch (ParseException e) {
- e.printStackTrace();
- }
- return dateWorkSt;
- }
-
- // convert String to Date
- private static long TimeConvert(String datInput) {
- long dateWorkSt = 0;
- SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss");
- try {
- dateWorkSt = formatter.parse(datInput).getTime();
- } catch (ParseException e) {
- e.printStackTrace();
- }
- // System.out.println("start reverce" + DateFormat1(dateWorkSt));
- return dateWorkSt;
- }
-
- // convert long to date in string format
- private static String DateFormat(long inDate){
- SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
- String dateString = datformat.format(new Date(inDate));
- return dateString;
- }
-
- private static String DateFormat1(long inDate){
- SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy:hh:mm:ss");
- String dateString = datformat.format(new Date(inDate));
- return dateString;
- }
-
- public long CountID(String id) {
- SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(), StringSerializer.get(), StringSerializer.get());
- sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100);
- QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
- String datBegin = result.get().getColumnByName("DataBegin").getValue();
- String datEnd = result.get().getColumnByName("DataEnd").getValue();
- long datBeginLong = TimeConvert(datBegin);
- long datEndLong = TimeConvert(datEnd);
- return datEndLong-datBeginLong;
-
- }
-
-}