+++ /dev/null
-package compbio.statistic;
-
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Calendar;
-import java.util.Collections;
-import java.util.Date;
-import java.util.Iterator;
-import java.util.List;
-
-import compbio.cassandra.CassandraNativeConnector;
-import compbio.cassandra.DataBase;
-
-public class StatisticsProt {
- private CassandraNativeConnector cc = new CassandraNativeConnector();
- private ArrayList<DataBase> query;
- private static long currentDate = 0;
- private static long earlestDate = 0;
-
- /*
- * query: the period from date1 till date2
- * */
- public List<DataBase> readDetails(String date1, String date2) {
-
- if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
- System.out.println("Wrong date: point 1");
- return null;
- }
- SetDateRange();
- long dateStart = DateParsing(date1);
- long dateEnd = DateParsing(date2);
- if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate) || dateStart > dateEnd)
- return null;
- if (dateStart < earlestDate)
- dateStart = earlestDate;
- if (dateEnd > currentDate)
- dateStart = currentDate;
- System.out.println("StatisticsProt.readDetails: earlestDate = " + earlestDate + ", currentDate = " + currentDate);
- System.out.println("StatisticsProt.readDetails: Start date " + date1 + ": int representation = " + dateStart);
- System.out.println("StatisticsProt.readDetails: End date " + date2 + ": int representation = " + dateEnd);
- Calendar start = Calendar.getInstance();
- start.setTime(new Date(dateStart));
- Calendar end = Calendar.getInstance();
- end.setTime(new Date(dateEnd));
- query = new ArrayList<DataBase>();
- int day = 0;
- /*
- for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
- SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
- StringSerializer.get(), StringSerializer.get());
- result.setColumnFamily("ProteinData");
- result.setKey(date.getTime());
- result.setRange(null, null, false, Integer.MAX_VALUE);
- QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
- ++day;
- System.out.print("Day " + day + ": dataStart = " + date + ": ");
- if (!columnSlice.get().getColumns().isEmpty()) {
- DataBase db = new DataBase(DateFormat(date.getTime()), columnSlice.get().getColumns().size());
- query.add(db);
- System.out.println("data exist");
- } else {
- System.out.println("no data");
- }
- }
- */
- System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
- return query;
- }
-
- /*
- * query: execution time for the period from date1 till date2
- * */
- public List<DataBase> readLength(String date1, String date2) {
- if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
- System.out.println("Wrong date: point 3");
- return null;
- }
- SetDateRange();
- int nbins = 5;
- long dateStart = DateParsing(date1);
- long dateEnd = DateParsing(date2);
- if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate) || dateStart > dateEnd)
- return null;
- if (dateStart < earlestDate)
- dateStart = earlestDate;
- if (dateEnd > currentDate)
- dateStart = currentDate;
- System.out.println("StatisticsProt.readLength: earlestDate = " + earlestDate + ", currentDate = " + currentDate);
- System.out.println("StatisticsProt.readLength: Start date is " + date1 + ": int representation = " + dateStart);
- System.out.println("StatisticsProt.readLength: End date is " + date2 + ": int representation = " + dateEnd);
- Calendar start = Calendar.getInstance();
- start.setTime(new Date(dateStart));
- Calendar end = Calendar.getInstance();
- end.setTime(new Date(dateEnd));
- query = new ArrayList<DataBase>();
- List<Integer> totalTime = new ArrayList<Integer>();
- for (int i = 0; i < nbins; i++)
- totalTime.add(i, 0);
- /*
- for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
- List<Integer> timeResult = new ArrayList<Integer>();
- SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
- StringSerializer.get(), StringSerializer.get());
- result.setColumnFamily("ProteinData");
- result.setKey(date.getTime());
- result.setRange(null, null, false, Integer.MAX_VALUE);
- QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
- List<HColumn<String, String>> col = columnSlice.get().getColumns();
- if (!col.isEmpty()) {
- Iterator<HColumn<String, String>> itCol = col.iterator();
- for (int i = 0; i < nbins; i++)
- timeResult.add(i, 0);
- // split all jobs into nbins bins
- while (itCol.hasNext()) {
- String id = itCol.next().getName();
- long lenResult = CountID(id);
- if (lenResult <= 30)
- timeResult.set(0, timeResult.get(0) + 1);
- else if (lenResult > 30 && lenResult <= 60)
- timeResult.set(1, timeResult.get(1) + 1);
- else if (lenResult > 60 && lenResult <= 120)
- timeResult.set(2, timeResult.get(2) + 1);
- else if (lenResult > 120 && lenResult <= 600)
- timeResult.set(3, timeResult.get(3) + 1);
- else {
- timeResult.set(4, timeResult.get(4) + 1);
- }
- }
- for (int i = 0; i < nbins; i++)
- totalTime.set(i, totalTime.get(i) + timeResult.get(i));
- DataBase db = new DataBase();
- db.setTimeRez(timeResult);
- db.setDate(DateFormat(date.getTime()));
- query.add(db);
- }
- }
- */
- DataBase db = new DataBase();
- db.setTimeTotalExec(totalTime);
- query.add(db);
- System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
- return query;
- }
-
- /*
- * query: protein sequence
- * */
- public List<DataBase> readProteins(String protIn) {
- query = new ArrayList<DataBase>();
- /*
- SliceQuery<String, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
- StringSerializer.get(), StringSerializer.get());
- result.setColumnFamily("ProteinRow");
- result.setKey(protIn);
- result.setRange(null, null, false, Integer.MAX_VALUE);
- QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
- Iterator<HColumn<String, String>> it = columnSlice.get().getColumns().iterator();
- while (it.hasNext()) {
- HColumn<String, String> col = it.next();
- String name = col.getName();
- if (name.matches("(.*)jnetpred")) {
- DataBase db = new DataBase();
- db.setProt(protIn);
- db.setId(col.getName());
- db.setJpred(col.getValue());
- query.add(db);
- }
- }
- */
- return query;
- }
-
- /*
- * query by a protein sequence
- * */
- public List<DataBase> readProtID(int counter) {
- query = new ArrayList<DataBase>();
- int row_count = 100;
- /*
- RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
- StringSerializer.get(), StringSerializer.get());
- result.setColumnFamily("ProteinRow");
- result.setRange(null, null, false, 100);
- result.setRowCount(row_count);
- String last_key = null;
- while (true) {
- result.setKeys(last_key, null);
- QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
- OrderedRows<String, String, String> rows = columnSlice.get();
- Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
- while (rowsIterator.hasNext()) {
- Row<String, String, String> row = rowsIterator.next();
- last_key = row.getKey();
- List<HColumn<String, String>> clms = row.getColumnSlice().getColumns();
- //int npred = 0;
- //for (HColumn<String, String> cln : clms) {
- // String name = cln.getName();
- // if (name.matches("(.*)jnetpred")) {
- // ++npred;
- // }
- //}
- int npred = clms.size();
- if (npred > counter) {
- DataBase db = new DataBase();
- db.setProt(last_key);
- db.setTotalId(npred);
- query.add(db);
- }
- }
- if (rows.getCount() < row_count)
- break;
- }*/
- return query;
- }
-
- /*
- * query by a part of sequence
- * */
- public List<DataBase> readPart(String protIn) {
- int row_count = 10000;
- query = new ArrayList<DataBase>();
- /*
- RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
- StringSerializer.get(), StringSerializer.get());
- result.setColumnFamily("ProteinRow");
- result.setRange(null, null, false, Integer.MAX_VALUE);
- result.setRowCount(row_count);
- String last_key = null;
- while (true) {
- result.setKeys(last_key, null);
- QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
- OrderedRows<String, String, String> rows = columnSlice.get();
- Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
- while (rowsIterator.hasNext()) {
- Row<String, String, String> row = rowsIterator.next();
- last_key = row.getKey();
- if (last_key.matches("(.*)" + protIn + "(.*)")) {
- Iterator<HColumn<String, String>> it = row.getColumnSlice().getColumns().iterator();
- while (it.hasNext()) {
- HColumn<String, String> col = it.next();
- List<String> subProt = new ArrayList<String>();
- String subStr = last_key;
- while (subStr.length() > 0 && subStr.contains(protIn)) {
- String first = subStr.substring(0, subStr.indexOf(protIn));
- if (first.length() > 0)
- subProt.add(first);
- subProt.add(protIn);
- subStr = subStr.substring(subStr.indexOf(protIn) + protIn.length(), subStr.length());
- }
- if (subStr.length() > 0)
- subProt.add(subStr);
- String name = col.getName();
- if (name.matches("(.*)jnetpred")) {
- DataBase db = new DataBase();
- db.setProt(last_key);
- db.setId(col.getName());
- db.setJpred(col.getValue());
- db.setSubProt(subProt);
- query.add(db);
- }
- }
- }
- }
- if (rows.getCount() < row_count)
- break;
- }
- */
- return query;
- }
-
- /*
- * convert String date into long date (miliseconds since the epoch start)
- */
- private static long DateParsing(String datInput) {
- if (datInput == null) {
- return 0;
- }
- long dateWorkSt = 0;
- SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
- try {
- dateWorkSt = formatter.parse(datInput).getTime();
- } catch (ParseException e) {
- e.printStackTrace();
- }
- return dateWorkSt;
- }
-
- /*
- * convert String date:time into long date:time (miliseconds since the epoch start)
- */
- private static long TimeConvert(String datInput) {
- long dateWorkSt = 0;
- if (datInput == null) {
- return dateWorkSt;
- }
- SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss");
- try {
- dateWorkSt = formatter.parse(datInput).getTime();
- } catch (ParseException e) {
- e.printStackTrace();
- }
- return dateWorkSt;
- }
-
- // convert long to date in string format
- private static String DateFormat(long inDate) {
- SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
- String dateString = datformat.format(new Date(inDate));
- return dateString;
- }
-
- /*
- * convert ???
- */
- public static String DateFormatYYMMDD(long indate) {
- SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd");
- String dateString = datformat.format(new Date(indate));
- return dateString;
- }
-
- /*
- * ???
- */
- public long CountID(String id) {
- /*
- SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
- StringSerializer.get(), StringSerializer.get());
- sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100);
- QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
- String datBegin = result.get().getColumnByName("DataBegin").getValue();
- String datEnd = result.get().getColumnByName("DataEnd").getValue();
-
- long datBeginLong = TimeConvert(datBegin);
- long datEndLong = TimeConvert(datEnd);
- return (datEndLong - datBeginLong) / 1000;
- */
- return 0;
- }
-
- /*
- * set earlest date and current dates.
- * earlestDate is static and should be set at the 1st call
- * currentDate should be re-calculated every time
- */
- private static void SetDateRange() {
- if (0 == earlestDate) {
- StatisticsProt sp = new StatisticsProt();
- earlestDate = sp.earliestDate();
- System.out.println("Set earlest Date = " + earlestDate);
- }
- Calendar cal = Calendar.getInstance();
- currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH));
- }
-
- public boolean isThisDateValid(String dateToValidate) {
- if (dateToValidate == null || dateToValidate.equals("")) {
- System.out.println("Undefined date");
- return false;
- }
- SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd");
- try {
- // if not valid, this will throw ParseException
- sdf.setLenient(false);
- Date date = sdf.parse(dateToValidate);
- } catch (ParseException e) {
- e.printStackTrace();
- return false;
- }
- return true;
- }
-
- /*
- * find the earliest date in the database
- */
- public long earliestDate() {
- ArrayList<Long> dateSort = new ArrayList<Long>();
- int row_count = 10000;
- /*
- RangeSlicesQuery<Long, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), LongSerializer.get(),
- StringSerializer.get(), StringSerializer.get());
- result.setColumnFamily("ProteinData");
- result.setRange(null, null, false, Integer.MAX_VALUE);
- result.setRowCount(row_count);
- Long last_key = null;
- while (true) {
- result.setKeys(last_key, null);
- QueryResult<OrderedRows<Long, String, String>> columnSlice = result.execute();
- OrderedRows<Long, String, String> rows = columnSlice.get();
- Iterator<Row<Long, String, String>> rowsIterator = rows.iterator();
- while (rowsIterator.hasNext()) {
- Row<Long, String, String> row = rowsIterator.next();
- last_key = row.getKey();
- dateSort.add(last_key);
- }
- if (rows.getCount() < row_count)
- break;
- }*/
- Collections.sort(dateSort);
- return dateSort.get(0);
- }
-}