X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=server%2Fcompbio%2Fstatistic%2FStatisticsProt.java;h=05375c094c7640ce53f343110b900c1d3de4fc84;hb=refs%2Fheads%2FNewWebsite;hp=0294747594bc89c7b40481353acb5c4e7cc1f5e2;hpb=5058bbff61abfcc34d734124545f86b2029b3373;p=proteocache.git diff --git a/server/compbio/statistic/StatisticsProt.java b/server/compbio/statistic/StatisticsProt.java old mode 100644 new mode 100755 index 0294747..05375c0 --- a/server/compbio/statistic/StatisticsProt.java +++ b/server/compbio/statistic/StatisticsProt.java @@ -9,113 +9,109 @@ import java.util.Date; import java.util.Iterator; import java.util.List; -import me.prettyprint.cassandra.serializers.LongSerializer; -import me.prettyprint.cassandra.serializers.StringSerializer; -import me.prettyprint.hector.api.beans.ColumnSlice; -import me.prettyprint.hector.api.beans.HColumn; -import me.prettyprint.hector.api.beans.OrderedRows; -import me.prettyprint.hector.api.beans.Row; -import me.prettyprint.hector.api.factory.HFactory; -import me.prettyprint.hector.api.query.QueryResult; -import me.prettyprint.hector.api.query.RangeSlicesQuery; -import me.prettyprint.hector.api.query.SliceQuery; -import compbio.cassandra.CassandraCreate; +import compbio.cassandra.CassandraNativeConnector; import compbio.cassandra.DataBase; public class StatisticsProt { - private final static long MILLISECONDS_PER_DAY = 1000L * 60 * 60 * 24; - private CassandraCreate cc = new CassandraCreate(); + private CassandraNativeConnector cc = new CassandraNativeConnector(); private ArrayList query; private static long currentDate = 0; private static long earlestDate = 0; - /* query: the period from date1 till date2 */ + /* + * query: the period from date1 till date2 + * */ public List readDetails(String date1, String date2) { + if (!isThisDateValid(date1) || !isThisDateValid(date2)) { System.out.println("Wrong date: point 1"); return null; } SetDateRange(); - long dateStart = DateParsing(date1); long dateEnd = DateParsing(date2); + if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate) || dateStart > dateEnd) + return null; if (dateStart < earlestDate) dateStart = earlestDate; - if (dateStart > currentDate) - dateStart = currentDate - MILLISECONDS_PER_DAY; - if (dateEnd < earlestDate) - dateStart = earlestDate + MILLISECONDS_PER_DAY; if (dateEnd > currentDate) dateStart = currentDate; System.out.println("StatisticsProt.readDetails: earlestDate = " + earlestDate + ", currentDate = " + currentDate); System.out.println("StatisticsProt.readDetails: Start date " + date1 + ": int representation = " + dateStart); System.out.println("StatisticsProt.readDetails: End date " + date2 + ": int representation = " + dateEnd); - + Calendar start = Calendar.getInstance(); + start.setTime(new Date(dateStart)); + Calendar end = Calendar.getInstance(); + end.setTime(new Date(dateEnd)); query = new ArrayList(); int day = 0; - while (dateStart <= dateEnd) { + /* + for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) { SliceQuery result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(), StringSerializer.get(), StringSerializer.get()); result.setColumnFamily("ProteinData"); - result.setKey(dateStart); + result.setKey(date.getTime()); result.setRange(null, null, false, Integer.MAX_VALUE); QueryResult> columnSlice = result.execute(); ++day; - System.out.print("Day " + day + ": dataStart = " + dateStart + ": "); + System.out.print("Day " + day + ": dataStart = " + date + ": "); if (!columnSlice.get().getColumns().isEmpty()) { - DataBase db = new DataBase(DateFormat(dateStart), columnSlice.get().getColumns().size()); + DataBase db = new DataBase(DateFormat(date.getTime()), columnSlice.get().getColumns().size()); query.add(db); System.out.println("data exist"); } else { System.out.println("no data"); } - dateStart += MILLISECONDS_PER_DAY; } + */ System.out.println("StatisticsProt.readLength: total number of dates = " + query.size()); return query; } /* * query: execution time for the period from date1 till date2 - */ + * */ public List readLength(String date1, String date2) { if (!isThisDateValid(date1) || !isThisDateValid(date2)) { System.out.println("Wrong date: point 3"); return null; } SetDateRange(); - + int nbins = 5; long dateStart = DateParsing(date1); long dateEnd = DateParsing(date2); + if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate) || dateStart > dateEnd) + return null; if (dateStart < earlestDate) dateStart = earlestDate; - if (dateStart > currentDate) - dateStart = currentDate - MILLISECONDS_PER_DAY; - if (dateEnd < earlestDate) - dateStart = earlestDate + MILLISECONDS_PER_DAY; if (dateEnd > currentDate) dateStart = currentDate; System.out.println("StatisticsProt.readLength: earlestDate = " + earlestDate + ", currentDate = " + currentDate); System.out.println("StatisticsProt.readLength: Start date is " + date1 + ": int representation = " + dateStart); System.out.println("StatisticsProt.readLength: End date is " + date2 + ": int representation = " + dateEnd); - + Calendar start = Calendar.getInstance(); + start.setTime(new Date(dateStart)); + Calendar end = Calendar.getInstance(); + end.setTime(new Date(dateEnd)); query = new ArrayList(); List totalTime = new ArrayList(); - for (int i = 0; i < 4; i++) + for (int i = 0; i < nbins; i++) totalTime.add(i, 0); - while (dateStart <= dateEnd) { + /* + for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) { List timeResult = new ArrayList(); SliceQuery result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(), StringSerializer.get(), StringSerializer.get()); result.setColumnFamily("ProteinData"); - result.setKey(dateStart); + result.setKey(date.getTime()); result.setRange(null, null, false, Integer.MAX_VALUE); QueryResult> columnSlice = result.execute(); List> col = columnSlice.get().getColumns(); if (!col.isEmpty()) { Iterator> itCol = col.iterator(); - for (int i = 0; i < 4; i++) + for (int i = 0; i < nbins; i++) timeResult.add(i, 0); + // split all jobs into nbins bins while (itCol.hasNext()) { String id = itCol.next().getName(); long lenResult = CountID(id); @@ -125,24 +121,34 @@ public class StatisticsProt { timeResult.set(1, timeResult.get(1) + 1); else if (lenResult > 60 && lenResult <= 120) timeResult.set(2, timeResult.get(2) + 1); - else { + else if (lenResult > 120 && lenResult <= 600) timeResult.set(3, timeResult.get(3) + 1); + else { + timeResult.set(4, timeResult.get(4) + 1); } } + for (int i = 0; i < nbins; i++) + totalTime.set(i, totalTime.get(i) + timeResult.get(i)); DataBase db = new DataBase(); db.setTimeRez(timeResult); - db.setDate(DateFormat(dateStart)); + db.setDate(DateFormat(date.getTime())); query.add(db); } - dateStart += MILLISECONDS_PER_DAY; } + */ + DataBase db = new DataBase(); + db.setTimeTotalExec(totalTime); + query.add(db); System.out.println("StatisticsProt.readLength: total number of dates = " + query.size()); return query; } - /* query: protein sequence */ + /* + * query: protein sequence + * */ public List readProteins(String protIn) { query = new ArrayList(); + /* SliceQuery result = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(), StringSerializer.get(), StringSerializer.get()); result.setColumnFamily("ProteinRow"); @@ -161,17 +167,21 @@ public class StatisticsProt { query.add(db); } } + */ return query; } - // query by a protein sequence - public List readProtID() { + /* + * query by a protein sequence + * */ + public List readProtID(int counter) { query = new ArrayList(); - int row_count = 100000000; + int row_count = 100; + /* RangeSlicesQuery result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(), StringSerializer.get(), StringSerializer.get()); result.setColumnFamily("ProteinRow"); - result.setRange(null, null, false, Integer.MAX_VALUE); + result.setRange(null, null, false, 100); result.setRowCount(row_count); String last_key = null; while (true) { @@ -183,14 +193,15 @@ public class StatisticsProt { Row row = rowsIterator.next(); last_key = row.getKey(); List> clms = row.getColumnSlice().getColumns(); - int npred = 0; - for (HColumn cln : clms) { - String name = cln.getName(); - if (name.matches("(.*)jnetpred")) { - ++npred; - } - } - if (npred > 3) { + //int npred = 0; + //for (HColumn cln : clms) { + // String name = cln.getName(); + // if (name.matches("(.*)jnetpred")) { + // ++npred; + // } + //} + int npred = clms.size(); + if (npred > counter) { DataBase db = new DataBase(); db.setProt(last_key); db.setTotalId(npred); @@ -199,14 +210,17 @@ public class StatisticsProt { } if (rows.getCount() < row_count) break; - } + }*/ return query; } - // query by a part of sequence + /* + * query by a part of sequence + * */ public List readPart(String protIn) { int row_count = 10000; query = new ArrayList(); + /* RangeSlicesQuery result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(), StringSerializer.get(), StringSerializer.get()); result.setColumnFamily("ProteinRow"); @@ -251,10 +265,13 @@ public class StatisticsProt { if (rows.getCount() < row_count) break; } + */ return query; } - // convert String to Date + /* + * convert String date into long date (miliseconds since the epoch start) + */ private static long DateParsing(String datInput) { if (datInput == null) { return 0; @@ -269,7 +286,9 @@ public class StatisticsProt { return dateWorkSt; } - // convert String to Date + /* + * convert String date:time into long date:time (miliseconds since the epoch start) + */ private static long TimeConvert(String datInput) { long dateWorkSt = 0; if (datInput == null) { @@ -292,9 +311,7 @@ public class StatisticsProt { } /* - * private static String DateFormat1(long inDate) { SimpleDateFormat - * datformat = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss"); String - * dateString = datformat.format(new Date(inDate)); return dateString; } + * convert ??? */ public static String DateFormatYYMMDD(long indate) { SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd"); @@ -302,7 +319,11 @@ public class StatisticsProt { return dateString; } + /* + * ??? + */ public long CountID(String id) { + /* SliceQuery sliceQuery = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(), StringSerializer.get(), StringSerializer.get()); sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100); @@ -313,8 +334,15 @@ public class StatisticsProt { long datBeginLong = TimeConvert(datBegin); long datEndLong = TimeConvert(datEnd); return (datEndLong - datBeginLong) / 1000; + */ + return 0; } + /* + * set earlest date and current dates. + * earlestDate is static and should be set at the 1st call + * currentDate should be re-calculated every time + */ private static void SetDateRange() { if (0 == earlestDate) { StatisticsProt sp = new StatisticsProt(); @@ -342,10 +370,13 @@ public class StatisticsProt { return true; } - // find the earliest date + /* + * find the earliest date in the database + */ public long earliestDate() { ArrayList dateSort = new ArrayList(); int row_count = 10000; + /* RangeSlicesQuery result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), LongSerializer.get(), StringSerializer.get(), StringSerializer.get()); result.setColumnFamily("ProteinData"); @@ -364,7 +395,7 @@ public class StatisticsProt { } if (rows.getCount() < row_count) break; - } + }*/ Collections.sort(dateSort); return dateSort.get(0); }