From 5058bbff61abfcc34d734124545f86b2029b3373 Mon Sep 17 00:00:00 2001 From: Sasha Sherstnev Date: Mon, 28 Oct 2013 20:20:16 +0000 Subject: [PATCH] Optimize code --- server/compbio/statistic/StatisticsProt.java | 294 +++++++++++++++----------- 1 file changed, 169 insertions(+), 125 deletions(-) diff --git a/server/compbio/statistic/StatisticsProt.java b/server/compbio/statistic/StatisticsProt.java index 02e7a59..0294747 100644 --- a/server/compbio/statistic/StatisticsProt.java +++ b/server/compbio/statistic/StatisticsProt.java @@ -26,111 +26,122 @@ public class StatisticsProt { private final static long MILLISECONDS_PER_DAY = 1000L * 60 * 60 * 24; private CassandraCreate cc = new CassandraCreate(); private ArrayList query; + private static long currentDate = 0; + private static long earlestDate = 0; - // query for the period from date1 till date2 - public List readDetail(String dateInStringSt, String dateInStringEnd) { - if (!isThisDateValid(dateInStringSt)) + /* query: the period from date1 till date2 */ + public List readDetails(String date1, String date2) { + if (!isThisDateValid(date1) || !isThisDateValid(date2)) { + System.out.println("Wrong date: point 1"); return null; - long dateWorkSt = DateParsing(dateInStringSt); - long dateWorkEnd = DateParsing(dateInStringEnd); - if (CheckDate(dateWorkSt) && CheckDate(dateWorkEnd)) { - query = new ArrayList(); - while (dateWorkSt <= dateWorkEnd) { - SliceQuery result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(), - StringSerializer.get(), StringSerializer.get()); - result.setColumnFamily("ProteinData"); - result.setKey(dateWorkSt); - result.setRange(null, null, false, Integer.MAX_VALUE); - QueryResult> columnSlice = result.execute(); - if (!columnSlice.get().getColumns().isEmpty()) { - DataBase db = new DataBase(DateFormat(dateWorkSt), columnSlice.get().getColumns().size()); - query.add(db); - } - dateWorkSt += MILLISECONDS_PER_DAY; + } + SetDateRange(); + + long dateStart = DateParsing(date1); + long dateEnd = DateParsing(date2); + if (dateStart < earlestDate) + dateStart = earlestDate; + if (dateStart > currentDate) + dateStart = currentDate - MILLISECONDS_PER_DAY; + if (dateEnd < earlestDate) + dateStart = earlestDate + MILLISECONDS_PER_DAY; + if (dateEnd > currentDate) + dateStart = currentDate; + System.out.println("StatisticsProt.readDetails: earlestDate = " + earlestDate + ", currentDate = " + currentDate); + System.out.println("StatisticsProt.readDetails: Start date " + date1 + ": int representation = " + dateStart); + System.out.println("StatisticsProt.readDetails: End date " + date2 + ": int representation = " + dateEnd); + + query = new ArrayList(); + int day = 0; + while (dateStart <= dateEnd) { + SliceQuery result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(), + StringSerializer.get(), StringSerializer.get()); + result.setColumnFamily("ProteinData"); + result.setKey(dateStart); + result.setRange(null, null, false, Integer.MAX_VALUE); + QueryResult> columnSlice = result.execute(); + ++day; + System.out.print("Day " + day + ": dataStart = " + dateStart + ": "); + if (!columnSlice.get().getColumns().isEmpty()) { + DataBase db = new DataBase(DateFormat(dateStart), columnSlice.get().getColumns().size()); + query.add(db); + System.out.println("data exist"); + } else { + System.out.println("no data"); } - } else - System.out.println("Wrong date"); + dateStart += MILLISECONDS_PER_DAY; + } + System.out.println("StatisticsProt.readLength: total number of dates = " + query.size()); return query; } - // find the earliest date - public long earliestDate() { - ArrayList dateSort = new ArrayList(); - int row_count = 10000; - RangeSlicesQuery result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), LongSerializer.get(), - StringSerializer.get(), StringSerializer.get()); - result.setColumnFamily("ProteinData"); - result.setRange(null, null, false, Integer.MAX_VALUE); - result.setRowCount(row_count); - Long last_key = null; - while (true) { - result.setKeys(last_key, null); - QueryResult> columnSlice = result.execute(); - OrderedRows rows = columnSlice.get(); - Iterator> rowsIterator = rows.iterator(); - while (rowsIterator.hasNext()) { - Row row = rowsIterator.next(); - last_key = row.getKey(); - dateSort.add(last_key); - } - if (rows.getCount() < row_count) - break; + /* + * query: execution time for the period from date1 till date2 + */ + public List readLength(String date1, String date2) { + if (!isThisDateValid(date1) || !isThisDateValid(date2)) { + System.out.println("Wrong date: point 3"); + return null; } - Collections.sort(dateSort); - return dateSort.get(0); - } + SetDateRange(); - // query execution time for the period from dateInStringSt till - // dateInStringEnd - public List readLength(String dateInStringSt, String dateInStringEnd) { - long dateWorkSt = DateParsing(dateInStringSt); - long dateWorkEnd = DateParsing(dateInStringEnd); - if (CheckDate(dateWorkSt) && CheckDate(dateWorkEnd)) { - query = new ArrayList(); - List totalTime = new ArrayList(); - for (int i = 0; i < 4; i++) - totalTime.add(i, 0); - while (dateWorkSt <= dateWorkEnd) { - List timeResult = new ArrayList(); - SliceQuery result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(), - StringSerializer.get(), StringSerializer.get()); - result.setColumnFamily("ProteinData"); - result.setKey(dateWorkSt); - result.setRange(null, null, false, Integer.MAX_VALUE); - QueryResult> columnSlice = result.execute(); - List> col = columnSlice.get().getColumns(); - if (!col.isEmpty()) { - Iterator> itCol = col.iterator(); - for (int i = 0; i < 4; i++) - timeResult.add(i, 0); - while (itCol.hasNext()) { - String id = itCol.next().getName(); - long lenResult = CountID(id); - if (lenResult <= 30) - timeResult.set(0, timeResult.get(0) + 1); - else if (lenResult > 30 && lenResult <= 60) - timeResult.set(1, timeResult.get(1) + 1); - else if (lenResult > 60 && lenResult <= 120) - timeResult.set(2, timeResult.get(2) + 1); - else { - timeResult.set(3, timeResult.get(3) + 1); - // System.out.println(lenResult + "; " + id); - } + long dateStart = DateParsing(date1); + long dateEnd = DateParsing(date2); + if (dateStart < earlestDate) + dateStart = earlestDate; + if (dateStart > currentDate) + dateStart = currentDate - MILLISECONDS_PER_DAY; + if (dateEnd < earlestDate) + dateStart = earlestDate + MILLISECONDS_PER_DAY; + if (dateEnd > currentDate) + dateStart = currentDate; + System.out.println("StatisticsProt.readLength: earlestDate = " + earlestDate + ", currentDate = " + currentDate); + System.out.println("StatisticsProt.readLength: Start date is " + date1 + ": int representation = " + dateStart); + System.out.println("StatisticsProt.readLength: End date is " + date2 + ": int representation = " + dateEnd); + + query = new ArrayList(); + List totalTime = new ArrayList(); + for (int i = 0; i < 4; i++) + totalTime.add(i, 0); + while (dateStart <= dateEnd) { + List timeResult = new ArrayList(); + SliceQuery result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(), + StringSerializer.get(), StringSerializer.get()); + result.setColumnFamily("ProteinData"); + result.setKey(dateStart); + result.setRange(null, null, false, Integer.MAX_VALUE); + QueryResult> columnSlice = result.execute(); + List> col = columnSlice.get().getColumns(); + if (!col.isEmpty()) { + Iterator> itCol = col.iterator(); + for (int i = 0; i < 4; i++) + timeResult.add(i, 0); + while (itCol.hasNext()) { + String id = itCol.next().getName(); + long lenResult = CountID(id); + if (lenResult <= 30) + timeResult.set(0, timeResult.get(0) + 1); + else if (lenResult > 30 && lenResult <= 60) + timeResult.set(1, timeResult.get(1) + 1); + else if (lenResult > 60 && lenResult <= 120) + timeResult.set(2, timeResult.get(2) + 1); + else { + timeResult.set(3, timeResult.get(3) + 1); } - DataBase db = new DataBase(); - db.setTimeRez(timeResult); - db.setDate(DateFormat(dateWorkSt)); - query.add(db); } - dateWorkSt += MILLISECONDS_PER_DAY; + DataBase db = new DataBase(); + db.setTimeRez(timeResult); + db.setDate(DateFormat(dateStart)); + query.add(db); } - } else - System.out.println("Wrong date"); + dateStart += MILLISECONDS_PER_DAY; + } + System.out.println("StatisticsProt.readLength: total number of dates = " + query.size()); return query; } - // query by a protein sequence - public List readProt(String protIn) { + /* query: protein sequence */ + public List readProteins(String protIn) { query = new ArrayList(); SliceQuery result = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(), StringSerializer.get(), StringSerializer.get()); @@ -141,11 +152,14 @@ public class StatisticsProt { Iterator> it = columnSlice.get().getColumns().iterator(); while (it.hasNext()) { HColumn col = it.next(); - DataBase db = new DataBase(); - db.setProt(protIn); - db.setId(col.getName()); - db.setJpred(col.getValue()); - query.add(db); + String name = col.getName(); + if (name.matches("(.*)jnetpred")) { + DataBase db = new DataBase(); + db.setProt(protIn); + db.setId(col.getName()); + db.setJpred(col.getValue()); + query.add(db); + } } return query; } @@ -168,10 +182,18 @@ public class StatisticsProt { while (rowsIterator.hasNext()) { Row row = rowsIterator.next(); last_key = row.getKey(); - if (row.getColumnSlice().getColumns().size() > 3) { + List> clms = row.getColumnSlice().getColumns(); + int npred = 0; + for (HColumn cln : clms) { + String name = cln.getName(); + if (name.matches("(.*)jnetpred")) { + ++npred; + } + } + if (npred > 3) { DataBase db = new DataBase(); db.setProt(last_key); - db.setTotalId(row.getColumnSlice().getColumns().size()); + db.setTotalId(npred); query.add(db); } } @@ -214,12 +236,15 @@ public class StatisticsProt { } if (subStr.length() > 0) subProt.add(subStr); - DataBase db = new DataBase(); - db.setProt(last_key); - db.setId(col.getName()); - db.setJpred(col.getValue()); - db.setSubProt(subProt); - query.add(db); + String name = col.getName(); + if (name.matches("(.*)jnetpred")) { + DataBase db = new DataBase(); + db.setProt(last_key); + db.setId(col.getName()); + db.setJpred(col.getValue()); + db.setSubProt(subProt); + query.add(db); + } } } } @@ -256,8 +281,6 @@ public class StatisticsProt { } catch (ParseException e) { e.printStackTrace(); } - // System.out.println(datInput + "start reverce" + - // DateFormat1(dateWorkSt)); return dateWorkSt; } @@ -268,12 +291,11 @@ public class StatisticsProt { return dateString; } - private static String DateFormat1(long inDate) { - SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss"); - String dateString = datformat.format(new Date(inDate)); - return dateString; - } - + /* + * private static String DateFormat1(long inDate) { SimpleDateFormat + * datformat = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss"); String + * dateString = datformat.format(new Date(inDate)); return dateString; } + */ public static String DateFormatYYMMDD(long indate) { SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd"); String dateString = datformat.format(new Date(indate)); @@ -291,32 +313,28 @@ public class StatisticsProt { long datBeginLong = TimeConvert(datBegin); long datEndLong = TimeConvert(datEnd); return (datEndLong - datBeginLong) / 1000; - } - public static boolean CheckDate(long indate) { - if (indate == 0) { - return false; + private static void SetDateRange() { + if (0 == earlestDate) { + StatisticsProt sp = new StatisticsProt(); + earlestDate = sp.earliestDate(); + System.out.println("Set earlest Date = " + earlestDate); } - StatisticsProt sp = new StatisticsProt(); Calendar cal = Calendar.getInstance(); - String currentDate = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH); - if (indate >= sp.earliestDate() && indate <= DateParsing(currentDate)) { - return true; - } - return false; + currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH)); } public boolean isThisDateValid(String dateToValidate) { - if (dateToValidate == null) { + if (dateToValidate == null || dateToValidate.equals("")) { + System.out.println("Undefined date"); return false; } SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd"); try { - // if not valid, it will throw ParseException + // if not valid, this will throw ParseException sdf.setLenient(false); Date date = sdf.parse(dateToValidate); - // System.out.println(date); } catch (ParseException e) { e.printStackTrace(); return false; @@ -324,4 +342,30 @@ public class StatisticsProt { return true; } + // find the earliest date + public long earliestDate() { + ArrayList dateSort = new ArrayList(); + int row_count = 10000; + RangeSlicesQuery result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), LongSerializer.get(), + StringSerializer.get(), StringSerializer.get()); + result.setColumnFamily("ProteinData"); + result.setRange(null, null, false, Integer.MAX_VALUE); + result.setRowCount(row_count); + Long last_key = null; + while (true) { + result.setKeys(last_key, null); + QueryResult> columnSlice = result.execute(); + OrderedRows rows = columnSlice.get(); + Iterator> rowsIterator = rows.iterator(); + while (rowsIterator.hasNext()) { + Row row = rowsIterator.next(); + last_key = row.getKey(); + dateSort.add(last_key); + } + if (rows.getCount() < row_count) + break; + } + Collections.sort(dateSort); + return dateSort.get(0); + } } -- 1.7.10.2