new readers for the queries
[proteocache.git] / server / compbio / statistic / CassandraRequester.java
index 1906c97..0d2ffc0 100755 (executable)
@@ -4,42 +4,57 @@ import java.text.ParseException;
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Calendar;
-import java.util.Collections;
 import java.util.Date;
-import java.util.Iterator;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
+import compbio.beans.DateBean;
+import compbio.beans.JobBean;
+import compbio.beans.ProteinBean;
+import compbio.beans.Total;
+import compbio.beans.TotalByCounterBean;
+import compbio.beans.TotalJobsStatisticBean;
+import compbio.beans.UserBean;
 import compbio.cassandra.CassandraNativeConnector;
+import compbio.cassandra.CassandraReaderOld;
 import compbio.cassandra.DataBase;
+import compbio.cassandra.Pair;
+import compbio.engine.JobStatus;
 
 public class CassandraRequester {
-       private CassandraNativeConnector DBInstance = new CassandraNativeConnector();
+       private CassandraReaderOld db = new CassandraReaderOld();
        private ArrayList<DataBase> query;
        private static long currentDate = 0;
        private static long earlestDate = 0;
-
+       private final static SimpleDateFormat formatYYMMDD = new SimpleDateFormat("yyyy/MM/dd");
+       private final static SimpleDateFormat formatDDMMYY = new SimpleDateFormat("dd/MM/yyyy");
 
        /*
         * query: execution time for the period from date1 till date2
-        * */
+        */
        public List<DataBase> extractExecutionTime(String date1, String date2) {
-               if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
-                       System.out.println("Wrong date: point 3");
+               if (null == date1) {
+                       date1 = "1970/1/1";
+               }
+               if (null == date2) {
+                       date1 = "2100/1/1";
+               }
+               if (!isThisDateValid(date1, formatYYMMDD) || !isThisDateValid(date2, formatYYMMDD)) {
+                       System.out.println("CassandraRequester.extractExecutionTime: wrong format for date1 " + date1 + "or date2 " + date2);
                        return null;
                }
                SetDateRange();
                int nbins = 5;
-               long dateStart = DateParsing(date1);
-               long dateEnd = DateParsing(date2);
-               if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate) || dateStart > dateEnd)
+               long dateStart = DateParsing(date1, formatYYMMDD);
+               long dateEnd = DateParsing(date2, formatYYMMDD);
+               if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd)
                        return null;
                if (dateStart < earlestDate)
                        dateStart = earlestDate;
                if (dateEnd > currentDate)
                        dateStart = currentDate;
 
-               System.out.println("CassandraRequester.extractExecutionTime: earlestDate = " + earlestDate + ", currentDate = " + currentDate);
-
                Calendar start = Calendar.getInstance();
                start.setTime(new Date(dateStart));
                Calendar end = Calendar.getInstance();
@@ -48,24 +63,34 @@ public class CassandraRequester {
                List<Integer> totalTime = new ArrayList<Integer>();
                for (int i = 0; i < nbins; i++)
                        totalTime.add(i, 0);
-               /*
+               List<Pair<String, String>> res = db.ReadProteinDataTable();
+               List<Pair<Date, Long>> numres = new ArrayList<Pair<Date, Long>>();
+
+               for (Pair<String, String> entry : res) {
+                       SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd");
+                       try {
+                               Date jobstartdate = dateformatter.parse(entry.getElement0());
+                               long date = jobstartdate.getTime();
+                               if (dateStart <= date && date <= dateEnd) {
+                                       SimpleDateFormat datetimeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
+                                       Date jobstarttime = datetimeformatter.parse(entry.getElement0());
+                                       Date jobendtime = datetimeformatter.parse(entry.getElement1());
+                                       long diff = (jobendtime.getTime() - jobstarttime.getTime()) / 1000;
+                                       Pair<Date, Long> pair = new Pair<Date, Long>(jobstartdate, Long.valueOf(diff));
+                                       numres.add(pair);
+                               }
+                       } catch (ParseException e) {
+                               e.printStackTrace();
+                       }
+               }
+
                for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
                        List<Integer> timeResult = new ArrayList<Integer>();
-                       SliceQuery<Long, String, String> result = HFactory.createSliceQuery(DBInstance.GetKeyspace(), LongSerializer.get(),
-                                       StringSerializer.get(), StringSerializer.get());
-                       result.setColumnFamily("ProteinData");
-                       result.setKey(date.getTime());
-                       result.setRange(null, null, false, Integer.MAX_VALUE);
-                       QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
-                       List<HColumn<String, String>> col = columnSlice.get().getColumns();
-                       if (!col.isEmpty()) {
-                               Iterator<HColumn<String, String>> itCol = col.iterator();
-                               for (int i = 0; i < nbins; i++)
-                                       timeResult.add(i, 0);
-                               // split all jobs into nbins bins
-                               while (itCol.hasNext()) {
-                                       String id = itCol.next().getName();
-                                       long lenResult = CountID(id);
+                       for (int i = 0; i < nbins; i++)
+                               timeResult.add(i, 0);
+                       for (Pair<Date, Long> p : numres) {
+                               if (date.equals(p.getElement0())) {
+                                       long lenResult = p.getElement1().longValue();
                                        if (lenResult <= 30)
                                                timeResult.set(0, timeResult.get(0) + 1);
                                        else if (lenResult > 30 && lenResult <= 60)
@@ -78,48 +103,212 @@ public class CassandraRequester {
                                                timeResult.set(4, timeResult.get(4) + 1);
                                        }
                                }
-                               for (int i = 0; i < nbins; i++)
-                                       totalTime.set(i, totalTime.get(i) + timeResult.get(i));
-                               DataBase db = new DataBase();
-                               db.setTimeRez(timeResult);
-                               db.setDate(DateFormat(date.getTime()));
-                               query.add(db);
                        }
+                       for (int i = 0; i < nbins; i++)
+                               totalTime.set(i, totalTime.get(i) + timeResult.get(i));
+                       DataBase db = new DataBase();
+                       db.setTimeRez(timeResult);
+                       db.setDate(DateFormat(date.getTime()));
+                       query.add(db);
                }
-               */
+
                DataBase db = new DataBase();
                db.setTimeTotalExec(totalTime);
                query.add(db);
-               System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
                return query;
        }
 
-       /* 
-        * convert String date into long date (miliseconds since the epoch start)
+       /*
+        * query: total number of jobs for the period from date1 till date2
+       */ 
+       public TotalJobsStatisticBean countJobs(String date1, String date2) {
+       /*      if (null == date1) {
+                       date1 = "1970/1/1";
+               }
+               if (null == date2) {
+                       date1 = "2100/1/1";
+               }
+               if (!isThisDateValid(date1, formatYYMMDD) || !isThisDateValid(date2, formatYYMMDD)) {
+                       System.out.println("CassandraRequester.countJobs: wrong format for date1 " + date1 + "or date2 " + date2);
+                       return null;
+               }*/
+               SetDateRange();
+               long dateStart = DateParsing(date1, formatYYMMDD);
+               long dateEnd = DateParsing(date2, formatYYMMDD);
+/*             if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd)
+                       return null;
+               if (dateStart < earlestDate)
+                       dateStart = earlestDate;
+               if (dateEnd > currentDate)
+                       dateStart = currentDate;*/
+               Calendar start = Calendar.getInstance();
+               start.setTime(new Date(dateStart));
+               Calendar end = Calendar.getInstance();
+               end.setTime(new Date(dateEnd));
+               TotalJobsStatisticBean query = new TotalJobsStatisticBean();
+               Total wholeTotal = new Total(0, 0, 0, 0, 0);
+               for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
+                       Total res = db.ReadDateTable(date.getTime());
+                       if (res == null)
+                               continue;
+                       query.setDateTotal(DateFormat(date.getTime()), res);
+                       wholeTotal.setTotal(res.getTotal() + wholeTotal.getTotal());
+                       wholeTotal.setTotalOK(res.getTotalOK() + wholeTotal.getTotalOK());
+                       wholeTotal.setTotalStopped(res.getTotalStopped() + wholeTotal.getTotalStopped());
+                       wholeTotal.setTotalError(res.getTotalError() + wholeTotal.getTotalError());
+                       wholeTotal.setTotalTimeOut(res.getTotalTimeOut() + wholeTotal.getTotalTimeOut());
+               }
+               query.setWholeTotal(wholeTotal);
+               return query;
+       }
+
+       /*
+        * query: jobs and sequence at date
         */
-       private static long DateParsing(String datInput) {
-               if (datInput == null) {
-                       return 0;
+       public DateBean readJobByDay(String date, JobStatus status) {
+               if (!isThisDateValid(date, formatDDMMYY)) {
+                       System.out.println("CassandraRequester.readJobByDay: Wrong date format for " + date);
+                       return null;
                }
-               long dateWorkSt = 0;
-               SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
-               try {
-                       dateWorkSt = formatter.parse(datInput).getTime();
-               } catch (ParseException e) {
-                       e.printStackTrace();
+               SetDateRange();
+               long day = DateParsing(date, formatDDMMYY);
+               if (day < earlestDate || day > currentDate)
+                       return null;
+               
+               if (status == JobStatus.OK) {
+                       return db.ReadProteinData(day, date);
                }
-               return dateWorkSt;
+               return db.ReadFailedJobs(day, date, status);
        }
 
        /*
-        * convert String date:time into long date:time (miliseconds since the epoch start)
+        * query: protein sequence
         */
-       private static long TimeConvert(String datInput) {
-               long dateWorkSt = 0;
+       public List<ProteinBean> readProteins(String protIn, String searchtype) {
+               List<ProteinBean> result;
+               if (searchtype.equals("whole"))
+                       result = db.ReadWholeSequence(protIn);
+               else
+                       result = db.ReadPartOfSequence(protIn);
+               if (result == null)
+                       return null;
+
+               if (searchtype.equals("partial")) {
+                       for (ProteinBean entry : result) {
+                               entry.setSubProt(CreateSubprot(entry.getSequence(), protIn));
+                       }
+               }
+               return result;
+       }
+       
+       /*
+        * query: protein feature
+        */
+       public Map<String, String> readProteinsPrediction(String feature, int percent) {
+               Map<String, String> result = db.ReadProtein();;
+               if (result == null)
+                       return null;
+               Map<String, String> query = new HashMap<String, String>();
+               for (Map.Entry<String, String> entry : result.entrySet()) {
+                       String pred = entry.getValue();                 
+                       if (pred.replaceAll("[^"+feature+"]", "").length() > pred.length() * percent / 100 && (!entry.getKey().equals(""))) {
+                       //      if (!entry.getKey().equals(""))
+                                       query.put(entry.getKey(), pred);
+                       }       
+               }
+               return query;
+       }
+
+       /*
+        * query protein sequences with number of jobs
+        */
+       public List<TotalByCounterBean> readProteinByCounter(int minimalcounter) {
+               List<TotalByCounterBean> query = new ArrayList<TotalByCounterBean>();
+               Map<String, Integer> map = db.ReadProteinSequenceByCounter();
+               if (map == null)
+                       return null;
+               for (Map.Entry<String, Integer> entry : map.entrySet()) {
+                       if (entry.getValue() > minimalcounter) {
+                               TotalByCounterBean bean = new TotalByCounterBean();
+                               bean.setTotaljobs(entry.getValue());
+                               bean.setName(entry.getKey());
+                               query.add(bean);
+                       }
+               }
+               return query;
+       }
+
+       /*
+        * query ip with number of jobs
+        */
+       public List<TotalByCounterBean> readIpByCounter(Integer minimalcounter) {
+               List<TotalByCounterBean> query = new ArrayList<TotalByCounterBean>();
+               Map<String, Integer> map = db.ReadIpByCounter();
+               if (minimalcounter == null)
+                       minimalcounter = 0;
+               if (map == null)
+                       return null;
+               for (Map.Entry<String, Integer> entry : map.entrySet()) {
+                       if (entry.getValue() > minimalcounter) {
+                               TotalByCounterBean bean = new TotalByCounterBean();
+                               bean.setTotaljobs(entry.getValue());
+                               bean.setName(entry.getKey());
+                               query.add(bean);
+                       }
+               }
+               return query;
+       }
+
+       /*
+        * query jobs log info
+        */
+       public JobBean readJobLog(String jobid) {
+               if (jobid == null)
+                       return null;
+               return db.ReadJobLog(jobid);
+       }
+
+       /*
+        * query jobs by ipStructureJobLog
+        */
+       public UserBean readIp(String ip) {
+               if (ip == null)
+                       return null;
+               Map<String, String[]> res = db.ReadIpWithJobs(ip);
+               if (res == null)
+                       return null;
+               UserBean query = new UserBean(ip);
+               query.setMainInfo(res);
+               return query;
+       }
+
+       /*
+        * create list of parts of protein sequence;
+        */
+       private static List<String> CreateSubprot(String protein, String subprot) {
+               List<String> sub = new ArrayList<String>();
+               String subStr = protein;
+               while (subStr.length() > 0 && subStr.contains(subprot)) {
+                       String first = subStr.substring(0, subStr.indexOf(subprot));
+                       if (first.length() > 0)
+                               sub.add(first);
+                       sub.add(subprot);
+                       subStr = subStr.substring(subStr.indexOf(subprot) + subprot.length(), subStr.length());
+               }
+               if (subStr.length() > 0)
+                       sub.add(subStr);
+               return sub;
+       }
+
+       /*
+        * convert String date into long date (miliseconds since the epoch start)
+        */
+       private static long DateParsing(String datInput, SimpleDateFormat formatter) {
                if (datInput == null) {
-                       return dateWorkSt;
+                       return 0;
                }
-               SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss");
+               long dateWorkSt = 0;
+
                try {
                        dateWorkSt = formatter.parse(datInput).getTime();
                } catch (ParseException e) {
@@ -131,59 +320,27 @@ public class CassandraRequester {
        // convert long to date in string format
        private static String DateFormat(long inDate) {
                SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
-               String dateString = datformat.format(new Date(inDate));
-               return dateString;
+               return datformat.format(new Date(inDate));
        }
 
        /*
-        * convert ???
-        */
-       public static String DateFormatYYMMDD(long indate) {
-               SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd");
-               String dateString = datformat.format(new Date(indate));
-               return dateString;
-       }
-
-       /*
-        * ???
-        */
-       public long CountID(String id) {
-               /*
-               SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(DBInstance.GetKeyspace(), StringSerializer.get(),
-                               StringSerializer.get(), StringSerializer.get());
-               sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100);
-               QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
-               String datBegin = result.get().getColumnByName("DataBegin").getValue();
-               String datEnd = result.get().getColumnByName("DataEnd").getValue();
-
-               long datBeginLong = TimeConvert(datBegin);
-               long datEndLong = TimeConvert(datEnd);
-               return (datEndLong - datBeginLong) / 1000;
-               */
-               return 0;
-       }
-
-       /*
-        * set earlest date and current dates. 
-        * earlestDate is static and should be set at the 1st call
-        * currentDate should be re-calculated every time
+        * set earlest date and current dates. earlestDate is static and should be
+        * set at the 1st call currentDate should be re-calculated every time
         */
        private static void SetDateRange() {
+               Calendar cal = Calendar.getInstance();
+               currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH),
+                               formatYYMMDD);
                if (0 == earlestDate) {
-                       StatisticsProt sp = new StatisticsProt();
-                       earlestDate = sp.earliestDate();
-                       System.out.println("Set earlest Date = " + earlestDate);
+                       CassandraRequester cr = new CassandraRequester();
+                       earlestDate = cr.earliestDate();
                }
-               Calendar cal = Calendar.getInstance();
-               currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH));
        }
 
-       public boolean isThisDateValid(String dateToValidate) {
+       public boolean isThisDateValid(String dateToValidate, SimpleDateFormat sdf) {
                if (dateToValidate == null || dateToValidate.equals("")) {
-                       System.out.println("Undefined date");
                        return false;
                }
-               SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd");
                try {
                        // if not valid, this will throw ParseException
                        sdf.setLenient(false);
@@ -199,31 +356,8 @@ public class CassandraRequester {
         * find the earliest date in the database
         */
        public long earliestDate() {
-               /*
-               ArrayList<Long> dateSort = new ArrayList<Long>();
-               int row_count = 10000;
-               RangeSlicesQuery<Long, String, String> result = HFactory.createRangeSlicesQuery(DBInstance.GetKeyspace(), LongSerializer.get(),
-                               StringSerializer.get(), StringSerializer.get());
-               result.setColumnFamily("ProteinData");
-               result.setRange(null, null, false, Integer.MAX_VALUE);
-               result.setRowCount(row_count);
-               Long last_key = null;
-               while (true) {
-                       result.setKeys(last_key, null);
-                       QueryResult<OrderedRows<Long, String, String>> columnSlice = result.execute();
-                       OrderedRows<Long, String, String> rows = columnSlice.get();
-                       Iterator<Row<Long, String, String>> rowsIterator = rows.iterator();
-                       while (rowsIterator.hasNext()) {
-                               Row<Long, String, String> row = rowsIterator.next();
-                               last_key = row.getKey();
-                               dateSort.add(last_key);
-                       }
-                       if (rows.getCount() < row_count)
-                               break;
-               }
-               Collections.sort(dateSort);
-               return dateSort.get(0);
-               */
-               return 0;
+               earlestDate = CassandraNativeConnector.getEarliestDateInDB();
+               return earlestDate;
        }
+
 }