Centralize initial examples of job id and IP
[proteocache.git] / server / compbio / statistic / CassandraRequester.java
index 1906c97..0ce458b 100755 (executable)
@@ -4,42 +4,57 @@ import java.text.ParseException;
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Calendar;
-import java.util.Collections;
 import java.util.Date;
-import java.util.Iterator;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
+import compbio.cassandra.DateBean;
+import compbio.cassandra.ProteinBean;
 import compbio.cassandra.CassandraNativeConnector;
+import compbio.cassandra.CassandraReaderOld;
 import compbio.cassandra.DataBase;
+import compbio.cassandra.Pair;
+import compbio.cassandra.JobBean;
+import compbio.cassandra.Total;
+import compbio.cassandra.TotalByCounterBean;
+import compbio.cassandra.TotalJobsStatisticBean;
+import compbio.cassandra.UserBean;
+import compbio.engine.JobStatus;
 
 public class CassandraRequester {
-       private CassandraNativeConnector DBInstance = new CassandraNativeConnector();
+       private CassandraReaderOld db = new CassandraReaderOld();
        private ArrayList<DataBase> query;
        private static long currentDate = 0;
        private static long earlestDate = 0;
-
+       private final static SimpleDateFormat formatYYMMDD = new SimpleDateFormat("yyyy/MM/dd");
+       private final static SimpleDateFormat formatDDMMYY = new SimpleDateFormat("dd/MM/yyyy");
 
        /*
         * query: execution time for the period from date1 till date2
-        * */
+        */
        public List<DataBase> extractExecutionTime(String date1, String date2) {
-               if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
-                       System.out.println("Wrong date: point 3");
+               if (null == date1) {
+                       date1 = "1970/1/1";
+               }
+               if (null == date2) {
+                       date1 = "2100/1/1";
+               }
+               if (!isThisDateValid(date1, formatYYMMDD) || !isThisDateValid(date2, formatYYMMDD)) {
+                       System.out.println("CassandraRequester.extractExecutionTime: wrong format for date1 " + date1 + "or date2 " + date2);
                        return null;
                }
                SetDateRange();
                int nbins = 5;
-               long dateStart = DateParsing(date1);
-               long dateEnd = DateParsing(date2);
-               if ((dateStart < earlestDate && dateEnd < earlestDate) || (dateStart > currentDate && dateEnd > currentDate) || dateStart > dateEnd)
+               long dateStart = DateParsing(date1, formatYYMMDD);
+               long dateEnd = DateParsing(date2, formatYYMMDD);
+               if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd)
                        return null;
                if (dateStart < earlestDate)
                        dateStart = earlestDate;
                if (dateEnd > currentDate)
                        dateStart = currentDate;
 
-               System.out.println("CassandraRequester.extractExecutionTime: earlestDate = " + earlestDate + ", currentDate = " + currentDate);
-
                Calendar start = Calendar.getInstance();
                start.setTime(new Date(dateStart));
                Calendar end = Calendar.getInstance();
@@ -48,24 +63,34 @@ public class CassandraRequester {
                List<Integer> totalTime = new ArrayList<Integer>();
                for (int i = 0; i < nbins; i++)
                        totalTime.add(i, 0);
-               /*
+               List<Pair<String, String>> res = db.ReadProteinDataTable();
+               List<Pair<Date, Long>> numres = new ArrayList<Pair<Date, Long>>();
+
+               for (Pair<String, String> entry : res) {
+                       SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd");
+                       try {
+                               Date jobstartdate = dateformatter.parse(entry.getElement0());
+                               long date = jobstartdate.getTime();
+                               if (dateStart <= date && date <= dateEnd) {
+                                       SimpleDateFormat datetimeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
+                                       Date jobstarttime = datetimeformatter.parse(entry.getElement0());
+                                       Date jobendtime = datetimeformatter.parse(entry.getElement1());
+                                       long diff = (jobendtime.getTime() - jobstarttime.getTime()) / 1000;
+                                       Pair<Date, Long> pair = new Pair<Date, Long>(jobstartdate, Long.valueOf(diff));
+                                       numres.add(pair);
+                               }
+                       } catch (ParseException e) {
+                               e.printStackTrace();
+                       }
+               }
+
                for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
                        List<Integer> timeResult = new ArrayList<Integer>();
-                       SliceQuery<Long, String, String> result = HFactory.createSliceQuery(DBInstance.GetKeyspace(), LongSerializer.get(),
-                                       StringSerializer.get(), StringSerializer.get());
-                       result.setColumnFamily("ProteinData");
-                       result.setKey(date.getTime());
-                       result.setRange(null, null, false, Integer.MAX_VALUE);
-                       QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
-                       List<HColumn<String, String>> col = columnSlice.get().getColumns();
-                       if (!col.isEmpty()) {
-                               Iterator<HColumn<String, String>> itCol = col.iterator();
-                               for (int i = 0; i < nbins; i++)
-                                       timeResult.add(i, 0);
-                               // split all jobs into nbins bins
-                               while (itCol.hasNext()) {
-                                       String id = itCol.next().getName();
-                                       long lenResult = CountID(id);
+                       for (int i = 0; i < nbins; i++)
+                               timeResult.add(i, 0);
+                       for (Pair<Date, Long> p : numres) {
+                               if (date.equals(p.getElement0())) {
+                                       long lenResult = p.getElement1().longValue();
                                        if (lenResult <= 30)
                                                timeResult.set(0, timeResult.get(0) + 1);
                                        else if (lenResult > 30 && lenResult <= 60)
@@ -78,112 +103,241 @@ public class CassandraRequester {
                                                timeResult.set(4, timeResult.get(4) + 1);
                                        }
                                }
-                               for (int i = 0; i < nbins; i++)
-                                       totalTime.set(i, totalTime.get(i) + timeResult.get(i));
-                               DataBase db = new DataBase();
-                               db.setTimeRez(timeResult);
-                               db.setDate(DateFormat(date.getTime()));
-                               query.add(db);
                        }
+                       for (int i = 0; i < nbins; i++)
+                               totalTime.set(i, totalTime.get(i) + timeResult.get(i));
+                       DataBase db = new DataBase();
+                       db.setTimeRez(timeResult);
+                       db.setDate(DateFormat(date.getTime()));
+                       query.add(db);
                }
-               */
+
                DataBase db = new DataBase();
                db.setTimeTotalExec(totalTime);
                query.add(db);
-               System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
                return query;
        }
 
-       /* 
-        * convert String date into long date (miliseconds since the epoch start)
+       /*
+        * query: total number of jobs for the period from date1 till date2
         */
-       private static long DateParsing(String datInput) {
-               if (datInput == null) {
-                       return 0;
+       public TotalJobsStatisticBean countJobs(String date1, String date2) {
+               /*
+                * if (null == date1) { date1 = "1970/1/1"; } if (null == date2) { date1
+                * = "2100/1/1"; } if (!isThisDateValid(date1, formatYYMMDD) ||
+                * !isThisDateValid(date2, formatYYMMDD)) { System.out.println(
+                * "CassandraRequester.countJobs: wrong format for date1 " + date1 +
+                * "or date2 " + date2); return null; }
+                */
+               SetDateRange();
+               long dateStart = DateParsing(date1, formatYYMMDD);
+               long dateEnd = DateParsing(date2, formatYYMMDD);
+               /*
+                * if (dateEnd < earlestDate || dateStart > currentDate || dateStart >
+                * dateEnd) return null; if (dateStart < earlestDate) dateStart =
+                * earlestDate; if (dateEnd > currentDate) dateStart = currentDate;
+                */
+               Calendar start = Calendar.getInstance();
+               start.setTime(new Date(dateStart));
+               Calendar end = Calendar.getInstance();
+               end.setTime(new Date(dateEnd));
+               TotalJobsStatisticBean query = new TotalJobsStatisticBean();
+               Total wholeTotal = new Total(0, 0, 0, 0, 0);
+               for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
+                       Total res = db.ReadDateTable(date.getTime());
+                       if (res == null)
+                               continue;
+                       query.setDateTotal(DateFormat(date.getTime()), res);
+                       wholeTotal.setTotal(res.getTotal() + wholeTotal.getTotal());
+                       wholeTotal.setTotalOK(res.getTotalOK() + wholeTotal.getTotalOK());
+                       wholeTotal.setTotalStopped(res.getTotalStopped() + wholeTotal.getTotalStopped());
+                       wholeTotal.setTotalError(res.getTotalError() + wholeTotal.getTotalError());
+                       wholeTotal.setTotalTimeOut(res.getTotalTimeOut() + wholeTotal.getTotalTimeOut());
                }
-               long dateWorkSt = 0;
-               SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
-               try {
-                       dateWorkSt = formatter.parse(datInput).getTime();
-               } catch (ParseException e) {
-                       e.printStackTrace();
+               query.setWholeTotal(wholeTotal);
+               return query;
+       }
+
+       /*
+        * query: jobs and sequence at date
+        */
+       public DateBean readJobByDay(String date, JobStatus status) {
+               if (!isThisDateValid(date, formatDDMMYY)) {
+                       System.out.println("CassandraRequester.readJobByDay: Wrong date format for " + date);
+                       return null;
                }
-               return dateWorkSt;
+               SetDateRange();
+               long day = DateParsing(date, formatDDMMYY);
+               if (day < earlestDate || day > currentDate)
+                       return null;
+
+               if (status == JobStatus.OK) {
+                       return db.ReadProteinData(day, date);
+               }
+               return db.ReadFailedJobs(day, date, status);
        }
 
        /*
-        * convert String date:time into long date:time (miliseconds since the epoch start)
+        * query: protein sequence
         */
-       private static long TimeConvert(String datInput) {
-               long dateWorkSt = 0;
-               if (datInput == null) {
-                       return dateWorkSt;
+       public List<ProteinBean> readProteins(String protIn, String searchtype) {
+               List<ProteinBean> result;
+               if (searchtype.equals("whole"))
+                       result = db.ReadWholeSequence(protIn);
+               else
+                       result = db.ReadPartOfSequence(protIn);
+               if (result == null)
+                       return null;
+
+               if (searchtype.equals("partial")) {
+                       for (ProteinBean entry : result) {
+                               entry.setSubProt(CreateSubprot(entry.getSequence(), protIn));
+                       }
                }
-               SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss");
-               try {
-                       dateWorkSt = formatter.parse(datInput).getTime();
-               } catch (ParseException e) {
-                       e.printStackTrace();
+               return result;
+       }
+
+       /*
+        * query: protein feature
+        */
+       public Map<String, String> readProteinsPrediction(String feature, int percent) {
+               Map<String, String> result = db.ReadProtein();
+               ;
+               if (result == null)
+                       return null;
+               Map<String, String> query = new HashMap<String, String>();
+               for (Map.Entry<String, String> entry : result.entrySet()) {
+                       String pred = entry.getValue();
+                       if (pred.replaceAll("[^" + feature + "]", "").length() > pred.length() * percent / 100 && (!entry.getKey().equals(""))) {
+                               // if (!entry.getKey().equals(""))
+                               query.put(entry.getKey(), pred);
+                       }
                }
-               return dateWorkSt;
+               return query;
        }
 
-       // convert long to date in string format
-       private static String DateFormat(long inDate) {
-               SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
-               String dateString = datformat.format(new Date(inDate));
-               return dateString;
+       /*
+        * query protein sequences with number of jobs
+        */
+       public List<TotalByCounterBean> readProteinByCounter(int minimalcounter) {
+               List<TotalByCounterBean> query = new ArrayList<TotalByCounterBean>();
+               Map<String, Integer> map = db.ReadProteinSequenceByCounter();
+               if (map == null)
+                       return null;
+               for (Map.Entry<String, Integer> entry : map.entrySet()) {
+                       if (entry.getValue() > minimalcounter) {
+                               TotalByCounterBean bean = new TotalByCounterBean();
+                               bean.setTotaljobs(entry.getValue());
+                               bean.setName(entry.getKey());
+                               query.add(bean);
+                       }
+               }
+               return query;
+       }
+
+       /*
+        * query ip with number of jobs
+        */
+       public List<TotalByCounterBean> readIpByCounter(Integer minimalcounter) {
+               List<TotalByCounterBean> query = new ArrayList<TotalByCounterBean>();
+               Map<String, Integer> map = db.ReadIpByCounter();
+               if (minimalcounter == null)
+                       minimalcounter = 0;
+               if (map == null)
+                       return null;
+               for (Map.Entry<String, Integer> entry : map.entrySet()) {
+                       if (entry.getValue() > minimalcounter) {
+                               TotalByCounterBean bean = new TotalByCounterBean();
+                               bean.setTotaljobs(entry.getValue());
+                               bean.setName(entry.getKey());
+                               query.add(bean);
+                       }
+               }
+               return query;
        }
 
        /*
-        * convert ???
+        * query jobs log info
         */
-       public static String DateFormatYYMMDD(long indate) {
-               SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd");
-               String dateString = datformat.format(new Date(indate));
-               return dateString;
+       public JobBean readJobLog(String jobid) {
+               if (jobid == null)
+                       return null;
+               return db.ReadJobLog(jobid);
        }
 
        /*
-        * ???
+        * query jobs by ipStructureJobLog
         */
-       public long CountID(String id) {
-               /*
-               SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(DBInstance.GetKeyspace(), StringSerializer.get(),
-                               StringSerializer.get(), StringSerializer.get());
-               sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100);
-               QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
-               String datBegin = result.get().getColumnByName("DataBegin").getValue();
-               String datEnd = result.get().getColumnByName("DataEnd").getValue();
+       public UserBean readIp(String ip) {
+               if (ip == null)
+                       return null;
+               Map<String, String[]> res = db.ReadIpWithJobs(ip);
+               if (res == null)
+                       return null;
+               UserBean query = new UserBean(ip);
+               query.setMainInfo(res);
+               return query;
+       }
+
+       /*
+        * create list of parts of protein sequence;
+        */
+       private static List<String> CreateSubprot(String protein, String subprot) {
+               List<String> sub = new ArrayList<String>();
+               String subStr = protein;
+               while (subStr.length() > 0 && subStr.contains(subprot)) {
+                       String first = subStr.substring(0, subStr.indexOf(subprot));
+                       if (first.length() > 0)
+                               sub.add(first);
+                       sub.add(subprot);
+                       subStr = subStr.substring(subStr.indexOf(subprot) + subprot.length(), subStr.length());
+               }
+               if (subStr.length() > 0)
+                       sub.add(subStr);
+               return sub;
+       }
+
+       /*
+        * convert String date into long date (miliseconds since the epoch start)
+        */
+       private static long DateParsing(String datInput, SimpleDateFormat formatter) {
+               if (datInput == null) {
+                       return 0;
+               }
+               long dateWorkSt = 0;
+
+               try {
+                       dateWorkSt = formatter.parse(datInput).getTime();
+               } catch (ParseException e) {
+                       e.printStackTrace();
+               }
+               return dateWorkSt;
+       }
 
-               long datBeginLong = TimeConvert(datBegin);
-               long datEndLong = TimeConvert(datEnd);
-               return (datEndLong - datBeginLong) / 1000;
-               */
-               return 0;
+       // convert long to date in string format
+       private static String DateFormat(long inDate) {
+               SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
+               return datformat.format(new Date(inDate));
        }
 
        /*
-        * set earlest date and current dates. 
-        * earlestDate is static and should be set at the 1st call
-        * currentDate should be re-calculated every time
+        * set earlest date and current dates. earlestDate is static and should be
+        * set at the 1st call currentDate should be re-calculated every time
         */
        private static void SetDateRange() {
+               Calendar cal = Calendar.getInstance();
+               currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH),
+                               formatYYMMDD);
                if (0 == earlestDate) {
-                       StatisticsProt sp = new StatisticsProt();
-                       earlestDate = sp.earliestDate();
-                       System.out.println("Set earlest Date = " + earlestDate);
+                       CassandraRequester cr = new CassandraRequester();
+                       earlestDate = cr.earliestDate();
                }
-               Calendar cal = Calendar.getInstance();
-               currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH));
        }
 
-       public boolean isThisDateValid(String dateToValidate) {
+       public boolean isThisDateValid(String dateToValidate, SimpleDateFormat sdf) {
                if (dateToValidate == null || dateToValidate.equals("")) {
-                       System.out.println("Undefined date");
                        return false;
                }
-               SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd");
                try {
                        // if not valid, this will throw ParseException
                        sdf.setLenient(false);
@@ -199,31 +353,26 @@ public class CassandraRequester {
         * find the earliest date in the database
         */
        public long earliestDate() {
-               /*
-               ArrayList<Long> dateSort = new ArrayList<Long>();
-               int row_count = 10000;
-               RangeSlicesQuery<Long, String, String> result = HFactory.createRangeSlicesQuery(DBInstance.GetKeyspace(), LongSerializer.get(),
-                               StringSerializer.get(), StringSerializer.get());
-               result.setColumnFamily("ProteinData");
-               result.setRange(null, null, false, Integer.MAX_VALUE);
-               result.setRowCount(row_count);
-               Long last_key = null;
-               while (true) {
-                       result.setKeys(last_key, null);
-                       QueryResult<OrderedRows<Long, String, String>> columnSlice = result.execute();
-                       OrderedRows<Long, String, String> rows = columnSlice.get();
-                       Iterator<Row<Long, String, String>> rowsIterator = rows.iterator();
-                       while (rowsIterator.hasNext()) {
-                               Row<Long, String, String> row = rowsIterator.next();
-                               last_key = row.getKey();
-                               dateSort.add(last_key);
-                       }
-                       if (rows.getCount() < row_count)
-                               break;
+               earlestDate = CassandraNativeConnector.getEarliestDateInDB();
+               return earlestDate;
+       }
+
+       /**
+        * prepares an example of either job id or IP for the DB
+        * 
+        * @param exampletype
+        *            defines which example you need (an existing job from the DB -
+        *            jobid, an IP - "ip")
+        * @return a string representation of the requested example, if the example
+        *         type is not known empty string is returned
+        */
+       public String getExample(String exampletype) {
+               if (exampletype.equals("jobid")) {
+                       return "jp_NzBOJKo";
+               } else if (exampletype.equals("ip")) {
+                       return "127.0.0.1";
                }
-               Collections.sort(dateSort);
-               return dateSort.get(0);
-               */
-               return 0;
+               return "";
        }
+
 }