Merge branch 'master' of https://source.jalview.org/git/proteocache into NewWebsite
[proteocache.git] / server / compbio / statistic / CassandraRequester.java
diff --git a/server/compbio/statistic/CassandraRequester.java b/server/compbio/statistic/CassandraRequester.java
new file mode 100755 (executable)
index 0000000..2da38e7
--- /dev/null
@@ -0,0 +1,287 @@
+package compbio.statistic;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import compbio.cassandra.CassandraNativeConnector;
+import compbio.cassandra.CassandraReader;
+import compbio.cassandra.DataBase;
+import compbio.cassandra.Pair;
+import compbio.cassandra.StructureJobLog;
+import compbio.cassandra.StructureProteinPrediction;
+
+public class CassandraRequester {
+       private CassandraReader db = new CassandraReader();
+       private ArrayList<DataBase> query;
+       private static long currentDate = 0;
+       private static long earlestDate = 0;
+
+       /*
+        * query: execution time for the period from date1 till date2
+        */
+       public List<DataBase> extractExecutionTime(String date1, String date2) {
+               if (null == date1) {
+                       date1 = "1970/1/1";
+               }
+               if (null == date2) {
+                       date1 = "2100/1/1";
+               }
+               if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
+                       System.out.println("Wrong date: point 3");
+                       return null;
+               }
+               SetDateRange();
+               int nbins = 5;
+               long dateStart = DateParsing(date1);
+               long dateEnd = DateParsing(date2);
+               if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd)
+                       return null;
+               if (dateStart < earlestDate)
+                       dateStart = earlestDate;
+               if (dateEnd > currentDate)
+                       dateStart = currentDate;
+
+               Calendar start = Calendar.getInstance();
+               start.setTime(new Date(dateStart));
+               Calendar end = Calendar.getInstance();
+               end.setTime(new Date(dateEnd));
+               query = new ArrayList<DataBase>();
+               List<Integer> totalTime = new ArrayList<Integer>();
+               for (int i = 0; i < nbins; i++)
+                       totalTime.add(i, 0);
+               List<Pair<String, String>> res = db.ReadProteinDataTable();
+               List<Pair<Date, Long>> numres = new ArrayList<Pair<Date, Long>>();
+
+               for (Pair<String, String> entry : res) {
+                       SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd");
+                       try {
+                               Date jobstartdate = dateformatter.parse(entry.getElement0());
+                               long date = jobstartdate.getTime();
+                               if (dateStart <= date && date <= dateEnd) {
+                                       SimpleDateFormat datetimeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
+                                       Date jobstarttime = datetimeformatter.parse(entry.getElement0());
+                                       Date jobendtime = datetimeformatter.parse(entry.getElement1());
+                                       long diff = (jobendtime.getTime() - jobstarttime.getTime()) / 1000;
+                                       Pair<Date, Long> pair = new Pair<Date, Long>(jobstartdate, Long.valueOf(diff));
+                                       numres.add(pair);
+                               }
+                       } catch (ParseException e) {
+                               e.printStackTrace();
+                       }
+               }
+
+               for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
+                       List<Integer> timeResult = new ArrayList<Integer>();
+                       for (int i = 0; i < nbins; i++)
+                               timeResult.add(i, 0);
+                       for (Pair<Date, Long> p : numres) {
+                               if (date.equals(p.getElement0())) {
+                                       long lenResult = p.getElement1().longValue();
+                                       if (lenResult <= 30)
+                                               timeResult.set(0, timeResult.get(0) + 1);
+                                       else if (lenResult > 30 && lenResult <= 60)
+                                               timeResult.set(1, timeResult.get(1) + 1);
+                                       else if (lenResult > 60 && lenResult <= 120)
+                                               timeResult.set(2, timeResult.get(2) + 1);
+                                       else if (lenResult > 120 && lenResult <= 600)
+                                               timeResult.set(3, timeResult.get(3) + 1);
+                                       else {
+                                               timeResult.set(4, timeResult.get(4) + 1);
+                                       }
+                               }
+                       }
+                       for (int i = 0; i < nbins; i++)
+                               totalTime.set(i, totalTime.get(i) + timeResult.get(i));
+                       DataBase db = new DataBase();
+                       db.setTimeRez(timeResult);
+                       db.setDate(DateFormat(date.getTime()));
+                       query.add(db);
+               }
+
+               DataBase db = new DataBase();
+               db.setTimeTotalExec(totalTime);
+               query.add(db);
+               System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
+               return query;
+       }
+       
+       /*
+        * query: total number of jobs  for the period from date1 till date2
+        */
+       public List<DataBase> countJobs(String date1, String date2) {
+               if (null == date1) {
+                       date1 = "1970/1/1";
+               }
+               if (null == date2) {
+                       date1 = "2100/1/1";
+               }
+               if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
+                       System.out.println("Wrong date: point 3");
+                       return null;
+               }
+               SetDateRange();
+               long dateStart = DateParsing(date1);
+               long dateEnd = DateParsing(date2);
+               if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd)
+                       return null;
+               if (dateStart < earlestDate)
+                       dateStart = earlestDate;
+               if (dateEnd > currentDate)
+                       dateStart = currentDate;
+
+               Calendar start = Calendar.getInstance();
+               start.setTime(new Date(dateStart));
+               Calendar end = Calendar.getInstance();
+               end.setTime(new Date(dateEnd));
+               query = new ArrayList<DataBase>();
+               for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
+                       long res = db.ReadDateTable(date.getTime());
+                       DataBase db = new DataBase();
+                       db.setTotal((int)res);
+                       db.setDate(DateFormat(date.getTime()));
+                       query.add(db);
+               }
+               System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
+               return query;
+       }
+       /* 
+        * query: protein sequence
+        * */
+       public List<DataBase> readProteins(String protIn, String flag) {
+               query = new ArrayList<DataBase>();
+               List<StructureProteinPrediction> res;
+               if (flag.equals("whole")) 
+                       res = db.ReadWholeSequence(protIn);
+                else 
+                       res = db.ReadPartOfSequence(protIn);
+               for (StructureProteinPrediction entry : res) {
+                       Map<String,String> pred = entry.getPrediction();
+                       Iterator it = pred.entrySet().iterator();
+                       while (it.hasNext()) {
+                               DataBase db = new DataBase();
+                               db.setProt(entry.getSequence());
+                               Map.Entry pairs = (Map.Entry)it.next();
+                               db.setId(entry.getJobid());
+                               db.setJpred(pairs.getValue().toString());
+                               if (flag.equals("part"))
+                                       db.setSubProt(CreateSubprot (entry.getSequence(), protIn));                             
+                               query.add(db);
+                       }
+               }
+               return query;
+       }
+       
+       /* 
+        * query protein sequences with number of jobs
+        */
+       public List<DataBase> readProteinByCounter(int minimalcounter) {
+               query = new ArrayList<DataBase>();
+               Map<String, Integer> map = db.ReadProteinSequenceByCounter();
+               for (Map.Entry<String, Integer> entry : map.entrySet()) {
+                       if (entry.getValue() > minimalcounter) {
+                               DataBase db = new DataBase();
+                               db.setTotalId(entry.getValue());
+                               db.setProt(entry.getKey());
+                               query.add(db);
+                       }
+               }
+               return query;
+       }
+       
+       /*
+        * query jobs log info
+        */
+       public DataBase readJobLog(String jobid) {
+       //      query = new ArrayList<DataBase>();
+               StructureJobLog res = db.ReadJobLog(jobid);
+               DataBase query = new DataBase();
+               query.setLogInfo(res);
+       //      query.setres);
+               return query;
+       }
+       /*
+        * create list of parts of protein sequence;
+        */
+       private static List<String> CreateSubprot (String protein, String subprot) {
+               List<String> sub = new ArrayList<String>();
+               String subStr = protein;
+               while (subStr.length() > 0 && subStr.contains(subprot)) {
+                       String first = subStr.substring(0, subStr.indexOf(subprot));
+                       if (first.length() > 0)
+                               sub.add(first);
+                       sub.add(subprot);
+                       subStr = subStr.substring(subStr.indexOf(subprot) + subprot.length(), subStr.length());
+               }
+               if (subStr.length() > 0)
+                       sub.add(subStr);
+               return sub;
+       }
+       /*
+        * convert String date into long date (miliseconds since the epoch start)
+        */
+       private static long DateParsing(String datInput) {
+               if (datInput == null) {
+                       return 0;
+               }
+               long dateWorkSt = 0;
+               SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
+               try {
+                       dateWorkSt = formatter.parse(datInput).getTime();
+               } catch (ParseException e) {
+                       e.printStackTrace();
+               }
+               return dateWorkSt;
+       }
+
+       // convert long to date in string format
+       private static String DateFormat(long inDate) {
+               SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
+               return datformat.format(new Date(inDate));
+       }
+
+       /*
+        * set earlest date and current dates. earlestDate is static and should be
+        * set at the 1st call currentDate should be re-calculated every time
+        */
+       private static void SetDateRange() {
+               Calendar cal = Calendar.getInstance();
+               currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH));
+               if (0 == earlestDate) {
+                       CassandraRequester cr = new CassandraRequester();
+                       earlestDate = cr.earliestDate();
+                       System.out.println("Set earlest Date = " + earlestDate);
+               }
+       }
+
+       public boolean isThisDateValid(String dateToValidate) {
+               if (dateToValidate == null || dateToValidate.equals("")) {
+                       System.out.println("Undefined date");
+                       return false;
+               }
+               SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd");
+               try {
+                       // if not valid, this will throw ParseException
+                       sdf.setLenient(false);
+                       Date date = sdf.parse(dateToValidate);
+               } catch (ParseException e) {
+                       e.printStackTrace();
+                       return false;
+               }
+               return true;
+       }
+
+       /*
+        * find the earliest date in the database
+        */
+       public long earliestDate() {
+               earlestDate = CassandraNativeConnector.getEarliestDateInDB();
+               return earlestDate;
+       }
+       
+}