1 package compbio.statistic;
3 import java.text.ParseException;
4 import java.text.SimpleDateFormat;
5 import java.util.ArrayList;
6 import java.util.Calendar;
8 import java.util.Iterator;
12 import compbio.cassandra.CassandraNativeConnector;
13 import compbio.cassandra.CassandraReader;
14 import compbio.cassandra.DataBase;
15 import compbio.cassandra.Pair;
16 import compbio.cassandra.StructureJobLog;
17 import compbio.cassandra.StructureProteinPrediction;
19 public class CassandraRequester {
20 private CassandraReader db = new CassandraReader();
21 private ArrayList<DataBase> query;
22 private static long currentDate = 0;
23 private static long earlestDate = 0;
24 private final static SimpleDateFormat formatYYMMDD = new SimpleDateFormat("yyyy/MM/dd");
25 private final static SimpleDateFormat formatDDMMYY = new SimpleDateFormat("dd/MM/yyyy");
28 * query: execution time for the period from date1 till date2
30 public List<DataBase> extractExecutionTime(String date1, String date2) {
37 if (!isThisDateValid(date1,formatYYMMDD) || !isThisDateValid(date2,formatYYMMDD)) {
38 System.out.println("Wrong date: point 3");
43 long dateStart = DateParsing(date1, formatYYMMDD);
44 long dateEnd = DateParsing(date2, formatYYMMDD);
45 if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd)
47 if (dateStart < earlestDate)
48 dateStart = earlestDate;
49 if (dateEnd > currentDate)
50 dateStart = currentDate;
52 Calendar start = Calendar.getInstance();
53 start.setTime(new Date(dateStart));
54 Calendar end = Calendar.getInstance();
55 end.setTime(new Date(dateEnd));
56 query = new ArrayList<DataBase>();
57 List<Integer> totalTime = new ArrayList<Integer>();
58 for (int i = 0; i < nbins; i++)
60 List<Pair<String, String>> res = db.ReadProteinDataTable();
61 List<Pair<Date, Long>> numres = new ArrayList<Pair<Date, Long>>();
63 for (Pair<String, String> entry : res) {
64 SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd");
66 Date jobstartdate = dateformatter.parse(entry.getElement0());
67 long date = jobstartdate.getTime();
68 if (dateStart <= date && date <= dateEnd) {
69 SimpleDateFormat datetimeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
70 Date jobstarttime = datetimeformatter.parse(entry.getElement0());
71 Date jobendtime = datetimeformatter.parse(entry.getElement1());
72 long diff = (jobendtime.getTime() - jobstarttime.getTime()) / 1000;
73 Pair<Date, Long> pair = new Pair<Date, Long>(jobstartdate, Long.valueOf(diff));
76 } catch (ParseException e) {
81 for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
82 List<Integer> timeResult = new ArrayList<Integer>();
83 for (int i = 0; i < nbins; i++)
85 for (Pair<Date, Long> p : numres) {
86 if (date.equals(p.getElement0())) {
87 long lenResult = p.getElement1().longValue();
89 timeResult.set(0, timeResult.get(0) + 1);
90 else if (lenResult > 30 && lenResult <= 60)
91 timeResult.set(1, timeResult.get(1) + 1);
92 else if (lenResult > 60 && lenResult <= 120)
93 timeResult.set(2, timeResult.get(2) + 1);
94 else if (lenResult > 120 && lenResult <= 600)
95 timeResult.set(3, timeResult.get(3) + 1);
97 timeResult.set(4, timeResult.get(4) + 1);
101 for (int i = 0; i < nbins; i++)
102 totalTime.set(i, totalTime.get(i) + timeResult.get(i));
103 DataBase db = new DataBase();
104 db.setTimeRez(timeResult);
105 db.setDate(DateFormat(date.getTime()));
109 DataBase db = new DataBase();
110 db.setTimeTotalExec(totalTime);
112 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
117 * query: total number of jobs for the period from date1 till date2
119 public List<DataBase> countJobs(String date1, String date2) {
126 if (!isThisDateValid(date1, formatYYMMDD) || !isThisDateValid(date2, formatYYMMDD)) {
127 System.out.println("Wrong date: point 3");
131 long dateStart = DateParsing(date1, formatYYMMDD);
132 long dateEnd = DateParsing(date2, formatYYMMDD);
133 if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd)
135 if (dateStart < earlestDate)
136 dateStart = earlestDate;
137 if (dateEnd > currentDate)
138 dateStart = currentDate;
140 Calendar start = Calendar.getInstance();
141 start.setTime(new Date(dateStart));
142 Calendar end = Calendar.getInstance();
143 end.setTime(new Date(dateEnd));
144 query = new ArrayList<DataBase>();
145 for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
146 List<Long> res = db.ReadDateTable(date.getTime());
149 DataBase bean = new DataBase();
150 bean.setTotal((int)(long)res.get(0));
151 bean.setTotalOK((int)(long)res.get(1));
152 bean.setTotalStopped((int)(long)res.get(2));
153 bean.setTotalError((int)(long)res.get(3));
154 bean.setTotalTimeOut((int)(long)res.get(4));
155 bean.setDate(DateFormat(date.getTime()));
158 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
163 * query: jobs and sequence at date
165 public List<DataBase> readJobByDay (String date) {
166 System.out.println(date);
170 if (!isThisDateValid(date, formatDDMMYY)) {
171 System.out.println("Wrong date: point 3");
175 long day = DateParsing(date, formatDDMMYY);
176 System.out.println(day);
177 if (day < earlestDate || day > currentDate)
179 List<Pair<String, String>> res = db.ReadProteinData(day);
182 query = new ArrayList<DataBase>();
183 for (Pair<String, String> entry : res) {
184 DataBase bean = new DataBase();
186 bean.setId(entry.getElement0());
187 bean.setProt(entry.getElement1());
190 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
194 * query: protein sequence
196 public List<DataBase> readProteins(String protIn, String flag) {
199 System.out.println(protIn.length());
200 query = new ArrayList<DataBase>();
201 List<StructureProteinPrediction> res;
202 if (flag.equals("whole"))
203 res = db.ReadWholeSequence(protIn);
205 res = (protIn.length() > 0) ? db.ReadPartOfSequence(protIn) : null;
208 for (StructureProteinPrediction entry : res) {
209 DataBase bean = new DataBase();
210 bean.setProt(entry.getSequence());
211 bean.setId(entry.getJobid());
212 bean.setJpred(entry.getJnetpred());
213 if (flag.equals("part"))
214 bean.setSubProt(CreateSubprot (entry.getSequence(), protIn));
222 * query protein sequences with number of jobs
224 public List<DataBase> readProteinByCounter(int minimalcounter) {
225 query = new ArrayList<DataBase>();
226 Map<String, Integer> map = db.ReadProteinSequenceByCounter();
227 for (Map.Entry<String, Integer> entry : map.entrySet()) {
228 if (entry.getValue() > minimalcounter && entry.getKey().length() > 0) {
229 DataBase bean = new DataBase();
230 bean.setTotalId(entry.getValue());
231 bean.setProt(entry.getKey());
239 * query jobs log info
241 public DataBase readJobLog(String jobid) {
244 StructureJobLog res = db.ReadJobLog(jobid);
247 DataBase query = new DataBase();
248 query.setLogInfo(res);
253 * create list of parts of protein sequence;
255 private static List<String> CreateSubprot (String protein, String subprot) {
256 List<String> sub = new ArrayList<String>();
257 String subStr = protein;
258 while (subStr.length() > 0 && subStr.contains(subprot)) {
259 String first = subStr.substring(0, subStr.indexOf(subprot));
260 if (first.length() > 0)
263 subStr = subStr.substring(subStr.indexOf(subprot) + subprot.length(), subStr.length());
265 if (subStr.length() > 0)
270 * convert String date into long date (miliseconds since the epoch start)
272 private static long DateParsing(String datInput, SimpleDateFormat formatter) {
273 if (datInput == null) {
279 dateWorkSt = formatter.parse(datInput).getTime();
280 } catch (ParseException e) {
286 // convert long to date in string format
287 private static String DateFormat(long inDate) {
288 SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
289 return datformat.format(new Date(inDate));
293 * set earlest date and current dates. earlestDate is static and should be
294 * set at the 1st call currentDate should be re-calculated every time
296 private static void SetDateRange() {
297 Calendar cal = Calendar.getInstance();
298 currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH), formatYYMMDD);
299 if (0 == earlestDate) {
300 CassandraRequester cr = new CassandraRequester();
301 earlestDate = cr.earliestDate();
302 System.out.println("Set earlest Date = " + earlestDate);
306 public boolean isThisDateValid(String dateToValidate, SimpleDateFormat sdf) {
307 if (dateToValidate == null || dateToValidate.equals("")) {
308 System.out.println("Undefined date");
312 // if not valid, this will throw ParseException
313 sdf.setLenient(false);
314 Date date = sdf.parse(dateToValidate);
315 } catch (ParseException e) {
323 * find the earliest date in the database
325 public long earliestDate() {
326 earlestDate = CassandraNativeConnector.getEarliestDateInDB();