1 package compbio.statistic;
3 import java.text.ParseException;
4 import java.text.SimpleDateFormat;
5 import java.util.ArrayList;
6 import java.util.Calendar;
8 import java.util.Iterator;
12 import compbio.cassandra.CassandraNativeConnector;
13 import compbio.cassandra.DataBase;
14 import compbio.cassandra.Pair;
15 import compbio.cassandra.StructureJobLog;
16 import compbio.cassandra.StructureProteinPrediction;
18 public class CassandraRequester {
19 private CassandraNativeConnector DBInstance = new CassandraNativeConnector();
20 private ArrayList<DataBase> query;
21 private static long currentDate = 0;
22 private static long earlestDate = 0;
25 * query: execution time for the period from date1 till date2
27 public List<DataBase> extractExecutionTime(String date1, String date2) {
34 if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
35 System.out.println("Wrong date: point 3");
40 long dateStart = DateParsing(date1);
41 long dateEnd = DateParsing(date2);
42 if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd)
44 if (dateStart < earlestDate)
45 dateStart = earlestDate;
46 if (dateEnd > currentDate)
47 dateStart = currentDate;
49 Calendar start = Calendar.getInstance();
50 start.setTime(new Date(dateStart));
51 Calendar end = Calendar.getInstance();
52 end.setTime(new Date(dateEnd));
53 query = new ArrayList<DataBase>();
54 List<Integer> totalTime = new ArrayList<Integer>();
55 for (int i = 0; i < nbins; i++)
57 List<Pair<String, String>> res = DBInstance.ReadProteinDataTable();
58 List<Pair<Date, Long>> numres = new ArrayList<Pair<Date, Long>>();
60 for (Pair<String, String> entry : res) {
61 SimpleDateFormat dateformatter = new SimpleDateFormat("yyyy/MM/dd");
63 Date jobstartdate = dateformatter.parse(entry.getElement0());
64 long date = jobstartdate.getTime();
65 if (dateStart <= date && date <= dateEnd) {
66 SimpleDateFormat datetimeformatter = new SimpleDateFormat("yyyy/MM/dd:H:m:s");
67 Date jobstarttime = datetimeformatter.parse(entry.getElement0());
68 Date jobendtime = datetimeformatter.parse(entry.getElement1());
69 long diff = (jobendtime.getTime() - jobstarttime.getTime()) / 1000;
70 Pair<Date, Long> pair = new Pair<Date, Long>(jobstartdate, Long.valueOf(diff));
73 } catch (ParseException e) {
78 for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
79 List<Integer> timeResult = new ArrayList<Integer>();
80 for (int i = 0; i < nbins; i++)
82 for (Pair<Date, Long> p : numres) {
83 if (date.equals(p.getElement0())) {
84 long lenResult = p.getElement1().longValue();
86 timeResult.set(0, timeResult.get(0) + 1);
87 else if (lenResult > 30 && lenResult <= 60)
88 timeResult.set(1, timeResult.get(1) + 1);
89 else if (lenResult > 60 && lenResult <= 120)
90 timeResult.set(2, timeResult.get(2) + 1);
91 else if (lenResult > 120 && lenResult <= 600)
92 timeResult.set(3, timeResult.get(3) + 1);
94 timeResult.set(4, timeResult.get(4) + 1);
98 for (int i = 0; i < nbins; i++)
99 totalTime.set(i, totalTime.get(i) + timeResult.get(i));
100 DataBase db = new DataBase();
101 db.setTimeRez(timeResult);
102 db.setDate(DateFormat(date.getTime()));
106 DataBase db = new DataBase();
107 db.setTimeTotalExec(totalTime);
109 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
114 * query: total number of jobs for the period from date1 till date2
116 public List<DataBase> countJobs(String date1, String date2) {
123 if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
124 System.out.println("Wrong date: point 3");
128 long dateStart = DateParsing(date1);
129 long dateEnd = DateParsing(date2);
130 if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd)
132 if (dateStart < earlestDate)
133 dateStart = earlestDate;
134 if (dateEnd > currentDate)
135 dateStart = currentDate;
137 Calendar start = Calendar.getInstance();
138 start.setTime(new Date(dateStart));
139 Calendar end = Calendar.getInstance();
140 end.setTime(new Date(dateEnd));
141 query = new ArrayList<DataBase>();
142 for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
143 Integer res = DBInstance.ReadDateTable(date.getTime());
146 DataBase db = new DataBase();
148 db.setDate(DateFormat(date.getTime()));
151 System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
155 * query: protein sequence
157 public List<DataBase> readProteins(String protIn, String flag) {
158 query = new ArrayList<DataBase>();
159 List<StructureProteinPrediction> res;
160 if (flag.equals("whole"))
161 res = DBInstance.ReadWholeSequence(protIn);
163 res = DBInstance.ReadPartOfSequence(protIn);
164 for (StructureProteinPrediction entry : res) {
165 Map<String,String> pred = entry.getPrediction();
166 Iterator it = pred.entrySet().iterator();
167 while (it.hasNext()) {
168 DataBase db = new DataBase();
169 db.setProt(entry.getSequence());
170 Map.Entry pairs = (Map.Entry)it.next();
171 db.setId(entry.getJobid());
172 db.setJpred(pairs.getValue().toString());
173 if (flag.equals("part"))
174 db.setSubProt(CreateSubprot (entry.getSequence(), protIn));
182 * query protein sequences with number of jobs
184 public List<DataBase> readProteinByCounter(int counter) {
185 query = new ArrayList<DataBase>();
186 Map<String, Integer> map = DBInstance.ReadProteinDataByCounter();
187 for (Map.Entry<String, Integer> entry : map.entrySet()) {
188 if (entry.getValue() > counter) {
189 DataBase db = new DataBase();
190 db.setTotalId(entry.getValue());
191 db.setProt(entry.getKey());
199 * query jobs log info
201 public DataBase readJobLog(String jobid) {
202 // query = new ArrayList<DataBase>();
203 StructureJobLog res = DBInstance.ReadJobLog(jobid);
204 DataBase query = new DataBase();
205 query.setLogInfo(res);
210 * create list of parts of protein sequence;
212 private static List<String> CreateSubprot (String protein, String subprot) {
213 List<String> sub = new ArrayList<String>();
214 String subStr = protein;
215 while (subStr.length() > 0 && subStr.contains(subprot)) {
216 String first = subStr.substring(0, subStr.indexOf(subprot));
217 if (first.length() > 0)
220 subStr = subStr.substring(subStr.indexOf(subprot) + subprot.length(), subStr.length());
222 if (subStr.length() > 0)
227 * convert String date into long date (miliseconds since the epoch start)
229 private static long DateParsing(String datInput) {
230 if (datInput == null) {
234 SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
236 dateWorkSt = formatter.parse(datInput).getTime();
237 } catch (ParseException e) {
243 // convert long to date in string format
244 private static String DateFormat(long inDate) {
245 SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
246 return datformat.format(new Date(inDate));
250 * set earlest date and current dates. earlestDate is static and should be
251 * set at the 1st call currentDate should be re-calculated every time
253 private static void SetDateRange() {
254 Calendar cal = Calendar.getInstance();
255 currentDate = DateParsing(cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH));
256 if (0 == earlestDate) {
257 CassandraRequester cr = new CassandraRequester();
258 earlestDate = cr.earliestDate();
259 System.out.println("Set earlest Date = " + earlestDate);
263 public boolean isThisDateValid(String dateToValidate) {
264 if (dateToValidate == null || dateToValidate.equals("")) {
265 System.out.println("Undefined date");
268 SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd");
270 // if not valid, this will throw ParseException
271 sdf.setLenient(false);
272 Date date = sdf.parse(dateToValidate);
273 } catch (ParseException e) {
281 * find the earliest date in the database
283 public long earliestDate() {
284 earlestDate = DBInstance.getEarliestDateInDB();