Add real NoSQL sources
[jabaws.git] / webservices / compbio / nosql / statistic / StatisticsProt.java
1 package combio.statistic;
2
3 import java.text.ParseException;
4 import java.text.SimpleDateFormat;
5 import java.util.ArrayList;
6 import java.util.Arrays;
7 import java.util.Collections;
8 import java.util.Date;
9 import java.util.Iterator;
10 import java.util.List;
11
12 import me.prettyprint.cassandra.serializers.LongSerializer;
13 import me.prettyprint.cassandra.serializers.StringSerializer;
14 import me.prettyprint.hector.api.beans.ColumnSlice;
15 import me.prettyprint.hector.api.beans.HColumn;
16 import me.prettyprint.hector.api.beans.OrderedRows;
17 import me.prettyprint.hector.api.beans.Row;
18 import me.prettyprint.hector.api.factory.HFactory;
19 import me.prettyprint.hector.api.query.QueryResult;
20 import me.prettyprint.hector.api.query.RangeSlicesQuery;
21 import me.prettyprint.hector.api.query.SliceQuery;
22 import combio.cassandra.CassandraCreate;
23 import combio.cassandra.DataBase;
24
25 public class StatisticsProt {
26         private final static long MILLISECONDS_PER_DAY = 1000L * 60 * 60 * 24;
27         private CassandraCreate cc = new CassandraCreate();
28         private ArrayList<DataBase> query;
29         
30         // query for the period from date1 till date2
31                public List<DataBase> readDetail(String dateInStringSt, String dateInStringEnd) {
32                    long dateWorkSt = DateParsing(dateInStringSt);
33                    long dateWorkEnd = DateParsing(dateInStringEnd);                        
34                    query = new ArrayList<DataBase>();
35                    while (dateWorkSt <= dateWorkEnd) {
36                         SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(), StringSerializer.get(), StringSerializer.get());
37                         result.setColumnFamily("ProteinData");
38                         result.setKey(dateWorkSt);
39                         result.setRange(null, null, false, Integer.MAX_VALUE); 
40                         QueryResult <ColumnSlice<String, String>> columnSlice = result.execute();
41                         DataBase db = new DataBase(DateFormat(dateWorkSt), columnSlice.get().getColumns().size());
42                         query.add(db);
43                         dateWorkSt +=  MILLISECONDS_PER_DAY ;
44                     } 
45                    return query;
46         }
47         
48         // query jobs for the period from dateInStringSt till dateInStringEnd
49         public List<DataBase>  readLength(String dateInStringSt, String dateInStringEnd) {              
50                 query = new ArrayList<DataBase>();
51         long dateWorkSt = DateParsing(dateInStringSt);
52         long dateWorkEnd = DateParsing(dateInStringEnd);                        
53         while (dateWorkSt <= dateWorkEnd) {
54                 List<Integer> timeResult = new ArrayList<Integer>();
55                 SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(), StringSerializer.get(), StringSerializer.get());
56             result.setColumnFamily("ProteinData");
57             result.setKey(dateWorkSt);
58             result.setRange(null, null, false, Integer.MAX_VALUE); 
59             QueryResult <ColumnSlice<String, String>> columnSlice = result.execute();
60             List<HColumn<String, String>> col = columnSlice.get().getColumns();
61             Iterator<HColumn<String, String>> itCol = col.iterator();    
62             for (int i = 0; i < 4; i++) 
63                 timeResult.add(i, 0);
64             while (itCol.hasNext()) {
65                 String id = itCol.next().getName();             
66                 long lenResult = CountID(id);                   
67                 if (lenResult <= 1) 
68                         timeResult.set(0,  timeResult.get(0) + 1);
69                 else if (lenResult > 1 && lenResult <= 10) 
70                         timeResult.set(1, timeResult.get(1) + 1);
71                 else if (lenResult > 10 && lenResult <= 20) 
72                         timeResult.set(2, timeResult.get(2) + 1);
73                 else 
74                         timeResult.set(3, timeResult.get(3) + 1);
75             }  
76             DataBase db = new DataBase();
77             db.setTimeRez(timeResult);
78             db.setDate(DateFormat(dateWorkSt));
79             query.add(db);
80             List<Integer> test = query.get(0).getTimeRez();
81             dateWorkSt +=  MILLISECONDS_PER_DAY ;
82         }         
83         return query;
84     }
85         
86         //query by a protein name
87         public List<DataBase> readProt(String protIn) {     
88         query = new ArrayList<DataBase>();
89         SliceQuery<String, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(), StringSerializer.get(), StringSerializer.get());
90         result.setColumnFamily("ProteinRow");
91         result.setKey(protIn);
92         result.setRange(null, null, false, Integer.MAX_VALUE); 
93         QueryResult <ColumnSlice<String, String>> columnSlice = result.execute();             
94         Iterator <HColumn<String, String>> it = columnSlice.get().getColumns().iterator();
95         while (it.hasNext()) {
96                 HColumn<String, String> col = it.next();
97                 DataBase db = new DataBase();
98                 db.setProt(protIn);
99                 db.setId(col.getName());
100                 db.setJpred(col.getValue());      
101                 query.add(db);
102         }    
103         return query;
104     }
105         
106         
107         //query by a sequence (whether exists proteins contains this sequence in the name)
108         public List<DataBase> readPart(String protIn) {     
109                 int row_count = 10000;
110         query = new ArrayList<DataBase>();
111         RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(), StringSerializer.get(), StringSerializer.get());
112         result.setColumnFamily("ProteinRow");
113         result.setRange(null, null, false, Integer.MAX_VALUE); 
114         result.setRowCount(row_count);
115         String last_key = null;     
116         while (true) {
117                 result.setKeys(last_key, null);
118                 QueryResult <OrderedRows<String,String, String>> columnSlice = result.execute(); 
119                 OrderedRows<String, String, String> rows = columnSlice.get();
120                 Iterator<Row<String, String, String>> rowsIterator = rows.iterator();             
121             while (rowsIterator.hasNext()) {
122                 Row<String, String, String> row = rowsIterator.next();
123                 last_key = row.getKey(); 
124                 if (last_key.matches("(.*)" + protIn + "(.*)")) {
125                         Iterator <HColumn<String, String>> it = row.getColumnSlice().getColumns().iterator();
126                         while (it.hasNext()) {
127                                 HColumn<String, String> col = it.next();
128                                 List<String> subProt = new ArrayList<String>();
129                                 String subStr = last_key;
130                                 while (subStr.length() > 0 && subStr.contains(protIn)) {
131                                         String first = subStr.substring(0, subStr.indexOf(protIn));
132                                         if (first.length() > 0)
133                                                 subProt.add(first);
134                                         subProt.add(protIn);
135                                         subStr = subStr.substring(subStr.indexOf(protIn) + protIn.length(), subStr.length());
136                                 }
137                                 if (subStr.length() > 0)
138                                         subProt.add(subStr);
139                                 DataBase db = new DataBase();
140                                 db.setProt(last_key);
141                                 db.setId(col.getName());
142                                 db.setJpred(col.getValue());
143                                 db.setSubProt(subProt);
144                                 query.add(db);
145                         }
146                 }
147             }
148             if (rows.getCount() < row_count)
149                 break;
150         }
151         return query;
152     }
153         
154         // convert String to Date
155         private static long DateParsing(String datInput) {
156                 long dateWorkSt = 0;
157                 SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); 
158                 try {                           
159                         dateWorkSt = formatter.parse(datInput).getTime();                       
160         } catch (ParseException e) {
161                         e.printStackTrace();
162                 }       
163             return dateWorkSt;
164         }
165         
166         // convert String to Date
167                 private static long TimeConvert(String datInput) {
168                         long dateWorkSt = 0;
169                         SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss"); 
170                         try {                           
171                                 dateWorkSt = formatter.parse(datInput).getTime();                       
172                 } catch (ParseException e) {
173                                 e.printStackTrace();
174                         }
175         //              System.out.println("start reverce" + DateFormat1(dateWorkSt));
176                     return dateWorkSt;
177                 }
178         
179         //  convert long to date in string format
180         private static String DateFormat(long inDate){
181         SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
182         String dateString = datformat.format(new Date(inDate));
183                 return dateString;
184         }
185         
186         private static String DateFormat1(long inDate){
187         SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy:hh:mm:ss");
188         String dateString = datformat.format(new Date(inDate));
189                 return dateString;
190         }
191         
192         public long CountID(String id) {
193                 SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(), StringSerializer.get(), StringSerializer.get());
194                 sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100);
195                 QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
196                 String datBegin = result.get().getColumnByName("DataBegin").getValue();
197                 String datEnd = result.get().getColumnByName("DataEnd").getValue();
198                 long datBeginLong = TimeConvert(datBegin);
199                 long datEndLong = TimeConvert(datEnd);
200                 return datEndLong-datBeginLong;
201                  
202         }
203         
204 }