first nosql version
[jabaws.git] / webservices / compbio / nosql / src / combio / statistic / StatisticsProt.java
diff --git a/webservices/compbio/nosql/src/combio/statistic/StatisticsProt.java b/webservices/compbio/nosql/src/combio/statistic/StatisticsProt.java
new file mode 100644 (file)
index 0000000..72cad07
--- /dev/null
@@ -0,0 +1,204 @@
+package combio.statistic;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Date;
+import java.util.Iterator;
+import java.util.List;
+
+import me.prettyprint.cassandra.serializers.LongSerializer;
+import me.prettyprint.cassandra.serializers.StringSerializer;
+import me.prettyprint.hector.api.beans.ColumnSlice;
+import me.prettyprint.hector.api.beans.HColumn;
+import me.prettyprint.hector.api.beans.OrderedRows;
+import me.prettyprint.hector.api.beans.Row;
+import me.prettyprint.hector.api.factory.HFactory;
+import me.prettyprint.hector.api.query.QueryResult;
+import me.prettyprint.hector.api.query.RangeSlicesQuery;
+import me.prettyprint.hector.api.query.SliceQuery;
+import combio.cassandra.CassandraCreate;
+import combio.cassandra.DataBase;
+
+public class StatisticsProt {
+       private final static long MILLISECONDS_PER_DAY = 1000L * 60 * 60 * 24;
+       private CassandraCreate cc = new CassandraCreate();
+       private ArrayList<DataBase> query;
+       
+       // query for the period from date1 till date2
+              public List<DataBase> readDetail(String dateInStringSt, String dateInStringEnd) {
+                  long dateWorkSt = DateParsing(dateInStringSt);
+                  long dateWorkEnd = DateParsing(dateInStringEnd);                        
+                  query = new ArrayList<DataBase>();
+                  while (dateWorkSt <= dateWorkEnd) {
+                       SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(), StringSerializer.get(), StringSerializer.get());
+                       result.setColumnFamily("ProteinData");
+                       result.setKey(dateWorkSt);
+                       result.setRange(null, null, false, Integer.MAX_VALUE); 
+                       QueryResult <ColumnSlice<String, String>> columnSlice = result.execute();
+                       DataBase db = new DataBase(DateFormat(dateWorkSt), columnSlice.get().getColumns().size());
+                       query.add(db);
+                       dateWorkSt +=  MILLISECONDS_PER_DAY ;
+                   } 
+                  return query;
+       }
+       
+       // query jobs for the period from dateInStringSt till dateInStringEnd
+       public List<DataBase>  readLength(String dateInStringSt, String dateInStringEnd) {              
+               query = new ArrayList<DataBase>();
+       long dateWorkSt = DateParsing(dateInStringSt);
+       long dateWorkEnd = DateParsing(dateInStringEnd);                        
+       while (dateWorkSt <= dateWorkEnd) {
+               List<Integer> timeResult = new ArrayList<Integer>();
+               SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(), StringSerializer.get(), StringSerializer.get());
+            result.setColumnFamily("ProteinData");
+            result.setKey(dateWorkSt);
+            result.setRange(null, null, false, Integer.MAX_VALUE); 
+            QueryResult <ColumnSlice<String, String>> columnSlice = result.execute();
+            List<HColumn<String, String>> col = columnSlice.get().getColumns();
+            Iterator<HColumn<String, String>> itCol = col.iterator();    
+            for (int i = 0; i < 4; i++) 
+               timeResult.add(i, 0);
+            while (itCol.hasNext()) {
+               String id = itCol.next().getName();             
+               long lenResult = CountID(id);                   
+               if (lenResult <= 1) 
+                       timeResult.set(0,  timeResult.get(0) + 1);
+               else if (lenResult > 1 && lenResult <= 10) 
+                       timeResult.set(1, timeResult.get(1) + 1);
+               else if (lenResult > 10 && lenResult <= 20) 
+                       timeResult.set(2, timeResult.get(2) + 1);
+                else 
+                       timeResult.set(3, timeResult.get(3) + 1);
+            }  
+            DataBase db = new DataBase();
+            db.setTimeRez(timeResult);
+            db.setDate(DateFormat(dateWorkSt));
+            query.add(db);
+            List<Integer> test = query.get(0).getTimeRez();
+            dateWorkSt +=  MILLISECONDS_PER_DAY ;
+        }         
+       return query;
+    }
+       
+       //query by a protein name
+       public List<DataBase> readProt(String protIn) {     
+       query = new ArrayList<DataBase>();
+       SliceQuery<String, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(), StringSerializer.get(), StringSerializer.get());
+        result.setColumnFamily("ProteinRow");
+        result.setKey(protIn);
+        result.setRange(null, null, false, Integer.MAX_VALUE); 
+        QueryResult <ColumnSlice<String, String>> columnSlice = result.execute();             
+        Iterator <HColumn<String, String>> it = columnSlice.get().getColumns().iterator();
+        while (it.hasNext()) {
+               HColumn<String, String> col = it.next();
+               DataBase db = new DataBase();
+               db.setProt(protIn);
+               db.setId(col.getName());
+               db.setJpred(col.getValue());      
+               query.add(db);
+        }    
+       return query;
+    }
+       
+       
+       //query by a sequence (whether exists proteins contains this sequence in the name)
+       public List<DataBase> readPart(String protIn) {     
+               int row_count = 10000;
+       query = new ArrayList<DataBase>();
+       RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(), StringSerializer.get(), StringSerializer.get());
+        result.setColumnFamily("ProteinRow");
+        result.setRange(null, null, false, Integer.MAX_VALUE); 
+        result.setRowCount(row_count);
+        String last_key = null;     
+        while (true) {
+               result.setKeys(last_key, null);
+               QueryResult <OrderedRows<String,String, String>> columnSlice = result.execute(); 
+               OrderedRows<String, String, String> rows = columnSlice.get();
+               Iterator<Row<String, String, String>> rowsIterator = rows.iterator();             
+            while (rowsIterator.hasNext()) {
+               Row<String, String, String> row = rowsIterator.next();
+                last_key = row.getKey(); 
+                if (last_key.matches("(.*)" + protIn + "(.*)")) {
+                       Iterator <HColumn<String, String>> it = row.getColumnSlice().getColumns().iterator();
+                       while (it.hasNext()) {
+                               HColumn<String, String> col = it.next();
+                               List<String> subProt = new ArrayList<String>();
+                               String subStr = last_key;
+                               while (subStr.length() > 0 && subStr.contains(protIn)) {
+                                       String first = subStr.substring(0, subStr.indexOf(protIn));
+                                       if (first.length() > 0)
+                                               subProt.add(first);
+                                       subProt.add(protIn);
+                                       subStr = subStr.substring(subStr.indexOf(protIn) + protIn.length(), subStr.length());
+                               }
+                               if (subStr.length() > 0)
+                                       subProt.add(subStr);
+                               DataBase db = new DataBase();
+                               db.setProt(last_key);
+                               db.setId(col.getName());
+                               db.setJpred(col.getValue());
+                               db.setSubProt(subProt);
+                               query.add(db);
+                       }
+                }
+            }
+            if (rows.getCount() < row_count)
+                break;
+        }
+       return query;
+    }
+       
+       // convert String to Date
+       private static long DateParsing(String datInput) {
+               long dateWorkSt = 0;
+               SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); 
+               try {                           
+                       dateWorkSt = formatter.parse(datInput).getTime();                       
+       } catch (ParseException e) {
+                       e.printStackTrace();
+               }       
+           return dateWorkSt;
+       }
+       
+       // convert String to Date
+               private static long TimeConvert(String datInput) {
+                       long dateWorkSt = 0;
+                       SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss"); 
+                       try {                           
+                               dateWorkSt = formatter.parse(datInput).getTime();                       
+               } catch (ParseException e) {
+                               e.printStackTrace();
+                       }
+       //              System.out.println("start reverce" + DateFormat1(dateWorkSt));
+                   return dateWorkSt;
+               }
+       
+       //  convert long to date in string format
+       private static String DateFormat(long inDate){
+        SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
+        String dateString = datformat.format(new Date(inDate));
+               return dateString;
+       }
+       
+       private static String DateFormat1(long inDate){
+        SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy:hh:mm:ss");
+        String dateString = datformat.format(new Date(inDate));
+               return dateString;
+       }
+       
+       public long CountID(String id) {
+               SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(), StringSerializer.get(), StringSerializer.get());
+               sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100);
+               QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
+               String datBegin = result.get().getColumnByName("DataBegin").getValue();
+               String datEnd = result.get().getColumnByName("DataEnd").getValue();
+               long datBeginLong = TimeConvert(datBegin);
+               long datEndLong = TimeConvert(datEnd);
+               return datEndLong-datBeginLong;
+                
+       }
+       
+}