fix data report, protein report
[proteocache.git] / server / compbio / statistic / CassandraRequester.java
index 9885d64..5335023 100755 (executable)
@@ -5,11 +5,14 @@ import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Calendar;
 import java.util.Date;
+import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
 
 import compbio.cassandra.CassandraNativeConnector;
 import compbio.cassandra.DataBase;
 import compbio.cassandra.Pair;
+import compbio.cassandra.StructureProteinPrediction;
 
 public class CassandraRequester {
        private CassandraNativeConnector DBInstance = new CassandraNativeConnector();
@@ -105,7 +108,100 @@ public class CassandraRequester {
                System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
                return query;
        }
+       
+       /*
+        * query: total number of jobs  for the period from date1 till date2
+        */
+       public List<DataBase> countJobs(String date1, String date2) {
+               if (null == date1) {
+                       date1 = "1970/1/1";
+               }
+               if (null == date2) {
+                       date1 = "2100/1/1";
+               }
+               if (!isThisDateValid(date1) || !isThisDateValid(date2)) {
+                       System.out.println("Wrong date: point 3");
+                       return null;
+               }
+               SetDateRange();
+               long dateStart = DateParsing(date1);
+               long dateEnd = DateParsing(date2);
+               if (dateEnd < earlestDate || dateStart > currentDate || dateStart > dateEnd)
+                       return null;
+               if (dateStart < earlestDate)
+                       dateStart = earlestDate;
+               if (dateEnd > currentDate)
+                       dateStart = currentDate;
 
+               Calendar start = Calendar.getInstance();
+               start.setTime(new Date(dateStart));
+               Calendar end = Calendar.getInstance();
+               end.setTime(new Date(dateEnd));
+               query = new ArrayList<DataBase>();
+               for (Date date = start.getTime(); !start.after(end); start.add(Calendar.DATE, 1), date = start.getTime()) {
+                       Integer res = DBInstance.ReadDateTable(date.getTime());
+                       if (res == null)
+                               continue;
+                       DataBase db = new DataBase();
+                       db.setTotal(res);
+                       db.setDate(DateFormat(date.getTime()));
+                       query.add(db);
+               }                       
+               System.out.println("StatisticsProt.readLength: total number of dates = " + query.size());
+               return query;
+       }
+       /* 
+        * query: protein sequence
+        * */
+       public List<DataBase> readProteins(String protIn, String flag) {
+               query = new ArrayList<DataBase>();
+               List<StructureProteinPrediction> res;
+               if (flag.equals("whole")) 
+                       res = DBInstance.ReadWholeSequence(protIn);
+                else 
+                       res = DBInstance.ReadPartOfSequence(protIn);
+               for (StructureProteinPrediction entry : res) {
+                       Map<String,String> pred = entry.getPrediction();
+                       Iterator it = pred.entrySet().iterator();
+                       while (it.hasNext()) {
+                               DataBase db = new DataBase();
+                               db.setProt(entry.getSequence());
+                               Map.Entry pairs = (Map.Entry)it.next();
+                               db.setId(entry.getJobid());
+                               db.setJpred(pairs.getValue().toString());
+                               if (flag.equals("part"))
+                                       db.setSubProt(CreateSubprot (entry.getSequence(), protIn));                             
+                               query.add(db);
+                       }
+               }
+               return query;
+       }
+       
+       /* 
+        * query  protein sequences with number of jobs
+        * */
+       public List<DataBase> readProteinByCounter(int counter) {
+               query = new ArrayList<DataBase>();
+       //      List<Pair<String, String>> res = DBInstance.ReadProteinDataByCounter(counter);
+               
+               return query;
+       }
+       
+       
+       private static List<String> CreateSubprot (String protein, String subprot) {
+               List<String> sub = new ArrayList<String>();
+               String subStr = protein;
+               while (subStr.length() > 0 && subStr.contains(subprot)) {
+                       String first = subStr.substring(0, subStr.indexOf(subprot));
+                       if (first.length() > 0)
+                               sub.add(first);
+                       sub.add(subprot);
+                       subStr = subStr.substring(subStr.indexOf(subprot) + subprot.length(), subStr.length());
+               }
+               if (subStr.length() > 0)
+                       sub.add(subStr);
+               return sub;
+       }
        /*
         * convert String date into long date (miliseconds since the epoch start)
         */
@@ -164,7 +260,8 @@ public class CassandraRequester {
         * find the earliest date in the database
         */
        public long earliestDate() {
-               earlestDate =  DBInstance.getEarliestDateInDB();
+               earlestDate = DBInstance.getEarliestDateInDB();
                return earlestDate;
        }
+       
 }