X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=webservices%2Fcompbio%2Fnosql%2Fstatistic%2FStatisticsProt.java;fp=webservices%2Fcompbio%2Fnosql%2Fstatistic%2FStatisticsProt.java;h=72cad070cde339ab5bd18af9335fdbc19eee49c3;hb=1b3f78859f281e8e3142b543a384e5cfbe366839;hp=0000000000000000000000000000000000000000;hpb=8b45c19de3f4e652033186bf6b9dc9b3e9205a64;p=jabaws.git diff --git a/webservices/compbio/nosql/statistic/StatisticsProt.java b/webservices/compbio/nosql/statistic/StatisticsProt.java new file mode 100644 index 0000000..72cad07 --- /dev/null +++ b/webservices/compbio/nosql/statistic/StatisticsProt.java @@ -0,0 +1,204 @@ +package combio.statistic; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Date; +import java.util.Iterator; +import java.util.List; + +import me.prettyprint.cassandra.serializers.LongSerializer; +import me.prettyprint.cassandra.serializers.StringSerializer; +import me.prettyprint.hector.api.beans.ColumnSlice; +import me.prettyprint.hector.api.beans.HColumn; +import me.prettyprint.hector.api.beans.OrderedRows; +import me.prettyprint.hector.api.beans.Row; +import me.prettyprint.hector.api.factory.HFactory; +import me.prettyprint.hector.api.query.QueryResult; +import me.prettyprint.hector.api.query.RangeSlicesQuery; +import me.prettyprint.hector.api.query.SliceQuery; +import combio.cassandra.CassandraCreate; +import combio.cassandra.DataBase; + +public class StatisticsProt { + private final static long MILLISECONDS_PER_DAY = 1000L * 60 * 60 * 24; + private CassandraCreate cc = new CassandraCreate(); + private ArrayList query; + + // query for the period from date1 till date2 + public List readDetail(String dateInStringSt, String dateInStringEnd) { + long dateWorkSt = DateParsing(dateInStringSt); + long dateWorkEnd = DateParsing(dateInStringEnd); + query = new ArrayList(); + while (dateWorkSt <= dateWorkEnd) { + SliceQuery result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(), StringSerializer.get(), StringSerializer.get()); + result.setColumnFamily("ProteinData"); + result.setKey(dateWorkSt); + result.setRange(null, null, false, Integer.MAX_VALUE); + QueryResult > columnSlice = result.execute(); + DataBase db = new DataBase(DateFormat(dateWorkSt), columnSlice.get().getColumns().size()); + query.add(db); + dateWorkSt += MILLISECONDS_PER_DAY ; + } + return query; + } + + // query jobs for the period from dateInStringSt till dateInStringEnd + public List readLength(String dateInStringSt, String dateInStringEnd) { + query = new ArrayList(); + long dateWorkSt = DateParsing(dateInStringSt); + long dateWorkEnd = DateParsing(dateInStringEnd); + while (dateWorkSt <= dateWorkEnd) { + List timeResult = new ArrayList(); + SliceQuery result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(), StringSerializer.get(), StringSerializer.get()); + result.setColumnFamily("ProteinData"); + result.setKey(dateWorkSt); + result.setRange(null, null, false, Integer.MAX_VALUE); + QueryResult > columnSlice = result.execute(); + List> col = columnSlice.get().getColumns(); + Iterator> itCol = col.iterator(); + for (int i = 0; i < 4; i++) + timeResult.add(i, 0); + while (itCol.hasNext()) { + String id = itCol.next().getName(); + long lenResult = CountID(id); + if (lenResult <= 1) + timeResult.set(0, timeResult.get(0) + 1); + else if (lenResult > 1 && lenResult <= 10) + timeResult.set(1, timeResult.get(1) + 1); + else if (lenResult > 10 && lenResult <= 20) + timeResult.set(2, timeResult.get(2) + 1); + else + timeResult.set(3, timeResult.get(3) + 1); + } + DataBase db = new DataBase(); + db.setTimeRez(timeResult); + db.setDate(DateFormat(dateWorkSt)); + query.add(db); + List test = query.get(0).getTimeRez(); + dateWorkSt += MILLISECONDS_PER_DAY ; + } + return query; + } + + //query by a protein name + public List readProt(String protIn) { + query = new ArrayList(); + SliceQuery result = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(), StringSerializer.get(), StringSerializer.get()); + result.setColumnFamily("ProteinRow"); + result.setKey(protIn); + result.setRange(null, null, false, Integer.MAX_VALUE); + QueryResult > columnSlice = result.execute(); + Iterator > it = columnSlice.get().getColumns().iterator(); + while (it.hasNext()) { + HColumn col = it.next(); + DataBase db = new DataBase(); + db.setProt(protIn); + db.setId(col.getName()); + db.setJpred(col.getValue()); + query.add(db); + } + return query; + } + + + //query by a sequence (whether exists proteins contains this sequence in the name) + public List readPart(String protIn) { + int row_count = 10000; + query = new ArrayList(); + RangeSlicesQuery result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(), StringSerializer.get(), StringSerializer.get()); + result.setColumnFamily("ProteinRow"); + result.setRange(null, null, false, Integer.MAX_VALUE); + result.setRowCount(row_count); + String last_key = null; + while (true) { + result.setKeys(last_key, null); + QueryResult > columnSlice = result.execute(); + OrderedRows rows = columnSlice.get(); + Iterator> rowsIterator = rows.iterator(); + while (rowsIterator.hasNext()) { + Row row = rowsIterator.next(); + last_key = row.getKey(); + if (last_key.matches("(.*)" + protIn + "(.*)")) { + Iterator > it = row.getColumnSlice().getColumns().iterator(); + while (it.hasNext()) { + HColumn col = it.next(); + List subProt = new ArrayList(); + String subStr = last_key; + while (subStr.length() > 0 && subStr.contains(protIn)) { + String first = subStr.substring(0, subStr.indexOf(protIn)); + if (first.length() > 0) + subProt.add(first); + subProt.add(protIn); + subStr = subStr.substring(subStr.indexOf(protIn) + protIn.length(), subStr.length()); + } + if (subStr.length() > 0) + subProt.add(subStr); + DataBase db = new DataBase(); + db.setProt(last_key); + db.setId(col.getName()); + db.setJpred(col.getValue()); + db.setSubProt(subProt); + query.add(db); + } + } + } + if (rows.getCount() < row_count) + break; + } + return query; + } + + // convert String to Date + private static long DateParsing(String datInput) { + long dateWorkSt = 0; + SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); + try { + dateWorkSt = formatter.parse(datInput).getTime(); + } catch (ParseException e) { + e.printStackTrace(); + } + return dateWorkSt; + } + + // convert String to Date + private static long TimeConvert(String datInput) { + long dateWorkSt = 0; + SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss"); + try { + dateWorkSt = formatter.parse(datInput).getTime(); + } catch (ParseException e) { + e.printStackTrace(); + } + // System.out.println("start reverce" + DateFormat1(dateWorkSt)); + return dateWorkSt; + } + + // convert long to date in string format + private static String DateFormat(long inDate){ + SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy"); + String dateString = datformat.format(new Date(inDate)); + return dateString; + } + + private static String DateFormat1(long inDate){ + SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy:hh:mm:ss"); + String dateString = datformat.format(new Date(inDate)); + return dateString; + } + + public long CountID(String id) { + SliceQuery sliceQuery = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(), StringSerializer.get(), StringSerializer.get()); + sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100); + QueryResult> result = sliceQuery.execute(); + String datBegin = result.get().getColumnByName("DataBegin").getValue(); + String datEnd = result.get().getColumnByName("DataEnd").getValue(); + long datBeginLong = TimeConvert(datBegin); + long datEndLong = TimeConvert(datEnd); + return datEndLong-datBeginLong; + + } + +}