--- /dev/null
+package compbio.cassandra;
+
+import java.util.Arrays;
+import java.util.List;
+
+import me.prettyprint.cassandra.serializers.LongSerializer;
+import me.prettyprint.cassandra.serializers.StringSerializer;
+import me.prettyprint.cassandra.service.ThriftKsDef;
+import me.prettyprint.hector.api.Cluster;
+import me.prettyprint.hector.api.Keyspace;
+import me.prettyprint.hector.api.beans.ColumnSlice;
+import me.prettyprint.hector.api.ddl.ColumnFamilyDefinition;
+import me.prettyprint.hector.api.ddl.ComparatorType;
+import me.prettyprint.hector.api.ddl.KeyspaceDefinition;
+import me.prettyprint.hector.api.factory.HFactory;
+import me.prettyprint.hector.api.mutation.Mutator;
+import me.prettyprint.hector.api.query.QueryResult;
+import me.prettyprint.hector.api.query.SliceQuery;
+
+public class CassandraCreate {
+ private static Keyspace ksp;
+ private static Cluster cluster;
+ private static Mutator<Long> mutatorLong;
+ private static Mutator<String> mutatorString;
+ private static Mutator<String> mutatorLog;
+ StringSerializer ss = StringSerializer.get();
+ LongSerializer ls = LongSerializer.get();
+
+ // connect to the cluster
+ public void Connection() {
+ cluster = HFactory.getOrCreateCluster("Protein Cluster", "127.0.0.1:9160");
+ KeyspaceDefinition keyspaceDef = cluster.describeKeyspace("ProteinKeyspace");
+ /*
+ * If keyspace does not exist, the CFs don't exist either. => create
+ * them.
+ */
+ if (keyspaceDef == null) { // create column family
+ System.out.println("ProteinKeyspace has been null");
+ ColumnFamilyDefinition cfProtein = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinRow",
+ ComparatorType.ASCIITYPE);
+ ColumnFamilyDefinition cfLog = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinLog", ComparatorType.ASCIITYPE);
+ ColumnFamilyDefinition cfData = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinData",
+ ComparatorType.ASCIITYPE);
+
+ KeyspaceDefinition newKeyspace = HFactory.createKeyspaceDefinition("ProteinKeyspace", ThriftKsDef.DEF_STRATEGY_CLASS, 1,
+ Arrays.asList(cfProtein, cfLog, cfData));
+ /*
+ * Add the schema to the cluster. "true" as the second param means
+ * that Hector will be blocked until all nodes see the change.
+ */
+ cluster.addKeyspace(newKeyspace, true);
+ cluster.addColumnFamily(cfProtein, true);
+ cluster.addColumnFamily(cfLog, true);
+ cluster.addColumnFamily(cfData, true);
+ } else {
+ System.out.println("Data loaded");
+ }
+ ksp = HFactory.createKeyspace("ProteinKeyspace", cluster);
+ System.out.println("Cassandra has been connected");
+ }
+
+ /*
+ * parsing data from
+ * http://www.compbio.dundee.ac.uk/www-jpred/results/usage-new/alljobs.dat
+ */
+ public void Parsing() {
+ /* CF ProteinRow store protein and prediction */
+ mutatorString = HFactory.createMutator(ksp, ss);
+
+ /*
+ * ProteinLog stores logging info: IP, job id, start date and end date
+ */
+ mutatorLog = HFactory.createMutator(ksp, ss);
+
+ /* CF ProteinData store id and protein per data */
+ mutatorLong = HFactory.createMutator(ksp, ls);
+
+ System.out.println("Parsing......");
+ String in = "http://www.compbio.dundee.ac.uk/www-jpred/results/usage-new/alljobs.dat";
+ DataParsing datParsing = new DataParsing();
+ datParsing.Parsing(in, 4);
+ flushData();
+ }
+
+ public void flushData() {
+ mutatorString.execute();
+ mutatorLong.execute();
+ mutatorLog.execute();
+ //System.out.println("Flush new data...");
+ }
+
+ public void Closing() {
+ cluster.getConnectionManager().shutdown();
+ System.out.println("Cassandra has been shut down");
+ }
+
+ // check whether the job id exists in the DB
+ public boolean CheckID(String jobid) {
+ SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(ksp, ss, ss, ss);
+ sliceQuery.setColumnFamily("ProteinLog").setKey(jobid).setRange("", "", false, 100);
+ QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
+ if (result.get().getColumns().size() > 0) {
+ return true;
+ }
+ return false;
+ }
+
+ public void InsertData(long dataWork, String dataBegin, String dataEnd, String ip, String id, String statusEx, String statusFinal,
+ String protein, List<FastaSequence> jnetpred) {
+ mutatorLog.addInsertion(id, "ProteinLog", HFactory.createColumn("ip", ip, ss, ss))
+ .addInsertion(id, "ProteinLog", HFactory.createColumn("DataBegin", dataBegin, ss, ss))
+ .addInsertion(id, "ProteinLog", HFactory.createColumn("DataEnd", dataEnd, ss, ss))
+ .addInsertion(id, "ProteinLog", HFactory.createColumn("Status ex", statusEx, ss, ss))
+ .addInsertion(id, "ProteinLog", HFactory.createColumn("Status final", statusFinal, ss, ss))
+ .addInsertion(id, "ProteinLog", HFactory.createColumn("Protein", protein, ss, ss));
+ for (int i = 0; i < jnetpred.size(); i++) {
+ String namepred = jnetpred.get(i).getId();
+ String pred = jnetpred.get(i).getSequence().replaceAll("\n", "");
+ mutatorString.addInsertion(protein, "ProteinRow", HFactory.createColumn(id + ";" + namepred, pred, ss, ss));
+ }
+ mutatorLong.addInsertion(dataWork, "ProteinData", HFactory.createColumn(id, protein, ss, ss));
+ }
+
+ public Keyspace GetKeyspace() {
+ return ksp;
+ }
+}
--- /dev/null
+package compbio.cassandra;
+
+import java.util.List;
+
+public class DataBase {
+ String date;
+ int total;
+ int totalId;
+ String id;
+ String prot;
+ String jpred;
+ List<String> subProt;
+ List<Integer> timeRez;
+
+ public DataBase() {
+ }
+
+ public DataBase(String dat, int total) {
+ this.date = dat;
+ this.total = total;
+ }
+
+ public void setDate(String dat) {
+ this.date = dat;
+ }
+
+ public String getDate() {
+ return date;
+ }
+
+ public void setTotal(int tot) {
+ this.total = tot;
+ }
+
+ public int getTotal() {
+ return total;
+ }
+
+ public void setTotalId(int totId) {
+ this.totalId = totId;
+ }
+
+ public int getTotalId() {
+ return totalId;
+ }
+
+ public void setProt(String prot) {
+ this.prot = prot;
+ }
+
+ public String getProt() {
+ return prot;
+ }
+
+ public void setJpred(String jpred) {
+ this.jpred = jpred;
+ }
+
+ public String getJpred() {
+ return jpred;
+ }
+
+ public void setId(String id) {
+ this.id = id;
+ }
+
+ public String getId() {
+ return id;
+ }
+
+ public void setSubProt(List<String> subProt) {
+ this.subProt = subProt;
+ }
+
+ public List<String> getSubProt() {
+ return subProt;
+ }
+
+ public void setTimeRez(List<Integer> timeRez) {
+ this.timeRez = timeRez;
+ }
+
+ public List<Integer> getTimeRez() {
+ return timeRez;
+ }
+
+}
--- /dev/null
+package compbio.cassandra;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.net.HttpURLConnection;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLConnection;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.List;
+
+public class DataParsing {
+ private CassandraCreate cc = new CassandraCreate();
+ private String dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results";
+
+ public void setDirPrefix (String newprefix) {
+ this.dirprefix = newprefix;
+ }
+
+ public void Parsing(String input, int nDays) {
+ Calendar cal = Calendar.getInstance();
+ cal.add(Calendar.DATE, -nDays);
+ for (int i = 0; i < nDays; ++i) {
+ cal.add(Calendar.DATE, 1);
+ int month = cal.get(Calendar.MONTH) + 1;
+ int year = cal.get(Calendar.YEAR);
+ int day = cal.get(Calendar.DATE);
+ String date = year + "/" + month + "/" + day;
+ ParsingForDate(input, date);
+ }
+ }
+
+ private void ParsingForDate(String input, String date) {
+ int totalcount = 0;
+ int countNoData = 0;
+ int countUnclearFASTAid = 0;
+ int countinsertions = 0;
+ int countinserted = 0;
+ int counAlignments = 0;
+ int countStrange = 0;
+
+ System.out.println("Inserting jobs for " + date);
+ try {
+ URL url = new URL(input);
+ URLConnection conn = url.openConnection();
+ BufferedReader alljobs = new BufferedReader(new InputStreamReader(conn.getInputStream()));
+ String line;
+
+ while ((line = alljobs.readLine()) != null) {
+ if (line.matches(date + "(.*)jp_[^\\s]+")) {
+ String[] table = line.split("\\s+");
+ String id = table[table.length - 1];
+ totalcount++;
+ if (!cc.CheckID(id)) {
+ URL urltable = new URL(dirprefix + "/" + id + "/" + id + ".concise.fasta");
+ HttpURLConnection httpConnection = (HttpURLConnection) urltable.openConnection();
+ int responsecode = httpConnection.getResponseCode();
+ if (199 < responsecode && responsecode < 300) {
+ try {
+ final FastaReader fr = new FastaReader(urltable.openStream());
+ final List<FastaSequence> seqs = new ArrayList<FastaSequence>();
+ String newprotein = "";
+ while (fr.hasNext()) {
+ final FastaSequence fs = fr.next();
+ if (fs.getId().equals("QUERY") || fs.getId().equals(id))
+ newprotein = fs.getSequence().replaceAll("\n", "");
+ else
+ seqs.add(fs);
+ }
+ if (newprotein.equals("")) {
+ countUnclearFASTAid++;
+ } else {
+ SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
+ String dateInString1 = table[0].substring(0, table[0].indexOf(":"));
+ long dateWork1 = 0;
+ try {
+ Date dat1 = formatter.parse(dateInString1);
+ dateWork1 = dat1.getTime();
+ } catch (ParseException e) {
+ e.printStackTrace();
+ }
+ cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs);
+ ++countinsertions;
+ // flush every 100 insertions
+ if (0 == countinsertions % 100) {
+ cc.flushData();
+ }
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ } else {
+ countNoData++;
+ }
+ } else {
+ ++countinserted;
+ }
+ } else {
+ if (line.matches(date + "(.*)Sequence0/(.*)")) {
+ ++counAlignments;
+ } else {
+ ++countStrange;
+ }
+ }
+ }
+ alljobs.close();
+ System.out.println("Total number of jobs = " + totalcount);
+ System.out.println(" " + countinserted + " jobs inserted already");
+ System.out.println(" " + counAlignments + " jalview jobs");
+ System.out.println(" " + countStrange + " not analysed jobs");
+ System.out.println(" " + countNoData + " jobs without *.concise.fasta file");
+ System.out.println(" " + countUnclearFASTAid + " jobs with unclear FASTA protein id in *.concise.fasta");
+ System.out.println(" " + countinsertions + " new job insertions\n");
+ } catch (MalformedURLException e) {
+ e.printStackTrace();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+}
--- /dev/null
+package compbio.cassandra;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.InputStream;
+import java.util.Iterator;
+import java.util.Scanner;
+
+//import compbio.util.Util;
+
+/**
+ * Reads files with FASTA formatted sequences. All the information in the FASTA
+ * header is preserved including trailing white spaces. All the white spaces are
+ * removed from the sequence.
+ *
+ * Examples of the correct input:
+ *
+ * <pre>
+ *
+ * >zedpshvyzg
+ * GCQDKNNIAELNEIMGTTRSPSDWQHMKGASPRAEIGLTGKKDSWWRHCCSKEFNKTPPPIHPDMKRWGWMWNRENFEKFLIDNFLNPPCPRLMLTKGTWWRHEDLCHEIFWSTLRWLCLGNQSFSAMIWGHLCECHRMIWWESNEHMFWLKFRRALKKMNSNGPCMGPDNREWMITNRMGKEFCGPAFAGDCQSCWRKCHKTNKICFNEKKGTPTKIDHEQKDIMDILKDIDNHRNWKQCQLWLLTSKSTDQESTTMLTWSTWRDFFIIIKQPFDHKCRGALDANGDFQIAAELKWPAPMIILRQNQKTMHDKSCHHFFTNRCPLMHTTRANDKQCSWHTRKQFICQQDFTTWQHRPDTHRILPSWCMSTRRKNHIKNTPALAFSTCEMGDLPNGWAPGTIILQRQFTQAIKLPQETTGWPRCDPKFDHWNMSKWLRQLLGRDDEMIPPQCD
+ *
+ * >xovkactesa
+ * CPLSKWWNRRAFLSHTANHWMILMTWEGPHDGESKMRIAMMKWSPCKPTMSHFRCGLDAWAEPIRQIACESTFRM
+ * FCTTPRPIHKLTEMWGHMNGWTGAFCRQLECEWMMPPRHPHPCTSTFNNNKKRLIGQIPNEGKQLFINFQKPQHG
+ * FSESDIWIWKDNPTAWHEGLTIAGIGDGQHCWNWMPMPWSGAPTSNALIEFWTWLGMIGTRCKTQGMWWDAMNHH
+ * DQFELSANAHIAAHHMEKKMILKPDDRNLGDDTWMPPGKIWMRMFAKNTNACWPEGCRDDNEEDDCGTHNLHRMC
+ *
+ * >ntazzewyvv
+ * CGCKIF D D NMKDNNRHG TDIKKHGFMH IRHPE KRDDC FDNHCIMPKHRRWGLWD
+ * EASINM AQQWRSLPPSRIMKLNG HGCDCMHSHMEAD DTKQSGIKGTFWNG HDAQWLCRWG
+ * EFITEA WWGRWGAITFFHAH ENKNEIQECSDQNLKE SRTTCEIID TCHLFTRHLDGW
+ * RCEKCQANATHMTW ACTKSCAEQW FCAKELMMN
+ * W KQMGWRCKIFRKLFRDNCWID FELPWWPICFCCKGLSTKSHSAHDGDQCRRW WPDCARDWLGPGIRGEF
+ * FCTHICQQLQRNFWCGCFRWNIEKRMFEIFDDNMAAHWKKCMHFKFLIRIHRHGPITMKMTWCRSGCCFGKTRRLPDSSFISAFLDPKHHRDGSGMMMWSSEMRSCAIPDPQQAWNQGKWIGQIKDWNICFAWPIRENQQCWATPHEMPSGFHFILEKWDALAHPHMHIRQKKCWAWAFLSLMSSTHSDMATFQWAIPGHNIWSNWDNIICGWPRI
+ *
+ * > 12 d t y wi k jbke
+ * KLSHHDCD
+ * N
+ * H
+ * HSKCTEPHCGNSHQMLHRDP
+ * CCDQCQSWEAENWCASMRKAILF
+ *
+ * </pre>
+ *
+ * @author Peter Troshin
+ * @version 1.0 April 2011
+ *
+ */
+public class FastaReader implements Iterator<FastaSequence> {
+
+ private final Scanner input;
+ /**
+ * Delimiter for the scanner
+ */
+ private final String DELIM = ">";
+
+ /**
+ * Header data can contain non-ASCII symbols and read in UTF8
+ *
+ * @param inputFile
+ * the file containing the list of FASTA formatted sequences to
+ * read from
+ * @throws FileNotFoundException
+ * if the input file is not found
+ * @throws IllegalStateException
+ * if the close method was called on this instance
+ *
+ */
+ public FastaReader(final String inputFile) throws FileNotFoundException {
+ input = new Scanner(new File(inputFile), "UTF8");
+ input.useDelimiter(DELIM);
+ Runtime.getRuntime().addShutdownHook(new Thread() {
+
+ @Override
+ public void run() {
+ if (input != null) {
+ input.close();
+ }
+ }
+ });
+ }
+
+ /**
+ * This class will not close the incoming stream! So the client should do
+ * so.
+ *
+ * @param inputStream
+ * @throws FileNotFoundException
+ */
+ public FastaReader(final InputStream inputStream)
+ throws FileNotFoundException {
+ input = new Scanner(inputStream);
+ input.useDelimiter(DELIM);
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * @throws IllegalStateException
+ * if the close method was called on this instance
+ */
+ @Override
+ public boolean hasNext() {
+ return input.hasNext();
+ }
+
+ /**
+ * Reads the next FastaSequence from the input
+ *
+ * @throws AssertionError
+ * if the header or the sequence is missing
+ * @throws IllegalStateException
+ * if the close method was called on this instance
+ * @throws MismatchException
+ * - if there were no more FastaSequence's.
+ */
+ @Override
+ public FastaSequence next() {
+ String fastaHeader = input.next();
+ while (fastaHeader.indexOf("\n") < 0 && input.hasNext()) {
+ fastaHeader = fastaHeader.concat(">");
+ fastaHeader = fastaHeader.concat(input.next());
+ }
+ return FastaReader.toFastaSequence(fastaHeader);
+ }
+
+ /**
+ * Not implemented
+ */
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Call this method to close the connection to the input file if you want to
+ * free up the resources. The connection will be closed on the JVM shutdown
+ * if this method was not called explicitly. No further reading on this
+ * instance of the FastaReader will be possible after calling this method.
+ */
+ public void close() {
+ input.close();
+ }
+
+ private static FastaSequence toFastaSequence(final String singleFastaEntry) {
+
+ // assert !Util.isEmpty(singleFastaEntry) :
+ // "Empty String where FASTA sequence is expected!";
+
+ int nlineidx = singleFastaEntry.indexOf("\n");
+ if (nlineidx < 0) {
+ throw new AssertionError(
+ "The FASTA sequence must contain the header information"
+ + " separated by the new line from the sequence. Given sequence does not appear to "
+ + "contain the header! Given data:\n "
+ + singleFastaEntry);
+ }
+ String header = singleFastaEntry.substring(0, nlineidx);
+
+ // Get rid of the new line chars (should cover common cases)
+ header = header.replaceAll("\r", "");
+
+ String sequence = singleFastaEntry.substring(nlineidx);
+
+ /*
+ * if (Util.isEmpty(sequence)) { throw new AssertionError(
+ * "Empty sequences are not allowed! Please make sure the " +
+ * " data is in the FASTA format! Given data:\n " + singleFastaEntry); }
+ */
+ return new FastaSequence(header, sequence);
+ }
+}
--- /dev/null
+package compbio.cassandra;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import javax.xml.bind.annotation.XmlAccessType;
+import javax.xml.bind.annotation.XmlAccessorType;
+
+//import compbio.util.SysPrefs;
+//import compbio.util.annotation.Immutable;
+
+/**
+ * A FASTA formatted sequence. Please note that this class does not make any
+ * assumptions as to what sequence it stores e.g. it could be nucleotide,
+ * protein or even gapped alignment sequence! The only guarantee it makes is
+ * that the sequence does not contain white space characters e.g. spaces, new
+ * lines etc
+ *
+ * @author pvtroshin
+ *
+ * @version 1.0 September 2009
+ */
+
+@XmlAccessorType(XmlAccessType.FIELD)
+//@Immutable
+public class FastaSequence {
+
+ /**
+ * Sequence id
+ */
+ private String id;
+
+ // TODO what about gapped sequence here! should be indicated
+ /**
+ * Returns the string representation of sequence
+ */
+ private String sequence;
+
+ FastaSequence() {
+ // Default constructor for JaxB
+ }
+
+ /**
+ * Upon construction the any whitespace characters are removed from the
+ * sequence
+ *
+ * @param id
+ * @param sequence
+ */
+ public FastaSequence(String id, String sequence) {
+ this.id = id;
+ this.sequence = sequence;
+ }
+
+ /**
+ * Gets the value of id
+ *
+ * @return the value of id
+ */
+ public String getId() {
+ return this.id;
+ }
+
+ /**
+ * Gets the value of sequence
+ *
+ * @return the value of sequence
+ */
+ public String getSequence() {
+ return this.sequence;
+ }
+
+ public static int countMatchesInSequence(final String theString,
+ final String theRegExp) {
+ final Pattern p = Pattern.compile(theRegExp);
+ final Matcher m = p.matcher(theString);
+ int cnt = 0;
+ while (m.find()) {
+ cnt++;
+ }
+ return cnt;
+ }
+
+ public String getFormattedFasta() {
+ return getFormatedSequence(80);
+ }
+
+ /**
+ *
+ * @return one line name, next line sequence, no matter what the sequence
+ * length is
+ */
+/* public String getOnelineFasta() {
+ String fasta = ">" + getId() + SysPrefs.newlinechar;
+ fasta += getSequence() + SysPrefs.newlinechar;
+ return fasta;
+ }
+
+ /**
+ * Format sequence per width letter in one string. Without spaces.
+ *
+ * @return multiple line formated sequence, one line width letters length
+ *
+ */
+ public String getFormatedSequence(final int width) {
+ if (sequence == null) {
+ return "";
+ }
+
+ assert width >= 0 : "Wrong width parameter ";
+
+ final StringBuilder sb = new StringBuilder(sequence);
+ // int tail = nrOfWindows % WIN_SIZE;
+ // final int turns = (nrOfWindows - tail) / WIN_SIZE;
+
+ int tailLen = sequence.length() % width;
+ // add up inserted new line chars
+ int nchunks = (sequence.length() - tailLen) / width;
+ int nlineCharcounter = 0;
+ int insPos = 0;
+ for (int i = 1; i <= nchunks; i++) {
+ insPos = width * i + nlineCharcounter;
+ // to prevent inserting new line in the very end of a sequence then
+ // it would have failed.
+ if (sb.length() <= insPos) {
+ break;
+ }
+ sb.insert(insPos, "\n");
+ nlineCharcounter++;
+ }
+ // sb.insert(insPos + tailLen, "\n");
+ return sb.toString();
+ }
+
+ /**
+ *
+ * @return sequence length
+ */
+ public int getLength() {
+ return this.sequence.length();
+ }
+
+ /**
+ * Same as oneLineFasta
+ */
+// @Override
+// public String toString() {
+// return this.getOnelineFasta();
+ // }
+
+ @Override
+ public int hashCode() {
+ final int prime = 17;
+ int result = 1;
+ result = prime * result + ((id == null) ? 0 : id.hashCode());
+ result = prime * result
+ + ((sequence == null) ? 0 : sequence.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (obj == null) {
+ return false;
+ }
+ if (!(obj instanceof FastaSequence)) {
+ return false;
+ }
+ FastaSequence fs = (FastaSequence) obj;
+ if (!fs.getId().equals(this.getId())) {
+ return false;
+ }
+ if (!fs.getSequence().equalsIgnoreCase(this.getSequence())) {
+ return false;
+ }
+ return true;
+ }
+
+}
--- /dev/null
+package compbio.listeners;
+
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+import javax.servlet.ServletContextEvent;
+import javax.servlet.ServletContextListener;
+import javax.servlet.annotation.WebListener;
+
+import compbio.cassandra.CassandraCreate;
+
+/**
+ * Application Lifecycle Listener implementation class ContextListener
+ *
+ */
+@WebListener
+public class ContextListener implements ServletContextListener {
+ private ScheduledExecutorService scheduler;
+ CassandraCreate cc = new CassandraCreate();
+
+ /**
+ * @see ServletContextListener#contextInitialized(ServletContextEvent)
+ */
+ public void contextInitialized(ServletContextEvent arg0) {
+ System.out.println("ProteoCache session start......");
+ cc.Connection();
+
+ scheduler = Executors.newSingleThreadScheduledExecutor();
+ scheduler.scheduleAtFixedRate(new Runnable() {
+ @Override
+ public void run() {
+ cc.Parsing();
+ }
+ }, 0, 60, TimeUnit.SECONDS);
+ }
+
+ /**
+ * @see ServletContextListener#contextDestroyed(ServletContextEvent)
+ */
+ public void contextDestroyed(ServletContextEvent arg0) {
+ cc.Closing();
+ System.out.println("Shut down ProteoCache......");
+ scheduler.shutdownNow();
+ }
+
+}
--- /dev/null
+package compbio.listeners;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.servlet.ServletException;
+import javax.servlet.annotation.WebServlet;
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import compbio.cassandra.*;
+import compbio.statistic.StatisticsProt;
+
+/**
+ * Servlet implementation class DetailList
+ */
+@WebServlet("/DetailList")
+public class DetailList extends HttpServlet {
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * @see HttpServlet#HttpServlet()
+ */
+
+ /**
+ * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse
+ * response)
+ */
+ protected void doGet(HttpServletRequest request,
+ HttpServletResponse response) throws ServletException, IOException {
+ List<DataBase> result;
+ String date1 = request.getParameter("data1");
+ String date2 = request.getParameter("data2");
+ StatisticsProt sp = new StatisticsProt();
+ // result = sp.readDetail(date1, date2);
+ }
+
+ /**
+ * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse
+ * response)
+ */
+ protected void doPost(HttpServletRequest request,
+ HttpServletResponse response) throws ServletException, IOException {
+ // TODO Auto-generated method stub
+ }
+
+}
--- /dev/null
+package compbio.listeners;
+
+import java.io.IOException;
+import java.util.Calendar;
+import java.util.List;
+
+import javax.servlet.RequestDispatcher;
+import javax.servlet.ServletException;
+import javax.servlet.annotation.WebServlet;
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import compbio.cassandra.DataBase;
+import compbio.statistic.StatisticsProt;
+
+/**
+ * Servlet implementation class LengthServlet
+ */
+@WebServlet("/LengthServlet")
+public class LengthServlet extends HttpServlet {
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse
+ * response)
+ */
+ protected void doGet(HttpServletRequest request,
+ HttpServletResponse response) throws ServletException, IOException {
+ List<DataBase> result;
+ String flag = request.getParameter("option");
+ String date1 = request.getParameter("data1");
+ String date2 = request.getParameter("data2");
+ StatisticsProt sp = new StatisticsProt();
+ if (flag == null)
+ result = sp.readLength(date1, date2);
+ else {
+ Calendar cal = Calendar.getInstance();
+ String dateB = StatisticsProt.DateFormatYYMMDD(sp.earliestDate());
+ String dateEnd = cal.get(Calendar.YEAR) + "/"
+ + (cal.get(Calendar.MONTH) + 1) + "/"
+ + cal.get(Calendar.DAY_OF_MONTH);
+ result = sp.readLength(dateB, dateEnd);
+ }
+ request.setAttribute("data1", date1);
+ request.setAttribute("data2", date2);
+ request.setAttribute("result", result);
+ request.setAttribute("flag", flag);
+ RequestDispatcher rd = request
+ .getRequestDispatcher("/ReportLength.jsp");
+ rd.forward(request, response);
+ }
+
+ /**
+ * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse
+ * response)
+ */
+ protected void doPost(HttpServletRequest request,
+ HttpServletResponse response) throws ServletException, IOException {
+ doGet(request, response);
+ }
+}
--- /dev/null
+package compbio.listeners;
+
+import java.io.IOException;
+import java.util.List;
+
+import javax.servlet.RequestDispatcher;
+import javax.servlet.ServletException;
+import javax.servlet.annotation.WebServlet;
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import compbio.cassandra.DataBase;
+import compbio.statistic.StatisticsProt;
+
+/**
+ * Servlet implementation class ProtServlet
+ */
+@WebServlet("/ProtServlet")
+public class ProtServlet extends HttpServlet {
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse
+ * response)
+ */
+ protected void doGet(HttpServletRequest request,
+ HttpServletResponse response) throws ServletException, IOException {
+ List<DataBase> result;
+ String flag = request.getParameter("protein");
+ String prot = request.getParameter("prot");
+ String checkbox = request.getParameter("option");
+ StatisticsProt sp = new StatisticsProt();
+ if (checkbox != null) {
+ result = sp.readProtID();
+ } else {
+ if (flag.equals("whole"))
+ result = sp.readProt(prot);
+ else
+ result = sp.readPart(prot);
+ }
+ request.setAttribute("prot", prot);
+ request.setAttribute("flag", flag);
+ request.setAttribute("checkbox", checkbox);
+ request.setAttribute("result", result);
+ RequestDispatcher rd = request.getRequestDispatcher("/ReportProt.jsp");
+ rd.forward(request, response);
+ }
+
+ /**
+ * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse
+ * response)
+ */
+ protected void doPost(HttpServletRequest request,
+ HttpServletResponse response) throws ServletException, IOException {
+ doGet(request, response);
+ }
+
+}
--- /dev/null
+package compbio.listeners;
+
+import java.io.IOException;
+import java.util.Calendar;
+import java.util.List;
+
+import javax.servlet.RequestDispatcher;
+import javax.servlet.ServletException;
+import javax.servlet.annotation.WebServlet;
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import compbio.cassandra.*;
+import compbio.statistic.StatisticsProt;
+
+@WebServlet("/QueryServlet")
+public class QueryServlet extends HttpServlet {
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse
+ * response)
+ */
+ protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
+ List<DataBase> result;
+ String flag = request.getParameter("option");
+ String date1 = request.getParameter("data1");
+ String date2 = request.getParameter("data2");
+ StatisticsProt sp = new StatisticsProt();
+ if (flag == null)
+ result = sp.readDetail(date1, date2);
+ else {
+ Calendar cal = Calendar.getInstance();
+ String dateB = StatisticsProt.DateFormatYYMMDD(sp.earliestDate());
+ String dateEnd = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH);
+ result = sp.readDetail(dateB, dateEnd);
+ }
+ request.setAttribute("data1", date1);
+ request.setAttribute("data2", date2);
+ request.setAttribute("result", result);
+ request.setAttribute("flag", flag);
+ RequestDispatcher rd = request.getRequestDispatcher("/ReportNew.jsp");
+ rd.forward(request, response);
+ }
+
+ protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
+ doGet(request, response);
+ }
+
+}
--- /dev/null
+package compbio.statistic;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.Collections;
+import java.util.Date;
+import java.util.Iterator;
+import java.util.List;
+
+import me.prettyprint.cassandra.serializers.LongSerializer;
+import me.prettyprint.cassandra.serializers.StringSerializer;
+import me.prettyprint.hector.api.beans.ColumnSlice;
+import me.prettyprint.hector.api.beans.HColumn;
+import me.prettyprint.hector.api.beans.OrderedRows;
+import me.prettyprint.hector.api.beans.Row;
+import me.prettyprint.hector.api.factory.HFactory;
+import me.prettyprint.hector.api.query.QueryResult;
+import me.prettyprint.hector.api.query.RangeSlicesQuery;
+import me.prettyprint.hector.api.query.SliceQuery;
+import compbio.cassandra.CassandraCreate;
+import compbio.cassandra.DataBase;
+
+public class StatisticsProt {
+ private final static long MILLISECONDS_PER_DAY = 1000L * 60 * 60 * 24;
+ private CassandraCreate cc = new CassandraCreate();
+ private ArrayList<DataBase> query;
+
+ // query for the period from date1 till date2
+ public List<DataBase> readDetail(String dateInStringSt, String dateInStringEnd) {
+ if (!isThisDateValid(dateInStringSt))
+ return null;
+ long dateWorkSt = DateParsing(dateInStringSt);
+ long dateWorkEnd = DateParsing(dateInStringEnd);
+ if (CheckDate(dateWorkSt) && CheckDate(dateWorkEnd)) {
+ query = new ArrayList<DataBase>();
+ while (dateWorkSt <= dateWorkEnd) {
+ SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
+ StringSerializer.get(), StringSerializer.get());
+ result.setColumnFamily("ProteinData");
+ result.setKey(dateWorkSt);
+ result.setRange(null, null, false, Integer.MAX_VALUE);
+ QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
+ if (!columnSlice.get().getColumns().isEmpty()) {
+ DataBase db = new DataBase(DateFormat(dateWorkSt), columnSlice.get().getColumns().size());
+ query.add(db);
+ }
+ dateWorkSt += MILLISECONDS_PER_DAY;
+ }
+ } else
+ System.out.println("Wrong date");
+ return query;
+ }
+
+ // find the earliest date
+ public long earliestDate() {
+ ArrayList<Long> dateSort = new ArrayList<Long>();
+ int row_count = 10000;
+ RangeSlicesQuery<Long, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), LongSerializer.get(),
+ StringSerializer.get(), StringSerializer.get());
+ result.setColumnFamily("ProteinData");
+ result.setRange(null, null, false, Integer.MAX_VALUE);
+ result.setRowCount(row_count);
+ Long last_key = null;
+ while (true) {
+ result.setKeys(last_key, null);
+ QueryResult<OrderedRows<Long, String, String>> columnSlice = result.execute();
+ OrderedRows<Long, String, String> rows = columnSlice.get();
+ Iterator<Row<Long, String, String>> rowsIterator = rows.iterator();
+ while (rowsIterator.hasNext()) {
+ Row<Long, String, String> row = rowsIterator.next();
+ last_key = row.getKey();
+ dateSort.add(last_key);
+ }
+ if (rows.getCount() < row_count)
+ break;
+ }
+ Collections.sort(dateSort);
+ return dateSort.get(0);
+ }
+
+ // query execution time for the period from dateInStringSt till
+ // dateInStringEnd
+ public List<DataBase> readLength(String dateInStringSt, String dateInStringEnd) {
+ long dateWorkSt = DateParsing(dateInStringSt);
+ long dateWorkEnd = DateParsing(dateInStringEnd);
+ if (CheckDate(dateWorkSt) && CheckDate(dateWorkEnd)) {
+ query = new ArrayList<DataBase>();
+ List<Integer> totalTime = new ArrayList<Integer>();
+ for (int i = 0; i < 4; i++)
+ totalTime.add(i, 0);
+ while (dateWorkSt <= dateWorkEnd) {
+ List<Integer> timeResult = new ArrayList<Integer>();
+ SliceQuery<Long, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(),
+ StringSerializer.get(), StringSerializer.get());
+ result.setColumnFamily("ProteinData");
+ result.setKey(dateWorkSt);
+ result.setRange(null, null, false, Integer.MAX_VALUE);
+ QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
+ List<HColumn<String, String>> col = columnSlice.get().getColumns();
+ if (!col.isEmpty()) {
+ Iterator<HColumn<String, String>> itCol = col.iterator();
+ for (int i = 0; i < 4; i++)
+ timeResult.add(i, 0);
+ while (itCol.hasNext()) {
+ String id = itCol.next().getName();
+ long lenResult = CountID(id);
+ if (lenResult <= 30)
+ timeResult.set(0, timeResult.get(0) + 1);
+ else if (lenResult > 30 && lenResult <= 60)
+ timeResult.set(1, timeResult.get(1) + 1);
+ else if (lenResult > 60 && lenResult <= 120)
+ timeResult.set(2, timeResult.get(2) + 1);
+ else {
+ timeResult.set(3, timeResult.get(3) + 1);
+ // System.out.println(lenResult + "; " + id);
+ }
+ }
+ DataBase db = new DataBase();
+ db.setTimeRez(timeResult);
+ db.setDate(DateFormat(dateWorkSt));
+ query.add(db);
+ }
+ dateWorkSt += MILLISECONDS_PER_DAY;
+ }
+ } else
+ System.out.println("Wrong date");
+ return query;
+ }
+
+ // query by a protein sequence
+ public List<DataBase> readProt(String protIn) {
+ query = new ArrayList<DataBase>();
+ SliceQuery<String, String, String> result = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
+ StringSerializer.get(), StringSerializer.get());
+ result.setColumnFamily("ProteinRow");
+ result.setKey(protIn);
+ result.setRange(null, null, false, Integer.MAX_VALUE);
+ QueryResult<ColumnSlice<String, String>> columnSlice = result.execute();
+ Iterator<HColumn<String, String>> it = columnSlice.get().getColumns().iterator();
+ while (it.hasNext()) {
+ HColumn<String, String> col = it.next();
+ DataBase db = new DataBase();
+ db.setProt(protIn);
+ db.setId(col.getName());
+ db.setJpred(col.getValue());
+ query.add(db);
+ }
+ return query;
+ }
+
+ // query by a protein sequence
+ public List<DataBase> readProtID() {
+ query = new ArrayList<DataBase>();
+ int row_count = 100000000;
+ RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
+ StringSerializer.get(), StringSerializer.get());
+ result.setColumnFamily("ProteinRow");
+ result.setRange(null, null, false, Integer.MAX_VALUE);
+ result.setRowCount(row_count);
+ String last_key = null;
+ while (true) {
+ result.setKeys(last_key, null);
+ QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
+ OrderedRows<String, String, String> rows = columnSlice.get();
+ Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
+ while (rowsIterator.hasNext()) {
+ Row<String, String, String> row = rowsIterator.next();
+ last_key = row.getKey();
+ if (row.getColumnSlice().getColumns().size() > 3) {
+ DataBase db = new DataBase();
+ db.setProt(last_key);
+ db.setTotalId(row.getColumnSlice().getColumns().size());
+ query.add(db);
+ }
+ }
+ if (rows.getCount() < row_count)
+ break;
+ }
+ return query;
+ }
+
+ // query by a part of sequence
+ public List<DataBase> readPart(String protIn) {
+ int row_count = 10000;
+ query = new ArrayList<DataBase>();
+ RangeSlicesQuery<String, String, String> result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(),
+ StringSerializer.get(), StringSerializer.get());
+ result.setColumnFamily("ProteinRow");
+ result.setRange(null, null, false, Integer.MAX_VALUE);
+ result.setRowCount(row_count);
+ String last_key = null;
+ while (true) {
+ result.setKeys(last_key, null);
+ QueryResult<OrderedRows<String, String, String>> columnSlice = result.execute();
+ OrderedRows<String, String, String> rows = columnSlice.get();
+ Iterator<Row<String, String, String>> rowsIterator = rows.iterator();
+ while (rowsIterator.hasNext()) {
+ Row<String, String, String> row = rowsIterator.next();
+ last_key = row.getKey();
+ if (last_key.matches("(.*)" + protIn + "(.*)")) {
+ Iterator<HColumn<String, String>> it = row.getColumnSlice().getColumns().iterator();
+ while (it.hasNext()) {
+ HColumn<String, String> col = it.next();
+ List<String> subProt = new ArrayList<String>();
+ String subStr = last_key;
+ while (subStr.length() > 0 && subStr.contains(protIn)) {
+ String first = subStr.substring(0, subStr.indexOf(protIn));
+ if (first.length() > 0)
+ subProt.add(first);
+ subProt.add(protIn);
+ subStr = subStr.substring(subStr.indexOf(protIn) + protIn.length(), subStr.length());
+ }
+ if (subStr.length() > 0)
+ subProt.add(subStr);
+ DataBase db = new DataBase();
+ db.setProt(last_key);
+ db.setId(col.getName());
+ db.setJpred(col.getValue());
+ db.setSubProt(subProt);
+ query.add(db);
+ }
+ }
+ }
+ if (rows.getCount() < row_count)
+ break;
+ }
+ return query;
+ }
+
+ // convert String to Date
+ private static long DateParsing(String datInput) {
+ if (datInput == null) {
+ return 0;
+ }
+ long dateWorkSt = 0;
+ SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd");
+ try {
+ dateWorkSt = formatter.parse(datInput).getTime();
+ } catch (ParseException e) {
+ e.printStackTrace();
+ }
+ return dateWorkSt;
+ }
+
+ // convert String to Date
+ private static long TimeConvert(String datInput) {
+ long dateWorkSt = 0;
+ if (datInput == null) {
+ return dateWorkSt;
+ }
+ SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss");
+ try {
+ dateWorkSt = formatter.parse(datInput).getTime();
+ } catch (ParseException e) {
+ e.printStackTrace();
+ }
+ // System.out.println(datInput + "start reverce" +
+ // DateFormat1(dateWorkSt));
+ return dateWorkSt;
+ }
+
+ // convert long to date in string format
+ private static String DateFormat(long inDate) {
+ SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy");
+ String dateString = datformat.format(new Date(inDate));
+ return dateString;
+ }
+
+ private static String DateFormat1(long inDate) {
+ SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss");
+ String dateString = datformat.format(new Date(inDate));
+ return dateString;
+ }
+
+ public static String DateFormatYYMMDD(long indate) {
+ SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd");
+ String dateString = datformat.format(new Date(indate));
+ return dateString;
+ }
+
+ public long CountID(String id) {
+ SliceQuery<String, String, String> sliceQuery = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(),
+ StringSerializer.get(), StringSerializer.get());
+ sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100);
+ QueryResult<ColumnSlice<String, String>> result = sliceQuery.execute();
+ String datBegin = result.get().getColumnByName("DataBegin").getValue();
+ String datEnd = result.get().getColumnByName("DataEnd").getValue();
+
+ long datBeginLong = TimeConvert(datBegin);
+ long datEndLong = TimeConvert(datEnd);
+ return (datEndLong - datBeginLong) / 1000;
+
+ }
+
+ public static boolean CheckDate(long indate) {
+ if (indate == 0) {
+ return false;
+ }
+ StatisticsProt sp = new StatisticsProt();
+ Calendar cal = Calendar.getInstance();
+ String currentDate = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH);
+ if (indate >= sp.earliestDate() && indate <= DateParsing(currentDate)) {
+ return true;
+ }
+ return false;
+ }
+
+ public boolean isThisDateValid(String dateToValidate) {
+ if (dateToValidate == null) {
+ return false;
+ }
+ SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd");
+ try {
+ // if not valid, it will throw ParseException
+ sdf.setLenient(false);
+ Date date = sdf.parse(dateToValidate);
+ // System.out.println(date);
+ } catch (ParseException e) {
+ e.printStackTrace();
+ return false;
+ }
+ return true;
+ }
+
+}