From 3e72c944b3a4168dbd3eb8105fd7afc9c68c6fb1 Mon Sep 17 00:00:00 2001 From: Sasha Sherstnev Date: Thu, 24 Oct 2013 18:25:29 +0100 Subject: [PATCH] First working code --- datadb/compbio/cassandra/CassandraCreate.java | 127 ++++++++++ datadb/compbio/cassandra/DataBase.java | 87 +++++++ datadb/compbio/cassandra/DataParsing.java | 125 ++++++++++ datadb/compbio/cassandra/FastaReader.java | 173 +++++++++++++ datadb/compbio/cassandra/FastaSequence.java | 179 ++++++++++++++ server/compbio/listeners/ContextListener.java | 47 ++++ server/compbio/listeners/DetailList.java | 49 ++++ server/compbio/listeners/LengthServlet.java | 62 +++++ server/compbio/listeners/ProtServlet.java | 59 +++++ server/compbio/listeners/QueryServlet.java | 51 ++++ server/compbio/statistic/StatisticsProt.java | 327 +++++++++++++++++++++++++ 11 files changed, 1286 insertions(+) create mode 100644 datadb/compbio/cassandra/CassandraCreate.java create mode 100644 datadb/compbio/cassandra/DataBase.java create mode 100644 datadb/compbio/cassandra/DataParsing.java create mode 100644 datadb/compbio/cassandra/FastaReader.java create mode 100644 datadb/compbio/cassandra/FastaSequence.java create mode 100644 server/compbio/listeners/ContextListener.java create mode 100644 server/compbio/listeners/DetailList.java create mode 100644 server/compbio/listeners/LengthServlet.java create mode 100644 server/compbio/listeners/ProtServlet.java create mode 100644 server/compbio/listeners/QueryServlet.java create mode 100644 server/compbio/statistic/StatisticsProt.java diff --git a/datadb/compbio/cassandra/CassandraCreate.java b/datadb/compbio/cassandra/CassandraCreate.java new file mode 100644 index 0000000..54fffac --- /dev/null +++ b/datadb/compbio/cassandra/CassandraCreate.java @@ -0,0 +1,127 @@ +package compbio.cassandra; + +import java.util.Arrays; +import java.util.List; + +import me.prettyprint.cassandra.serializers.LongSerializer; +import me.prettyprint.cassandra.serializers.StringSerializer; +import me.prettyprint.cassandra.service.ThriftKsDef; +import me.prettyprint.hector.api.Cluster; +import me.prettyprint.hector.api.Keyspace; +import me.prettyprint.hector.api.beans.ColumnSlice; +import me.prettyprint.hector.api.ddl.ColumnFamilyDefinition; +import me.prettyprint.hector.api.ddl.ComparatorType; +import me.prettyprint.hector.api.ddl.KeyspaceDefinition; +import me.prettyprint.hector.api.factory.HFactory; +import me.prettyprint.hector.api.mutation.Mutator; +import me.prettyprint.hector.api.query.QueryResult; +import me.prettyprint.hector.api.query.SliceQuery; + +public class CassandraCreate { + private static Keyspace ksp; + private static Cluster cluster; + private static Mutator mutatorLong; + private static Mutator mutatorString; + private static Mutator mutatorLog; + StringSerializer ss = StringSerializer.get(); + LongSerializer ls = LongSerializer.get(); + + // connect to the cluster + public void Connection() { + cluster = HFactory.getOrCreateCluster("Protein Cluster", "127.0.0.1:9160"); + KeyspaceDefinition keyspaceDef = cluster.describeKeyspace("ProteinKeyspace"); + /* + * If keyspace does not exist, the CFs don't exist either. => create + * them. + */ + if (keyspaceDef == null) { // create column family + System.out.println("ProteinKeyspace has been null"); + ColumnFamilyDefinition cfProtein = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinRow", + ComparatorType.ASCIITYPE); + ColumnFamilyDefinition cfLog = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinLog", ComparatorType.ASCIITYPE); + ColumnFamilyDefinition cfData = HFactory.createColumnFamilyDefinition("ProteinKeyspace", "ProteinData", + ComparatorType.ASCIITYPE); + + KeyspaceDefinition newKeyspace = HFactory.createKeyspaceDefinition("ProteinKeyspace", ThriftKsDef.DEF_STRATEGY_CLASS, 1, + Arrays.asList(cfProtein, cfLog, cfData)); + /* + * Add the schema to the cluster. "true" as the second param means + * that Hector will be blocked until all nodes see the change. + */ + cluster.addKeyspace(newKeyspace, true); + cluster.addColumnFamily(cfProtein, true); + cluster.addColumnFamily(cfLog, true); + cluster.addColumnFamily(cfData, true); + } else { + System.out.println("Data loaded"); + } + ksp = HFactory.createKeyspace("ProteinKeyspace", cluster); + System.out.println("Cassandra has been connected"); + } + + /* + * parsing data from + * http://www.compbio.dundee.ac.uk/www-jpred/results/usage-new/alljobs.dat + */ + public void Parsing() { + /* CF ProteinRow store protein and prediction */ + mutatorString = HFactory.createMutator(ksp, ss); + + /* + * ProteinLog stores logging info: IP, job id, start date and end date + */ + mutatorLog = HFactory.createMutator(ksp, ss); + + /* CF ProteinData store id and protein per data */ + mutatorLong = HFactory.createMutator(ksp, ls); + + System.out.println("Parsing......"); + String in = "http://www.compbio.dundee.ac.uk/www-jpred/results/usage-new/alljobs.dat"; + DataParsing datParsing = new DataParsing(); + datParsing.Parsing(in, 4); + flushData(); + } + + public void flushData() { + mutatorString.execute(); + mutatorLong.execute(); + mutatorLog.execute(); + //System.out.println("Flush new data..."); + } + + public void Closing() { + cluster.getConnectionManager().shutdown(); + System.out.println("Cassandra has been shut down"); + } + + // check whether the job id exists in the DB + public boolean CheckID(String jobid) { + SliceQuery sliceQuery = HFactory.createSliceQuery(ksp, ss, ss, ss); + sliceQuery.setColumnFamily("ProteinLog").setKey(jobid).setRange("", "", false, 100); + QueryResult> result = sliceQuery.execute(); + if (result.get().getColumns().size() > 0) { + return true; + } + return false; + } + + public void InsertData(long dataWork, String dataBegin, String dataEnd, String ip, String id, String statusEx, String statusFinal, + String protein, List jnetpred) { + mutatorLog.addInsertion(id, "ProteinLog", HFactory.createColumn("ip", ip, ss, ss)) + .addInsertion(id, "ProteinLog", HFactory.createColumn("DataBegin", dataBegin, ss, ss)) + .addInsertion(id, "ProteinLog", HFactory.createColumn("DataEnd", dataEnd, ss, ss)) + .addInsertion(id, "ProteinLog", HFactory.createColumn("Status ex", statusEx, ss, ss)) + .addInsertion(id, "ProteinLog", HFactory.createColumn("Status final", statusFinal, ss, ss)) + .addInsertion(id, "ProteinLog", HFactory.createColumn("Protein", protein, ss, ss)); + for (int i = 0; i < jnetpred.size(); i++) { + String namepred = jnetpred.get(i).getId(); + String pred = jnetpred.get(i).getSequence().replaceAll("\n", ""); + mutatorString.addInsertion(protein, "ProteinRow", HFactory.createColumn(id + ";" + namepred, pred, ss, ss)); + } + mutatorLong.addInsertion(dataWork, "ProteinData", HFactory.createColumn(id, protein, ss, ss)); + } + + public Keyspace GetKeyspace() { + return ksp; + } +} diff --git a/datadb/compbio/cassandra/DataBase.java b/datadb/compbio/cassandra/DataBase.java new file mode 100644 index 0000000..c661e77 --- /dev/null +++ b/datadb/compbio/cassandra/DataBase.java @@ -0,0 +1,87 @@ +package compbio.cassandra; + +import java.util.List; + +public class DataBase { + String date; + int total; + int totalId; + String id; + String prot; + String jpred; + List subProt; + List timeRez; + + public DataBase() { + } + + public DataBase(String dat, int total) { + this.date = dat; + this.total = total; + } + + public void setDate(String dat) { + this.date = dat; + } + + public String getDate() { + return date; + } + + public void setTotal(int tot) { + this.total = tot; + } + + public int getTotal() { + return total; + } + + public void setTotalId(int totId) { + this.totalId = totId; + } + + public int getTotalId() { + return totalId; + } + + public void setProt(String prot) { + this.prot = prot; + } + + public String getProt() { + return prot; + } + + public void setJpred(String jpred) { + this.jpred = jpred; + } + + public String getJpred() { + return jpred; + } + + public void setId(String id) { + this.id = id; + } + + public String getId() { + return id; + } + + public void setSubProt(List subProt) { + this.subProt = subProt; + } + + public List getSubProt() { + return subProt; + } + + public void setTimeRez(List timeRez) { + this.timeRez = timeRez; + } + + public List getTimeRez() { + return timeRez; + } + +} diff --git a/datadb/compbio/cassandra/DataParsing.java b/datadb/compbio/cassandra/DataParsing.java new file mode 100644 index 0000000..d548f56 --- /dev/null +++ b/datadb/compbio/cassandra/DataParsing.java @@ -0,0 +1,125 @@ +package compbio.cassandra; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLConnection; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Date; +import java.util.List; + +public class DataParsing { + private CassandraCreate cc = new CassandraCreate(); + private String dirprefix = "http://www.compbio.dundee.ac.uk/www-jpred/results"; + + public void setDirPrefix (String newprefix) { + this.dirprefix = newprefix; + } + + public void Parsing(String input, int nDays) { + Calendar cal = Calendar.getInstance(); + cal.add(Calendar.DATE, -nDays); + for (int i = 0; i < nDays; ++i) { + cal.add(Calendar.DATE, 1); + int month = cal.get(Calendar.MONTH) + 1; + int year = cal.get(Calendar.YEAR); + int day = cal.get(Calendar.DATE); + String date = year + "/" + month + "/" + day; + ParsingForDate(input, date); + } + } + + private void ParsingForDate(String input, String date) { + int totalcount = 0; + int countNoData = 0; + int countUnclearFASTAid = 0; + int countinsertions = 0; + int countinserted = 0; + int counAlignments = 0; + int countStrange = 0; + + System.out.println("Inserting jobs for " + date); + try { + URL url = new URL(input); + URLConnection conn = url.openConnection(); + BufferedReader alljobs = new BufferedReader(new InputStreamReader(conn.getInputStream())); + String line; + + while ((line = alljobs.readLine()) != null) { + if (line.matches(date + "(.*)jp_[^\\s]+")) { + String[] table = line.split("\\s+"); + String id = table[table.length - 1]; + totalcount++; + if (!cc.CheckID(id)) { + URL urltable = new URL(dirprefix + "/" + id + "/" + id + ".concise.fasta"); + HttpURLConnection httpConnection = (HttpURLConnection) urltable.openConnection(); + int responsecode = httpConnection.getResponseCode(); + if (199 < responsecode && responsecode < 300) { + try { + final FastaReader fr = new FastaReader(urltable.openStream()); + final List seqs = new ArrayList(); + String newprotein = ""; + while (fr.hasNext()) { + final FastaSequence fs = fr.next(); + if (fs.getId().equals("QUERY") || fs.getId().equals(id)) + newprotein = fs.getSequence().replaceAll("\n", ""); + else + seqs.add(fs); + } + if (newprotein.equals("")) { + countUnclearFASTAid++; + } else { + SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); + String dateInString1 = table[0].substring(0, table[0].indexOf(":")); + long dateWork1 = 0; + try { + Date dat1 = formatter.parse(dateInString1); + dateWork1 = dat1.getTime(); + } catch (ParseException e) { + e.printStackTrace(); + } + cc.InsertData(dateWork1, table[0], table[1], table[2], id, "OK", "OK", newprotein, seqs); + ++countinsertions; + // flush every 100 insertions + if (0 == countinsertions % 100) { + cc.flushData(); + } + } + } catch (IOException e) { + e.printStackTrace(); + } + } else { + countNoData++; + } + } else { + ++countinserted; + } + } else { + if (line.matches(date + "(.*)Sequence0/(.*)")) { + ++counAlignments; + } else { + ++countStrange; + } + } + } + alljobs.close(); + System.out.println("Total number of jobs = " + totalcount); + System.out.println(" " + countinserted + " jobs inserted already"); + System.out.println(" " + counAlignments + " jalview jobs"); + System.out.println(" " + countStrange + " not analysed jobs"); + System.out.println(" " + countNoData + " jobs without *.concise.fasta file"); + System.out.println(" " + countUnclearFASTAid + " jobs with unclear FASTA protein id in *.concise.fasta"); + System.out.println(" " + countinsertions + " new job insertions\n"); + } catch (MalformedURLException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + } +} diff --git a/datadb/compbio/cassandra/FastaReader.java b/datadb/compbio/cassandra/FastaReader.java new file mode 100644 index 0000000..4783b14 --- /dev/null +++ b/datadb/compbio/cassandra/FastaReader.java @@ -0,0 +1,173 @@ +package compbio.cassandra; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.InputStream; +import java.util.Iterator; +import java.util.Scanner; + +//import compbio.util.Util; + +/** + * Reads files with FASTA formatted sequences. All the information in the FASTA + * header is preserved including trailing white spaces. All the white spaces are + * removed from the sequence. + * + * Examples of the correct input: + * + *
+ * 
+ * >zedpshvyzg
+ * GCQDKNNIAELNEIMGTTRSPSDWQHMKGASPRAEIGLTGKKDSWWRHCCSKEFNKTPPPIHPDMKRWGWMWNRENFEKFLIDNFLNPPCPRLMLTKGTWWRHEDLCHEIFWSTLRWLCLGNQSFSAMIWGHLCECHRMIWWESNEHMFWLKFRRALKKMNSNGPCMGPDNREWMITNRMGKEFCGPAFAGDCQSCWRKCHKTNKICFNEKKGTPTKIDHEQKDIMDILKDIDNHRNWKQCQLWLLTSKSTDQESTTMLTWSTWRDFFIIIKQPFDHKCRGALDANGDFQIAAELKWPAPMIILRQNQKTMHDKSCHHFFTNRCPLMHTTRANDKQCSWHTRKQFICQQDFTTWQHRPDTHRILPSWCMSTRRKNHIKNTPALAFSTCEMGDLPNGWAPGTIILQRQFTQAIKLPQETTGWPRCDPKFDHWNMSKWLRQLLGRDDEMIPPQCD
+ * 
+ * >xovkactesa
+ * CPLSKWWNRRAFLSHTANHWMILMTWEGPHDGESKMRIAMMKWSPCKPTMSHFRCGLDAWAEPIRQIACESTFRM
+ * FCTTPRPIHKLTEMWGHMNGWTGAFCRQLECEWMMPPRHPHPCTSTFNNNKKRLIGQIPNEGKQLFINFQKPQHG
+ * FSESDIWIWKDNPTAWHEGLTIAGIGDGQHCWNWMPMPWSGAPTSNALIEFWTWLGMIGTRCKTQGMWWDAMNHH
+ * DQFELSANAHIAAHHMEKKMILKPDDRNLGDDTWMPPGKIWMRMFAKNTNACWPEGCRDDNEEDDCGTHNLHRMC
+ * 
+ * >ntazzewyvv
+ * CGCKIF D D NMKDNNRHG TDIKKHGFMH IRHPE KRDDC FDNHCIMPKHRRWGLWD
+ * EASINM	AQQWRSLPPSRIMKLNG	HGCDCMHSHMEAD	DTKQSGIKGTFWNG	HDAQWLCRWG	
+ * EFITEA	WWGRWGAITFFHAH	ENKNEIQECSDQNLKE	SRTTCEIID   TCHLFTRHLDGW 
+ *   RCEKCQANATHMTW ACTKSCAEQW  FCAKELMMN    
+ *   W        KQMGWRCKIFRKLFRDNCWID  FELPWWPICFCCKGLSTKSHSAHDGDQCRRW    WPDCARDWLGPGIRGEF   
+ *   FCTHICQQLQRNFWCGCFRWNIEKRMFEIFDDNMAAHWKKCMHFKFLIRIHRHGPITMKMTWCRSGCCFGKTRRLPDSSFISAFLDPKHHRDGSGMMMWSSEMRSCAIPDPQQAWNQGKWIGQIKDWNICFAWPIRENQQCWATPHEMPSGFHFILEKWDALAHPHMHIRQKKCWAWAFLSLMSSTHSDMATFQWAIPGHNIWSNWDNIICGWPRI
+ * 
+ *    > 12 d t y wi 		k	jbke  	
+ *   KLSHHDCD
+ *    N
+ *     H
+ *     HSKCTEPHCGNSHQMLHRDP
+ *     CCDQCQSWEAENWCASMRKAILF
+ * 
+ * 
+ * + * @author Peter Troshin + * @version 1.0 April 2011 + * + */ +public class FastaReader implements Iterator { + + private final Scanner input; + /** + * Delimiter for the scanner + */ + private final String DELIM = ">"; + + /** + * Header data can contain non-ASCII symbols and read in UTF8 + * + * @param inputFile + * the file containing the list of FASTA formatted sequences to + * read from + * @throws FileNotFoundException + * if the input file is not found + * @throws IllegalStateException + * if the close method was called on this instance + * + */ + public FastaReader(final String inputFile) throws FileNotFoundException { + input = new Scanner(new File(inputFile), "UTF8"); + input.useDelimiter(DELIM); + Runtime.getRuntime().addShutdownHook(new Thread() { + + @Override + public void run() { + if (input != null) { + input.close(); + } + } + }); + } + + /** + * This class will not close the incoming stream! So the client should do + * so. + * + * @param inputStream + * @throws FileNotFoundException + */ + public FastaReader(final InputStream inputStream) + throws FileNotFoundException { + input = new Scanner(inputStream); + input.useDelimiter(DELIM); + } + + /** + * {@inheritDoc} + * + * @throws IllegalStateException + * if the close method was called on this instance + */ + @Override + public boolean hasNext() { + return input.hasNext(); + } + + /** + * Reads the next FastaSequence from the input + * + * @throws AssertionError + * if the header or the sequence is missing + * @throws IllegalStateException + * if the close method was called on this instance + * @throws MismatchException + * - if there were no more FastaSequence's. + */ + @Override + public FastaSequence next() { + String fastaHeader = input.next(); + while (fastaHeader.indexOf("\n") < 0 && input.hasNext()) { + fastaHeader = fastaHeader.concat(">"); + fastaHeader = fastaHeader.concat(input.next()); + } + return FastaReader.toFastaSequence(fastaHeader); + } + + /** + * Not implemented + */ + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + /** + * Call this method to close the connection to the input file if you want to + * free up the resources. The connection will be closed on the JVM shutdown + * if this method was not called explicitly. No further reading on this + * instance of the FastaReader will be possible after calling this method. + */ + public void close() { + input.close(); + } + + private static FastaSequence toFastaSequence(final String singleFastaEntry) { + + // assert !Util.isEmpty(singleFastaEntry) : + // "Empty String where FASTA sequence is expected!"; + + int nlineidx = singleFastaEntry.indexOf("\n"); + if (nlineidx < 0) { + throw new AssertionError( + "The FASTA sequence must contain the header information" + + " separated by the new line from the sequence. Given sequence does not appear to " + + "contain the header! Given data:\n " + + singleFastaEntry); + } + String header = singleFastaEntry.substring(0, nlineidx); + + // Get rid of the new line chars (should cover common cases) + header = header.replaceAll("\r", ""); + + String sequence = singleFastaEntry.substring(nlineidx); + + /* + * if (Util.isEmpty(sequence)) { throw new AssertionError( + * "Empty sequences are not allowed! Please make sure the " + + * " data is in the FASTA format! Given data:\n " + singleFastaEntry); } + */ + return new FastaSequence(header, sequence); + } +} diff --git a/datadb/compbio/cassandra/FastaSequence.java b/datadb/compbio/cassandra/FastaSequence.java new file mode 100644 index 0000000..61f49c7 --- /dev/null +++ b/datadb/compbio/cassandra/FastaSequence.java @@ -0,0 +1,179 @@ +package compbio.cassandra; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; + +//import compbio.util.SysPrefs; +//import compbio.util.annotation.Immutable; + +/** + * A FASTA formatted sequence. Please note that this class does not make any + * assumptions as to what sequence it stores e.g. it could be nucleotide, + * protein or even gapped alignment sequence! The only guarantee it makes is + * that the sequence does not contain white space characters e.g. spaces, new + * lines etc + * + * @author pvtroshin + * + * @version 1.0 September 2009 + */ + +@XmlAccessorType(XmlAccessType.FIELD) +//@Immutable +public class FastaSequence { + + /** + * Sequence id + */ + private String id; + + // TODO what about gapped sequence here! should be indicated + /** + * Returns the string representation of sequence + */ + private String sequence; + + FastaSequence() { + // Default constructor for JaxB + } + + /** + * Upon construction the any whitespace characters are removed from the + * sequence + * + * @param id + * @param sequence + */ + public FastaSequence(String id, String sequence) { + this.id = id; + this.sequence = sequence; + } + + /** + * Gets the value of id + * + * @return the value of id + */ + public String getId() { + return this.id; + } + + /** + * Gets the value of sequence + * + * @return the value of sequence + */ + public String getSequence() { + return this.sequence; + } + + public static int countMatchesInSequence(final String theString, + final String theRegExp) { + final Pattern p = Pattern.compile(theRegExp); + final Matcher m = p.matcher(theString); + int cnt = 0; + while (m.find()) { + cnt++; + } + return cnt; + } + + public String getFormattedFasta() { + return getFormatedSequence(80); + } + + /** + * + * @return one line name, next line sequence, no matter what the sequence + * length is + */ +/* public String getOnelineFasta() { + String fasta = ">" + getId() + SysPrefs.newlinechar; + fasta += getSequence() + SysPrefs.newlinechar; + return fasta; + } + + /** + * Format sequence per width letter in one string. Without spaces. + * + * @return multiple line formated sequence, one line width letters length + * + */ + public String getFormatedSequence(final int width) { + if (sequence == null) { + return ""; + } + + assert width >= 0 : "Wrong width parameter "; + + final StringBuilder sb = new StringBuilder(sequence); + // int tail = nrOfWindows % WIN_SIZE; + // final int turns = (nrOfWindows - tail) / WIN_SIZE; + + int tailLen = sequence.length() % width; + // add up inserted new line chars + int nchunks = (sequence.length() - tailLen) / width; + int nlineCharcounter = 0; + int insPos = 0; + for (int i = 1; i <= nchunks; i++) { + insPos = width * i + nlineCharcounter; + // to prevent inserting new line in the very end of a sequence then + // it would have failed. + if (sb.length() <= insPos) { + break; + } + sb.insert(insPos, "\n"); + nlineCharcounter++; + } + // sb.insert(insPos + tailLen, "\n"); + return sb.toString(); + } + + /** + * + * @return sequence length + */ + public int getLength() { + return this.sequence.length(); + } + + /** + * Same as oneLineFasta + */ +// @Override +// public String toString() { +// return this.getOnelineFasta(); + // } + + @Override + public int hashCode() { + final int prime = 17; + int result = 1; + result = prime * result + ((id == null) ? 0 : id.hashCode()); + result = prime * result + + ((sequence == null) ? 0 : sequence.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (!(obj instanceof FastaSequence)) { + return false; + } + FastaSequence fs = (FastaSequence) obj; + if (!fs.getId().equals(this.getId())) { + return false; + } + if (!fs.getSequence().equalsIgnoreCase(this.getSequence())) { + return false; + } + return true; + } + +} diff --git a/server/compbio/listeners/ContextListener.java b/server/compbio/listeners/ContextListener.java new file mode 100644 index 0000000..2dfe373 --- /dev/null +++ b/server/compbio/listeners/ContextListener.java @@ -0,0 +1,47 @@ +package compbio.listeners; + +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +import javax.servlet.ServletContextEvent; +import javax.servlet.ServletContextListener; +import javax.servlet.annotation.WebListener; + +import compbio.cassandra.CassandraCreate; + +/** + * Application Lifecycle Listener implementation class ContextListener + * + */ +@WebListener +public class ContextListener implements ServletContextListener { + private ScheduledExecutorService scheduler; + CassandraCreate cc = new CassandraCreate(); + + /** + * @see ServletContextListener#contextInitialized(ServletContextEvent) + */ + public void contextInitialized(ServletContextEvent arg0) { + System.out.println("ProteoCache session start......"); + cc.Connection(); + + scheduler = Executors.newSingleThreadScheduledExecutor(); + scheduler.scheduleAtFixedRate(new Runnable() { + @Override + public void run() { + cc.Parsing(); + } + }, 0, 60, TimeUnit.SECONDS); + } + + /** + * @see ServletContextListener#contextDestroyed(ServletContextEvent) + */ + public void contextDestroyed(ServletContextEvent arg0) { + cc.Closing(); + System.out.println("Shut down ProteoCache......"); + scheduler.shutdownNow(); + } + +} diff --git a/server/compbio/listeners/DetailList.java b/server/compbio/listeners/DetailList.java new file mode 100644 index 0000000..ee3b260 --- /dev/null +++ b/server/compbio/listeners/DetailList.java @@ -0,0 +1,49 @@ +package compbio.listeners; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import javax.servlet.ServletException; +import javax.servlet.annotation.WebServlet; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import compbio.cassandra.*; +import compbio.statistic.StatisticsProt; + +/** + * Servlet implementation class DetailList + */ +@WebServlet("/DetailList") +public class DetailList extends HttpServlet { + private static final long serialVersionUID = 1L; + + /** + * @see HttpServlet#HttpServlet() + */ + + /** + * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse + * response) + */ + protected void doGet(HttpServletRequest request, + HttpServletResponse response) throws ServletException, IOException { + List result; + String date1 = request.getParameter("data1"); + String date2 = request.getParameter("data2"); + StatisticsProt sp = new StatisticsProt(); + // result = sp.readDetail(date1, date2); + } + + /** + * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse + * response) + */ + protected void doPost(HttpServletRequest request, + HttpServletResponse response) throws ServletException, IOException { + // TODO Auto-generated method stub + } + +} diff --git a/server/compbio/listeners/LengthServlet.java b/server/compbio/listeners/LengthServlet.java new file mode 100644 index 0000000..c87a34d --- /dev/null +++ b/server/compbio/listeners/LengthServlet.java @@ -0,0 +1,62 @@ +package compbio.listeners; + +import java.io.IOException; +import java.util.Calendar; +import java.util.List; + +import javax.servlet.RequestDispatcher; +import javax.servlet.ServletException; +import javax.servlet.annotation.WebServlet; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import compbio.cassandra.DataBase; +import compbio.statistic.StatisticsProt; + +/** + * Servlet implementation class LengthServlet + */ +@WebServlet("/LengthServlet") +public class LengthServlet extends HttpServlet { + private static final long serialVersionUID = 1L; + + /** + * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse + * response) + */ + protected void doGet(HttpServletRequest request, + HttpServletResponse response) throws ServletException, IOException { + List result; + String flag = request.getParameter("option"); + String date1 = request.getParameter("data1"); + String date2 = request.getParameter("data2"); + StatisticsProt sp = new StatisticsProt(); + if (flag == null) + result = sp.readLength(date1, date2); + else { + Calendar cal = Calendar.getInstance(); + String dateB = StatisticsProt.DateFormatYYMMDD(sp.earliestDate()); + String dateEnd = cal.get(Calendar.YEAR) + "/" + + (cal.get(Calendar.MONTH) + 1) + "/" + + cal.get(Calendar.DAY_OF_MONTH); + result = sp.readLength(dateB, dateEnd); + } + request.setAttribute("data1", date1); + request.setAttribute("data2", date2); + request.setAttribute("result", result); + request.setAttribute("flag", flag); + RequestDispatcher rd = request + .getRequestDispatcher("/ReportLength.jsp"); + rd.forward(request, response); + } + + /** + * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse + * response) + */ + protected void doPost(HttpServletRequest request, + HttpServletResponse response) throws ServletException, IOException { + doGet(request, response); + } +} diff --git a/server/compbio/listeners/ProtServlet.java b/server/compbio/listeners/ProtServlet.java new file mode 100644 index 0000000..8ed2d2f --- /dev/null +++ b/server/compbio/listeners/ProtServlet.java @@ -0,0 +1,59 @@ +package compbio.listeners; + +import java.io.IOException; +import java.util.List; + +import javax.servlet.RequestDispatcher; +import javax.servlet.ServletException; +import javax.servlet.annotation.WebServlet; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import compbio.cassandra.DataBase; +import compbio.statistic.StatisticsProt; + +/** + * Servlet implementation class ProtServlet + */ +@WebServlet("/ProtServlet") +public class ProtServlet extends HttpServlet { + private static final long serialVersionUID = 1L; + + /** + * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse + * response) + */ + protected void doGet(HttpServletRequest request, + HttpServletResponse response) throws ServletException, IOException { + List result; + String flag = request.getParameter("protein"); + String prot = request.getParameter("prot"); + String checkbox = request.getParameter("option"); + StatisticsProt sp = new StatisticsProt(); + if (checkbox != null) { + result = sp.readProtID(); + } else { + if (flag.equals("whole")) + result = sp.readProt(prot); + else + result = sp.readPart(prot); + } + request.setAttribute("prot", prot); + request.setAttribute("flag", flag); + request.setAttribute("checkbox", checkbox); + request.setAttribute("result", result); + RequestDispatcher rd = request.getRequestDispatcher("/ReportProt.jsp"); + rd.forward(request, response); + } + + /** + * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse + * response) + */ + protected void doPost(HttpServletRequest request, + HttpServletResponse response) throws ServletException, IOException { + doGet(request, response); + } + +} diff --git a/server/compbio/listeners/QueryServlet.java b/server/compbio/listeners/QueryServlet.java new file mode 100644 index 0000000..1241b60 --- /dev/null +++ b/server/compbio/listeners/QueryServlet.java @@ -0,0 +1,51 @@ +package compbio.listeners; + +import java.io.IOException; +import java.util.Calendar; +import java.util.List; + +import javax.servlet.RequestDispatcher; +import javax.servlet.ServletException; +import javax.servlet.annotation.WebServlet; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import compbio.cassandra.*; +import compbio.statistic.StatisticsProt; + +@WebServlet("/QueryServlet") +public class QueryServlet extends HttpServlet { + private static final long serialVersionUID = 1L; + + /** + * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse + * response) + */ + protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + List result; + String flag = request.getParameter("option"); + String date1 = request.getParameter("data1"); + String date2 = request.getParameter("data2"); + StatisticsProt sp = new StatisticsProt(); + if (flag == null) + result = sp.readDetail(date1, date2); + else { + Calendar cal = Calendar.getInstance(); + String dateB = StatisticsProt.DateFormatYYMMDD(sp.earliestDate()); + String dateEnd = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH); + result = sp.readDetail(dateB, dateEnd); + } + request.setAttribute("data1", date1); + request.setAttribute("data2", date2); + request.setAttribute("result", result); + request.setAttribute("flag", flag); + RequestDispatcher rd = request.getRequestDispatcher("/ReportNew.jsp"); + rd.forward(request, response); + } + + protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + doGet(request, response); + } + +} diff --git a/server/compbio/statistic/StatisticsProt.java b/server/compbio/statistic/StatisticsProt.java new file mode 100644 index 0000000..02e7a59 --- /dev/null +++ b/server/compbio/statistic/StatisticsProt.java @@ -0,0 +1,327 @@ +package compbio.statistic; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Collections; +import java.util.Date; +import java.util.Iterator; +import java.util.List; + +import me.prettyprint.cassandra.serializers.LongSerializer; +import me.prettyprint.cassandra.serializers.StringSerializer; +import me.prettyprint.hector.api.beans.ColumnSlice; +import me.prettyprint.hector.api.beans.HColumn; +import me.prettyprint.hector.api.beans.OrderedRows; +import me.prettyprint.hector.api.beans.Row; +import me.prettyprint.hector.api.factory.HFactory; +import me.prettyprint.hector.api.query.QueryResult; +import me.prettyprint.hector.api.query.RangeSlicesQuery; +import me.prettyprint.hector.api.query.SliceQuery; +import compbio.cassandra.CassandraCreate; +import compbio.cassandra.DataBase; + +public class StatisticsProt { + private final static long MILLISECONDS_PER_DAY = 1000L * 60 * 60 * 24; + private CassandraCreate cc = new CassandraCreate(); + private ArrayList query; + + // query for the period from date1 till date2 + public List readDetail(String dateInStringSt, String dateInStringEnd) { + if (!isThisDateValid(dateInStringSt)) + return null; + long dateWorkSt = DateParsing(dateInStringSt); + long dateWorkEnd = DateParsing(dateInStringEnd); + if (CheckDate(dateWorkSt) && CheckDate(dateWorkEnd)) { + query = new ArrayList(); + while (dateWorkSt <= dateWorkEnd) { + SliceQuery result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(), + StringSerializer.get(), StringSerializer.get()); + result.setColumnFamily("ProteinData"); + result.setKey(dateWorkSt); + result.setRange(null, null, false, Integer.MAX_VALUE); + QueryResult> columnSlice = result.execute(); + if (!columnSlice.get().getColumns().isEmpty()) { + DataBase db = new DataBase(DateFormat(dateWorkSt), columnSlice.get().getColumns().size()); + query.add(db); + } + dateWorkSt += MILLISECONDS_PER_DAY; + } + } else + System.out.println("Wrong date"); + return query; + } + + // find the earliest date + public long earliestDate() { + ArrayList dateSort = new ArrayList(); + int row_count = 10000; + RangeSlicesQuery result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), LongSerializer.get(), + StringSerializer.get(), StringSerializer.get()); + result.setColumnFamily("ProteinData"); + result.setRange(null, null, false, Integer.MAX_VALUE); + result.setRowCount(row_count); + Long last_key = null; + while (true) { + result.setKeys(last_key, null); + QueryResult> columnSlice = result.execute(); + OrderedRows rows = columnSlice.get(); + Iterator> rowsIterator = rows.iterator(); + while (rowsIterator.hasNext()) { + Row row = rowsIterator.next(); + last_key = row.getKey(); + dateSort.add(last_key); + } + if (rows.getCount() < row_count) + break; + } + Collections.sort(dateSort); + return dateSort.get(0); + } + + // query execution time for the period from dateInStringSt till + // dateInStringEnd + public List readLength(String dateInStringSt, String dateInStringEnd) { + long dateWorkSt = DateParsing(dateInStringSt); + long dateWorkEnd = DateParsing(dateInStringEnd); + if (CheckDate(dateWorkSt) && CheckDate(dateWorkEnd)) { + query = new ArrayList(); + List totalTime = new ArrayList(); + for (int i = 0; i < 4; i++) + totalTime.add(i, 0); + while (dateWorkSt <= dateWorkEnd) { + List timeResult = new ArrayList(); + SliceQuery result = HFactory.createSliceQuery(cc.GetKeyspace(), LongSerializer.get(), + StringSerializer.get(), StringSerializer.get()); + result.setColumnFamily("ProteinData"); + result.setKey(dateWorkSt); + result.setRange(null, null, false, Integer.MAX_VALUE); + QueryResult> columnSlice = result.execute(); + List> col = columnSlice.get().getColumns(); + if (!col.isEmpty()) { + Iterator> itCol = col.iterator(); + for (int i = 0; i < 4; i++) + timeResult.add(i, 0); + while (itCol.hasNext()) { + String id = itCol.next().getName(); + long lenResult = CountID(id); + if (lenResult <= 30) + timeResult.set(0, timeResult.get(0) + 1); + else if (lenResult > 30 && lenResult <= 60) + timeResult.set(1, timeResult.get(1) + 1); + else if (lenResult > 60 && lenResult <= 120) + timeResult.set(2, timeResult.get(2) + 1); + else { + timeResult.set(3, timeResult.get(3) + 1); + // System.out.println(lenResult + "; " + id); + } + } + DataBase db = new DataBase(); + db.setTimeRez(timeResult); + db.setDate(DateFormat(dateWorkSt)); + query.add(db); + } + dateWorkSt += MILLISECONDS_PER_DAY; + } + } else + System.out.println("Wrong date"); + return query; + } + + // query by a protein sequence + public List readProt(String protIn) { + query = new ArrayList(); + SliceQuery result = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(), + StringSerializer.get(), StringSerializer.get()); + result.setColumnFamily("ProteinRow"); + result.setKey(protIn); + result.setRange(null, null, false, Integer.MAX_VALUE); + QueryResult> columnSlice = result.execute(); + Iterator> it = columnSlice.get().getColumns().iterator(); + while (it.hasNext()) { + HColumn col = it.next(); + DataBase db = new DataBase(); + db.setProt(protIn); + db.setId(col.getName()); + db.setJpred(col.getValue()); + query.add(db); + } + return query; + } + + // query by a protein sequence + public List readProtID() { + query = new ArrayList(); + int row_count = 100000000; + RangeSlicesQuery result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(), + StringSerializer.get(), StringSerializer.get()); + result.setColumnFamily("ProteinRow"); + result.setRange(null, null, false, Integer.MAX_VALUE); + result.setRowCount(row_count); + String last_key = null; + while (true) { + result.setKeys(last_key, null); + QueryResult> columnSlice = result.execute(); + OrderedRows rows = columnSlice.get(); + Iterator> rowsIterator = rows.iterator(); + while (rowsIterator.hasNext()) { + Row row = rowsIterator.next(); + last_key = row.getKey(); + if (row.getColumnSlice().getColumns().size() > 3) { + DataBase db = new DataBase(); + db.setProt(last_key); + db.setTotalId(row.getColumnSlice().getColumns().size()); + query.add(db); + } + } + if (rows.getCount() < row_count) + break; + } + return query; + } + + // query by a part of sequence + public List readPart(String protIn) { + int row_count = 10000; + query = new ArrayList(); + RangeSlicesQuery result = HFactory.createRangeSlicesQuery(cc.GetKeyspace(), StringSerializer.get(), + StringSerializer.get(), StringSerializer.get()); + result.setColumnFamily("ProteinRow"); + result.setRange(null, null, false, Integer.MAX_VALUE); + result.setRowCount(row_count); + String last_key = null; + while (true) { + result.setKeys(last_key, null); + QueryResult> columnSlice = result.execute(); + OrderedRows rows = columnSlice.get(); + Iterator> rowsIterator = rows.iterator(); + while (rowsIterator.hasNext()) { + Row row = rowsIterator.next(); + last_key = row.getKey(); + if (last_key.matches("(.*)" + protIn + "(.*)")) { + Iterator> it = row.getColumnSlice().getColumns().iterator(); + while (it.hasNext()) { + HColumn col = it.next(); + List subProt = new ArrayList(); + String subStr = last_key; + while (subStr.length() > 0 && subStr.contains(protIn)) { + String first = subStr.substring(0, subStr.indexOf(protIn)); + if (first.length() > 0) + subProt.add(first); + subProt.add(protIn); + subStr = subStr.substring(subStr.indexOf(protIn) + protIn.length(), subStr.length()); + } + if (subStr.length() > 0) + subProt.add(subStr); + DataBase db = new DataBase(); + db.setProt(last_key); + db.setId(col.getName()); + db.setJpred(col.getValue()); + db.setSubProt(subProt); + query.add(db); + } + } + } + if (rows.getCount() < row_count) + break; + } + return query; + } + + // convert String to Date + private static long DateParsing(String datInput) { + if (datInput == null) { + return 0; + } + long dateWorkSt = 0; + SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd"); + try { + dateWorkSt = formatter.parse(datInput).getTime(); + } catch (ParseException e) { + e.printStackTrace(); + } + return dateWorkSt; + } + + // convert String to Date + private static long TimeConvert(String datInput) { + long dateWorkSt = 0; + if (datInput == null) { + return dateWorkSt; + } + SimpleDateFormat formatter = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss"); + try { + dateWorkSt = formatter.parse(datInput).getTime(); + } catch (ParseException e) { + e.printStackTrace(); + } + // System.out.println(datInput + "start reverce" + + // DateFormat1(dateWorkSt)); + return dateWorkSt; + } + + // convert long to date in string format + private static String DateFormat(long inDate) { + SimpleDateFormat datformat = new SimpleDateFormat("dd/MM/yyyy"); + String dateString = datformat.format(new Date(inDate)); + return dateString; + } + + private static String DateFormat1(long inDate) { + SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd:hh:mm:ss"); + String dateString = datformat.format(new Date(inDate)); + return dateString; + } + + public static String DateFormatYYMMDD(long indate) { + SimpleDateFormat datformat = new SimpleDateFormat("yyyy/MM/dd"); + String dateString = datformat.format(new Date(indate)); + return dateString; + } + + public long CountID(String id) { + SliceQuery sliceQuery = HFactory.createSliceQuery(cc.GetKeyspace(), StringSerializer.get(), + StringSerializer.get(), StringSerializer.get()); + sliceQuery.setColumnFamily("ProteinLog").setKey(id).setRange("", "", false, 100); + QueryResult> result = sliceQuery.execute(); + String datBegin = result.get().getColumnByName("DataBegin").getValue(); + String datEnd = result.get().getColumnByName("DataEnd").getValue(); + + long datBeginLong = TimeConvert(datBegin); + long datEndLong = TimeConvert(datEnd); + return (datEndLong - datBeginLong) / 1000; + + } + + public static boolean CheckDate(long indate) { + if (indate == 0) { + return false; + } + StatisticsProt sp = new StatisticsProt(); + Calendar cal = Calendar.getInstance(); + String currentDate = cal.get(Calendar.YEAR) + "/" + (cal.get(Calendar.MONTH) + 1) + "/" + cal.get(Calendar.DAY_OF_MONTH); + if (indate >= sp.earliestDate() && indate <= DateParsing(currentDate)) { + return true; + } + return false; + } + + public boolean isThisDateValid(String dateToValidate) { + if (dateToValidate == null) { + return false; + } + SimpleDateFormat sdf = new SimpleDateFormat("yyyy/MM/dd"); + try { + // if not valid, it will throw ParseException + sdf.setLenient(false); + Date date = sdf.parse(dateToValidate); + // System.out.println(date); + } catch (ParseException e) { + e.printStackTrace(); + return false; + } + return true; + } + +} -- 1.7.10.2