Remove internal FastaReader/FastaSequence and replace this with classes from JABAWS
[proteocache.git] / datadb / compbio / cassandra / FastaReader.java
diff --git a/datadb/compbio/cassandra/FastaReader.java b/datadb/compbio/cassandra/FastaReader.java
deleted file mode 100644 (file)
index 4783b14..0000000
+++ /dev/null
@@ -1,173 +0,0 @@
-package compbio.cassandra;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.InputStream;
-import java.util.Iterator;
-import java.util.Scanner;
-
-//import compbio.util.Util;
-
-/**
- * Reads files with FASTA formatted sequences. All the information in the FASTA
- * header is preserved including trailing white spaces. All the white spaces are
- * removed from the sequence.
- * 
- * Examples of the correct input:
- * 
- * <pre>
- * 
- * >zedpshvyzg
- * GCQDKNNIAELNEIMGTTRSPSDWQHMKGASPRAEIGLTGKKDSWWRHCCSKEFNKTPPPIHPDMKRWGWMWNRENFEKFLIDNFLNPPCPRLMLTKGTWWRHEDLCHEIFWSTLRWLCLGNQSFSAMIWGHLCECHRMIWWESNEHMFWLKFRRALKKMNSNGPCMGPDNREWMITNRMGKEFCGPAFAGDCQSCWRKCHKTNKICFNEKKGTPTKIDHEQKDIMDILKDIDNHRNWKQCQLWLLTSKSTDQESTTMLTWSTWRDFFIIIKQPFDHKCRGALDANGDFQIAAELKWPAPMIILRQNQKTMHDKSCHHFFTNRCPLMHTTRANDKQCSWHTRKQFICQQDFTTWQHRPDTHRILPSWCMSTRRKNHIKNTPALAFSTCEMGDLPNGWAPGTIILQRQFTQAIKLPQETTGWPRCDPKFDHWNMSKWLRQLLGRDDEMIPPQCD
- * 
- * >xovkactesa
- * CPLSKWWNRRAFLSHTANHWMILMTWEGPHDGESKMRIAMMKWSPCKPTMSHFRCGLDAWAEPIRQIACESTFRM
- * FCTTPRPIHKLTEMWGHMNGWTGAFCRQLECEWMMPPRHPHPCTSTFNNNKKRLIGQIPNEGKQLFINFQKPQHG
- * FSESDIWIWKDNPTAWHEGLTIAGIGDGQHCWNWMPMPWSGAPTSNALIEFWTWLGMIGTRCKTQGMWWDAMNHH
- * DQFELSANAHIAAHHMEKKMILKPDDRNLGDDTWMPPGKIWMRMFAKNTNACWPEGCRDDNEEDDCGTHNLHRMC
- * 
- * >ntazzewyvv
- * CGCKIF D D NMKDNNRHG TDIKKHGFMH IRHPE KRDDC FDNHCIMPKHRRWGLWD
- * EASINM      AQQWRSLPPSRIMKLNG       HGCDCMHSHMEAD   DTKQSGIKGTFWNG  HDAQWLCRWG      
- * EFITEA      WWGRWGAITFFHAH  ENKNEIQECSDQNLKE        SRTTCEIID   TCHLFTRHLDGW 
- *   RCEKCQANATHMTW ACTKSCAEQW  FCAKELMMN    
- *   W        KQMGWRCKIFRKLFRDNCWID  FELPWWPICFCCKGLSTKSHSAHDGDQCRRW    WPDCARDWLGPGIRGEF   
- *   FCTHICQQLQRNFWCGCFRWNIEKRMFEIFDDNMAAHWKKCMHFKFLIRIHRHGPITMKMTWCRSGCCFGKTRRLPDSSFISAFLDPKHHRDGSGMMMWSSEMRSCAIPDPQQAWNQGKWIGQIKDWNICFAWPIRENQQCWATPHEMPSGFHFILEKWDALAHPHMHIRQKKCWAWAFLSLMSSTHSDMATFQWAIPGHNIWSNWDNIICGWPRI
- * 
- *    > 12 d t y wi            k       jbke    
- *   KLSHHDCD
- *    N
- *     H
- *     HSKCTEPHCGNSHQMLHRDP
- *     CCDQCQSWEAENWCASMRKAILF
- * 
- * </pre>
- * 
- * @author Peter Troshin
- * @version 1.0 April 2011
- * 
- */
-public class FastaReader implements Iterator<FastaSequence> {
-
-       private final Scanner input;
-       /**
-        * Delimiter for the scanner
-        */
-       private final String DELIM = ">";
-
-       /**
-        * Header data can contain non-ASCII symbols and read in UTF8
-        * 
-        * @param inputFile
-        *            the file containing the list of FASTA formatted sequences to
-        *            read from
-        * @throws FileNotFoundException
-        *             if the input file is not found
-        * @throws IllegalStateException
-        *             if the close method was called on this instance
-        * 
-        */
-       public FastaReader(final String inputFile) throws FileNotFoundException {
-               input = new Scanner(new File(inputFile), "UTF8");
-               input.useDelimiter(DELIM);
-               Runtime.getRuntime().addShutdownHook(new Thread() {
-
-                       @Override
-                       public void run() {
-                               if (input != null) {
-                                       input.close();
-                               }
-                       }
-               });
-       }
-
-       /**
-        * This class will not close the incoming stream! So the client should do
-        * so.
-        * 
-        * @param inputStream
-        * @throws FileNotFoundException
-        */
-       public FastaReader(final InputStream inputStream)
-                       throws FileNotFoundException {
-               input = new Scanner(inputStream);
-               input.useDelimiter(DELIM);
-       }
-
-       /**
-        * {@inheritDoc}
-        * 
-        * @throws IllegalStateException
-        *             if the close method was called on this instance
-        */
-       @Override
-       public boolean hasNext() {
-               return input.hasNext();
-       }
-
-       /**
-        * Reads the next FastaSequence from the input
-        * 
-        * @throws AssertionError
-        *             if the header or the sequence is missing
-        * @throws IllegalStateException
-        *             if the close method was called on this instance
-        * @throws MismatchException
-        *             - if there were no more FastaSequence's.
-        */
-       @Override
-       public FastaSequence next() {
-               String fastaHeader = input.next();
-               while (fastaHeader.indexOf("\n") < 0 && input.hasNext()) {
-                       fastaHeader = fastaHeader.concat(">");
-                       fastaHeader = fastaHeader.concat(input.next());
-               }
-               return FastaReader.toFastaSequence(fastaHeader);
-       }
-
-       /**
-        * Not implemented
-        */
-       @Override
-       public void remove() {
-               throw new UnsupportedOperationException();
-       }
-
-       /**
-        * Call this method to close the connection to the input file if you want to
-        * free up the resources. The connection will be closed on the JVM shutdown
-        * if this method was not called explicitly. No further reading on this
-        * instance of the FastaReader will be possible after calling this method.
-        */
-       public void close() {
-               input.close();
-       }
-
-       private static FastaSequence toFastaSequence(final String singleFastaEntry) {
-
-               // assert !Util.isEmpty(singleFastaEntry) :
-               // "Empty String where FASTA sequence is expected!";
-
-               int nlineidx = singleFastaEntry.indexOf("\n");
-               if (nlineidx < 0) {
-                       throw new AssertionError(
-                                       "The FASTA sequence must contain the header information"
-                                                       + " separated by the new line from the sequence. Given sequence does not appear to "
-                                                       + "contain the header! Given data:\n "
-                                                       + singleFastaEntry);
-               }
-               String header = singleFastaEntry.substring(0, nlineidx);
-
-               // Get rid of the new line chars (should cover common cases)
-               header = header.replaceAll("\r", "");
-
-               String sequence = singleFastaEntry.substring(nlineidx);
-
-               /*
-                * if (Util.isEmpty(sequence)) { throw new AssertionError(
-                * "Empty sequences are not allowed! Please make sure the " +
-                * " data is in the FASTA format! Given data:\n " + singleFastaEntry); }
-                */
-               return new FastaSequence(header, sequence);
-       }
-}