package compbio.data.sequence; import java.io.File; import java.io.FileNotFoundException; import java.util.Iterator; import java.util.Scanner; import compbio.util.Util; /** * Reads files with FASTA formatted sequences. All the information in the FASTA * header is preserved including trailing white spaces. All the white spaces are * removed from the sequence. * * Examples of the correct input: * *
* * >zedpshvyzg * GCQDKNNIAELNEIMGTTRSPSDWQHMKGASPRAEIGLTGKKDSWWRHCCSKEFNKTPPPIHPDMKRWGWMWNRENFEKFLIDNFLNPPCPRLMLTKGTWWRHEDLCHEIFWSTLRWLCLGNQSFSAMIWGHLCECHRMIWWESNEHMFWLKFRRALKKMNSNGPCMGPDNREWMITNRMGKEFCGPAFAGDCQSCWRKCHKTNKICFNEKKGTPTKIDHEQKDIMDILKDIDNHRNWKQCQLWLLTSKSTDQESTTMLTWSTWRDFFIIIKQPFDHKCRGALDANGDFQIAAELKWPAPMIILRQNQKTMHDKSCHHFFTNRCPLMHTTRANDKQCSWHTRKQFICQQDFTTWQHRPDTHRILPSWCMSTRRKNHIKNTPALAFSTCEMGDLPNGWAPGTIILQRQFTQAIKLPQETTGWPRCDPKFDHWNMSKWLRQLLGRDDEMIPPQCD * * >xovkactesa * CPLSKWWNRRAFLSHTANHWMILMTWEGPHDGESKMRIAMMKWSPCKPTMSHFRCGLDAWAEPIRQIACESTFRM * FCTTPRPIHKLTEMWGHMNGWTGAFCRQLECEWMMPPRHPHPCTSTFNNNKKRLIGQIPNEGKQLFINFQKPQHG * FSESDIWIWKDNPTAWHEGLTIAGIGDGQHCWNWMPMPWSGAPTSNALIEFWTWLGMIGTRCKTQGMWWDAMNHH * DQFELSANAHIAAHHMEKKMILKPDDRNLGDDTWMPPGKIWMRMFAKNTNACWPEGCRDDNEEDDCGTHNLHRMC * * >ntazzewyvv * CGCKIF D D NMKDNNRHG TDIKKHGFMH IRHPE KRDDC FDNHCIMPKHRRWGLWD * EASINM AQQWRSLPPSRIMKLNG HGCDCMHSHMEAD DTKQSGIKGTFWNG HDAQWLCRWG * EFITEA WWGRWGAITFFHAH ENKNEIQECSDQNLKE SRTTCEIID TCHLFTRHLDGW * RCEKCQANATHMTW ACTKSCAEQW FCAKELMMN * W KQMGWRCKIFRKLFRDNCWID FELPWWPICFCCKGLSTKSHSAHDGDQCRRW WPDCARDWLGPGIRGEF * FCTHICQQLQRNFWCGCFRWNIEKRMFEIFDDNMAAHWKKCMHFKFLIRIHRHGPITMKMTWCRSGCCFGKTRRLPDSSFISAFLDPKHHRDGSGMMMWSSEMRSCAIPDPQQAWNQGKWIGQIKDWNICFAWPIRENQQCWATPHEMPSGFHFILEKWDALAHPHMHIRQKKCWAWAFLSLMSSTHSDMATFQWAIPGHNIWSNWDNIICGWPRI * * > 12 d t y wi k jbke * KLSHHDCD * N * H * HSKCTEPHCGNSHQMLHRDP * CCDQCQSWEAENWCASMRKAILF * ** * @author Peter Troshin * @version 1.0 April 2011 * */ public class FastaReader implements Iterator