X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=binaries%2Fsrc%2Fglobplot%2Fbiopython-1.50%2FBio%2FFasta%2F__init__.py;fp=binaries%2Fsrc%2Fglobplot%2Fbiopython-1.50%2FBio%2FFasta%2F__init__.py;h=c49b45be8c445aa01d2cc8f7737eaee5bc3c72b6;hb=119df1cedad3d4760e6fd458713da2488eff79cc;hp=0000000000000000000000000000000000000000;hpb=d3806a66f002b93f6dc03447b6628f943a3ba90c;p=jabaws.git diff --git a/binaries/src/globplot/biopython-1.50/Bio/Fasta/__init__.py b/binaries/src/globplot/biopython-1.50/Bio/Fasta/__init__.py new file mode 100644 index 0000000..c49b45b --- /dev/null +++ b/binaries/src/globplot/biopython-1.50/Bio/Fasta/__init__.py @@ -0,0 +1,198 @@ +"""Utilities for working with FASTA-formatted sequences (OBSOLETE). + +Classes: +Record Holds FASTA sequence data. +Iterator Iterates over sequence data in a FASTA file. +RecordParser Parses FASTA sequence data into a Record object. +SequenceParser Parses FASTA sequence data into a SeqRecord object. + +For a long time this module was the most commonly used and best documented +FASTA parser in Biopython. However, we now recommend using Bio.SeqIO instead. + +In view of this, while you can continue to use Bio.Fasta for the moment, it is +considered to be a legacy module and should not be used if you are writing new +code. At some point Bio.Fasta may be officially deprecated (with warning +messages when used) before finally being removed. + +If you are already using Bio.Fasta with the SequenceParser to get SeqRecord +objects, then you should be able to switch to the more recent Bio.SeqIO module +very easily as that too uses SeqRecord objects. For example, + +from Bio import Fasta +handle = open("example.fas") +for seq_record in Fasta.Iterator(handle, Fasta.SequenceParser()) : + print seq_record.description + print seq_record.seq +handle.close() + +Using Bio.SeqIO instead this becomes: + +from Bio import SeqIO +handle = open("example.fas") +for seq_record in SeqIO.parse(handle, "fasta") : + print seq_record.description + print seq_record.seq +handle.close() + +Converting an existing code which uses the RecordParser is a little more +complicated as the Bio.Fasta.Record object differs from the SeqRecord. + +from Bio import Fasta +handle = open("example.fas") +for record in Fasta.Iterator(handle, Fasta.RecordParser()) : + #record is a Bio.Fasta.Record object + print record.title #The full title line as a string + print record.sequence #The sequence as a string +handle.close() + +Using Bio.SeqIO instead this becomes: + +from Bio import SeqIO +handle = open("example.fas") +for seq_record in SeqIO.parse(handle, "fasta") : + print seq_record.description #The full title line as a string + print seq_record.seq.tostring() #The sequence as a string +handle.close() + + + +""" +from Bio import Seq +from Bio import SeqRecord +from Bio import Alphabet + + +class Record: + """Holds information from a FASTA record. + + Members: + title Title line ('>' character not included). + sequence The sequence. + + """ + def __init__(self, colwidth=60): + """__init__(self, colwidth=60) + + Create a new Record. colwidth specifies the number of residues + to put on each line when generating FASTA format. + + """ + self.title = '' + self.sequence = '' + self._colwidth = colwidth + + def __str__(self): + s = [] + s.append('>%s' % self.title) + i = 0 + while i < len(self.sequence): + s.append(self.sequence[i:i+self._colwidth]) + i = i + self._colwidth + #Was having a problem getting the tests to pass on windows... + #return os.linesep.join(s) + return "\n".join(s) + +class Iterator: + """Returns one record at a time from a FASTA file. + """ + def __init__(self, handle, parser = None, debug = 0): + """Initialize a new iterator. + """ + self.handle = handle + self._parser = parser + self._debug = debug + + #Skip any text before the first record (e.g. blank lines) + while True : + line = handle.readline() + if not line or line[0] == ">" : + break + if debug : print "Skipping: " + line + self._lookahead = line + + def __iter__(self): + return iter(self.next, None) + + def next(self): + """Return the next record in the file""" + line = self._lookahead + if not line: + return None + assert line[0]==">", line + lines = [line.rstrip()] + line = self.handle.readline() + while line: + if line[0] == ">": break + if line[0] == "#" : + if self._debug : print "Ignoring comment line" + pass + else : + lines.append(line.rstrip()) + line = self.handle.readline() + self._lookahead = line + if self._debug : print "Debug: '%s' and '%s'" % (title, "".join(lines)) + if self._parser is None: + return "\n".join(lines) + else : + return self._parser.parse_string("\n".join(lines)) + +class RecordParser: + """Parses FASTA sequence data into a Fasta.Record object. + """ + def __init__(self, debug = 0): + pass + + def parse_string(self, text) : + text = text.replace("\r\n","\n") #Crude way of dealing with \r\n + assert text[0] == ">", text + text = text.split("\n>",1)[0] # Only do the first record if more than one + title, sequence = text.split("\n", 1) + title = title[1:] + rec = Record() + rec.title = title + rec.sequence = sequence.replace("\n","") + return rec + + def parse(self, handle): + return self.parse_string(handle.read()) + +class SequenceParser: + """Parses FASTA sequence data into a SeqRecord object. + """ + def __init__(self, alphabet = Alphabet.generic_alphabet, title2ids = None, + debug = 0): + """Initialize a Scanner and Sequence Consumer. + + Arguments: + o alphabet - The alphabet of the sequences to be parsed. If not + passed, this will be set as generic_alphabet. + o title2ids - A function that, when given the title of the FASTA + file (without the beginning >), will return the id, name and + description (in that order) for the record. If this is not given, + then the entire title line will be used as the description. + """ + self.alphabet = alphabet + self.title2ids = title2ids + + def parse_string(self, text) : + text = text.replace("\r\n","\n") #Crude way of dealing with \r\n + assert text[0] == ">", text + text = text.split("\n>",1)[0] # Only do the first record if more than one + title, sequence = text.split("\n", 1) + title = title[1:] + + seq = Seq.Seq(sequence.replace("\n",""), self.alphabet) + rec = SeqRecord.SeqRecord(seq) + + if self.title2ids: + seq_id, name, descr = self.title2ids(title) + rec.id = seq_id + rec.name = name + rec.description = descr + else: + rec.description = title + + return rec + + def parse(self, handle): + return self.parse_string(handle.read())