2 Code to deal with alignments written in Fasta format (OBSOLETE).
4 This module is considered obsolete and likely to be deprecated. Please use
5 Bio.AlignIO instead for reading and writing alignments in FASTA format.
7 This mostly just uses the regular Fasta parsing stuff written by Jeff
8 to deal with all of the input and output formats.
19 from Bio.Align.Generic import Alignment
20 from Bio import Alphabet
21 from Bio.Alphabet import IUPAC
24 def parse_file(file_name, type = 'DNA'):
25 """Parse the given file into a FastaAlignment object.
28 o file_name - The location of the file to parse.
29 o type - The type of information contained in the file.
31 if type.upper() == 'DNA':
32 alphabet = IUPAC.ambiguous_dna
33 elif type.upper() == 'RNA':
34 alphabet = IUPAC.ambiguous_rna
35 elif type.upper() == 'PROTEIN':
36 alphabet = IUPAC.protein
38 raise ValueError("Invalid type %s passed. Need DNA, RNA or PROTEIN"
41 # create a new alignment object
42 fasta_align = FastaAlignment(Alphabet.Gapped(alphabet))
44 # now parse the file and fill up the alignment object
45 align_file = open(file_name, 'r')
47 parser = Fasta.RecordParser()
48 iterator = Fasta.Iterator(align_file, parser)
50 cur_align = iterator.next()
52 fasta_align.add_sequence(cur_align.title, cur_align.sequence)
54 cur_align = iterator.next()
58 class FastaAlignment(Alignment):
59 """Work with the Fasta Alignment format.
61 The fasta alignment format is basically the same as the regular ol'
62 Fasta format we know and love, except the sequences have gaps
65 def __init__(self, alphabet = Alphabet.Gapped(IUPAC.ambiguous_dna)):
66 Alignment.__init__(self, alphabet)
69 """Print out a fasta version of the alignment info."""
71 for item in self._records:
72 new_f_record = Fasta.Record()
73 new_f_record.title = item.description
74 new_f_record.sequence = item.seq.data
76 return_string = return_string + str(new_f_record) + os.linesep + os.linesep
78 # have a extra newline, so strip two off and add one before returning
79 return return_string.rstrip() + os.linesep