Fasta/FastaAlign.py

   1 """
   2 Code to deal with alignments written in Fasta format (OBSOLETE).
   3
   4 This module is considered obsolete and likely to be deprecated.  Please use
   5 Bio.AlignIO instead for reading and writing alignments in FASTA format.
   6
   7 This mostly just uses the regular Fasta parsing stuff written by Jeff
   8 to deal with all of the input and output formats.
   9
  10 functions:
  11 o parse_file()
  12
  13 classes:
  14 FastaAlignment"""
  15 # standard library
  16 import os
  17
  18 # biopython
  19 from Bio.Align.Generic import Alignment
  20 from Bio import Alphabet
  21 from Bio.Alphabet import IUPAC
  22 from Bio import Fasta
  23
  24 def parse_file(file_name, type = 'DNA'):
  25     """Parse the given file into a FastaAlignment object.
  26
  27     Arguments:
  28     o file_name - The location of the file to parse.
  29     o type - The type of information contained in the file.
  30     """
  31     if type.upper() == 'DNA':
  32         alphabet = IUPAC.ambiguous_dna
  33     elif type.upper() == 'RNA':
  34         alphabet = IUPAC.ambiguous_rna
  35     elif type.upper() == 'PROTEIN':
  36         alphabet = IUPAC.protein
  37     else:
  38         raise ValueError("Invalid type %s passed. Need DNA, RNA or PROTEIN"
  39                          % type)
  40
  41     # create a new alignment object
  42     fasta_align = FastaAlignment(Alphabet.Gapped(alphabet))
  43
  44     # now parse the file and fill up the alignment object
  45     align_file = open(file_name, 'r')
  46
  47     parser = Fasta.RecordParser()
  48     iterator = Fasta.Iterator(align_file, parser)
  49
  50     cur_align = iterator.next()
  51     while cur_align:
  52         fasta_align.add_sequence(cur_align.title, cur_align.sequence)
  53
  54         cur_align = iterator.next()
  55
  56     return fasta_align
  57
  58 class FastaAlignment(Alignment):
  59     """Work with the Fasta Alignment format.
  60
  61     The fasta alignment format is basically the same as the regular ol'
  62     Fasta format we know and love, except the sequences have gaps
  63     (represented by -'s).
  64     """
  65     def __init__(self, alphabet = Alphabet.Gapped(IUPAC.ambiguous_dna)):
  66         Alignment.__init__(self, alphabet)
  67
  68     def __str__(self):
  69         """Print out a fasta version of the alignment info."""
  70         return_string = ''
  71         for item in self._records:
  72             new_f_record = Fasta.Record()
  73             new_f_record.title = item.description
  74             new_f_record.sequence = item.seq.data
  75
  76             return_string = return_string + str(new_f_record) + os.linesep + os.linesep
  77
  78         # have a extra newline, so strip two off and add one before returning
  79         return return_string.rstrip() + os.linesep
  80
  81
  82
  83
  84