Disembl binaries and its dependancies e.g. minimized BioPython distribution and sovgo...

[jabaws.git] / binaries / src / disembl / biopython-1.50 / Bio / Fasta / FastaAlign.py
diff --git a/binaries/src/disembl/biopython-1.50/Bio/Fasta/FastaAlign.py b/binaries/src/disembl/biopython-1.50/Bio/Fasta/FastaAlign.py

new file mode 100644 (file)

index 0000000..735b502
--- /dev/null
+++ b/binaries/src/disembl/biopython-1.50/Bio/Fasta/FastaAlign.py
@@ -0,0 +1,84 @@
+"""
+Code to deal with alignments written in Fasta format (OBSOLETE).
+
+This module is considered obsolete and likely to be deprecated.  Please use
+Bio.AlignIO instead for reading and writing alignments in FASTA format.
+
+This mostly just uses the regular Fasta parsing stuff written by Jeff
+to deal with all of the input and output formats.
+
+functions:
+o parse_file()
+
+classes:
+FastaAlignment"""
+# standard library
+import os
+
+# biopython
+from Bio.Align.Generic import Alignment
+from Bio import Alphabet
+from Bio.Alphabet import IUPAC
+from Bio import Fasta
+
+def parse_file(file_name, type = 'DNA'):
+    """Parse the given file into a FastaAlignment object.
+
+    Arguments:
+    o file_name - The location of the file to parse.
+    o type - The type of information contained in the file.
+    """
+    if type.upper() == 'DNA':
+        alphabet = IUPAC.ambiguous_dna
+    elif type.upper() == 'RNA':
+        alphabet = IUPAC.ambiguous_rna
+    elif type.upper() == 'PROTEIN':
+        alphabet = IUPAC.protein
+    else:
+        raise ValueError("Invalid type %s passed. Need DNA, RNA or PROTEIN"
+                         % type)
+
+    # create a new alignment object
+    fasta_align = FastaAlignment(Alphabet.Gapped(alphabet))
+
+    # now parse the file and fill up the alignment object
+    align_file = open(file_name, 'r')
+
+    parser = Fasta.RecordParser()
+    iterator = Fasta.Iterator(align_file, parser)
+
+    cur_align = iterator.next()
+    while cur_align:
+        fasta_align.add_sequence(cur_align.title, cur_align.sequence)
+
+        cur_align = iterator.next()
+
+    return fasta_align
+
+class FastaAlignment(Alignment):
+    """Work with the Fasta Alignment format.
+
+    The fasta alignment format is basically the same as the regular ol'
+    Fasta format we know and love, except the sequences have gaps
+    (represented by -'s).
+    """
+    def __init__(self, alphabet = Alphabet.Gapped(IUPAC.ambiguous_dna)):
+        Alignment.__init__(self, alphabet)
+
+    def __str__(self):
+        """Print out a fasta version of the alignment info."""
+        return_string = ''
+        for item in self._records:
+            new_f_record = Fasta.Record()
+            new_f_record.title = item.description
+            new_f_record.sequence = item.seq.data
+
+            return_string = return_string + str(new_f_record) + os.linesep + os.linesep
+
+        # have a extra newline, so strip two off and add one before returning
+        return return_string.rstrip() + os.linesep
+
+            
+            
+        
+