binaries/src/globplot/biopython-1.50/Bio/writers/SeqRecord/embl.py

   1 """Part of an old unused and undocumented sequence writing framework (DEPRECATED)."""
   2 # Not clear on the distinction, if any, between 'embl' and 'embl/65'.  This
   3 # code might apply to either or both.
   4
   5 # See 'http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html' for a
   6 # definition of this file format.
   7
   8 # This code only makes a best effort--the output may not be strictly valid.
   9 # So, for example, the EMBL ID is supposed to be alphanumeric, starting with a
  10 # letter, but we don't check for this, etc.
  11
  12
  13 # Example:
  14 # ID   AA03518    standard; DNA; FUN; 237 BP.
  15 # XX
  16 # AC   U03518;
  17 # XX
  18 # DE   Aspergillus awamori internal transcribed spacer 1 (ITS1) and 18S
  19 # DE   rRNA and 5.8S rRNA genes, partial sequence.
  20 # XX
  21 # SQ   Sequence 237 BP; 41 A; 77 C; 67 G; 52 T; 0 other;
  22 #      aacctgcgga aggatcatta ccgagtgcgg gtcctttggg cccaacctcc catccgtgtc        60
  23 #      tattgtaccc tgttgcttcg gcgggcccgc cgcttgtcgg ccgccggggg ggcgcctctg       120
  24 #      ccccccgggc ccgtgcccgc cggagacccc aacacgaaca ctgtctgaaa gcgtgcagtc       180
  25 #      tgagttgatt gaatgcaatc agttaaaact ttcaacaatg gatctcttgg ttccggc          237
  26 # //
  27
  28
  29 import textwrap
  30
  31 from Bio import Alphabet
  32 from Bio import Writer
  33
  34 class WriteEmbl(Writer.Writer):
  35     def __init__(self, outfile):
  36         Writer.Writer.__init__(self, outfile)
  37
  38     def write(self, record):
  39         seq = record.seq
  40         assert seq.alphabet.size == 1, "cannot handle alphabet of size %d" % \
  41                seq.alphabet.size
  42         data = seq.data
  43         upperdata = data.upper()
  44
  45 # It'd be nice if the alphabet was usefully set, but for many interesting
  46 # cases (e.g., reading from FASTA files), it's not.
  47
  48         if isinstance(seq.alphabet, Alphabet.RNAAlphabet):
  49             molecule = 'mRNA'
  50             letters = ['A', 'C', 'G', 'U']
  51         else:
  52             molecule = 'DNA'
  53             letters = ['A', 'C', 'G', 'T']
  54
  55         division = 'UNC'                # unknown
  56
  57         self.outfile.write("ID   %s  standard; %s; %s; %d BP.\n"
  58                            % (record.id, molecule, division, len(data)))
  59
  60         desclist = textwrap.wrap(record.description, 74)
  61         for l in desclist:
  62             self.outfile.write("DE   %s\n" % l)
  63
  64         counts = [ upperdata.count(l) for l in letters ]
  65         othercount = len(upperdata) - sum(counts)
  66
  67         countstring = ''.join([ " %d %s;" % p for p in zip(counts, letters) ])
  68
  69         self.outfile.write("SQ   Sequence %s BP;%s %d other;\n"
  70                            % (len(data), countstring, othercount))
  71
  72         rowlength = 60
  73         blocklength = 10
  74         for i in xrange(0, len(data), rowlength):
  75             self.outfile.write(" " * 5)
  76             row = data[i:i+rowlength]
  77             for b in xrange(0, rowlength, blocklength):
  78                 block = row[b:b+blocklength]
  79                 self.outfile.write("%-*s" % (blocklength+1, block))
  80             self.outfile.write("%9d\n" % min(i+rowlength, len(data)))
  81
  82         self.outfile.write("//\n")
  83
  84
  85 make_writer = WriteEmbl