--- /dev/null
+# Copyright 2008 by Peter Cock. All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+
+"""Bio.SeqIO support for the "tab" (simple tab separated) file format.
+
+You are expected to use this module via the Bio.SeqIO functions.
+
+The "tab" format is an ad-hoc plain text file format where each sequence is
+on one (long) line. Each line contains the identifier/description, followed
+by a tab, followed by the sequence. For example, consider the following
+short FASTA format file:
+
+>ID123456 possible binding site?
+CATCNAGATGACACTACGACTACGACTCAGACTAC
+>ID123457 random sequence
+ACACTACGACTACGACTCAGACTACAAN
+
+Apart from the descriptions, this can be represented in the simple two column
+tab separated format as follows:
+
+ID123456(tab)CATCNAGATGACACTACGACTACGACTCAGACTAC
+ID123457(tab)ACACTACGACTACGACTCAGACTACAAN
+
+When reading this file, "ID123456" or "ID123457" will be taken as the record's
+.id and .name property. There is no other information to record.
+
+Similarly, when writing to this format, Biopython will ONLY record the record's
+.id and .seq (and not the description or any other information) as in the example
+above.
+"""
+
+from Bio.Alphabet import single_letter_alphabet
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+from Interfaces import SequentialSequenceWriter
+
+#This is a generator function!
+def TabIterator(handle, alphabet = single_letter_alphabet) :
+ """Iterates over tab separated lines (as SeqRecord objects).
+
+ Each line of the file should contain one tab only, dividing the line
+ into an identifier and the full sequence.
+
+ handle - input file
+ alphabet - optional alphabet
+
+ The first field is taken as the record's .id and .name (regardless of
+ any spaces within the text) and the second field is the sequence.
+
+ Any blank lines are ignored.
+ """
+ for line in handle :
+ try :
+ title, seq = line.split("\t") #will fail if more than one tab!
+ except :
+ if line.strip() == "" :
+ #It's a blank line, ignore it
+ continue
+ raise ValueError("Each line should have one tab separating the" + \
+ " title and sequence, this line has %i tabs: %s" \
+ % (line.count("\t"), repr(line)))
+ title = title.strip()
+ seq = seq.strip() #removes the trailing new line
+ yield SeqRecord(Seq(seq, alphabet), id = title, name = title)
+
+class TabWriter(SequentialSequenceWriter):
+ """Class to write simple tab separated format files.
+
+ Each line consists of "id(tab)sequence" only.
+
+ Any description, name or other annotation is not recorded.
+ """
+ def write_record(self, record):
+ """Write a single tab line to the file."""
+ assert self._header_written
+ assert not self._footer_written
+ self._record_written = True
+
+ title = self.clean(record.id)
+ seq = self._get_seq_string(record) #Catches sequence being None
+ assert "\t" not in title
+ assert "\n" not in title
+ assert "\r" not in title
+ assert "\t" not in seq
+ assert "\n" not in seq
+ assert "\r" not in seq
+ self.handle.write("%s\t%s\n" % (title, seq))
+
+
+if __name__ == "__main__" :
+ print "Running quick self test"
+ from StringIO import StringIO
+
+ #This example has a trailing blank line which should be ignored
+ handle = StringIO("Alpha\tAAAAAAA\nBeta\tCCCCCCC\n\n")
+ records = list(TabIterator(handle))
+ assert len(records) == 2
+
+ handle = StringIO("Alpha\tAAAAAAA\tExtra\nBeta\tCCCCCCC\n")
+ try :
+ records = list(TabIterator(handle))
+ assert False, "Should have reject this invalid example!"
+ except ValueError :
+ #Good!
+ pass
+
+ print "Done"