+++ /dev/null
-# Copyright 2006-2008 by Peter Cock. All rights reserved.
-# This code is part of the Biopython distribution and governed by its
-# license. Please see the LICENSE file that should have been included
-# as part of this package.
-"""
-Bio.SeqIO support module (not for general use).
-
-Unless you are writing a new parser or writer for Bio.SeqIO, you should not
-use this module. It provides base classes to try and simplify things.
-"""
-
-from Bio.Alphabet import generic_alphabet
-
-class SequenceIterator :
- """Base class for building SeqRecord iterators.
-
- You should write a next() method to return SeqRecord
- objects. You may wish to redefine the __init__
- method as well.
- """
- def __init__(self, handle, alphabet=generic_alphabet) :
- """Create a SequenceIterator object.
-
- handle - input file
- alphabet - optional, e.g. Bio.Alphabet.generic_protein
-
- Note when subclassing:
- - there should be a single non-optional argument,
- the handle.
- - you do not have to require an alphabet.
- - you can add additional optional arguments."""
- self.handle = handle
- self.alphabet = alphabet
- #####################################################
- # You may want to subclass this, for example #
- # to read through the file to find the first record,#
- # or if additional arguments are required. #
- #####################################################
-
- def next(self) :
- """Return the next record in the file.
-
- This method should be replaced by any derived class to do something useful."""
- raise NotImplementedError("This object should be subclassed")
- #####################################################
- # You SHOULD subclass this, to split the file up #
- # into your individual records, and convert these #
- # into useful objects, e.g. return SeqRecord object #
- #####################################################
-
- def __iter__(self):
- """Iterate over the entries as a SeqRecord objects.
-
- Example usage for Fasta files:
-
- myFile = open("example.fasta","r")
- myFastaReader = FastaIterator(myFile)
- for record in myFastaReader :
- print record.id
- print record.seq
- myFile.close()"""
- return iter(self.next, None)
-
-class InterlacedSequenceIterator(SequenceIterator) :
- """Base class for any iterator of a non-sequential file type.
-
- This object is not intended for use directly.
-
- When writing a parser for any interlaced sequence file where the whole
- file must be read in order to extract any single record, then you should
- subclass this object.
-
- All you need to do is to define your own:
- (1) __init__ method to parse the file and call self.move_start()
- (2) __len__ method to return the number of records
- (3) __getitem__ to return any requested record.
-
- This class will then provide the iterator methods including next(), but relies
- on knowing the total number of records and tracking the pending record index in
- as self._n
-
- It is up to the subclassed object to decide if it wants to generate a cache of
- SeqRecords when initialised, or simply use its own lists and dicts and create
- SeqRecords on request.
- """
-
- def __init__(self) :
- """Create the object.
-
- This method should be replaced by any derived class to do something useful."""
- #We assume that your implementation of __init__ will ensure self._n=0
- self.move_start()
- raise NotImplementedError("This object method should be subclassed")
- #####################################################
- # You SHOULD subclass this #
- #####################################################
-
- def __len__(self) :
- """Return the number of records.
-
- This method should be replaced by any derived class to do something useful."""
- raise NotImplementedError("This object method should be subclassed")
- #####################################################
- # You SHOULD subclass this #
- #####################################################
-
- def __getitem__(self, i) :
- """Return the requested record.
-
- This method should be replaced by any derived class to do something
- useful.
-
- It should NOT touch the value of self._n"""
- raise NotImplementedError("This object method should be subclassed")
- #####################################################
- # You SHOULD subclass this #
- #####################################################
-
- def move_start(self) :
- self._n = 0
-
- def next(self) :
- next_record = self._n
- if next_record < len(self) :
- self._n = next_record+1
- return self[next_record]
- else :
- #StopIteration
- return None
-
- def __iter__(self):
- return iter(self.next, None)
-
-class SequenceWriter:
- """This class should be subclassed.
-
- Interlaced file formats (e.g. Clustal) should subclass directly.
-
- Sequential file formats (e.g. Fasta, GenBank) should subclass
- the SequentialSequenceWriter class instead.
- """
- def __init__(self, handle):
- """Creates the writer object.
-
- Use the method write_file() to actually record your sequence records."""
- self.handle = handle
-
- def _get_seq_string(self, record):
- """Use this to catch errors like the sequence being None."""
- try :
- #The tostring() method is part of the Seq API, we could instead
- #use str(record.seq) but that would give a string "None" if the
- #sequence was None, and unpredicatable output if an unexpected
- #object was present.
- return record.seq.tostring()
- except AttributeError :
- if record.seq is None :
- #We could silently treat this as an empty sequence, Seq(""),
- #but that would be an implict assumption we should avoid.
- raise TypeError("SeqRecord (id=%s) has None for its sequence." \
- % record.id)
- else :
- raise TypeError("SeqRecord (id=%s) has an invalid sequence." \
- % record.id)
-
- def clean(self, text) :
- """Use this to avoid getting newlines in the output."""
- answer = text
- for x in ["\n", "\r"] :
- answer = answer.replace(x, " ")
- return answer.replace(" ", " ")
-
- def write_file(self, records) :
- """Use this to write an entire file containing the given records.
-
- records - A list or iterator returning SeqRecord objects
-
- Should return the number of records (as an integer).
-
- This method can only be called once."""
- #Note when implementing this, you should close the file at the end.
- raise NotImplementedError("This object should be subclassed")
- #####################################################
- # You SHOULD subclass this #
- #####################################################
-
-class SequentialSequenceWriter(SequenceWriter):
- """This class should be subclassed.
-
- It is intended for sequential file formats with an (optional)
- header, repeated records, and an (optional) footer.
-
- In this case (as with interlaced file formats), the user may
- simply call the write_file() method and be done.
-
- However, they may also call the write_header(), followed
- by multiple calls to write_record() and/or write_records()
- followed finally by write_footer().
-
- Users must call write_header() and write_footer() even when
- the file format concerned doesn't have a header or footer.
- This is to try and make life as easy as possible when
- switching the output format.
-
- Note that write_header() cannot require any assumptions about
- the number of records.
- """
- def __init__(self, handle):
- self.handle = handle
- self._header_written = False
- self._record_written = False
- self._footer_written = False
-
- def write_header(self) :
- assert not self._header_written, "You have aleady called write_header()"
- assert not self._record_written, "You have aleady called write_record() or write_records()"
- assert not self._footer_written, "You have aleady called write_footer()"
- self._header_written = True
-
- def write_footer(self) :
- assert self._header_written, "You must call write_header() first"
- assert self._record_written, "You have not called write_record() or write_records() yet"
- assert not self._footer_written, "You have aleady called write_footer()"
- self._footer_written = True
-
- def write_record(self, record):
- """Write a single record to the output file.
-
- record - a SeqRecord object
-
- Once you have called write_header() you can call write_record()
- and/or write_records() as many times as needed. Then call
- write_footer() and close()."""
- assert self._header_written, "You must call write_header() first"
- assert not self._footer_written, "You have already called write_footer()"
- self._record_written = True
- raise NotImplementedError("This object should be subclassed")
- #####################################################
- # You SHOULD subclass this #
- #####################################################
-
- def write_records(self, records):
- """Write multiple record to the output file.
-
- records - A list or iterator returning SeqRecord objects
-
- Once you have called write_header() you can call write_record()
- and/or write_records() as many times as needed. Then call
- write_footer() and close().
-
- Returns the number of records written.
- """
- #Default implementation:
- assert self._header_written, "You must call write_header() first"
- assert not self._footer_written, "You have already called write_footer()"
- count = 0
- for record in records :
- self.write_record(record)
- count += 1
- #Mark as true, even if there where no records
- self._record_written = True
- return count
-
- def write_file(self, records) :
- """Use this to write an entire file containing the given records.
-
- records - A list or iterator returning SeqRecord objects
-
- This method can only be called once. Returns the number of records
- written.
- """
- self.write_header()
- count = self.write_records(records)
- self.write_footer()
- return count