--- /dev/null
+# Copyright 2000-2002 Andrew Dalke.
+# Copyright 2002-2004 Brad Chapman.
+# Copyright 2006-2009 by Peter Cock.
+# All rights reserved.
+# This code is part of the Biopython distribution and governed by its
+# license. Please see the LICENSE file that should have been included
+# as part of this package.
+"""Represent a Sequence Record, a sequence with annotation."""
+__docformat__ = "epytext en" #Simple markup to show doctests nicely
+
+# NEEDS TO BE SYNCH WITH THE REST OF BIOPYTHON AND BIOPERL
+# In particular, the SeqRecord and BioSQL.BioSeq.DBSeqRecord classes
+# need to be in sync (this is the BioSQL "Database SeqRecord", see
+# also BioSQL.BioSeq.DBSeq which is the "Database Seq" class)
+
+class _RestrictedDict(dict):
+ """Dict which only allows sequences of given length as values (PRIVATE).
+
+ This simple subclass of the python dictionary is used in the SeqRecord
+ object for holding per-letter-annotations. This class is intended to
+ prevent simple errors by only allowing python sequences (e.g. lists,
+ strings and tuples) to be stored, and only if their length matches that
+ expected (the length of the SeqRecord's seq object). It cannot however
+ prevent the entries being edited in situ (for example appending entries
+ to a list).
+ """
+ def __init__(self, length) :
+ """Create an EMPTY restricted dictionary."""
+ dict.__init__(self)
+ self._length = int(length)
+ def __setitem__(self, key, value) :
+ if not hasattr(value,"__len__") or not hasattr(value,"__getitem__") \
+ or len(value) != self._length :
+ raise TypeError("We only allow python sequences (lists, tuples or "
+ "strings) of length %i." % self._length)
+ dict.__setitem__(self, key, value)
+
+class SeqRecord(object):
+ """A SeqRecord object holds a sequence and information about it.
+
+ Main attributes:
+ - id - Identifier such as a locus tag (string)
+ - seq - The sequence itself (Seq object)
+
+ Additional attributes:
+ - name - Sequence name, e.g. gene name (string)
+ - description - Additional text (string)
+ - dbxrefs - List of database cross references (list of strings)
+ - features - Any (sub)features defined (list of SeqFeature objects)
+ - annotations - Further information about the whole sequence (dictionary)
+ Most entries are lists of strings.
+ - letter_annotations - Per letter/symbol annotation (restricted
+ dictionary). This holds python sequences (lists, strings
+ or tuples) whose length matches that of the sequence.
+ A typical use would be to hold a list of integers
+ representing sequencing quality scores, or a string
+ representing the secondary structure.
+
+ You will typically use Bio.SeqIO to read in sequences from files as
+ SeqRecord objects. However, you may want to create your own SeqRecord
+ objects directly (see the __init__ method for further details):
+
+ >>> from Bio.Seq import Seq
+ >>> from Bio.SeqRecord import SeqRecord
+ >>> from Bio.Alphabet import IUPAC
+ >>> record = SeqRecord(Seq("MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF",
+ ... IUPAC.protein),
+ ... id="YP_025292.1", name="HokC",
+ ... description="toxic membrane protein")
+ >>> print record
+ ID: YP_025292.1
+ Name: HokC
+ Description: toxic membrane protein
+ Number of features: 0
+ Seq('MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF', IUPACProtein())
+
+ If you want to save SeqRecord objects to a sequence file, use Bio.SeqIO
+ for this. For the special case where you want the SeqRecord turned into
+ a string in a particular file format there is a format method which uses
+ Bio.SeqIO internally:
+
+ >>> print record.format("fasta")
+ >YP_025292.1 toxic membrane protein
+ MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF
+ <BLANKLINE>
+ """
+ def __init__(self, seq, id = "<unknown id>", name = "<unknown name>",
+ description = "<unknown description>", dbxrefs = None,
+ features = None):
+ """Create a SeqRecord.
+
+ Arguments:
+ - seq - Sequence, required (Seq or Mutable object)
+ - id - Sequence identifier, recommended (string)
+ - name - Sequence name, optional (string)
+ - description - Sequence description, optional (string)
+ - dbxrefs - Database cross references, optional (list of strings)
+ - features - Any (sub)features, optional (list of SeqFeature objects)
+
+ You will typically use Bio.SeqIO to read in sequences from files as
+ SeqRecord objects. However, you may want to create your own SeqRecord
+ objects directly.
+
+ Note that while an id is optional, we strongly recommend you supply a
+ unique id string for each record. This is especially important
+ if you wish to write your sequences to a file.
+
+ If you don't have the actual sequence, but you do know its length,
+ then using the UnknownSeq object from Bio.Seq is appropriate.
+
+ You can create a 'blank' SeqRecord object, and then populate the
+ attributes later. Note that currently the annotations and the
+ letter_annotations dictionaries cannot be specified when creating
+ the SeqRecord.
+ """
+ if id is not None and not isinstance(id, basestring) :
+ #Lots of existing code uses id=None... this may be a bad idea.
+ raise TypeError("id argument should be a string")
+ if not isinstance(name, basestring) :
+ raise TypeError("name argument should be a string")
+ if not isinstance(description, basestring) :
+ raise TypeError("description argument should be a string")
+ if dbxrefs is not None and not isinstance(dbxrefs, list) :
+ raise TypeError("dbxrefs argument should be a list (of strings)")
+ if features is not None and not isinstance(features, list) :
+ raise TypeError("features argument should be a list (of SeqFeature objects)")
+ self._seq = seq
+ self.id = id
+ self.name = name
+ self.description = description
+ if dbxrefs is None:
+ dbxrefs = []
+ self.dbxrefs = dbxrefs
+ # annotations about the whole sequence
+ self.annotations = {}
+
+ # annotations about each letter in the sequence
+ if seq is None :
+ #Should we allow this and use a normal unrestricted dict?
+ self._per_letter_annotations = _RestrictedDict(length=0)
+ else :
+ try :
+ self._per_letter_annotations = _RestrictedDict(length=len(seq))
+ except :
+ raise TypeError("seq argument should be a Seq or MutableSeq")
+
+ # annotations about parts of the sequence
+ if features is None:
+ features = []
+ self.features = features
+
+ #TODO - Just make this a read only property?
+ def _set_per_letter_annotations(self, value) :
+ if not isinstance(value, dict) :
+ raise TypeError("The per-letter-annotations should be a "
+ "(restricted) dictionary.")
+ #Turn this into a restricted-dictionary (and check the entries)
+ try :
+ self._per_letter_annotations = _RestrictedDict(length=len(self.seq))
+ except AttributeError :
+ #e.g. seq is None
+ self._per_letter_annotations = _RestrictedDict(length=0)
+ self._per_letter_annotations.update(value)
+ letter_annotations = property( \
+ fget=lambda self : self._per_letter_annotations,
+ fset=_set_per_letter_annotations,
+ doc="""Dictionary of per-letter-annotation for the sequence.
+
+ For example, this can hold quality scores used in FASTQ or QUAL files.
+ Consider this example using Bio.SeqIO to read in an example Solexa
+ variant FASTQ file as a SeqRecord:
+
+ >>> from Bio import SeqIO
+ >>> handle = open("Quality/solexa.fastq", "rU")
+ >>> record = SeqIO.read(handle, "fastq-solexa")
+ >>> handle.close()
+ >>> print record.id, record.seq
+ slxa_0013_1_0001_24 ACAAAAATCACAAGCATTCTTATACACC
+ >>> print record.letter_annotations.keys()
+ ['solexa_quality']
+ >>> print record.letter_annotations["solexa_quality"]
+ [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -6, -1, -1, -4, -1, -4, -19, -10, -27, -18]
+
+ The per-letter-annotaions get sliced automatically if you slice the
+ parent SeqRecord, for example taking the last ten bases:
+
+ >>> sub_record = record[-10:]
+ >>> print sub_record.id, sub_record.seq
+ slxa_0013_1_0001_24 CTTATACACC
+ >>> print sub_record.letter_annotations["solexa_quality"]
+ [-6, -1, -1, -4, -1, -4, -19, -10, -27, -18]
+
+ Any python sequence (i.e. list, tuple or string) can be recorded in
+ the SeqRecord's letter_annotations dictionary as long as the length
+ matches that of the SeqRecord's sequence. e.g.
+
+ >>> len(sub_record.letter_annotations)
+ 1
+ >>> sub_record.letter_annotations["dummy"] = "abcdefghij"
+ >>> len(sub_record.letter_annotations)
+ 2
+
+ You can delete entries from the letter_annotations dictionary as usual:
+
+ >>> del sub_record.letter_annotations["solexa_quality"]
+ >>> sub_record.letter_annotations
+ {'dummy': 'abcdefghij'}
+
+ You can completely clear the dictionary easily as follows:
+
+ >>> sub_record.letter_annotations = {}
+ >>> sub_record.letter_annotations
+ {}
+ """)
+
+ def _set_seq(self, value) :
+ #TODO - Add a deprecation warning that the seq should be write only?
+ if self._per_letter_annotations :
+ #TODO - Make this a warning? Silently empty the dictionary?
+ raise ValueError("You must empty the letter annotations first!")
+ self._seq = value
+ try :
+ self._per_letter_annotations = _RestrictedDict(length=len(self.seq))
+ except AttributeError :
+ #e.g. seq is None
+ self._per_letter_annotations = _RestrictedDict(length=0)
+
+ seq = property(fget=lambda self : self._seq,
+ fset=_set_seq,
+ doc="The sequence itself, as a Seq or MutableSeq object.")
+
+ def __getitem__(self, index) :
+ """Returns a sub-sequence or an individual letter.
+
+ Splicing, e.g. my_record[5:10], returns a new SeqRecord for
+ that sub-sequence with approriate annotation preserved. The
+ name, id and description are kept.
+
+ Any per-letter-annotations are sliced to match the requested
+ sub-sequence. Unless a stride is used, all those features
+ which fall fully within the subsequence are included (with
+ their locations adjusted accordingly).
+
+ However, the annotations dictionary and the dbxrefs list are
+ not used for the new SeqRecord, as in general they may not
+ apply to the subsequence. If you want to preserve them, you
+ must explictly copy them to the new SeqRecord yourself.
+
+ Using an integer index, e.g. my_record[5] is shorthand for
+ extracting that letter from the sequence, my_record.seq[5].
+
+ For example, consider this short protein and its secondary
+ structure as encoded by the PDB (e.g. H for alpha helices),
+ plus a simple feature for its histidine self phosphorylation
+ site:
+
+ >>> from Bio.Seq import Seq
+ >>> from Bio.SeqRecord import SeqRecord
+ >>> from Bio.SeqFeature import SeqFeature, FeatureLocation
+ >>> from Bio.Alphabet import IUPAC
+ >>> rec = SeqRecord(Seq("MAAGVKQLADDRTLLMAGVSHDLRTPLTRIRLAT"
+ ... "EMMSEQDGYLAESINKDIEECNAIIEQFIDYLR",
+ ... IUPAC.protein),
+ ... id="1JOY", name="EnvZ",
+ ... description="Homodimeric domain of EnvZ from E. coli")
+ >>> rec.letter_annotations["secondary_structure"] = \
+ " S SSSSSSHHHHHTTTHHHHHHHHHHHHHHHHHHHHHHTHHHHHHHHHHHHHHHHHHHHHTT "
+ >>> rec.features.append(SeqFeature(FeatureLocation(20,21),
+ ... type = "Site"))
+
+ Now let's have a quick look at the full record,
+
+ >>> print rec
+ ID: 1JOY
+ Name: EnvZ
+ Description: Homodimeric domain of EnvZ from E. coli
+ Number of features: 1
+ Per letter annotation for: secondary_structure
+ Seq('MAAGVKQLADDRTLLMAGVSHDLRTPLTRIRLATEMMSEQDGYLAESINKDIEE...YLR', IUPACProtein())
+ >>> print rec.letter_annotations["secondary_structure"]
+ S SSSSSSHHHHHTTTHHHHHHHHHHHHHHHHHHHHHHTHHHHHHHHHHHHHHHHHHHHHTT
+ >>> print rec.features[0].location
+ [20:21]
+
+ Now let's take a sub sequence, here chosen as the first (fractured)
+ alpha helix which includes the histidine phosphorylation site:
+
+ >>> sub = rec[11:41]
+ >>> print sub
+ ID: 1JOY
+ Name: EnvZ
+ Description: Homodimeric domain of EnvZ from E. coli
+ Number of features: 1
+ Per letter annotation for: secondary_structure
+ Seq('RTLLMAGVSHDLRTPLTRIRLATEMMSEQD', IUPACProtein())
+ >>> print sub.letter_annotations["secondary_structure"]
+ HHHHHTTTHHHHHHHHHHHHHHHHHHHHHH
+ >>> print sub.features[0].location
+ [9:10]
+
+ You can also of course omit the start or end values, for
+ example to get the first ten letters only:
+
+ >>> print rec[:10]
+ ID: 1JOY
+ Name: EnvZ
+ Description: Homodimeric domain of EnvZ from E. coli
+ Number of features: 0
+ Per letter annotation for: secondary_structure
+ Seq('MAAGVKQLAD', IUPACProtein())
+
+ Or for the last ten letters:
+
+ >>> print rec[-10:]
+ ID: 1JOY
+ Name: EnvZ
+ Description: Homodimeric domain of EnvZ from E. coli
+ Number of features: 0
+ Per letter annotation for: secondary_structure
+ Seq('IIEQFIDYLR', IUPACProtein())
+
+ If you omit both, then you get a copy of the original record (although
+ lacking the annotations and dbxrefs):
+
+ >>> print rec[:]
+ ID: 1JOY
+ Name: EnvZ
+ Description: Homodimeric domain of EnvZ from E. coli
+ Number of features: 1
+ Per letter annotation for: secondary_structure
+ Seq('MAAGVKQLADDRTLLMAGVSHDLRTPLTRIRLATEMMSEQDGYLAESINKDIEE...YLR', IUPACProtein())
+
+ Finally, indexing with a simple integer is shorthand for pulling out
+ that letter from the sequence directly:
+
+ >>> rec[5]
+ 'K'
+ >>> rec.seq[5]
+ 'K'
+ """
+ if isinstance(index, int) :
+ #NOTE - The sequence level annotation like the id, name, etc
+ #do not really apply to a single character. However, should
+ #we try and expose any per-letter-annotation here? If so how?
+ return self.seq[index]
+ elif isinstance(index, slice) :
+ if self.seq is None :
+ raise ValueError("If the sequence is None, we cannot slice it.")
+ parent_length = len(self)
+ answer = self.__class__(self.seq[index],
+ id=self.id,
+ name=self.name,
+ description=self.description)
+ #TODO - The desription may no longer apply.
+ #It would be safer to change it to something
+ #generic like "edited" or the default value.
+
+ #Don't copy the annotation dict and dbxefs list,
+ #they may not apply to a subsequence.
+ #answer.annotations = dict(self.annotations.iteritems())
+ #answer.dbxrefs = self.dbxrefs[:]
+
+ #TODO - Cope with strides by generating ambiguous locations?
+ if index.step is None or index.step == 1 :
+ #Select relevant features, add them with shifted locations
+ if index.start is None :
+ start = 0
+ else :
+ start = index.start
+ if index.stop is None :
+ stop = -1
+ else :
+ stop = index.stop
+ if (start < 0 or stop < 0) and parent_length == 0 :
+ raise ValueError, \
+ "Cannot support negative indices without the sequence length"
+ if start < 0 :
+ start = parent_length - start
+ if stop < 0 :
+ stop = parent_length - stop + 1
+ #assert str(self.seq)[index] == str(self.seq)[start:stop]
+ for f in self.features :
+ if start <= f.location.start.position \
+ and f.location.end.position < stop :
+ answer.features.append(f._shift(-start))
+
+ #Slice all the values to match the sliced sequence
+ #(this should also work with strides, even negative strides):
+ for key, value in self.letter_annotations.iteritems() :
+ answer._per_letter_annotations[key] = value[index]
+
+ return answer
+ raise ValueError, "Invalid index"
+
+ def __iter__(self) :
+ """Iterate over the letters in the sequence.
+
+ For example, using Bio.SeqIO to read in a protein FASTA file:
+
+ >>> from Bio import SeqIO
+ >>> record = SeqIO.read(open("Amino/loveliesbleeding.pro"),"fasta")
+ >>> for amino in record :
+ ... print amino
+ ... if amino == "L" : break
+ X
+ A
+ G
+ L
+ >>> print record.seq[3]
+ L
+
+ This is just a shortcut for iterating over the sequence directly:
+
+ >>> for amino in record.seq :
+ ... print amino
+ ... if amino == "L" : break
+ X
+ A
+ G
+ L
+ >>> print record.seq[3]
+ L
+
+ Note that this does not facilitate iteration together with any
+ per-letter-annotation. However, you can achieve that using the
+ python zip function on the record (or its sequence) and the relevant
+ per-letter-annotation:
+
+ >>> from Bio import SeqIO
+ >>> rec = SeqIO.read(open("Quality/solexa.fastq", "rU"),
+ ... "fastq-solexa")
+ >>> print rec.id, rec.seq
+ slxa_0013_1_0001_24 ACAAAAATCACAAGCATTCTTATACACC
+ >>> print rec.letter_annotations.keys()
+ ['solexa_quality']
+ >>> for nuc, qual in zip(rec,rec.letter_annotations["solexa_quality"]) :
+ ... if qual < -10 :
+ ... print nuc, qual
+ C -19
+ C -27
+ C -18
+
+ You may agree that using zip(rec.seq, ...) is more explicit than using
+ zip(rec, ...) as shown above.
+ """
+ return iter(self.seq)
+
+ def __str__(self) :
+ """A human readable summary of the record and its annotation (string).
+
+ The python built in function str works by calling the object's ___str__
+ method. e.g.
+
+ >>> from Bio.Seq import Seq
+ >>> from Bio.SeqRecord import SeqRecord
+ >>> from Bio.Alphabet import IUPAC
+ >>> record = SeqRecord(Seq("MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF",
+ ... IUPAC.protein),
+ ... id="YP_025292.1", name="HokC",
+ ... description="toxic membrane protein, small")
+ >>> print str(record)
+ ID: YP_025292.1
+ Name: HokC
+ Description: toxic membrane protein, small
+ Number of features: 0
+ Seq('MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF', IUPACProtein())
+
+ In this example you don't actually need to call str explicity, as the
+ print command does this automatically:
+
+ >>> print record
+ ID: YP_025292.1
+ Name: HokC
+ Description: toxic membrane protein, small
+ Number of features: 0
+ Seq('MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF', IUPACProtein())
+
+ Note that long sequences are shown truncated.
+ """
+ lines = []
+ if self.id : lines.append("ID: %s" % self.id)
+ if self.name : lines.append("Name: %s" % self.name)
+ if self.description : lines.append("Description: %s" % self.description)
+ if self.dbxrefs : lines.append("Database cross-references: " \
+ + ", ".join(self.dbxrefs))
+ lines.append("Number of features: %i" % len(self.features))
+ for a in self.annotations:
+ lines.append("/%s=%s" % (a, str(self.annotations[a])))
+ if self.letter_annotations :
+ lines.append("Per letter annotation for: " \
+ + ", ".join(self.letter_annotations.keys()))
+ #Don't want to include the entire sequence,
+ #and showing the alphabet is useful:
+ lines.append(repr(self.seq))
+ return "\n".join(lines)
+
+ def __repr__(self) :
+ """A concise summary of the record for debugging (string).
+
+ The python built in function repr works by calling the object's ___repr__
+ method. e.g.
+
+ >>> from Bio.Seq import Seq
+ >>> from Bio.SeqRecord import SeqRecord
+ >>> from Bio.Alphabet import generic_protein
+ >>> rec = SeqRecord(Seq("MASRGVNKVILVGNLGQDPEVRYMPNGGAVANITLATSESWRDKAT"
+ ... +"GEMKEQTEWHRVVLFGKLAEVASEYLRKGSQVYIEGQLRTRKWTDQ"
+ ... +"SGQDRYTTEVVVNVGGTMQMLGGRQGGGAPAGGNIGGGQPQGGWGQ"
+ ... +"PQQPQGGNQFSGGAQSRPQQSAPAAPSNEPPMDFDDDIPF",
+ ... generic_protein),
+ ... id="NP_418483.1", name="b4059",
+ ... description="ssDNA-binding protein",
+ ... dbxrefs=["ASAP:13298", "GI:16131885", "GeneID:948570"])
+ >>> print repr(rec)
+ SeqRecord(seq=Seq('MASRGVNKVILVGNLGQDPEVRYMPNGGAVANITLATSESWRDKATGEMKEQTE...IPF', ProteinAlphabet()), id='NP_418483.1', name='b4059', description='ssDNA-binding protein', dbxrefs=['ASAP:13298', 'GI:16131885', 'GeneID:948570'])
+
+ At the python prompt you can also use this shorthand:
+
+ >>> rec
+ SeqRecord(seq=Seq('MASRGVNKVILVGNLGQDPEVRYMPNGGAVANITLATSESWRDKATGEMKEQTE...IPF', ProteinAlphabet()), id='NP_418483.1', name='b4059', description='ssDNA-binding protein', dbxrefs=['ASAP:13298', 'GI:16131885', 'GeneID:948570'])
+
+ Note that long sequences are shown truncated.
+ """
+ return self.__class__.__name__ \
+ + "(seq=%s, id=%s, name=%s, description=%s, dbxrefs=%s)" \
+ % tuple(map(repr, (self.seq, self.id, self.name,
+ self.description, self.dbxrefs)))
+
+ def format(self, format) :
+ r"""Returns the record as a string in the specified file format.
+
+ The format should be a lower case string supported as an output
+ format by Bio.SeqIO, which is used to turn the SeqRecord into a
+ string. e.g.
+
+ >>> from Bio.Seq import Seq
+ >>> from Bio.SeqRecord import SeqRecord
+ >>> from Bio.Alphabet import IUPAC
+ >>> record = SeqRecord(Seq("MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF",
+ ... IUPAC.protein),
+ ... id="YP_025292.1", name="HokC",
+ ... description="toxic membrane protein")
+ >>> record.format("fasta")
+ '>YP_025292.1 toxic membrane protein\nMKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF\n'
+ >>> print record.format("fasta")
+ >YP_025292.1 toxic membrane protein
+ MKQHKAMIVALIVICITAVVAALVTRKDLCEVHIRTGQTEVAVF
+ <BLANKLINE>
+
+ The python print command automatically appends a new line, meaning
+ in this example a blank line is shown. If you look at the string
+ representation you can see there is a trailing new line (shown as
+ slash n) which is important when writing to a file or if
+ concatenating mutliple sequence strings together.
+
+ Note that this method will NOT work on every possible file format
+ supported by Bio.SeqIO (e.g. some are for multiple sequences only).
+ """
+ #See also the __format__ added for Python 2.6 / 3.0, PEP 3101
+ #See also the Bio.Align.Generic.Alignment class and its format()
+ return self.__format__(format)
+
+ def __format__(self, format_spec) :
+ """Returns the record as a string in the specified file format.
+
+ This method supports the python format() function added in
+ Python 2.6/3.0. The format_spec should be a lower case
+ string supported by Bio.SeqIO as an output file format.
+ See also the SeqRecord's format() method.
+ """
+ if format_spec:
+ from StringIO import StringIO
+ from Bio import SeqIO
+ handle = StringIO()
+ SeqIO.write([self], handle, format_spec)
+ return handle.getvalue()
+ else :
+ #Follow python convention and default to using __str__
+ return str(self)
+
+ def __len__(self) :
+ """Returns the length of the sequence.
+
+ For example, using Bio.SeqIO to read in a FASTA nucleotide file:
+
+ >>> from Bio import SeqIO
+ >>> record = SeqIO.read(open("Nucleic/sweetpea.nu"),"fasta")
+ >>> len(record)
+ 309
+ >>> len(record.seq)
+ 309
+ """
+ return len(self.seq)
+
+ def __nonzero__(self) :
+ """Returns True regardless of the length of the sequence.
+
+ This behaviour is for backwards compatibility, since until the
+ __len__ method was added, a SeqRecord always evaluated as True.
+
+ Note that in comparison, a Seq object will evaluate to False if it
+ has a zero length sequence.
+
+ WARNING: The SeqRecord may in future evaluate to False when its
+ sequence is of zero length (in order to better match the Seq
+ object behaviour)!
+ """
+ return True
+
+def _test():
+ """Run the Bio.SeqRecord module's doctests (PRIVATE).
+
+ This will try and locate the unit tests directory, and run the doctests
+ from there in order that the relative paths used in the examples work.
+ """
+ import doctest
+ import os
+ if os.path.isdir(os.path.join("..","Tests")) :
+ print "Runing doctests..."
+ cur_dir = os.path.abspath(os.curdir)
+ os.chdir(os.path.join("..","Tests"))
+ doctest.testmod()
+ os.chdir(cur_dir)
+ del cur_dir
+ print "Done"
+
+if __name__ == "__main__":
+ _test()