1 # Copyright 1999 by Jeffrey Chang. All rights reserved.
2 # This code is part of the Biopython distribution and governed by its
3 # license. Please see the LICENSE file that should have been included
4 # as part of this package.
8 This module provides a way to create indexes to text files.
11 Index Dictionary-like class used to store index information.
13 _ShelveIndex An Index class based on the shelve module.
14 _InMemoryIndex An in-memory Index class.
22 class _ShelveIndex(dict):
23 """An index file wrapped around shelve.
26 # Without a good dbm module installed, this is pretty slow and
27 # generates large files. When generating an index on a FASTA-
28 # formatted file with 82000 sequences (37Mb), the
29 # index 'dat' file is 42Mb and 'dir' file is 8Mb.
32 __version_key = '__version'
34 def __init__(self, indexname, truncate=None):
38 # In python 1.52 and before, dumbdbm (under shelve)
39 # doesn't clear the old database.
40 files = [indexname + '.dir',
45 if os.path.exists(file):
47 raise Exception("open a new shelf")
48 self.data = shelve.open(indexname, flag='r')
51 self.data = shelve.open(indexname, flag='n')
52 self.data[self.__version_key] = self.__version
54 # Check to make sure the database is the correct version.
55 version = self.data.get(self.__version_key, None)
57 raise IOError("Unrecognized index format")
58 elif version != self.__version:
59 raise IOError("Version %s doesn't match my version %s" \
60 % (version, self.__version))
63 if self.__dict__.has_key('data'):
66 class _InMemoryIndex(dict):
67 """This creates an in-memory index file.
76 __version_key = '__version'
78 def __init__(self, indexname, truncate=None):
79 self._indexname = indexname
81 self.__changed = 0 # the index hasn't changed
83 # Remove the database if truncate is true.
84 if truncate and os.path.exists(indexname):
88 # Load the database if it exists
89 if os.path.exists(indexname):
90 handle = open(indexname)
91 version = self._toobj(handle.readline().rstrip())
92 if version != self.__version:
93 raise IOError("Version %s doesn't match my version %s" \
94 % (version, self.__version))
96 key, value = line.split()
97 key, value = self._toobj(key), self._toobj(value)
101 def update(self, dict):
103 dict.update(self, dict)
104 def __setitem__(self, key, value):
106 dict.__setitem__(self, key, value)
107 def __delitem__(self, key):
109 dict.__delitem__(self, key)
116 handle = open(self._indexname, 'w')
117 handle.write("%s\n" % self._tostr(self.__version))
118 for key, value in self.items():
119 handle.write("%s %s\n" %
120 (self._tostr(key), self._tostr(value)))
123 def _tostr(self, obj):
124 # I need a representation of the object that's saveable to
125 # a file that uses whitespace as delimiters. Thus, I'm
126 # going to pickle the object, and then convert each character of
127 # the string to its ASCII integer value. Then, I'm going to convert
128 # the integers into strings and join them together with commas.
129 # It's not the most efficient way of storing things, but it's
131 s = cPickle.dumps(obj)
132 intlist = array.array('b', s)
133 strlist = map(str, intlist)
134 return ','.join(strlist)
136 def _toobj(self, str):
137 intlist = map(int, str.split(','))
138 intlist = array.array('b', intlist)
139 strlist = map(chr, intlist)
140 return cPickle.loads(''.join(strlist))
142 Index = _InMemoryIndex