--- /dev/null
+#include <Python.h>
+#include <marshal.h>
+#include "trie.h"
+
+#if PY_VERSION_HEX < 0x02050000
+#define Py_ssize_t int
+#endif
+
+
+
+staticforward PyTypeObject Trie_Type;
+
+typedef struct {
+ PyObject_HEAD
+ Trie trie;
+} trieobject;
+
+static PyObject*
+trie_trie(PyObject* self, PyObject* args)
+{
+ trieobject* trieobj;
+ Trie trie;
+
+ if (!PyArg_ParseTuple(args,":trie"))
+ return NULL;
+ if(!(trie = Trie_new()))
+ return PyErr_NoMemory();
+ if(!(trieobj = PyObject_New(trieobject, &Trie_Type)))
+ return NULL;
+ trieobj->trie = trie;
+ return (PyObject*)trieobj;
+}
+
+static void
+_decref_objects(const unsigned char *key, const void *value, void *data)
+{
+ Py_DECREF((PyObject *)value);
+}
+
+static void
+trie_dealloc(PyObject* self)
+{
+ trieobject *mp = (trieobject *)self;
+ Trie_iterate(mp->trie, _decref_objects, NULL);
+ Trie_del(mp->trie);
+ PyObject_Del(self);
+}
+
+static Py_ssize_t
+trie_length(trieobject *mp)
+{
+ return Trie_len(mp->trie);
+}
+
+static PyObject *
+trie_subscript(trieobject *mp, PyObject *py_key)
+{
+ unsigned char *key;
+ PyObject *py_value;
+
+ /* Make sure key is a string. */
+ if(!PyString_Check(py_key)) {
+ PyErr_SetString(PyExc_TypeError, "key must be a string");
+ return NULL;
+ }
+ key = (unsigned char *)PyString_AS_STRING(py_key);
+ py_value = (PyObject *)Trie_get(mp->trie, key);
+ if(py_value == NULL)
+ PyErr_SetString(PyExc_KeyError, (char *)key);
+ else
+ Py_INCREF(py_value);
+ return py_value;
+}
+
+static int
+trie_ass_sub(trieobject *mp, PyObject *py_key, PyObject *py_value)
+{
+ unsigned char *key;
+ PyObject *py_prev;
+
+ /* Make sure key is a string. */
+ if(!PyString_Check(py_key)) {
+ PyErr_SetString(PyExc_TypeError, "key must be a string");
+ return -1;
+ }
+ key = (unsigned char *)PyString_AS_STRING((char *)py_key);
+
+ /* Check to see whether something already exists at that key. If
+ there's already an object there, then I will have to remove it.
+ */
+ py_prev = (PyObject *)Trie_get(mp->trie, key);
+ if(py_prev) {
+ Py_DECREF(py_prev);
+ }
+
+ /* The client wants to delete a key from a dictionary. The Trie
+ API doesn't support this, so I will just overwrite it with
+ NULL. */
+ if(!py_value) {
+ /* If the key doesn't exist, raise a KeyError. */
+ if(!py_prev) {
+ PyErr_SetString(PyExc_KeyError, (char *)key);
+ return -1;
+ }
+ Trie_set(mp->trie, key, NULL);
+ }
+ /* The client wants to set a key in the dictionary. */
+ else {
+ Py_INCREF(py_value);
+ if(Trie_set(mp->trie, key, py_value)) {
+ PyErr_SetString(PyExc_AssertionError, "error setting trie");
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static char has_key__doc__[] =
+"D.has_key(k) -> 1 if D has a key k, else 0";
+
+static PyObject *
+trie_has_key(trieobject *mp, PyObject *py_key)
+{
+ unsigned char *key;
+ int has_key;
+
+ /* Make sure key is a string. */
+ if(!PyString_Check(py_key)) {
+ PyErr_SetString(PyExc_TypeError, "key must be a string");
+ return NULL;
+ }
+ key = (unsigned char *)PyString_AS_STRING(py_key);
+ has_key = Trie_has_key(mp->trie, key);
+ return PyInt_FromLong((long)has_key);
+}
+
+static PyObject *
+trie_has_key_onearg(trieobject *mp, PyObject *py_args)
+{
+ PyObject *py_arg;
+ if(!PyArg_ParseTuple(py_args, "O", &py_arg))
+ return NULL;
+ return trie_has_key(mp, py_arg);
+}
+
+
+
+static char has_prefix__doc__[] =
+"D.has_prefix(k) -> 1 if D has a prefix k, else 0";
+
+static PyObject *
+trie_has_prefix(trieobject *mp, PyObject *py_prefix)
+{
+ unsigned char *prefix;
+ int has_prefix;
+
+ /* Make sure prefix is a string. */
+ if(!PyString_Check(py_prefix)) {
+ PyErr_SetString(PyExc_TypeError, "k must be a string");
+ return NULL;
+ }
+ prefix = (unsigned char *)PyString_AS_STRING(py_prefix);
+ has_prefix = Trie_has_prefix(mp->trie, prefix);
+ return PyInt_FromLong((long)has_prefix);
+}
+
+static PyObject *
+trie_has_prefix_onearg(trieobject *mp, PyObject *py_args)
+{
+ PyObject *py_arg;
+ if(!PyArg_ParseTuple(py_args, "O", &py_arg))
+ return NULL;
+ return trie_has_prefix(mp, py_arg);
+}
+
+static char with_prefix__doc__[] =
+"D.with_prefix(prefix) -> list of D's keys that begins with prefix";
+
+static void
+_trie_with_prefix_helper(const unsigned char *key, const void *value,
+ void *data)
+{
+ PyObject *py_list = (PyObject *)data;
+ PyObject *py_key;
+
+ if(PyErr_Occurred())
+ return;
+
+ if(!(py_key = PyString_FromString((const char *)key)))
+ return;
+ PyList_Append(py_list, py_key);
+ Py_DECREF(py_key);
+}
+
+static PyObject *
+trie_with_prefix(trieobject *mp, PyObject *py_prefix)
+{
+ unsigned char *prefix;
+ PyObject *py_list;
+
+ /* Make sure prefix is a string. */
+ if(!PyString_Check(py_prefix)) {
+ PyErr_SetString(PyExc_TypeError, "k must be a string");
+ return NULL;
+ }
+ prefix = (unsigned char *)PyString_AS_STRING(py_prefix);
+
+ if(!(py_list = PyList_New(0)))
+ return NULL;
+ Trie_with_prefix(mp->trie, prefix,
+ _trie_with_prefix_helper, (void *)py_list);
+ if(PyErr_Occurred()) {
+ Py_DECREF(py_list);
+ return NULL;
+ }
+ return py_list;
+}
+
+static PyObject *
+trie_with_prefix_onearg(trieobject *mp, PyObject *py_args)
+{
+ PyObject *py_arg;
+ if(!PyArg_ParseTuple(py_args, "O", &py_arg))
+ return NULL;
+ return trie_with_prefix(mp, py_arg);
+}
+
+
+static char keys__doc__[] =
+"D.keys() -> list of D's keys";
+
+static void
+_trie_keys_helper(const unsigned char *key, const void *value, void *data)
+{
+ PyObject *py_list = (PyObject *)data;
+ PyObject *py_key;
+
+ if(PyErr_Occurred())
+ return;
+
+ if(!(py_key = PyString_FromString((char *)key)))
+ return;
+ PyList_Append(py_list, py_key);
+ Py_DECREF(py_key);
+}
+
+static PyObject *
+trie_keys(trieobject *mp)
+{
+ PyObject *py_list;
+
+ if(!(py_list = PyList_New(0)))
+ return NULL;
+ Trie_iterate(mp->trie, _trie_keys_helper, (void *)py_list);
+ if(PyErr_Occurred()) {
+ Py_DECREF(py_list);
+ return NULL;
+ }
+ return py_list;
+}
+
+static PyObject *
+trie_keys_noargs(trieobject *mp, PyObject *py_args)
+{
+ if(PyTuple_Size(py_args) != 0) {
+ PyErr_SetString(PyExc_ValueError, "no args expected");
+ return NULL;
+ }
+ return trie_keys(mp);
+}
+
+static char values__doc__[] =
+"D.values() -> list of D's values";
+
+static void
+_trie_values_helper(const unsigned char *key, const void *value, void *data)
+{
+ PyObject *py_list = (PyObject *)data;
+ if(PyErr_Occurred())
+ return;
+ PyList_Append(py_list, (PyObject *)value);
+}
+
+static PyObject *
+trie_values(trieobject *mp)
+{
+ PyObject *py_list;
+
+ if(!(py_list = PyList_New(0)))
+ return NULL;
+ Trie_iterate(mp->trie, _trie_values_helper, (void *)py_list);
+ if(PyErr_Occurred()) {
+ Py_DECREF(py_list);
+ return NULL;
+ }
+ return py_list;
+}
+
+static PyObject *
+trie_values_noargs(trieobject *mp, PyObject *py_args)
+{
+ if(PyTuple_Size(py_args) != 0) {
+ PyErr_SetString(PyExc_ValueError, "no args expected");
+ return NULL;
+ }
+ return trie_values(mp);
+}
+
+static char get__doc__[] =
+"D.get(k[,d]) -> D[k] if D.has_key(k), else d. d defaults to None.";
+
+static PyObject *
+trie_get(trieobject *mp, PyObject *args)
+{
+ unsigned char *key;
+ PyObject *py_value;
+ PyObject *py_failobj = Py_None;
+
+ if (!PyArg_ParseTuple(args, "s|O:get", &key, &py_failobj))
+ return NULL;
+ py_value = (PyObject *)Trie_get(mp->trie, key);
+ if(!py_value)
+ py_value = py_failobj;
+ Py_INCREF(py_value);
+ return py_value;
+}
+
+static char get_approximate__doc__[] =
+"D.get_approximate(key, k) -> List of (key, value, mismatches) in D, allowing up to k mismatches in key.";
+
+void
+_trie_get_approximate_helper(const unsigned char *key, const void *value,
+ const int mismatches, void *data)
+{
+ /* Append a tuple of (key, value) to data, which is a PyList. */
+ PyObject *py_list = (PyObject *)data,
+ *py_value = (PyObject *)value,
+ *py_key,
+ *py_tuple,
+ *py_mismatches;
+
+ if(PyErr_Occurred())
+ return;
+
+ if(!(py_key = PyString_FromString((const char *)key)))
+ return;
+ if(!(py_mismatches = PyInt_FromLong(mismatches))) {
+ Py_DECREF(py_key);
+ return;
+ }
+ Py_INCREF(py_value);
+
+ if(!(py_tuple = PyTuple_New(3))) {
+ Py_DECREF(py_key);
+ Py_DECREF(py_mismatches);
+ Py_DECREF(py_value);
+ return;
+ }
+ PyTuple_SetItem(py_tuple, 0, py_key);
+ PyTuple_SetItem(py_tuple, 1, py_value);
+ PyTuple_SetItem(py_tuple, 2, py_mismatches);
+ PyList_Append(py_list, py_tuple);
+ Py_DECREF(py_tuple);
+}
+
+static PyObject *
+trie_get_approximate(trieobject *mp, PyObject *args)
+{
+ unsigned char *key;
+ int k;
+ PyObject *py_list;
+
+ if (!PyArg_ParseTuple(args, "si:get_approximate", &key, &k))
+ return NULL;
+
+ if(!(py_list = PyList_New(0)))
+ return NULL;
+ Trie_get_approximate(mp->trie, key, k,
+ _trie_get_approximate_helper, (void *)py_list);
+ if(PyErr_Occurred()) {
+ Py_DECREF(py_list);
+ return NULL;
+ }
+ return py_list;
+}
+
+static long
+trie_nohash(PyObject *self)
+{
+ PyErr_SetString(PyExc_TypeError, "trie objects are unhashable");
+ return -1;
+}
+
+static PyMappingMethods trie_as_mapping = {
+/* The first member of PyMappingMethods was redefined in Python 2.5. */
+#if PY_VERSION_HEX < 0x02050000
+ (inquiry)trie_length, /*mp_length*/
+#else
+ (lenfunc)trie_length, /*mp_length*/
+#endif
+ (binaryfunc)trie_subscript, /*mp_subscript*/
+ (objobjargproc)trie_ass_sub /*mp_ass_subscript*/
+};
+
+static PyMethodDef trieobj_methods[] = {
+ /* METH_O and METH_NOARGS require Python 2.2.
+ {"has_key", (PyCFunction)trie_has_key, METH_O,
+ has_key__doc__},
+ {"has_prefix", (PyCFunction)trie_has_prefix, METH_O,
+ has_prefix__doc__},
+ {"with_prefix", (PyCFunction)trie_with_prefix, METH_O,
+ with_prefix__doc__},
+ {"keys", (PyCFunction)trie_keys, METH_NOARGS,
+ keys__doc__},
+ {"values", (PyCFunction)trie_values, METH_NOARGS,
+ values__doc__},
+ */
+
+ {"has_key", (PyCFunction)trie_has_key_onearg, METH_VARARGS,
+ has_key__doc__},
+ {"has_prefix", (PyCFunction)trie_has_prefix_onearg, METH_VARARGS,
+ has_prefix__doc__},
+ {"with_prefix", (PyCFunction)trie_with_prefix_onearg, METH_VARARGS,
+ with_prefix__doc__},
+ {"keys", (PyCFunction)trie_keys_noargs, METH_VARARGS,
+ keys__doc__},
+ {"values", (PyCFunction)trie_values_noargs, METH_VARARGS,
+ values__doc__},
+
+ {"get", (PyCFunction)trie_get, METH_VARARGS,
+ get__doc__},
+ {"get_approximate", (PyCFunction)trie_get_approximate, METH_VARARGS,
+ get_approximate__doc__},
+ {NULL, NULL} /* sentinel */
+};
+
+static PyObject *trie_getattr(PyObject *obj, char *name)
+{
+ return Py_FindMethod(trieobj_methods, (PyObject *)obj, name);
+
+}
+
+static PyTypeObject Trie_Type = {
+ PyObject_HEAD_INIT(NULL)
+ 0,
+ "trie",
+ sizeof(trieobject),
+ 0,
+ trie_dealloc, /*tp_dealloc*/
+ 0, /*tp_print*/
+ trie_getattr, /*tp_getattr*/
+ 0, /*tp_setattr*/
+ 0, /*tp_compare*/
+ 0, /*tp_repr*/
+ 0, /*tp_as_number*/
+ 0, /*tp_as_sequence*/
+ &trie_as_mapping, /*tp_as_mapping*/
+ trie_nohash, /*tp_hash */
+};
+
+static int
+_write_to_handle(const void *towrite, const int length, void *handle)
+{
+ PyObject *py_handle = (PyObject *)handle,
+ *py_retval = NULL;
+ int success = 0;
+
+ if(!length)
+ return 1;
+
+ if(!(py_retval = PyObject_CallMethod(py_handle, "write", "s#",
+ towrite, length)))
+ goto _write_to_handle_cleanup;
+ success = 1;
+
+ _write_to_handle_cleanup:
+ if(py_retval) {
+ Py_DECREF(py_retval);
+ }
+ return success;
+}
+
+int _write_value_to_handle(const void *value, void *handle)
+{
+ PyObject *py_value = (PyObject *)value,
+ *py_marshalled = NULL;
+ char *marshalled;
+ Py_ssize_t length;
+ int success = 0;
+
+#ifdef Py_MARSHAL_VERSION
+ if(!(py_marshalled =
+ PyMarshal_WriteObjectToString(py_value, Py_MARSHAL_VERSION)))
+ goto _write_value_to_handle_cleanup;
+#else
+ if(!(py_marshalled = PyMarshal_WriteObjectToString(py_value)))
+ goto _write_value_to_handle_cleanup;
+#endif
+ if(PyString_AsStringAndSize(py_marshalled, &marshalled, &length) == -1)
+ goto _write_value_to_handle_cleanup;
+ if(!_write_to_handle(&length, sizeof(length), handle))
+ goto _write_value_to_handle_cleanup;
+ if (length != (int)length)
+ goto _write_value_to_handle_cleanup;
+ if(!_write_to_handle(marshalled, (int)length, handle))
+ goto _write_value_to_handle_cleanup;
+ success = 1;
+
+ _write_value_to_handle_cleanup:
+ if(py_marshalled) {
+ Py_DECREF(py_marshalled);
+ }
+
+ return success;
+}
+
+static PyObject *
+trie_save(PyObject *self, PyObject *args)
+{
+ PyObject *py_handle,
+ *py_trie;
+ trieobject *mp;
+
+ if(!PyArg_ParseTuple(args, "OO:save", &py_handle, &py_trie))
+ return NULL;
+ mp = (trieobject *)py_trie;
+ if(!Trie_serialize(mp->trie, _write_to_handle, _write_value_to_handle,
+ (void *)py_handle)) {
+ if(!PyErr_Occurred())
+ PyErr_SetString(PyExc_RuntimeError,
+ "saving failed for some reason");
+ return NULL;
+ }
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static int
+_read_from_handle(void *wasread, const int length, void *handle)
+{
+ PyObject *py_handle = (PyObject *)handle,
+ *py_retval = NULL;
+ void *retval;
+ int success = 0;
+ PyBufferProcs *buffer;
+ int segment;
+ int bytes_read, bytes_left;
+
+ if(!length)
+ return 1;
+
+ if(!(py_retval = PyObject_CallMethod(py_handle, "read", "i", length)))
+ goto _read_from_handle_cleanup;
+ if(!py_retval->ob_type->tp_as_buffer) {
+ PyErr_SetString(PyExc_ValueError, "read method should return buffer");
+ goto _read_from_handle_cleanup;
+ }
+ if(!(py_retval->ob_type->tp_flags & Py_TPFLAGS_DEFAULT)) {
+ PyErr_SetString(PyExc_ValueError, "no bf_getcharbuffer slot");
+ goto _read_from_handle_cleanup;
+ }
+ buffer = py_retval->ob_type->tp_as_buffer;
+ if(!buffer->bf_getreadbuffer) {
+ PyErr_SetString(PyExc_ValueError, "no bf_getreadbuffer");
+ goto _read_from_handle_cleanup;
+ }
+
+ bytes_left = length;
+ segment = 0;
+ while(bytes_left > 0) {
+ if((bytes_read = buffer->bf_getreadbuffer(py_retval,
+ segment, &retval)) == -1)
+ goto _read_from_handle_cleanup;
+ memcpy(wasread, retval, bytes_read);
+ wasread = (void *)((char *)wasread + bytes_read);
+ bytes_left -= bytes_read;
+ segment += 1;
+ }
+
+ success = 1;
+
+ _read_from_handle_cleanup:
+ if(py_retval) {
+ Py_DECREF(py_retval);
+ }
+ return success;
+}
+
+#define MAX_KEY_LENGTH 2000
+static void *
+_read_value_from_handle(void *handle)
+{
+ Py_ssize_t length;
+ char KEY[MAX_KEY_LENGTH];
+
+ if(!_read_from_handle((void *)&length, sizeof(length), (void *)handle))
+ return NULL;
+ if(length < 0 || length >= MAX_KEY_LENGTH)
+ return NULL;
+ if(!_read_from_handle((void *)KEY, length, (void *)handle))
+ return NULL;
+ return PyMarshal_ReadObjectFromString(KEY, length);
+}
+
+
+static PyObject *
+trie_load(PyObject *self, PyObject *args)
+{
+ PyObject *py_handle;
+ Trie trie;
+ trieobject *trieobj;
+
+ if(!PyArg_ParseTuple(args, "O:load", &py_handle))
+ return NULL;
+
+ if(!(trie = Trie_deserialize(_read_from_handle, _read_value_from_handle,
+ py_handle))) {
+ if(!PyErr_Occurred())
+ PyErr_SetString(PyExc_RuntimeError,
+ "loading failed for some reason");
+ return NULL;
+ }
+
+ if(!(trieobj = PyObject_New(trieobject, &Trie_Type))) {
+ Trie_del(trie);
+ return NULL;
+ }
+ trieobj->trie = trie;
+ return (PyObject *)trieobj;
+}
+
+static PyMethodDef trie_methods[] = {
+ {"trie", trie_trie, METH_VARARGS,
+ "trie() -> new Trie object."},
+ {"load", trie_load, METH_VARARGS,
+ "load(handle) -> trie object"},
+ {"save", trie_save, METH_VARARGS,
+ "save(handle, trie), save a trie object to a handle"},
+ {NULL, NULL, 0, NULL}
+};
+
+static char trie__doc__[] =
+"\
+This module implements a trie data structure. This allows an O(M)\n\
+lookup of a string in a dictionary, where M is the length of the\n\
+string. It also supports approximate matches.\n\
+\n\
+Functions:\n\
+trie Create a new trie object.\n\
+save Save a trie to a handle.\n\
+load Load a trie from a handle.\n\
+\n\
+";
+
+DL_EXPORT(void)
+inittrie(void)
+{
+ Trie_Type.ob_type = &PyType_Type;
+
+ (void) Py_InitModule3("trie", trie_methods, trie__doc__);
+}