diff --git a/Lexicon.py b/Lexicon.py index fec281b..1c79b19 100644 --- a/Lexicon.py +++ b/Lexicon.py @@ -152,17 +152,12 @@ class StopWordRemover: for c in range(255): dict[chr(c)] = None - def process(self, lst): - has_key = self.dict.has_key - return [w for w in lst if not has_key(w)] - -try: - from Products.ZCTextIndex import stopper as _stopper -except ImportError: - pass -else: - _stopwords = StopWordRemover.dict - def StopWordRemover(): - swr = _stopper.new() - swr.dict.update(_stopwords) - return swr + try: + from Products.ZCTextIndex.stopper import process as _process + except ImportError: + def process(self, lst): + has_key = self.dict.has_key + return [w for w in lst if not has_key(w)] + else: + def process(self, lst): + return self._process(self.dict, lst) diff --git a/stopper.c b/stopper.c index 130b511..f59f14a 100644 --- a/stopper.c +++ b/stopper.c @@ -18,24 +18,19 @@ */ #include "Python.h" -#include "structmember.h" - -typedef struct { - PyObject_HEAD - PyObject *swr_dict; -} StopWordRemover; static PyObject * -swr_process(StopWordRemover *self, PyObject *args) +stopper_process(PyObject *unused, PyObject *args) { PyObject *result = NULL; + PyObject *dict; PyObject *seq; int len, i; - if (!PyArg_ParseTuple(args, "O:process", &seq)) + if (!PyArg_ParseTuple(args, "O!O:process", &PyDict_Type, &dict, &seq)) return NULL; seq = PySequence_Fast(seq, - "process() requires a sequence as the argument"); + "process() requires a sequence as argument 2"); if (seq == NULL) return NULL; result = PyList_New(0); @@ -54,143 +49,30 @@ swr_process(StopWordRemover *self, PyObject *args) * item, but without setting an exception, so this does what * we want. */ - if (PyDict_GetItem(self->swr_dict, s) == NULL) + if (PyDict_GetItem(dict, s) == NULL) { if (PyList_Append(result, s) < 0) { Py_DECREF(result); result = NULL; goto finally; } - } - finally: - Py_XDECREF(seq); - return result; -} - -static struct memberlist swr_members[] = { - {"dict", T_OBJECT, offsetof(StopWordRemover, swr_dict), READONLY}, - {NULL} -}; - -static PyMethodDef swr_methods[] = { - {"process", (PyCFunction)swr_process, METH_VARARGS, - "process([str, ...]) --> [str, ...]\n" - "Remove stop words from the input list of strings to create a new list."}, - {NULL} -}; - -static PyObject * -swr_getattr(PyObject *self, char *name) -{ - PyObject *res; - - res = Py_FindMethod(swr_methods, self, name); - if (res != NULL) - return res; - PyErr_Clear(); - return PyMember_Get((char *)self, swr_members, name); -} - -static void -swr_dealloc(StopWordRemover *self) -{ - Py_XDECREF(self->swr_dict); - PyObject_Del(self); -} - -static PyTypeObject StopWordRemover_Type = { - PyObject_HEAD_INIT(NULL) /* ob_type */ - 0, /* ob_size */ - "stopper.StopWordRemover", /* tp_name */ - sizeof(StopWordRemover), /* tp_basicsize */ - 0, /* tp_itemsize */ - (destructor)swr_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - (getattrfunc)swr_getattr, /* tp_getattr */ - 0, /* tp_setattr */ -}; - -static PyObject * -swr_new(PyObject *notused, PyObject *args) -{ - StopWordRemover *swr = NULL; - PyObject *dict = NULL; - - if (PyArg_ParseTuple(args, "|O!:new", &PyDict_Type, &dict)) { - swr = PyObject_New(StopWordRemover, &StopWordRemover_Type); - if (swr != NULL) { - if (dict != NULL) { - Py_INCREF(dict); - swr->swr_dict = dict; - } - else { - swr->swr_dict = PyDict_New(); - if (swr->swr_dict == NULL) { - Py_DECREF(swr); - swr = NULL; - } - } } } - return (PyObject *) swr; -} - -static PyObject* -pickle_constructor = NULL; - -PyObject * -swr_pickler(PyObject *unused, PyObject *args) -{ - StopWordRemover *swr; - PyObject *result = NULL; - - if (PyArg_ParseTuple(args, "O!:_pickler", &StopWordRemover_Type, &swr)) { - result = Py_BuildValue("O(O)", pickle_constructor, swr->swr_dict); - } + finally: + Py_DECREF(seq); return result; } static PyMethodDef stopper_functions[] = { - {"new", swr_new, METH_VARARGS, - "new() -> StopWordRemover instance\n" - "Create & return a new stop-word remover."}, - {"_pickler", swr_pickler, METH_VARARGS, - "_pickler(StopWordRemover instance) -> pickle magic\n" - "Internal magic used to make stop-word removers picklable."}, + {"process", stopper_process, METH_VARARGS, + "process(dict, [str, ...]) --> [str, ...]\n" + "Remove stop words (the keys of dict) from the input list of strings\n" + " to create a new list."}, {NULL} }; void initstopper(void) { - PyObject *m, *copy_reg; - - StopWordRemover_Type.ob_type = &PyType_Type; - m = Py_InitModule3("stopper", stopper_functions, - "Fast StopWordRemover implementation."); - if (m == NULL) - return; - if (PyObject_SetAttrString(m, "StopWordRemoverType", - (PyObject *) &StopWordRemover_Type) < 0) - return; - - /* register to support pickling */ - copy_reg = PyImport_ImportModule("copy_reg"); - if (copy_reg != NULL) { - PyObject *pickler; - - if (pickle_constructor == NULL) { - pickle_constructor = PyObject_GetAttrString(m, "new"); - Py_XINCREF(pickle_constructor); - } - pickler = PyObject_GetAttrString(m, "_pickler"); - if ((pickle_constructor != NULL) && (pickler != NULL)) { - PyObject *res; - - res = PyObject_CallMethod( - copy_reg, "pickle", "OOO", &StopWordRemover_Type, - pickler, pickle_constructor); - Py_XDECREF(res); - } - Py_DECREF(copy_reg); - } + Py_InitModule3("stopper", stopper_functions, + "Fast StopWordRemover implementation."); } diff --git a/tests/testStopper.py b/tests/testStopper.py index eea13b4..991dde3 100644 --- a/tests/testStopper.py +++ b/tests/testStopper.py @@ -6,33 +6,25 @@ class StopperTest(unittest.TestCase): - def test_constructor_empty(self): - s = stopper.new() - self.assertEqual(s.dict, {}) - - def test_constructor_dict(self): - d = {} - s = stopper.new(d) - self.assert_(s.dict is d) - - def test_constructor_error(self): - self.assertRaises(TypeError, stopper.new, []) - self.assertRaises(TypeError, stopper.new, {}, 'extra arg') + def test_process_typeerror(self): + self.assertRaises(TypeError, stopper.process, 42, []) + self.assertRaises(TypeError, stopper.process, {}, 42) + self.assertRaises(TypeError, stopper.process, {}) + self.assertRaises(TypeError, stopper.process, {}, [], 'extra arg') def test_process_nostops(self): - s = stopper.new() words = ['a', 'b', 'c', 'splat!'] - self.assertEqual(words, s.process(words)) + self.assertEqual(words, stopper.process({}, words)) def test_process_somestops(self): - s = stopper.new({'b':1, 'splat!':1}) + d = {'b':1, 'splat!':1} words = ['a', 'b', 'c', 'splat!'] - self.assertEqual(['a', 'c'], s.process(words)) + self.assertEqual(['a', 'c'], stopper.process(d, words)) def test_process_allstops(self): - s = stopper.new({'a':1, 'b':1, 'c':1, 'splat!':1}) + d = {'a':1, 'b':1, 'c':1, 'splat!':1} words = ['a', 'b', 'c', 'splat!'] - self.assertEqual([], s.process(words)) + self.assertEqual([], stopper.process(d, words)) def test_suite():