Skip to content
This repository has been archived by the owner on May 13, 2020. It is now read-only.

Commit

Permalink
Simplify the "stopper" helper module -- just define a simple function
Browse files Browse the repository at this point in the history
instead of an extension type, and let StopWordRemover be a Python class
that uses the helper if available.
  • Loading branch information
freddrake committed May 22, 2002
1 parent 5d5e178 commit 039ad57
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 163 deletions.
23 changes: 9 additions & 14 deletions Lexicon.py
Expand Up @@ -152,17 +152,12 @@ class StopWordRemover:
for c in range(255):
dict[chr(c)] = None

def process(self, lst):
has_key = self.dict.has_key
return [w for w in lst if not has_key(w)]

try:
from Products.ZCTextIndex import stopper as _stopper
except ImportError:
pass
else:
_stopwords = StopWordRemover.dict
def StopWordRemover():
swr = _stopper.new()
swr.dict.update(_stopwords)
return swr
try:
from Products.ZCTextIndex.stopper import process as _process
except ImportError:
def process(self, lst):
has_key = self.dict.has_key
return [w for w in lst if not has_key(w)]
else:
def process(self, lst):
return self._process(self.dict, lst)
144 changes: 13 additions & 131 deletions stopper.c
Expand Up @@ -18,24 +18,19 @@
*/

#include "Python.h"
#include "structmember.h"

typedef struct {
PyObject_HEAD
PyObject *swr_dict;
} StopWordRemover;

static PyObject *
swr_process(StopWordRemover *self, PyObject *args)
stopper_process(PyObject *unused, PyObject *args)
{
PyObject *result = NULL;
PyObject *dict;
PyObject *seq;
int len, i;

if (!PyArg_ParseTuple(args, "O:process", &seq))
if (!PyArg_ParseTuple(args, "O!O:process", &PyDict_Type, &dict, &seq))
return NULL;
seq = PySequence_Fast(seq,
"process() requires a sequence as the argument");
"process() requires a sequence as argument 2");
if (seq == NULL)
return NULL;
result = PyList_New(0);
Expand All @@ -54,143 +49,30 @@ swr_process(StopWordRemover *self, PyObject *args)
* item, but without setting an exception, so this does what
* we want.
*/
if (PyDict_GetItem(self->swr_dict, s) == NULL)
if (PyDict_GetItem(dict, s) == NULL) {
if (PyList_Append(result, s) < 0) {
Py_DECREF(result);
result = NULL;
goto finally;
}
}
finally:
Py_XDECREF(seq);
return result;
}

static struct memberlist swr_members[] = {
{"dict", T_OBJECT, offsetof(StopWordRemover, swr_dict), READONLY},
{NULL}
};

static PyMethodDef swr_methods[] = {
{"process", (PyCFunction)swr_process, METH_VARARGS,
"process([str, ...]) --> [str, ...]\n"
"Remove stop words from the input list of strings to create a new list."},
{NULL}
};

static PyObject *
swr_getattr(PyObject *self, char *name)
{
PyObject *res;

res = Py_FindMethod(swr_methods, self, name);
if (res != NULL)
return res;
PyErr_Clear();
return PyMember_Get((char *)self, swr_members, name);
}

static void
swr_dealloc(StopWordRemover *self)
{
Py_XDECREF(self->swr_dict);
PyObject_Del(self);
}

static PyTypeObject StopWordRemover_Type = {
PyObject_HEAD_INIT(NULL) /* ob_type */
0, /* ob_size */
"stopper.StopWordRemover", /* tp_name */
sizeof(StopWordRemover), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)swr_dealloc, /* tp_dealloc */
0, /* tp_print */
(getattrfunc)swr_getattr, /* tp_getattr */
0, /* tp_setattr */
};

static PyObject *
swr_new(PyObject *notused, PyObject *args)
{
StopWordRemover *swr = NULL;
PyObject *dict = NULL;

if (PyArg_ParseTuple(args, "|O!:new", &PyDict_Type, &dict)) {
swr = PyObject_New(StopWordRemover, &StopWordRemover_Type);
if (swr != NULL) {
if (dict != NULL) {
Py_INCREF(dict);
swr->swr_dict = dict;
}
else {
swr->swr_dict = PyDict_New();
if (swr->swr_dict == NULL) {
Py_DECREF(swr);
swr = NULL;
}
}
}
}
return (PyObject *) swr;
}

static PyObject*
pickle_constructor = NULL;

PyObject *
swr_pickler(PyObject *unused, PyObject *args)
{
StopWordRemover *swr;
PyObject *result = NULL;

if (PyArg_ParseTuple(args, "O!:_pickler", &StopWordRemover_Type, &swr)) {
result = Py_BuildValue("O(O)", pickle_constructor, swr->swr_dict);
}
finally:
Py_DECREF(seq);
return result;
}

static PyMethodDef stopper_functions[] = {
{"new", swr_new, METH_VARARGS,
"new() -> StopWordRemover instance\n"
"Create & return a new stop-word remover."},
{"_pickler", swr_pickler, METH_VARARGS,
"_pickler(StopWordRemover instance) -> pickle magic\n"
"Internal magic used to make stop-word removers picklable."},
{"process", stopper_process, METH_VARARGS,
"process(dict, [str, ...]) --> [str, ...]\n"
"Remove stop words (the keys of dict) from the input list of strings\n"
" to create a new list."},
{NULL}
};

void
initstopper(void)
{
PyObject *m, *copy_reg;

StopWordRemover_Type.ob_type = &PyType_Type;
m = Py_InitModule3("stopper", stopper_functions,
"Fast StopWordRemover implementation.");
if (m == NULL)
return;
if (PyObject_SetAttrString(m, "StopWordRemoverType",
(PyObject *) &StopWordRemover_Type) < 0)
return;

/* register to support pickling */
copy_reg = PyImport_ImportModule("copy_reg");
if (copy_reg != NULL) {
PyObject *pickler;

if (pickle_constructor == NULL) {
pickle_constructor = PyObject_GetAttrString(m, "new");
Py_XINCREF(pickle_constructor);
}
pickler = PyObject_GetAttrString(m, "_pickler");
if ((pickle_constructor != NULL) && (pickler != NULL)) {
PyObject *res;

res = PyObject_CallMethod(
copy_reg, "pickle", "OOO", &StopWordRemover_Type,
pickler, pickle_constructor);
Py_XDECREF(res);
}
Py_DECREF(copy_reg);
}
Py_InitModule3("stopper", stopper_functions,
"Fast StopWordRemover implementation.");
}
28 changes: 10 additions & 18 deletions tests/testStopper.py
Expand Up @@ -6,33 +6,25 @@


class StopperTest(unittest.TestCase):
def test_constructor_empty(self):
s = stopper.new()
self.assertEqual(s.dict, {})

def test_constructor_dict(self):
d = {}
s = stopper.new(d)
self.assert_(s.dict is d)

def test_constructor_error(self):
self.assertRaises(TypeError, stopper.new, [])
self.assertRaises(TypeError, stopper.new, {}, 'extra arg')
def test_process_typeerror(self):
self.assertRaises(TypeError, stopper.process, 42, [])
self.assertRaises(TypeError, stopper.process, {}, 42)
self.assertRaises(TypeError, stopper.process, {})
self.assertRaises(TypeError, stopper.process, {}, [], 'extra arg')

def test_process_nostops(self):
s = stopper.new()
words = ['a', 'b', 'c', 'splat!']
self.assertEqual(words, s.process(words))
self.assertEqual(words, stopper.process({}, words))

def test_process_somestops(self):
s = stopper.new({'b':1, 'splat!':1})
d = {'b':1, 'splat!':1}
words = ['a', 'b', 'c', 'splat!']
self.assertEqual(['a', 'c'], s.process(words))
self.assertEqual(['a', 'c'], stopper.process(d, words))

def test_process_allstops(self):
s = stopper.new({'a':1, 'b':1, 'c':1, 'splat!':1})
d = {'a':1, 'b':1, 'c':1, 'splat!':1}
words = ['a', 'b', 'c', 'splat!']
self.assertEqual([], s.process(words))
self.assertEqual([], stopper.process(d, words))


def test_suite():
Expand Down

0 comments on commit 039ad57

Please sign in to comment.