diff --git a/slugify/slugify.py b/slugify/slugify.py index 82f67fe..5951eb3 100644 --- a/slugify/slugify.py +++ b/slugify/slugify.py @@ -74,7 +74,7 @@ def smart_truncate(string, max_length=0, word_boundaries=False, separator=' ', s def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, word_boundary=False, - separator='-', save_order=False): + separator='-', save_order=False, stopwords=()): """Make a slug from the given text. :param text (str): initial text :param entities (bool): @@ -84,6 +84,7 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w :param word_boundary (bool): :param save_order (bool): if parameter is True and max_length > 0 return whole words in the initial order :param separator (str): separator between words + :param stopwords (iterable): words to discount :return (str): """ @@ -128,6 +129,12 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w # remove redundant - text = REMOVE_REXP.sub('-', text).strip('-') + # remove stopwords + if stopwords: + stopwords_lower = [s.lower() for s in stopwords] + words = [w for w in text.split(separator) if w not in stopwords_lower] + text = separator.join(words) + # smart truncate if requested if max_length > 0: text = smart_truncate(text, max_length, word_boundary, '-', save_order) diff --git a/test.py b/test.py index 88027ee..ee1a04c 100644 --- a/test.py +++ b/test.py @@ -4,22 +4,33 @@ from slugify import slugify -class TestSequenceFunctions(unittest.TestCase): +class TestSlugification(unittest.TestCase): - def test_manager(self): + def test_extraneous_seperators(self): txt = "This is a test ---" r = slugify(txt) self.assertEqual(r, "this-is-a-test") + txt = "___This is a test ---" + r = slugify(txt) + self.assertEqual(r, "this-is-a-test") + + txt = "___This is a test___" + r = slugify(txt) + self.assertEqual(r, "this-is-a-test") + + def test_non_word_characters(self): txt = "This -- is a ## test ---" r = slugify(txt) self.assertEqual(r, "this-is-a-test") + def test_phonetic_conversion_of_eastern_scripts(self): txt = '影師嗎' r = slugify(txt) self.assertEqual(r, "ying-shi-ma") + def test_accented_text(self): txt = 'C\'est déjà l\'été.' r = slugify(txt) self.assertEqual(r, "cest-deja-lete") @@ -28,14 +39,17 @@ def test_manager(self): r = slugify(txt) self.assertEqual(r, "nin-hao-wo-shi-zhong-guo-ren") - txt = 'Компьютер' - r = slugify(txt) - self.assertEqual(r, "kompiuter") - + def test_accented_text_with_non_word_characters(self): txt = 'jaja---lol-méméméoo--a' r = slugify(txt) self.assertEqual(r, "jaja-lol-mememeoo-a") + def test_cyrillic_text(self): + txt = 'Компьютер' + r = slugify(txt) + self.assertEqual(r, "kompiuter") + + def test_max_length(self): txt = 'jaja---lol-méméméoo--a' r = slugify(txt, max_length=9) self.assertEqual(r, "jaja-lol") @@ -44,10 +58,12 @@ def test_manager(self): r = slugify(txt, max_length=15) self.assertEqual(r, "jaja-lol-mememe") + def test_max_length_cutoff_not_required(self): txt = 'jaja---lol-méméméoo--a' r = slugify(txt, max_length=50) self.assertEqual(r, "jaja-lol-mememeoo-a") + def test_word_boundary(self): txt = 'jaja---lol-méméméoo--a' r = slugify(txt, max_length=15, word_boundary=True) self.assertEqual(r, "jaja-lol-a") @@ -64,22 +80,17 @@ def test_manager(self): r = slugify(txt, max_length=19, word_boundary=True) self.assertEqual(r, "jaja-lol-mememeoo-a") + def test_custom_separator(self): txt = 'jaja---lol-méméméoo--a' r = slugify(txt, max_length=20, word_boundary=True, separator=".") self.assertEqual(r, "jaja.lol.mememeoo.a") + def test_multi_character_separator(self): txt = 'jaja---lol-méméméoo--a' r = slugify(txt, max_length=20, word_boundary=True, separator="ZZZZZZ") self.assertEqual(r, "jajaZZZZZZlolZZZZZZmememeooZZZZZZa") - txt = "___This is a test ---" - r = slugify(txt) - self.assertEqual(r, "this-is-a-test") - - txt = "___This is a test___" - r = slugify(txt) - self.assertEqual(r, "this-is-a-test") - + def test_save_order(self): txt = 'one two three four five' r = slugify(txt, max_length=13, word_boundary=True, save_order=True) self.assertEqual(r, "one-two-three") @@ -96,5 +107,25 @@ def test_manager(self): r = slugify(txt, max_length=12, word_boundary=True, save_order=True) self.assertEqual(r, "one-two") + def test_stopword_removal(self): + txt = 'this has a stopword' + r = slugify(txt, stopwords=['stopword']) + self.assertEqual(r, 'this-has-a') + + def test_multiple_stopword_occurances(self): + txt = 'the quick brown fox jumps over the lazy dog' + r = slugify(txt, stopwords=['the']) + self.assertEqual(r, 'quick-brown-fox-jumps-over-lazy-dog') + + def test_differently_cased_stopword_match(self): + txt = 'Foo A FOO B foo C' + r = slugify(txt, stopwords=['foo']) + self.assertEqual(r, 'a-b-c') + + txt = 'Foo A FOO B foo C' + r = slugify(txt, stopwords=['FOO']) + self.assertEqual(r, 'a-b-c') + + if __name__ == '__main__': unittest.main()