diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..4148354 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,44 @@ +name: CI + +# Run on push only for dev/sandbox +# Otherwise it may trigger concurrently `push & pull_request` on PRs. +on: + push: + branches: + - ci + - staging + +jobs: + build: + name: Python ${{ matrix.python }} + runs-on: ubuntu-latest + strategy: + matrix: + python: [3.6, 3.7, 3.8, 3.9, "3.10", pypy3] + + steps: + - uses: actions/checkout@v2 + - name: setup python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e . + pip install coveralls --upgrade + - name: Run flake8 + run: | + pip install flake8 --upgrade + flake8 --exclude=build --ignore=E501,F403,F401,E241,E225,E128 . + - name: Run pycodestyle + run: | + pip install pycodestyle --upgrade + pycodestyle --ignore=E128,E261,E225,E501,W605 slugify test.py setup.py + - name: Run test + run: | + coverage run --source=slugify test.py + - name: Coveralls + run: coveralls --service=github + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml new file mode 100644 index 0000000..d0cb401 --- /dev/null +++ b/.github/workflows/dev.yml @@ -0,0 +1,44 @@ +name: DEV + +# Run on push only for dev/sandbox +# Otherwise it may trigger concurrently `push & pull_request` on PRs. +on: + push: + branches: + - sandbox + - dev + +jobs: + build: + name: Python ${{ matrix.python }} + runs-on: ubuntu-latest + strategy: + matrix: + python: [3.6, 3.7, 3.8, 3.9, "3.10", pypy3] + + steps: + - uses: actions/checkout@v2 + - name: setup python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e . + pip install coveralls --upgrade + - name: Run flake8 + run: | + pip install flake8 --upgrade + flake8 --exclude=build --ignore=E501,F403,F401,E241,E225,E128 . + - name: Run pycodestyle + run: | + pip install pycodestyle --upgrade + pycodestyle --ignore=E128,E261,E225,E501,W605 slugify test.py setup.py + - name: Run test + run: | + coverage run --source=slugify test.py + - name: Coveralls + run: coveralls --service=github + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..f1e75b7 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,43 @@ +name: Main + +# Run on push only for dev/sandbox +# Otherwise it may trigger concurrently `push & pull_request` on PRs. +on: + push: + branches: + - master + +jobs: + build: + name: Python ${{ matrix.python }} + runs-on: ubuntu-latest + strategy: + matrix: + python: [3.6, 3.7, 3.8, 3.9, "3.10", pypy3] + + steps: + - uses: actions/checkout@v2 + - name: setup python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e . + pip install coveralls --upgrade + - name: Run flake8 + run: | + pip install flake8 --upgrade + flake8 --exclude=build --ignore=E501,F403,F401,E241,E225,E128 . + - name: Run pycodestyle + run: | + pip install pycodestyle --upgrade + pycodestyle --ignore=E128,E261,E225,E501,W605 slugify test.py setup.py + - name: Run test + run: | + coverage run --source=slugify test.py + - name: Coveralls + run: coveralls --service=github + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.python-version b/.python-version deleted file mode 100644 index a8733ab..0000000 --- a/.python-version +++ /dev/null @@ -1,5 +0,0 @@ -3.9.2 -3.8.8 -3.7.10 -3.6.13 -pypy3.7-7.3.3 diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 2f8b3ce..0000000 --- a/.travis.yml +++ /dev/null @@ -1,22 +0,0 @@ -language: python -dist: xenial - -python: - - "3.6" - - "3.7" - - "3.8" - - "3.9" - - "pypy3" - -install: - - pip install pip -U - - pip install -e . - - pip install pycodestyle - - pip install coveralls - -before_script: - - "bash format.sh" - -script: coverage run --source=slugify test.py - -after_success: coveralls diff --git a/.vscode/settings.json b/.vscode/settings.json index 2ab09c1..ecfbb80 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,4 +1,5 @@ { "python.linting.pylintEnabled": false, "python.pythonPath": "/usr/bin/python3", -} \ No newline at end of file + "cSpell.words": ["Neekman", "shch", "xlate"] +} diff --git a/CHANGELOG.md b/CHANGELOG.md index 777f6dc..2ba0bb3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,25 @@ +## 6.1.2 + +- Reintroduce the cli options + +## 6.1.1 + +- Remove type hinting (temporarily) + +## 6.1.0 + +- Add `allow_unicode` flag to allow unicode characters in the slug + +## 6.0.1 + +- Rework regex_pattern to mean the opposite (disallowed chars instead of allowed) +- Thanks to @yyyyyyyan for the initial PR followed by the final PR by @mrezzamoradi + +## 6.0.0 + +- Enable github action +- Remove tox, as we run the test on github action, the end users can refer to those test + ## 5.0.2 - Enable twine publish diff --git a/MANIFEST.in b/MANIFEST.in index 0c78f18..373701c 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,3 @@ -include CHANGELOG.md include LICENSE include README.md -include test.py +include CHANGELOG.md diff --git a/README.md b/README.md index 2305794..f93afee 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,8 @@ def slugify( stopwords=(), regex_pattern=None, lowercase=True, - replacements=() + replacements=(), + allow_unicode=False ): """ Make a slug from the given text. @@ -55,9 +56,10 @@ def slugify( :param save_order (bool): if parameter is True and max_length > 0 return whole words in the initial order :param separator (str): separator between words :param stopwords (iterable): words to discount - :param regex_pattern (str): regex pattern for allowed characters + :param regex_pattern (str): regex pattern for disallowed characters :param lowercase (bool): activate case sensitivity by setting it to False :param replacements (iterable): list of replacement rules e.g. [['|', 'or'], ['%', 'percent']] + :param allow_unicode (bool): allow unicode characters :return (str): slugify text """ ``` @@ -75,6 +77,10 @@ txt = '影師嗎' r = slugify(txt) self.assertEqual(r, "ying-shi-ma") +txt = '影師嗎' +r = slugify(txt, allow_unicode=True) +self.assertEqual(r, "影師嗎") + txt = 'C\'est déjà l\'été.' r = slugify(txt) self.assertEqual(r, "c-est-deja-l-ete") @@ -133,6 +139,14 @@ txt = 'ÜBER Über German Umlaut' r = slugify(txt, replacements=[['Ü', 'UE'], ['ü', 'ue']]) self.assertEqual(r, "ueber-ueber-german-umlaut") +txt = 'i love 🦄' +r = slugify(txt, allow_unicode=True) +self.assertEqual(r, "i-love") + +txt = 'i love 🦄' +r = slugify(txt, allow_unicode=True, regex_pattern=r'[^🦄]+') +self.assertEqual(r, "🦄") + ``` For more examples, have a look at the [test.py](test.py) file. @@ -164,10 +178,6 @@ quick-brown-fox-jumps-over-lazy-dog # Running the tests -To run the tests against all environments: - - tox - To run the tests against the current environment: python test.py @@ -188,8 +198,8 @@ X.Y.Z Version `MINOR` version -- when you add functionality in a backwards-compatible manner, and `PATCH` version -- when you make backwards-compatible bug fixes. -[status-image]: https://travis-ci.org/un33k/python-slugify.svg?branch=master -[status-link]: https://travis-ci.org/un33k/python-slugify +[status-image]: https://github.com/un33k/python-slugify/actions/workflows/ci.yml/badge.svg +[status-link]: https://github.com/un33k/python-slugify/actions/workflows/ci.yml [version-image]: https://img.shields.io/pypi/v/python-slugify.svg [version-link]: https://pypi.python.org/pypi/python-slugify [coverage-image]: https://coveralls.io/repos/un33k/python-slugify/badge.svg diff --git a/dev.requirements.txt b/dev.requirements.txt index 337aa36..5f94d7b 100644 --- a/dev.requirements.txt +++ b/dev.requirements.txt @@ -1,2 +1,3 @@ -pycodestyle==2.7.0 -twine==3.4.1 \ No newline at end of file +pycodestyle==2.8.0 +twine==3.4.1 +flake8==4.0.1 \ No newline at end of file diff --git a/setup.py b/setup.py index 51b267f..c3c4b3b 100755 --- a/setup.py +++ b/setup.py @@ -1,70 +1,85 @@ #!/usr/bin/env python - -# -*- coding: utf-8 -*- -from setuptools import setup, find_packages -import re +# Learn more: https://github.com/un33k/setup.py import os import sys -import codecs -name = 'python-slugify' +from codecs import open +from shutil import rmtree +from setuptools import setup + + package = 'slugify' -description = 'A Python Slugify application that handles Unicode' -url = 'https://github.com/un33k/python-slugify' -author = 'Val Neekman' -author_email = 'info@neekware.com' -license = 'MIT' +python_requires = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +here = os.path.abspath(os.path.dirname(__file__)) + install_requires = ['text-unidecode>=1.3'] -extras_require = {'unidecode': ['Unidecode>=1.1.1']} +extras_requires = {'unidecode': ['Unidecode>=1.1.1']} +test_requires = [] -classifiers = [ - 'Development Status :: 5 - Production/Stable', - 'Intended Audience :: Developers', - 'Topic :: Software Development :: Build Tools', - 'License :: OSI Approved :: MIT License', - 'Operating System :: OS Independent', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', -] +about = {} +with open(os.path.join(here, package, '__version__.py'), 'r', 'utf-8') as f: + exec(f.read(), about) +with open('README.md', 'r', 'utf-8') as f: + readme = f.read() -def get_version(package): - """ - Return package version as listed in `__version__` in `init.py`. - """ - init_py = codecs.open(os.path.join(package, '__init__.py'), encoding='utf-8').read() - return re.search("^__version__ = ['\"]([^'\"]+)['\"]", init_py, re.MULTILINE).group(1) +def status(s): + print('\033[1m{0}\033[0m'.format(s)) -if sys.argv[-1] == 'build': - os.system("python setup.py sdist bdist_wheel") +# 'setup.py publish' shortcut. if sys.argv[-1] == 'publish': - os.system("python setup.py build && twine upload dist/*") - args = {'version': get_version(package)} - print("You probably want to also tag the version now:") - print(" git tag -a %(version)s -m 'version %(version)s' && git push --tags" % args) - sys.exit() + try: + status('Removing previous builds…') + rmtree(os.path.join(here, 'dist')) + except OSError: + pass + + status('Building Source and Wheel (universal) distribution…') + os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable)) -EXCLUDE_FROM_PACKAGES = [] + status('Uploading the package to PyPI via Twine…') + os.system('twine upload dist/*') + + status('Pushing git tags…') + os.system('git tag v{0}'.format(about['__version__'])) + os.system('git push --tags') + sys.exit() setup( - name=name, - version=get_version(package), - url=url, - license=license, - description=description, - long_description=description, - author=author, - author_email=author_email, - packages=find_packages(exclude=EXCLUDE_FROM_PACKAGES), + name=about['__title__'], + version=about['__version__'], + description=about['__description__'], + long_description=readme, + long_description_content_type='text/markdown', + author=about['__author__'], + author_email=about['__author_email__'], + url=about['__url__'], + license=about['__license__'], + packages=[package], + package_data={'': ['LICENSE']}, + package_dir={'slugify': 'slugify'}, + include_package_data=True, + python_requires=python_requires, install_requires=install_requires, - extras_require=extras_require, - python_requires='>=3.6', - classifiers=classifiers, + tests_require=test_requires, + extras_require=extras_requires, + zip_safe=False, + cmdclass={}, + project_urls={}, + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'Natural Language :: English', + 'License :: OSI Approved :: MIT License', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + ], entry_points={'console_scripts': ['slugify=slugify.__main__:main']}, ) diff --git a/slugify/__init__.py b/slugify/__init__.py index 6c59f4e..ac21492 100644 --- a/slugify/__init__.py +++ b/slugify/__init__.py @@ -1,7 +1,2 @@ from .special import * from .slugify import * - - -__author__ = 'Val Neekman @ Neekware Inc. [@vneekman]' -__description__ = 'A Python slugify application that also handles Unicode' -__version__ = '5.0.2' diff --git a/slugify/__main__.py b/slugify/__main__.py index f815206..7dd6b01 100644 --- a/slugify/__main__.py +++ b/slugify/__main__.py @@ -31,11 +31,13 @@ def parse_args(argv): parser.add_argument("--stopwords", nargs='+', help="Words to discount") parser.add_argument("--regex-pattern", - help="Python regex pattern for allowed characters") + help="Python regex pattern for disallowed characters") parser.add_argument("--no-lowercase", action='store_false', dest='lowercase', default=True, help="Activate case sensitivity") parser.add_argument("--replacements", nargs='+', help="""Additional replacement rules e.g. "|->or", "%%->percent".""") + parser.add_argument("--allow-unicode", action='store_true', default=False, + help="Allow unicode characters") args = parser.parse_args(argv[1:]) @@ -73,11 +75,12 @@ def slugify_params(args): separator=args.separator, stopwords=args.stopwords, lowercase=args.lowercase, - replacements=args.replacements + replacements=args.replacements, + allow_unicode=args.allow_unicode ) -def main(argv=None): # pragma: no cover +def main(argv=None): # pragma: no cover """ Run this program """ if argv is None: argv = sys.argv @@ -89,5 +92,5 @@ def main(argv=None): # pragma: no cover sys.exit(-1) -if __name__ == '__main__': # pragma: no cover +if __name__ == '__main__': # pragma: no cover main() diff --git a/slugify/__version__.py b/slugify/__version__.py new file mode 100644 index 0000000..55abc97 --- /dev/null +++ b/slugify/__version__.py @@ -0,0 +1,8 @@ +__title__ = 'python-slugify' +__author__ = 'Val Neekman' +__author_email__ = 'info@neekware.com' +__description__ = 'A Python slugify application that also handles Unicode' +__url__ = 'https://github.com/un33k/python-slugify' +__license__ = 'MIT' +__copyright__ = 'Copyright 2022 Val Neekman @ Neekware Inc.' +__version__ = '6.1.2' diff --git a/slugify/slugify.py b/slugify/slugify.py index bb3aa95..b8c02ad 100644 --- a/slugify/slugify.py +++ b/slugify/slugify.py @@ -1,17 +1,7 @@ import re -import unicodedata -import types import sys - -try: - from htmlentitydefs import name2codepoint - _unicode = unicode - _unicode_type = types.UnicodeType -except ImportError: - from html.entities import name2codepoint - _unicode = str - _unicode_type = str - unichr = chr +import unicodedata +from html.entities import name2codepoint try: import text_unidecode as unidecode @@ -25,8 +15,8 @@ DECIMAL_PATTERN = re.compile(r'&#(\d+);') HEX_PATTERN = re.compile(r'&#x([\da-fA-F]+);') QUOTE_PATTERN = re.compile(r'[\']+') -ALLOWED_CHARS_PATTERN = re.compile(r'[^-a-z0-9]+') -ALLOWED_CHARS_PATTERN_WITH_UPPERCASE = re.compile(r'[^-a-zA-Z0-9]+') +DISALLOWED_CHARS_PATTERN = re.compile(r'[^-a-zA-Z0-9]+') +DISALLOWED_UNICODE_CHARS_PATTERN = re.compile(r'[\W_]+') DUPLICATE_DASH_PATTERN = re.compile(r'-{2,}') NUMBERS_PATTERN = re.compile(r'(?<=\d),(?=\d)') DEFAULT_SEPARATOR = '-' @@ -69,14 +59,14 @@ def smart_truncate(string, max_length=0, word_boundary=False, separator=' ', sav else: if save_order: break - if not truncated: # pragma: no cover + if not truncated: # pragma: no cover truncated = string[:max_length] return truncated.strip(separator) def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, word_boundary=False, separator=DEFAULT_SEPARATOR, save_order=False, stopwords=(), regex_pattern=None, lowercase=True, - replacements=()): + replacements=(), allow_unicode=False): """ Make a slug from the given text. :param text (str): initial text @@ -88,9 +78,10 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w :param save_order (bool): if parameter is True and max_length > 0 return whole words in the initial order :param separator (str): separator between words :param stopwords (iterable): words to discount - :param regex_pattern (str): regex pattern for allowed characters + :param regex_pattern (str): regex pattern for disallowed characters :param lowercase (bool): activate case sensitivity by setting it to False :param replacements (iterable): list of replacement rules e.g. [['|', 'or'], ['%', 'percent']] + :param allow_unicode (bool): allow unicode characters :return (str): """ @@ -100,39 +91,44 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w text = text.replace(old, new) # ensure text is unicode - if not isinstance(text, _unicode_type): - text = _unicode(text, 'utf-8', 'ignore') + if not isinstance(text, str): + text = str(text, 'utf-8', 'ignore') # replace quotes with dashes - pre-process text = QUOTE_PATTERN.sub(DEFAULT_SEPARATOR, text) # decode unicode - text = unidecode.unidecode(text) + if not allow_unicode: + text = unidecode.unidecode(text) # ensure text is still in unicode - if not isinstance(text, _unicode_type): - text = _unicode(text, 'utf-8', 'ignore') + if not isinstance(text, str): + text = str(text, 'utf-8', 'ignore') # character entity reference if entities: - text = CHAR_ENTITY_PATTERN.sub(lambda m: unichr(name2codepoint[m.group(1)]), text) + text = CHAR_ENTITY_PATTERN.sub(lambda m: chr(name2codepoint[m.group(1)]), text) # decimal character reference if decimal: try: - text = DECIMAL_PATTERN.sub(lambda m: unichr(int(m.group(1))), text) + text = DECIMAL_PATTERN.sub(lambda m: chr(int(m.group(1))), text) except Exception: pass # hexadecimal character reference if hexadecimal: try: - text = HEX_PATTERN.sub(lambda m: unichr(int(m.group(1), 16)), text) + text = HEX_PATTERN.sub(lambda m: chr(int(m.group(1), 16)), text) except Exception: pass # translate - text = unicodedata.normalize('NFKD', text) + if allow_unicode: + text = unicodedata.normalize('NFKC', text) + else: + text = unicodedata.normalize('NFKD', text) + if sys.version_info < (3,): text = text.encode('ascii', 'ignore') @@ -147,10 +143,11 @@ def slugify(text, entities=True, decimal=True, hexadecimal=True, max_length=0, w text = NUMBERS_PATTERN.sub('', text) # replace all other unwanted characters - if lowercase: - pattern = regex_pattern or ALLOWED_CHARS_PATTERN + if allow_unicode: + pattern = regex_pattern or DISALLOWED_UNICODE_CHARS_PATTERN else: - pattern = regex_pattern or ALLOWED_CHARS_PATTERN_WITH_UPPERCASE + pattern = regex_pattern or DISALLOWED_CHARS_PATTERN + text = re.sub(pattern, DEFAULT_SEPARATOR, text) # remove redundant diff --git a/slugify/special.py b/slugify/special.py index d3478d5..54eb85c 100644 --- a/slugify/special.py +++ b/slugify/special.py @@ -20,7 +20,7 @@ def add_uppercase_char(char_list): (u'я', u'ya'), # ia (u'х', u'h'), # kh (u'у', u'y'), # u - (u'щ', u'sch'), # shch + (u'щ', u'sch'), # sch (u'ю', u'u'), # iu / yu ] CYRILLIC = add_uppercase_char(_CYRILLIC) diff --git a/test.py b/test.py index ddf1bf4..931f38f 100644 --- a/test.py +++ b/test.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- import io -import os import sys import unittest from contextlib import contextmanager @@ -10,7 +9,7 @@ from slugify.__main__ import slugify_params, parse_args -class TestSlugification(unittest.TestCase): +class TestSlugify(unittest.TestCase): def test_extraneous_seperators(self): @@ -234,6 +233,294 @@ def test_replacements_german_umlaut_custom(self): self.assertEqual(r, "ueber-ueber-german-umlaut") +class TestSlugifyUnicode(unittest.TestCase): + + def test_extraneous_seperators(self): + + txt = "This is a test ---" + r = slugify(txt, allow_unicode=True) + self.assertEqual(r, "this-is-a-test") + + txt = "___This is a test ---" + r = slugify(txt, allow_unicode=True) + self.assertEqual(r, "this-is-a-test") + + txt = "___This is a test___" + r = slugify(txt, allow_unicode=True) + self.assertEqual(r, "this-is-a-test") + + def test_non_word_characters(self): + txt = "This -- is a ## test ---" + r = slugify(txt, allow_unicode=True) + self.assertEqual(r, "this-is-a-test") + + def test_phonetic_conversion_of_eastern_scripts(self): + txt = '影師嗎' + r = slugify(txt, allow_unicode=True) + self.assertEqual(r, txt) + + def test_accented_text(self): + txt = 'C\'est déjà l\'été.' + r = slugify(txt, allow_unicode=True) + self.assertEqual(r, "c-est-déjà-l-été") + + txt = 'Nín hǎo. Wǒ shì zhōng guó rén' + r = slugify(txt, allow_unicode=True) + self.assertEqual(r, "nín-hǎo-wǒ-shì-zhōng-guó-rén") + + def test_accented_text_with_non_word_characters(self): + txt = 'jaja---lol-méméméoo--a' + r = slugify(txt, allow_unicode=True) + self.assertEqual(r, "jaja-lol-méméméoo-a") + + def test_cyrillic_text(self): + txt = 'Компьютер' + r = slugify(txt, allow_unicode=True) + self.assertEqual(r, "компьютер") + + def test_max_length(self): + txt = 'jaja---lol-méméméoo--a' + r = slugify(txt, allow_unicode=True, max_length=9) + self.assertEqual(r, "jaja-lol") + + txt = 'jaja---lol-méméméoo--a' + r = slugify(txt, allow_unicode=True, max_length=15) + self.assertEqual(r, "jaja-lol-mémémé") + + def test_max_length_cutoff_not_required(self): + txt = 'jaja---lol-méméméoo--a' + r = slugify(txt, allow_unicode=True, max_length=50) + self.assertEqual(r, "jaja-lol-méméméoo-a") + + def test_word_boundary(self): + txt = 'jaja---lol-méméméoo--a' + r = slugify(txt, allow_unicode=True, max_length=15, word_boundary=True) + self.assertEqual(r, "jaja-lol-a") + + txt = 'jaja---lol-méméméoo--a' + r = slugify(txt, allow_unicode=True, max_length=17, word_boundary=True) + self.assertEqual(r, "jaja-lol-méméméoo") + + txt = 'jaja---lol-méméméoo--a' + r = slugify(txt, allow_unicode=True, max_length=18, word_boundary=True) + self.assertEqual(r, "jaja-lol-méméméoo") + + txt = 'jaja---lol-méméméoo--a' + r = slugify(txt, allow_unicode=True, max_length=19, word_boundary=True) + self.assertEqual(r, "jaja-lol-méméméoo-a") + + def test_custom_separator(self): + txt = 'jaja---lol-méméméoo--a' + r = slugify(txt, allow_unicode=True, max_length=20, word_boundary=True, separator=".") + self.assertEqual(r, "jaja.lol.méméméoo.a") + + def test_multi_character_separator(self): + txt = 'jaja---lol-méméméoo--a' + r = slugify(txt, allow_unicode=True, max_length=20, word_boundary=True, separator="ZZZZZZ") + self.assertEqual(r, "jajaZZZZZZlolZZZZZZméméméooZZZZZZa") + + def test_save_order(self): + txt = 'one two three four five' + r = slugify(txt, allow_unicode=True, max_length=13, word_boundary=True, save_order=True) + self.assertEqual(r, "one-two-three") + + txt = 'one two three four five' + r = slugify(txt, allow_unicode=True, max_length=13, word_boundary=True, save_order=False) + self.assertEqual(r, "one-two-three") + + txt = 'one two three four five' + r = slugify(txt, allow_unicode=True, max_length=12, word_boundary=True, save_order=False) + self.assertEqual(r, "one-two-four") + + txt = 'one two three four five' + r = slugify(txt, allow_unicode=True, max_length=12, word_boundary=True, save_order=True) + self.assertEqual(r, "one-two") + + def test_save_order_rtl(self): + """For right-to-left unicode languages""" + txt = 'دو سه چهار پنج' + r = slugify(txt, allow_unicode=True, max_length=10, word_boundary=True, save_order=True) + self.assertEqual(r, "دو-سه-چهار") + + txt = 'دو سه چهار پنج' + r = slugify(txt, allow_unicode=True, max_length=10, word_boundary=True, save_order=False) + self.assertEqual(r, "دو-سه-چهار") + + txt = 'دو سه چهار پنج' + r = slugify(txt, allow_unicode=True, max_length=9, word_boundary=True, save_order=False) + self.assertEqual(r, "دو-سه-پنج") + + txt = 'دو سه چهار پنج' + r = slugify(txt, allow_unicode=True, max_length=9, word_boundary=True, save_order=True) + self.assertEqual(r, "دو-سه") + + def test_stopword_removal(self): + txt = 'this has a stopword' + r = slugify(txt, allow_unicode=True, stopwords=['stopword']) + self.assertEqual(r, 'this-has-a') + + txt = 'this has a Öländ' + r = slugify(txt, allow_unicode=True, stopwords=['Öländ']) + self.assertEqual(r, 'this-has-a') + + def test_stopword_removal_casesensitive(self): + txt = 'thIs Has a stopword Stopword' + r = slugify(txt, allow_unicode=True, stopwords=['Stopword'], lowercase=False) + self.assertEqual(r, 'thIs-Has-a-stopword') + + txt = 'thIs Has a öländ Öländ' + r = slugify(txt, allow_unicode=True, stopwords=['Öländ'], lowercase=False) + self.assertEqual(r, 'thIs-Has-a-öländ') + + def test_multiple_stopword_occurances(self): + txt = 'the quick brown fox jumps over the lazy dog' + r = slugify(txt, allow_unicode=True, stopwords=['the']) + self.assertEqual(r, 'quick-brown-fox-jumps-over-lazy-dog') + + def test_differently_cased_stopword_match(self): + txt = 'Foo A FOO B foo C' + r = slugify(txt, allow_unicode=True, stopwords=['foo']) + self.assertEqual(r, 'a-b-c') + + txt = 'Foo A FOO B foo C' + r = slugify(txt, allow_unicode=True, stopwords=['FOO']) + self.assertEqual(r, 'a-b-c') + + def test_multiple_stopwords(self): + txt = 'the quick brown fox jumps over the lazy dog in a hurry' + r = slugify(txt, allow_unicode=True, stopwords=['the', 'in', 'a', 'hurry']) + self.assertEqual(r, 'quick-brown-fox-jumps-over-lazy-dog') + + def test_stopwords_with_different_separator(self): + txt = 'the quick brown fox jumps over the lazy dog' + r = slugify(txt, allow_unicode=True, stopwords=['the'], separator=' ') + self.assertEqual(r, 'quick brown fox jumps over lazy dog') + + def test_html_entities_on(self): + txt = 'foo & bar' + r = slugify(txt, allow_unicode=True) + self.assertEqual(r, 'foo-bar') + + def test_html_entities_off(self): + txt = 'foo & bår' + r = slugify(txt, allow_unicode=True, entities=False) + self.assertEqual(r, 'foo-amp-bår') + + def test_html_decimal_on(self): + txt = 'Ž' + r = slugify(txt, allow_unicode=True, decimal=True) + self.assertEqual(r, 'ž') + + def test_html_decimal_off(self): + txt = 'Ž' + r = slugify(txt, allow_unicode=True, entities=False, decimal=False) + self.assertEqual(r, '381') + + def test_html_hexadecimal_on(self): + txt = 'Ž' + r = slugify(txt, allow_unicode=True, hexadecimal=True) + self.assertEqual(r, 'ž') + + def test_html_hexadecimal_off(self): + txt = 'Ž' + r = slugify(txt, allow_unicode=True, hexadecimal=False) + self.assertEqual(r, 'x17d') + + def test_starts_with_number(self): + txt = '10 amazing secrets' + r = slugify(txt, allow_unicode=True) + self.assertEqual(r, '10-amazing-secrets') + + def test_contains_numbers(self): + txt = 'buildings with 1000 windows' + r = slugify(txt, allow_unicode=True) + self.assertEqual(r, 'buildings-with-1000-windows') + + def test_ends_with_number(self): + txt = 'recipe number 3' + r = slugify(txt, allow_unicode=True) + self.assertEqual(r, 'recipe-number-3') + + def test_numbers_only(self): + txt = '404' + r = slugify(txt, allow_unicode=True) + self.assertEqual(r, '404') + + def test_numbers_and_symbols(self): + txt = '1,000 reasons you are #1' + r = slugify(txt, allow_unicode=True) + self.assertEqual(r, '1000-reasons-you-are-1') + + txt = '۱,۰۰۰ reasons you are #۱' + r = slugify(txt, allow_unicode=True) + self.assertEqual(r, '۱۰۰۰-reasons-you-are-۱') + + def test_regex_pattern_keep_underscore(self): + """allowing unicode should not overrule the passed regex_pattern""" + txt = "___This is a test___" + regex_pattern = r'[^-a-z0-9_]+' + r = slugify(txt, allow_unicode=True, regex_pattern=regex_pattern) + self.assertEqual(r, "___this-is-a-test___") + + def test_regex_pattern_keep_underscore_with_underscore_as_separator(self): + """ + The regex_pattern turns the power to the caller. + Hence, the caller must ensure that a custom separator doesn't clash + with the regex_pattern. + """ + txt = "___This is a test___" + regex_pattern = r'[^-a-z0-9_]+' + r = slugify(txt, allow_unicode=True, separator='_', regex_pattern=regex_pattern) + self.assertNotEqual(r, "_this_is_a_test_") + + def test_replacements(self): + txt = '10 | 20 %' + r = slugify(txt, allow_unicode=True, replacements=[['|', 'or'], ['%', 'percent']]) + self.assertEqual(r, "10-or-20-percent") + + txt = 'I ♥ 🦄' + r = slugify(txt, allow_unicode=True, replacements=[['♥', 'amour'], ['🦄', 'licorne']]) + self.assertEqual(r, "i-amour-licorne") + + txt = 'I ♥ 🦄' + r = slugify(txt, allow_unicode=True, replacements=[['♥', 'სიყვარული'], ['🦄', 'licorne']]) + self.assertEqual(r, "i-სიყვარული-licorne") + + def test_replacements_german_umlaut_custom(self): + txt = 'ÜBER Über German Umlaut' + r = slugify(txt, allow_unicode=True, replacements=[['Ü', 'UE'], ['ü', 'ue']]) + self.assertEqual(r, "ueber-ueber-german-umlaut") + + def test_emojis(self): + """ + allowing unicode shouldn't allow emojis, even in replacements. + the only exception is when it is allowed by the regex_pattern. regex_pattern overrules all + """ + txt = 'i love 🦄' + r = slugify(txt, allow_unicode=True) + self.assertEqual(r, "i-love") + + txt = 'i love 🦄' + r = slugify(txt, allow_unicode=True, decimal=True) + self.assertEqual(r, "i-love") + + txt = 'i love 🦄' + r = slugify(txt, allow_unicode=True, hexadecimal=True) + self.assertEqual(r, "i-love") + + txt = 'i love 🦄' + r = slugify(txt, allow_unicode=True, entities=True) + self.assertEqual(r, "i-love") + + txt = 'i love you' + r = slugify(txt, allow_unicode=True, replacements=[['you', '🦄']]) + self.assertEqual(r, "i-love") + + txt = 'i love 🦄' + r = slugify(txt, allow_unicode=True, regex_pattern=r'[^🦄]+') + self.assertEqual(r, "🦄") + + class TestUtils(unittest.TestCase): def test_smart_truncate_no_max_length(self): diff --git a/tox.ini b/tox.ini deleted file mode 100644 index a4bee82..0000000 --- a/tox.ini +++ /dev/null @@ -1,18 +0,0 @@ -[tox] -envlist = py{39,38,37,36},pypy3 - -[testenv] -deps= - -e . -commands = - python -m unittest test - -[testenv:format] -deps = pycodestyle -allowlist_externals = sh -commands = sh format.sh - -[testenv:coverage] -deps = coverage -commands = - coverage run --source=slugify test.py