From c92ec436a5489ab2043e15e1f9e6d205dc2de861 Mon Sep 17 00:00:00 2001 From: masklinn Date: Sat, 27 Aug 2022 12:59:45 +0200 Subject: [PATCH 1/3] Allow CI to run on maintenance branches --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8eff3a62..515007e4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,9 +2,9 @@ name: CI on: push: - branches: [ master ] + branches: [ '*' ] pull_request: - branches: [ master ] + branches: [ '*' ] jobs: checks: From 0939d42b9130e8e4c216f60e7c699b7133f67e73 Mon Sep 17 00:00:00 2001 From: masklinn Date: Sat, 27 Aug 2022 12:28:27 +0200 Subject: [PATCH 2/3] Revert cache FIFO replacement policy to reset/clear Amongst other changes, #113 switched the cache to a FIFO inspired by the standard library's re module, however it didn't really take concurrency in account, so didn't really consider: that double-pops are possible (probably why the stdlib ignores a bunch of errors), which can cause KeyError during lookup (as two workers try to clear the first key, one succeeds, and the other doesn't find the key and fails). It also has a few other less major issues: - double-inserts are possible, which can cause the cache to exceed set capacity permanently by the number of concurrent workers - the stdlib's method only works properly with Python 3.6's naturally ordered `dict`, but I'd rather not drop 2.7 compatibility from 0.x unless there are very good causes to as, despite 2.7 having been EOL'd in 2020, it still accounts for more downloads than 3.10 (according to pypistats) Using an ordered dict would solve (3), and allow using an LRU rather than a FIFO, but it would not actually prevent double-pops or double-inserts, that would require a proper lock on lookup. Which might not be that expensive but given the lack of a good dataset to bench with, it seems a lot of additional complexity for something we've got no visibility on. But that can be considered if someone reports a serious performance regression from this. So for now just revert to a "reset" cache replacement policy. If / when we drop older versions we can switch to `functools.lru_cache` and let the stdlib take care of this (and possibly have cache stats). Alternatively if we get a good testing dataset one day we can bench cache replacement policies or even provide pluggable policies. Anyway fixes #132, closes #133 --- ua_parser/user_agent_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ua_parser/user_agent_parser.py b/ua_parser/user_agent_parser.py index 192d25ab..4989336f 100644 --- a/ua_parser/user_agent_parser.py +++ b/ua_parser/user_agent_parser.py @@ -234,7 +234,7 @@ def _lookup(ua, args): return entry if len(_PARSE_CACHE) >= MAX_CACHE_SIZE: - _PARSE_CACHE.pop(next(iter(_PARSE_CACHE))) + _PARSE_CACHE.clear() v = _PARSE_CACHE[key] = {"string": ua} return v From 05be64dcb97ca84423078622bf9cd929344b3027 Mon Sep 17 00:00:00 2001 From: masklinn Date: Sat, 27 Aug 2022 13:53:08 +0200 Subject: [PATCH 3/3] Release 0.15.2 Fix for #132 on 0.15 --- setup.py | 2 +- ua_parser/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index f47e9da3..67f614d4 100644 --- a/setup.py +++ b/setup.py @@ -200,7 +200,7 @@ class sdist(_sdist): setup( name="ua-parser", - version="0.15.1", + version="0.15.2", description="Python port of Browserscope's user agent parser", author="PBS", author_email="no-reply@pbs.org", diff --git a/ua_parser/__init__.py b/ua_parser/__init__.py index 8bdf7118..7923db2a 100644 --- a/ua_parser/__init__.py +++ b/ua_parser/__init__.py @@ -1 +1 @@ -VERSION = (0, 15, 1) +VERSION = (0, 15, 2)