diff --git a/jobrunner/test/utils_test.py b/jobrunner/test/utils_test.py new file mode 100644 index 0000000..148a489 --- /dev/null +++ b/jobrunner/test/utils_test.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python +# Copyright (c) 2021 Arista Networks, Inc. All rights reserved. +# Arista Networks, Inc. Confidential and Proprietary. + +from __future__ import absolute_import, division, print_function + +import pytest + +from jobrunner.utils import autoDecode + + +@pytest.mark.parametrize(("value", "encoding"), [ + (b"Waiting for '\xe2\x9d\xaf|[Pp]db' in session " + b"routing-enabled-structure_0_64\n(Pdb++)\n", "utf-8"), + (b"hi there", "ascii"), +]) +def testAutoDecode(value, encoding): + assert value.decode(encoding) == autoDecode(value) diff --git a/jobrunner/utils.py b/jobrunner/utils.py index d87f778..9e6c9d6 100644 --- a/jobrunner/utils.py +++ b/jobrunner/utils.py @@ -351,6 +351,8 @@ def sudoKillProcGroup(pgrp): def autoDecode(byteArray): detected = chardet.detect(byteArray) encoding = detected['encoding'] - if detected['confidence'] < 0.5: # very arbitrary + if detected['confidence'] < 0.8: # very arbitrary + LOG.debug("char encoding below confidence level 0.8 (%r). " + "Fall back to UTF-8.", detected) encoding = 'utf-8' return byteArray.decode(encoding)