From 12e5949d2ae0ec830e46331e6ae12601c086af37 Mon Sep 17 00:00:00 2001
From: wwade <wwade@users.noreply.github.com>
Date: Thu, 27 May 2021 16:34:24 -0700
Subject: [PATCH] utils: fix autoDecode error for specific sequence

Since we're probably going to be seeing either ASCII or UTF-8 input anyway, bump the
detection confidence requirement from 0.5 to 0.8. In the case of the first test input
shown in utils_test.py, it was coming in at 0.559 for Windows-1254 and just 0.505 for
UTF-8, when in fact, it's UTF-8.

This also makes me question using chardet at all, but it probably won't hurt at the
new confidence threshold.
---
 jobrunner/test/utils_test.py | 18 ++++++++++++++++++
 jobrunner/utils.py           |  4 +++-
 2 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 jobrunner/test/utils_test.py

diff --git a/jobrunner/test/utils_test.py b/jobrunner/test/utils_test.py
new file mode 100644
index 0000000..148a489
--- /dev/null
+++ b/jobrunner/test/utils_test.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python
+# Copyright (c) 2021 Arista Networks, Inc.  All rights reserved.
+# Arista Networks, Inc. Confidential and Proprietary.
+
+from __future__ import absolute_import, division, print_function
+
+import pytest
+
+from jobrunner.utils import autoDecode
+
+
+@pytest.mark.parametrize(("value", "encoding"), [
+    (b"Waiting for '\xe2\x9d\xaf|[Pp]db' in session "
+     b"routing-enabled-structure_0_64\n(Pdb++)\n", "utf-8"),
+    (b"hi there", "ascii"),
+])
+def testAutoDecode(value, encoding):
+    assert value.decode(encoding) == autoDecode(value)
diff --git a/jobrunner/utils.py b/jobrunner/utils.py
index d87f778..9e6c9d6 100644
--- a/jobrunner/utils.py
+++ b/jobrunner/utils.py
@@ -351,6 +351,8 @@ def sudoKillProcGroup(pgrp):
 def autoDecode(byteArray):
     detected = chardet.detect(byteArray)
     encoding = detected['encoding']
-    if detected['confidence'] < 0.5:  # very arbitrary
+    if detected['confidence'] < 0.8:  # very arbitrary
+        LOG.debug("char encoding below confidence level 0.8 (%r). "
+                  "Fall back to UTF-8.", detected)
         encoding = 'utf-8'
     return byteArray.decode(encoding)