From dfaa9e060bf6d69cb862a2ac140b8fccbebf3000 Mon Sep 17 00:00:00 2001
From: Michael Droettboom <mdboom@gmail.com>
Date: Tue, 12 Dec 2023 16:17:08 -0500
Subject: [PATCH 1/2] gh-113010: Don't decrement deferred in pystats (#113032)

This fixes a recently introduced bug where the deferred count is being unnecessarily decremented to counteract an increment elsewhere that is no longer happening. This caused the values to flip around to "very large" 64-bit numbers.
---
 Python/ceval_macros.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h
index f298c602b1042b..ac44aecae046d8 100644
--- a/Python/ceval_macros.h
+++ b/Python/ceval_macros.h
@@ -258,10 +258,6 @@ GETITEM(PyObject *v, Py_ssize_t i) {
         if (ADAPTIVE_COUNTER_IS_ZERO(next_instr->cache)) {       \
             STAT_INC((INSTNAME), deopt);                         \
         }                                                        \
-        else {                                                   \
-            /* This is about to be (incorrectly) incremented: */ \
-            STAT_DEC((INSTNAME), deferred);                      \
-        }                                                        \
     } while (0)
 #else
 #define UPDATE_MISS_STATS(INSTNAME) ((void)0)

From 7316dfb0ebc46aedf484c1f15f03a0a309d12a42 Mon Sep 17 00:00:00 2001
From: Guido van Rossum <guido@python.org>
Date: Tue, 12 Dec 2023 13:43:08 -0800
Subject: [PATCH 2/2] gh-112320: Implement on-trace confidence tracking for
 branches (#112321)

We track the confidence as a scaled int.
---
 Include/cpython/pystats.h                     |  1 +
 Lib/test/test_capi/test_misc.py               | 31 +++++++++++++++++++
 ...-11-22-13-17-54.gh-issue-112320.EddM51.rst |  4 +++
 Python/optimizer.c                            | 20 ++++++++++--
 Python/specialize.c                           |  1 +
 Tools/scripts/summarize_stats.py              |  2 ++
 6 files changed, 56 insertions(+), 3 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-11-22-13-17-54.gh-issue-112320.EddM51.rst

diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h
index 294bf1505f0115..ba67eefef3e37a 100644
--- a/Include/cpython/pystats.h
+++ b/Include/cpython/pystats.h
@@ -114,6 +114,7 @@ typedef struct _optimization_stats {
     uint64_t trace_too_short;
     uint64_t inner_loop;
     uint64_t recursive_call;
+    uint64_t low_confidence;
     UOpStats opcode[512];
     uint64_t unsupported_opcode[256];
     uint64_t trace_length_hist[_Py_UOP_HIST_SIZE];
diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py
index e6b532e858c8f9..776ee913a02216 100644
--- a/Lib/test/test_capi/test_misc.py
+++ b/Lib/test/test_capi/test_misc.py
@@ -2985,6 +2985,37 @@ def testfunc(n, m):
         uops = {opname for opname, _, _ in ex}
         self.assertIn("_FOR_ITER_TIER_TWO", uops)
 
+    def test_confidence_score(self):
+        def testfunc(n):
+            bits = 0
+            for i in range(n):
+                if i & 0x01:
+                    bits += 1
+                if i & 0x02:
+                    bits += 1
+                if i&0x04:
+                    bits += 1
+                if i&0x08:
+                    bits += 1
+                if i&0x10:
+                    bits += 1
+                if i&0x20:
+                    bits += 1
+            return bits
+
+        opt = _testinternalcapi.get_uop_optimizer()
+        with temporary_optimizer(opt):
+            x = testfunc(20)
+
+        self.assertEqual(x, 40)
+        ex = get_first_executor(testfunc)
+        self.assertIsNotNone(ex)
+        ops = [opname for opname, _, _ in ex]
+        count = ops.count("_GUARD_IS_TRUE_POP")
+        # Because Each 'if' halves the score, the second branch is
+        # too much already.
+        self.assertEqual(count, 1)
+
 
 @unittest.skipUnless(support.Py_GIL_DISABLED, 'need Py_GIL_DISABLED')
 class TestPyThreadId(unittest.TestCase):
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-11-22-13-17-54.gh-issue-112320.EddM51.rst b/Misc/NEWS.d/next/Core and Builtins/2023-11-22-13-17-54.gh-issue-112320.EddM51.rst
new file mode 100644
index 00000000000000..0da2fd33b0ea52
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-11-22-13-17-54.gh-issue-112320.EddM51.rst	
@@ -0,0 +1,4 @@
+The Tier 2 translator now tracks the confidence level for staying "on trace"
+(i.e. not exiting back to the Tier 1 interpreter) for branch instructions
+based on the number of bits set in the branch "counter". Trace translation
+ends when the confidence drops below 1/3rd.
diff --git a/Python/optimizer.c b/Python/optimizer.c
index 7c46bd69157170..d44e733bc346fa 100644
--- a/Python/optimizer.c
+++ b/Python/optimizer.c
@@ -409,6 +409,9 @@ BRANCH_TO_GUARD[4][2] = {
 
 #define TRACE_STACK_SIZE 5
 
+#define CONFIDENCE_RANGE 1000
+#define CONFIDENCE_CUTOFF 333
+
 /* Returns 1 on success,
  * 0 if it failed to produce a worthwhile trace,
  * and -1 on an error.
@@ -431,6 +434,7 @@ translate_bytecode_to_trace(
         _Py_CODEUNIT *instr;
     } trace_stack[TRACE_STACK_SIZE];
     int trace_stack_depth = 0;
+    int confidence = CONFIDENCE_RANGE;  // Adjusted by branch instructions
 
 #ifdef Py_DEBUG
     char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
@@ -513,7 +517,6 @@ translate_bytecode_to_trace(
         uint32_t oparg = instr->op.arg;
         uint32_t extras = 0;
 
-
         if (opcode == EXTENDED_ARG) {
             instr++;
             extras += 1;
@@ -543,11 +546,22 @@ translate_bytecode_to_trace(
                 int counter = instr[1].cache;
                 int bitcount = _Py_popcount32(counter);
                 int jump_likely = bitcount > 8;
+                if (jump_likely) {
+                    confidence = confidence * bitcount / 16;
+                }
+                else {
+                    confidence = confidence * (16 - bitcount) / 16;
+                }
+                if (confidence < CONFIDENCE_CUTOFF) {
+                    DPRINTF(2, "Confidence too low (%d)\n", confidence);
+                    OPT_STAT_INC(low_confidence);
+                    goto done;
+                }
                 uint32_t uopcode = BRANCH_TO_GUARD[opcode - POP_JUMP_IF_FALSE][jump_likely];
                 _Py_CODEUNIT *next_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]];
-                DPRINTF(4, "%s(%d): counter=%x, bitcount=%d, likely=%d, uopcode=%s\n",
+                DPRINTF(2, "%s(%d): counter=%x, bitcount=%d, likely=%d, confidence=%d, uopcode=%s\n",
                         _PyUOpName(opcode), oparg,
-                        counter, bitcount, jump_likely, _PyUOpName(uopcode));
+                        counter, bitcount, jump_likely, confidence, _PyUOpName(uopcode));
                 ADD_TO_TRACE(uopcode, max_length, 0, target);
                 if (jump_likely) {
                     _Py_CODEUNIT *target_instr = next_instr + oparg;
diff --git a/Python/specialize.c b/Python/specialize.c
index ba704cbbb464d7..7c2a4a42b1dcc3 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -233,6 +233,7 @@ print_optimization_stats(FILE *out, OptimizationStats *stats)
     fprintf(out, "Optimization trace too short: %" PRIu64 "\n", stats->trace_too_short);
     fprintf(out, "Optimization inner loop: %" PRIu64 "\n", stats->inner_loop);
     fprintf(out, "Optimization recursive call: %" PRIu64 "\n", stats->recursive_call);
+    fprintf(out, "Optimization low confidence: %" PRIu64 "\n", stats->low_confidence);
 
     print_histogram(out, "Trace length", stats->trace_length_hist);
     print_histogram(out, "Trace run length", stats->trace_run_length_hist);
diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py
index 360b7c720bd1f0..80a1280c025aca 100644
--- a/Tools/scripts/summarize_stats.py
+++ b/Tools/scripts/summarize_stats.py
@@ -386,6 +386,7 @@ def get_optimization_stats(self) -> dict[str, tuple[int, int | None]]:
         trace_too_short = self._data["Optimization trace too short"]
         inner_loop = self._data["Optimization inner loop"]
         recursive_call = self._data["Optimization recursive call"]
+        low_confidence = self._data["Optimization low confidence"]
 
         return {
             "Optimization attempts": (attempts, None),
@@ -396,6 +397,7 @@ def get_optimization_stats(self) -> dict[str, tuple[int, int | None]]:
             "Trace too short": (trace_too_short, attempts),
             "Inner loop found": (inner_loop, attempts),
             "Recursive call": (recursive_call, attempts),
+            "Low confidence": (low_confidence, attempts),
             "Traces executed": (executed, None),
             "Uops executed": (uops, executed),
         }