diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index 294bf1505f0115..ba67eefef3e37a 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -114,6 +114,7 @@ typedef struct _optimization_stats { uint64_t trace_too_short; uint64_t inner_loop; uint64_t recursive_call; + uint64_t low_confidence; UOpStats opcode[512]; uint64_t unsupported_opcode[256]; uint64_t trace_length_hist[_Py_UOP_HIST_SIZE]; diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index e6b532e858c8f9..776ee913a02216 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2985,6 +2985,37 @@ def testfunc(n, m): uops = {opname for opname, _, _ in ex} self.assertIn("_FOR_ITER_TIER_TWO", uops) + def test_confidence_score(self): + def testfunc(n): + bits = 0 + for i in range(n): + if i & 0x01: + bits += 1 + if i & 0x02: + bits += 1 + if i&0x04: + bits += 1 + if i&0x08: + bits += 1 + if i&0x10: + bits += 1 + if i&0x20: + bits += 1 + return bits + + opt = _testinternalcapi.get_uop_optimizer() + with temporary_optimizer(opt): + x = testfunc(20) + + self.assertEqual(x, 40) + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + ops = [opname for opname, _, _ in ex] + count = ops.count("_GUARD_IS_TRUE_POP") + # Because Each 'if' halves the score, the second branch is + # too much already. + self.assertEqual(count, 1) + @unittest.skipUnless(support.Py_GIL_DISABLED, 'need Py_GIL_DISABLED') class TestPyThreadId(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-11-22-13-17-54.gh-issue-112320.EddM51.rst b/Misc/NEWS.d/next/Core and Builtins/2023-11-22-13-17-54.gh-issue-112320.EddM51.rst new file mode 100644 index 00000000000000..0da2fd33b0ea52 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-11-22-13-17-54.gh-issue-112320.EddM51.rst @@ -0,0 +1,4 @@ +The Tier 2 translator now tracks the confidence level for staying "on trace" +(i.e. not exiting back to the Tier 1 interpreter) for branch instructions +based on the number of bits set in the branch "counter". Trace translation +ends when the confidence drops below 1/3rd. diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index f298c602b1042b..ac44aecae046d8 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -258,10 +258,6 @@ GETITEM(PyObject *v, Py_ssize_t i) { if (ADAPTIVE_COUNTER_IS_ZERO(next_instr->cache)) { \ STAT_INC((INSTNAME), deopt); \ } \ - else { \ - /* This is about to be (incorrectly) incremented: */ \ - STAT_DEC((INSTNAME), deferred); \ - } \ } while (0) #else #define UPDATE_MISS_STATS(INSTNAME) ((void)0) diff --git a/Python/optimizer.c b/Python/optimizer.c index 7c46bd69157170..d44e733bc346fa 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -409,6 +409,9 @@ BRANCH_TO_GUARD[4][2] = { #define TRACE_STACK_SIZE 5 +#define CONFIDENCE_RANGE 1000 +#define CONFIDENCE_CUTOFF 333 + /* Returns 1 on success, * 0 if it failed to produce a worthwhile trace, * and -1 on an error. @@ -431,6 +434,7 @@ translate_bytecode_to_trace( _Py_CODEUNIT *instr; } trace_stack[TRACE_STACK_SIZE]; int trace_stack_depth = 0; + int confidence = CONFIDENCE_RANGE; // Adjusted by branch instructions #ifdef Py_DEBUG char *python_lltrace = Py_GETENV("PYTHON_LLTRACE"); @@ -513,7 +517,6 @@ translate_bytecode_to_trace( uint32_t oparg = instr->op.arg; uint32_t extras = 0; - if (opcode == EXTENDED_ARG) { instr++; extras += 1; @@ -543,11 +546,22 @@ translate_bytecode_to_trace( int counter = instr[1].cache; int bitcount = _Py_popcount32(counter); int jump_likely = bitcount > 8; + if (jump_likely) { + confidence = confidence * bitcount / 16; + } + else { + confidence = confidence * (16 - bitcount) / 16; + } + if (confidence < CONFIDENCE_CUTOFF) { + DPRINTF(2, "Confidence too low (%d)\n", confidence); + OPT_STAT_INC(low_confidence); + goto done; + } uint32_t uopcode = BRANCH_TO_GUARD[opcode - POP_JUMP_IF_FALSE][jump_likely]; _Py_CODEUNIT *next_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]]; - DPRINTF(4, "%s(%d): counter=%x, bitcount=%d, likely=%d, uopcode=%s\n", + DPRINTF(2, "%s(%d): counter=%x, bitcount=%d, likely=%d, confidence=%d, uopcode=%s\n", _PyUOpName(opcode), oparg, - counter, bitcount, jump_likely, _PyUOpName(uopcode)); + counter, bitcount, jump_likely, confidence, _PyUOpName(uopcode)); ADD_TO_TRACE(uopcode, max_length, 0, target); if (jump_likely) { _Py_CODEUNIT *target_instr = next_instr + oparg; diff --git a/Python/specialize.c b/Python/specialize.c index ba704cbbb464d7..7c2a4a42b1dcc3 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -233,6 +233,7 @@ print_optimization_stats(FILE *out, OptimizationStats *stats) fprintf(out, "Optimization trace too short: %" PRIu64 "\n", stats->trace_too_short); fprintf(out, "Optimization inner loop: %" PRIu64 "\n", stats->inner_loop); fprintf(out, "Optimization recursive call: %" PRIu64 "\n", stats->recursive_call); + fprintf(out, "Optimization low confidence: %" PRIu64 "\n", stats->low_confidence); print_histogram(out, "Trace length", stats->trace_length_hist); print_histogram(out, "Trace run length", stats->trace_run_length_hist); diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index 360b7c720bd1f0..80a1280c025aca 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -386,6 +386,7 @@ def get_optimization_stats(self) -> dict[str, tuple[int, int | None]]: trace_too_short = self._data["Optimization trace too short"] inner_loop = self._data["Optimization inner loop"] recursive_call = self._data["Optimization recursive call"] + low_confidence = self._data["Optimization low confidence"] return { "Optimization attempts": (attempts, None), @@ -396,6 +397,7 @@ def get_optimization_stats(self) -> dict[str, tuple[int, int | None]]: "Trace too short": (trace_too_short, attempts), "Inner loop found": (inner_loop, attempts), "Recursive call": (recursive_call, attempts), + "Low confidence": (low_confidence, attempts), "Traces executed": (executed, None), "Uops executed": (uops, executed), }