Skip to content

gh-135474: Specialize arithmetic only on compact ints #135479

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Doc/howto/perf_profiling.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ Then we can use ``perf report`` to analyze the data:
| | | |
| | | |--51.67%--_PyEval_EvalFrameDefault
| | | | |
| | | | |--11.52%--_PyLong_Add
| | | | |--11.52%--_PyCompactLong_Add
| | | | | |
| | | | | |--2.97%--_PyObject_Malloc
...
Expand Down Expand Up @@ -142,7 +142,7 @@ Instead, if we run the same experiment with ``perf`` support enabled we get:
| | | |
| | | |--51.81%--_PyEval_EvalFrameDefault
| | | | |
| | | | |--13.77%--_PyLong_Add
| | | | |--13.77%--_PyCompactLong_Add
| | | | | |
| | | | | |--3.26%--_PyObject_Malloc

Expand Down
6 changes: 3 additions & 3 deletions Include/internal/pycore_long.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,9 @@ PyAPI_DATA(PyObject*) _PyLong_Rshift(PyObject *, int64_t);
// Export for 'math' shared extension
PyAPI_DATA(PyObject*) _PyLong_Lshift(PyObject *, int64_t);

PyAPI_FUNC(PyObject*) _PyLong_Add(PyLongObject *left, PyLongObject *right);
PyAPI_FUNC(PyObject*) _PyLong_Multiply(PyLongObject *left, PyLongObject *right);
PyAPI_FUNC(PyObject*) _PyLong_Subtract(PyLongObject *left, PyLongObject *right);
PyAPI_FUNC(PyObject*) _PyCompactLong_Add(PyLongObject *left, PyLongObject *right);
PyAPI_FUNC(PyObject*) _PyCompactLong_Multiply(PyLongObject *left, PyLongObject *right);
PyAPI_FUNC(PyObject*) _PyCompactLong_Subtract(PyLongObject *left, PyLongObject *right);

// Export for 'binascii' shared extension.
PyAPI_DATA(unsigned char) _PyLong_DigitValue[256];
Expand Down
6 changes: 3 additions & 3 deletions Include/internal/pycore_opcode_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions Include/internal/pycore_uop_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Specialize integer operations only on compact integers. This is a CPython internal change.
17 changes: 11 additions & 6 deletions Objects/longobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -3772,9 +3772,11 @@ long_add(PyLongObject *a, PyLongObject *b)
}

PyObject *
_PyLong_Add(PyLongObject *a, PyLongObject *b)
_PyCompactLong_Add(PyLongObject *a, PyLongObject *b)
{
return (PyObject*)long_add(a, b);
assert(_PyLong_BothAreCompact(a, b));
stwodigits z = medium_value(a) + medium_value(b);
return (PyObject *)_PyLong_FromSTwoDigits(z);
}

static PyObject *
Expand Down Expand Up @@ -3815,9 +3817,10 @@ long_sub(PyLongObject *a, PyLongObject *b)
}

PyObject *
_PyLong_Subtract(PyLongObject *a, PyLongObject *b)
_PyCompactLong_Subtract(PyLongObject *a, PyLongObject *b)
{
return (PyObject*)long_sub(a, b);
assert(_PyLong_BothAreCompact(a, b));
return (PyObject *)_PyLong_FromSTwoDigits(medium_value(a) - medium_value(b));
}

static PyObject *
Expand Down Expand Up @@ -4262,9 +4265,11 @@ long_mul(PyLongObject *a, PyLongObject *b)
}

PyObject *
_PyLong_Multiply(PyLongObject *a, PyLongObject *b)
_PyCompactLong_Multiply(PyLongObject *a, PyLongObject *b)
{
return (PyObject*)long_mul(a, b);
assert(_PyLong_BothAreCompact(a, b));
stwodigits v = medium_value(a) * medium_value(b);
return (PyObject *)_PyLong_FromSTwoDigits(v);
}

static PyObject *
Expand Down
9 changes: 6 additions & 3 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -582,9 +582,10 @@ dummy_func(
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
assert(PyLong_CheckExact(left_o));
assert(PyLong_CheckExact(right_o));
DEOPT_IF(!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you narrow the checks in _GUARD_TOS_INT and _GUARD_NOS_INT to check for compact ints, we can drop the check here. That way we keep the guard and action in separate micro-ops.

Likewise for + and -. The only complexity is that optimizer_bytecodes.c will need updating to handle compact ints differently.

This change is fine for now though, if you want to do the more sophisticated change in another PR.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I can do that.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On second thought, it's best to leave this to another PR. We need to introduce a new type altogether (one separate from the sym_set_type(&PyLong_Type), and I want to keep this PR's change confined to longobject.c).


STAT_INC(BINARY_OP, hit);
PyObject *res_o = _PyLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o);
PyObject *res_o = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o);
PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc);
PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc);
INPUTS_DEAD();
Expand All @@ -597,9 +598,10 @@ dummy_func(
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
assert(PyLong_CheckExact(left_o));
assert(PyLong_CheckExact(right_o));
DEOPT_IF(!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o));

STAT_INC(BINARY_OP, hit);
PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o);
PyObject *res_o = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o);
PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc);
PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc);
INPUTS_DEAD();
Expand All @@ -612,9 +614,10 @@ dummy_func(
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
assert(PyLong_CheckExact(left_o));
assert(PyLong_CheckExact(right_o));
DEOPT_IF(!_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o));

STAT_INC(BINARY_OP, hit);
PyObject *res_o = _PyLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o);
PyObject *res_o = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o);
PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc);
PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc);
INPUTS_DEAD();
Expand Down
24 changes: 15 additions & 9 deletions Python/executor_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 18 additions & 9 deletions Python/generated_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

51 changes: 3 additions & 48 deletions Python/optimizer_bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -222,60 +222,15 @@ dummy_func(void) {
}

op(_BINARY_OP_ADD_INT, (left, right -- res)) {
if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
assert(PyLong_CheckExact(sym_get_const(ctx, left)));
assert(PyLong_CheckExact(sym_get_const(ctx, right)));
PyObject *temp = _PyLong_Add((PyLongObject *)sym_get_const(ctx, left),
(PyLongObject *)sym_get_const(ctx, right));
if (temp == NULL) {
goto error;
}
res = sym_new_const(ctx, temp);
Py_DECREF(temp);
// TODO gh-115506:
// replace opcode with constant propagated one and add tests!
}
else {
res = sym_new_type(ctx, &PyLong_Type);
}
res = sym_new_type(ctx, &PyLong_Type);
}

op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) {
if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
assert(PyLong_CheckExact(sym_get_const(ctx, left)));
assert(PyLong_CheckExact(sym_get_const(ctx, right)));
PyObject *temp = _PyLong_Subtract((PyLongObject *)sym_get_const(ctx, left),
(PyLongObject *)sym_get_const(ctx, right));
if (temp == NULL) {
goto error;
}
res = sym_new_const(ctx, temp);
Py_DECREF(temp);
// TODO gh-115506:
// replace opcode with constant propagated one and add tests!
}
else {
res = sym_new_type(ctx, &PyLong_Type);
}
res = sym_new_type(ctx, &PyLong_Type);
}

op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) {
if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
assert(PyLong_CheckExact(sym_get_const(ctx, left)));
assert(PyLong_CheckExact(sym_get_const(ctx, right)));
PyObject *temp = _PyLong_Multiply((PyLongObject *)sym_get_const(ctx, left),
(PyLongObject *)sym_get_const(ctx, right));
if (temp == NULL) {
goto error;
}
res = sym_new_const(ctx, temp);
Py_DECREF(temp);
// TODO gh-115506:
// replace opcode with constant propagated one and add tests!
}
else {
res = sym_new_type(ctx, &PyLong_Type);
}
res = sym_new_type(ctx, &PyLong_Type);
}

op(_BINARY_OP_ADD_FLOAT, (left, right -- res)) {
Expand Down
Loading
Loading