Skip to content

Commit

Permalink
macOS linking: re-enable chained fixups using a linker response file
Browse files Browse the repository at this point in the history
Following a high-bandwidth exchange with the Apple team in charge of the
macOS linker, this commit adjusts macOS extension linking so that it
retains the "chained fixups" optimization to produce smaller binaries.

This is done by informing the linker about every individual CPython (or
PyPy) API symbol that we expect to import dynamically at runtime. This
can actually be a relatively long list, and the commit uses a so-called
"linker response file" to specify them more efficiently.

Further technical details on chained linkups and the reasons for making
this change can be found in this issue tracker post:

python/cpython#97524 (comment)
  • Loading branch information
wjakob committed Mar 8, 2023
1 parent bc7aa4a commit 2f29ec7
Show file tree
Hide file tree
Showing 6 changed files with 1,930 additions and 1 deletion.
28 changes: 28 additions & 0 deletions cmake/collect-symbols-pypy.py
@@ -0,0 +1,28 @@
from urllib.request import urlopen
import tarfile
import subprocess

funcs = set()

files = [
('https://downloads.python.org/pypy/pypy3.9-v7.3.11-macos_arm64.tar.bz2', 'pypy3.9-v7.3.11-macos_arm64/bin/libpypy3.9-c.dylib')
]

for f in files:
fs = urlopen(f[0])
ft = tarfile.open(fileobj=fs, mode="r|bz2")
success = False
for member in ft: # move to the next file each loop
if member.name == f[1]:
ft.extract(member, path='tmp')
success = True
assert success

out = subprocess.check_output(['nm', '-gjU', 'tmp/' + f[1]])
for line in out.decode().split('\n'):
if line.startswith('_Py') or line.startswith('__Py'):
funcs.add(line)

with open("darwin-ld-pypy.sym", "w") as f:
for func in sorted(list(funcs)):
f.write(f'-U _{func}\n')
42 changes: 42 additions & 0 deletions cmake/collect-symbols.py
@@ -0,0 +1,42 @@
#!/usr/bin/env python3
#
# This script collects a list of symbols that are considered to be part of the
# CPython API. The result is used to inform the macOS linker that it's fine for
# those symbols to be undefined when an extension module is linked, as they
# will be provided when the extension module is loaded into the interpreter.

from urllib.request import urlopen
import re

funcs = set()

for ver in ['3.7', '3.8', '3.9']:
url = f'https://raw.githubusercontent.com/python/cpython/{ver}/PC/python3.def'
output = urlopen(url).read().decode('utf-8')
for match in re.findall(r" (.*)=.*", output):
funcs.add(match)

for ver in ['3.10', '3.11', 'main']:
url = f'https://raw.githubusercontent.com/python/cpython/{ver}/PC/python3dll.c'
output = urlopen(url).read().decode('utf-8')
for match in re.findall(r"EXPORT_FUNC\((.*)\)", output):
funcs.add(match)

funcs.remove('name')

# Add a few more functions that nanobind uses and which aren't in the above list
funcs |= {
'PyFrame_GetBack',
'PyGILState_Check',
'PyObject_LengthHint',
'Py_CompileStringExFlags',
'_PyInterpreterState_Get',
'_PyObject_MakeTpCall',
'_PyObject_NextNotImplemented',
'_Py_CheckFunctionResult',
'_Py_RefTotal'
}

with open("darwin-ld-cpython.sym", "w") as f:
for func in sorted(list(funcs)):
f.write(f'-U _{func}\n')

0 comments on commit 2f29ec7

Please sign in to comment.