Skip to content

Allow ObjectCode to have a name #682

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 6, 2025
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions cuda_core/cuda/core/experimental/_linker.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ class LinkerOptions:

Attributes
----------
name : str, optional
Name of the linker. If the linking succeeds, the name is passed down to the generated `ObjectCode`.
arch : str, optional
Pass the SM architecture value, such as ``sm_<CC>`` (for generating CUBIN) or
``compute_<CC>`` (for generating PTX). If not provided, the current device's architecture
Expand Down Expand Up @@ -161,6 +163,7 @@ class LinkerOptions:
Default: False.
"""

name: Optional[str] = "<default linker>"
arch: Optional[str] = None
max_register_count: Optional[int] = None
time: Optional[bool] = None
Expand All @@ -184,6 +187,7 @@ class LinkerOptions:

def __post_init__(self):
_lazy_init()
self._name = self.name.encode()
self.formatted_options = []
if _nvjitlink:
self._init_nvjitlink()
Expand Down Expand Up @@ -393,7 +397,7 @@ def _add_code_object(self, object_code: ObjectCode):
data = object_code._module
assert_type(data, bytes)
with _exception_manager(self):
name_str = f"{object_code._handle}_{object_code._code_type}"
name_str = f"{object_code.name}"
if _nvjitlink:
_nvjitlink.add_data(
self._mnff.handle,
Expand Down Expand Up @@ -455,7 +459,7 @@ def link(self, target_type) -> ObjectCode:
addr, size = handle_return(_driver.cuLinkComplete(self._mnff.handle))
code = (ctypes.c_char * size).from_address(addr)

return ObjectCode._init(bytes(code), target_type)
return ObjectCode._init(bytes(code), target_type, name=self._options.name)

def get_error_log(self) -> str:
"""Get the error log generated by the linker.
Expand Down
58 changes: 41 additions & 17 deletions cuda_core/cuda/core/experimental/_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ class ObjectCode:
context.
"""

__slots__ = ("_handle", "_backend_version", "_code_type", "_module", "_loader", "_sym_map")
__slots__ = ("_handle", "_backend_version", "_code_type", "_module", "_loader", "_sym_map", "_name")
_supported_code_type = ("cubin", "ptx", "ltoir", "fatbin", "object", "library")

def __new__(self, *args, **kwargs):
Expand All @@ -459,7 +459,7 @@ def __new__(self, *args, **kwargs):
)

@classmethod
def _init(cls, module, code_type, *, symbol_mapping: Optional[dict] = None):
def _init(cls, module, code_type, *, name: str = "", symbol_mapping: Optional[dict] = None):
self = super().__new__(cls)
assert code_type in self._supported_code_type, f"{code_type=} is not supported"
_lazy_init()
Expand All @@ -473,112 +473,131 @@ def _init(cls, module, code_type, *, symbol_mapping: Optional[dict] = None):
self._code_type = code_type
self._module = module
self._sym_map = {} if symbol_mapping is None else symbol_mapping
self._name = name

return self

@classmethod
def _reduce_helper(self, module, code_type, symbol_mapping):
def _reduce_helper(self, module, code_type, name, symbol_mapping):
# just for forwarding kwargs
return ObjectCode._init(module, code_type, symbol_mapping=symbol_mapping)
return ObjectCode._init(module, code_type, name=name, symbol_mapping=symbol_mapping)

def __reduce__(self):
return ObjectCode._reduce_helper, (self._module, self._code_type, self._sym_map)
return ObjectCode._reduce_helper, (self._module, self._code_type, self._name, self._sym_map)

@staticmethod
def from_cubin(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode":
def from_cubin(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode":
"""Create an :class:`ObjectCode` instance from an existing cubin.

Parameters
----------
module : Union[bytes, str]
Either a bytes object containing the in-memory cubin to load, or
a file path string pointing to the on-disk cubin to load.
name : Optional[str]
A human-readable identifier representing this code object.
symbol_mapping : Optional[dict]
A dictionary specifying how the unmangled symbol names (as keys)
should be mapped to the mangled names before trying to retrieve
them (default to no mappings).
"""
return ObjectCode._init(module, "cubin", symbol_mapping=symbol_mapping)
return ObjectCode._init(module, "cubin", name=name, symbol_mapping=symbol_mapping)

@staticmethod
def from_ptx(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode":
def from_ptx(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode":
"""Create an :class:`ObjectCode` instance from an existing PTX.

Parameters
----------
module : Union[bytes, str]
Either a bytes object containing the in-memory ptx code to load, or
a file path string pointing to the on-disk ptx file to load.
name : Optional[str]
A human-readable identifier representing this code object.
symbol_mapping : Optional[dict]
A dictionary specifying how the unmangled symbol names (as keys)
should be mapped to the mangled names before trying to retrieve
them (default to no mappings).
"""
return ObjectCode._init(module, "ptx", symbol_mapping=symbol_mapping)
return ObjectCode._init(module, "ptx", name=name, symbol_mapping=symbol_mapping)

@staticmethod
def from_ltoir(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode":
def from_ltoir(module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None) -> "ObjectCode":
"""Create an :class:`ObjectCode` instance from an existing LTOIR.

Parameters
----------
module : Union[bytes, str]
Either a bytes object containing the in-memory ltoir code to load, or
a file path string pointing to the on-disk ltoir file to load.
name : Optional[str]
A human-readable identifier representing this code object.
symbol_mapping : Optional[dict]
A dictionary specifying how the unmangled symbol names (as keys)
should be mapped to the mangled names before trying to retrieve
them (default to no mappings).
"""
return ObjectCode._init(module, "ltoir", symbol_mapping=symbol_mapping)
return ObjectCode._init(module, "ltoir", name=name, symbol_mapping=symbol_mapping)

@staticmethod
def from_fatbin(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode":
def from_fatbin(
module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None
) -> "ObjectCode":
"""Create an :class:`ObjectCode` instance from an existing fatbin.

Parameters
----------
module : Union[bytes, str]
Either a bytes object containing the in-memory fatbin to load, or
a file path string pointing to the on-disk fatbin to load.
name : Optional[str]
A human-readable identifier representing this code object.
symbol_mapping : Optional[dict]
A dictionary specifying how the unmangled symbol names (as keys)
should be mapped to the mangled names before trying to retrieve
them (default to no mappings).
"""
return ObjectCode._init(module, "fatbin", symbol_mapping=symbol_mapping)
return ObjectCode._init(module, "fatbin", name=name, symbol_mapping=symbol_mapping)

@staticmethod
def from_object(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode":
def from_object(
module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None
) -> "ObjectCode":
"""Create an :class:`ObjectCode` instance from an existing object code.

Parameters
----------
module : Union[bytes, str]
Either a bytes object containing the in-memory object code to load, or
a file path string pointing to the on-disk object code to load.
name : Optional[str]
A human-readable identifier representing this code object.
symbol_mapping : Optional[dict]
A dictionary specifying how the unmangled symbol names (as keys)
should be mapped to the mangled names before trying to retrieve
them (default to no mappings).
"""
return ObjectCode._init(module, "object", symbol_mapping=symbol_mapping)
return ObjectCode._init(module, "object", name=name, symbol_mapping=symbol_mapping)

@staticmethod
def from_library(module: Union[bytes, str], *, symbol_mapping: Optional[dict] = None) -> "ObjectCode":
def from_library(
module: Union[bytes, str], *, name: str = "", symbol_mapping: Optional[dict] = None
) -> "ObjectCode":
"""Create an :class:`ObjectCode` instance from an existing library.

Parameters
----------
module : Union[bytes, str]
Either a bytes object containing the in-memory library to load, or
a file path string pointing to the on-disk library to load.
name : Optional[str]
A human-readable identifier representing this code object.
symbol_mapping : Optional[dict]
A dictionary specifying how the unmangled symbol names (as keys)
should be mapped to the mangled names before trying to retrieve
them (default to no mappings).
"""
return ObjectCode._init(module, "library", symbol_mapping=symbol_mapping)
return ObjectCode._init(module, "library", name=name, symbol_mapping=symbol_mapping)

# TODO: do we want to unload in a finalizer? Probably not..

Expand Down Expand Up @@ -632,6 +651,11 @@ def code(self) -> CodeTypeT:
"""Return the underlying code object."""
return self._module

@property
def name(self) -> str:
"""Return a human-readable name of this code object."""
return self._name

@property
@precondition(_lazy_load_module)
def handle(self):
Expand Down
10 changes: 8 additions & 2 deletions cuda_core/cuda/core/experimental/_program.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ class ProgramOptions:

Attributes
----------
name : str, optional
Name of the program. If the compilation succeeds, the name is passed down to the generated `ObjectCode`.
arch : str, optional
Pass the SM architecture value, such as ``sm_<CC>`` (for generating CUBIN) or
``compute_<CC>`` (for generating PTX). If not provided, the current device's architecture
Expand Down Expand Up @@ -180,6 +182,7 @@ class ProgramOptions:
Default: False
"""

name: Optional[str] = "<default program>"
arch: Optional[str] = None
relocatable_device_code: Optional[bool] = None
extensible_whole_program: Optional[bool] = None
Expand Down Expand Up @@ -222,6 +225,8 @@ class ProgramOptions:
minimal: Optional[bool] = None

def __post_init__(self):
self._name = self.name.encode()

self._formatted_options = []
if self.arch is not None:
self._formatted_options.append(f"--gpu-architecture={self.arch}")
Expand Down Expand Up @@ -396,7 +401,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None):
# TODO: support pre-loaded headers & include names
# TODO: allow tuples once NVIDIA/cuda-python#72 is resolved

self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), b"", 0, [], []))
self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), options._name, 0, [], []))
self._backend = "NVRTC"
self._linker = None

Expand All @@ -413,6 +418,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None):

def _translate_program_options(self, options: ProgramOptions) -> LinkerOptions:
return LinkerOptions(
name=options.name,
arch=options.arch,
max_register_count=options.max_register_count,
time=options.time,
Expand Down Expand Up @@ -505,7 +511,7 @@ def compile(self, target_type, name_expressions=(), logs=None):
handle_return(nvrtc.nvrtcGetProgramLog(self._mnff.handle, log), handle=self._mnff.handle)
logs.write(log.decode("utf-8", errors="backslashreplace"))

return ObjectCode._init(data, target_type, symbol_mapping=symbol_mapping)
return ObjectCode._init(data, target_type, symbol_mapping=symbol_mapping, name=self._options.name)

supported_backends = ("nvJitLink", "driver")
if self._backend not in supported_backends:
Expand Down
2 changes: 2 additions & 0 deletions cuda_core/docs/source/release/0.3.0-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ New features

- :class:`Kernel` adds :property:`Kernel.num_arguments` and :property:`Kernel.arguments_info` for introspection of kernel arguments. (#612)
- Add pythonic access to kernel occupancy calculation functions via :property:`Kernel.occupancy`. (#648)
- A name can be assigned to :class:`ObjectCode` instances generated by both :class:`Program` and :class:`Linker` through their respective
options.

New examples
------------
Expand Down
13 changes: 10 additions & 3 deletions cuda_core/tests/test_linker.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ def test_linker_link_ptx_nvjitlink(compile_ltoir_functions):
linker = Linker(*compile_ltoir_functions, options=options)
linked_code = linker.link("ptx")
assert isinstance(linked_code, ObjectCode)
assert linked_code.name == options.name


@pytest.mark.skipif(not is_culink_backend, reason="nvjitlink requires lto for ptx linking")
Expand All @@ -117,13 +118,15 @@ def test_linker_link_ptx_culink(compile_ptx_functions):
linker = Linker(*compile_ptx_functions, options=options)
linked_code = linker.link("ptx")
assert isinstance(linked_code, ObjectCode)
assert linked_code.name == options.name


def test_linker_link_cubin(compile_ptx_functions):
options = LinkerOptions(arch=ARCH)
linker = Linker(*compile_ptx_functions, options=options)
linked_code = linker.link("cubin")
assert isinstance(linked_code, ObjectCode)
assert linked_code.name == options.name


def test_linker_link_ptx_multiple(compile_ptx_functions):
Expand All @@ -132,6 +135,7 @@ def test_linker_link_ptx_multiple(compile_ptx_functions):
linker = Linker(*ptxes, options=options)
linked_code = linker.link("cubin")
assert isinstance(linked_code, ObjectCode)
assert linked_code.name == options.name


def test_linker_link_invalid_target_type(compile_ptx_functions):
Expand All @@ -144,14 +148,16 @@ def test_linker_link_invalid_target_type(compile_ptx_functions):
# this test causes an API error when using the culink API
@skipif_testing_with_compute_sanitizer
def test_linker_get_error_log(compile_ptx_functions):
options = LinkerOptions(arch=ARCH)
options = LinkerOptions(name="ABC", arch=ARCH)

replacement_kernel = """
extern __device__ int Z();
extern __device__ int C(int a, int b);
__global__ void A() { int result = C(Z(), 1);}
"""
dummy_program = Program(replacement_kernel, "c++", ProgramOptions(relocatable_device_code=True)).compile("ptx")
dummy_program = Program(
replacement_kernel, "c++", ProgramOptions(name="CBA", relocatable_device_code=True)
).compile("ptx")
linker = Linker(dummy_program, *(compile_ptx_functions[1:]), options=options)
try:
linker.link("cubin")
Expand All @@ -160,8 +166,9 @@ def test_linker_get_error_log(compile_ptx_functions):
log = linker.get_error_log()
assert isinstance(log, str)
# TODO when 4902246 is addressed, we can update this to cover nvjitlink as well
# The error is coming from the input object that's being linked (CBA), not the output object (ABC).
if is_culink_backend:
assert log.rstrip("\x00") == "error : Undefined reference to '_Z1Zv' in 'None_ptx'"
assert log.rstrip("\x00") == "error : Undefined reference to '_Z1Zv' in 'CBA'"


def test_linker_get_info_log(compile_ptx_functions):
Expand Down
15 changes: 9 additions & 6 deletions cuda_core/tests/test_program.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def ptx_code_object():
@pytest.mark.parametrize(
"options",
[
ProgramOptions(name="abc"),
ProgramOptions(device_code_optimize=True, debug=True),
ProgramOptions(relocatable_device_code=True, max_register_count=32),
ProgramOptions(ftz=True, prec_sqrt=False, prec_div=False),
Expand Down Expand Up @@ -105,21 +106,23 @@ def test_program_init_invalid_code_format():
# This is tested against the current device's arch
def test_program_compile_valid_target_type(init_cuda):
code = 'extern "C" __global__ void my_kernel() {}'
program = Program(code, "c++")
program = Program(code, "c++", options={"name": "42"})

with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
ptx_object_code = program.compile("ptx")
assert isinstance(ptx_object_code, ObjectCode)
assert ptx_object_code.name == "42"
if any("The CUDA driver version is older than the backend version" in str(warning.message) for warning in w):
pytest.skip("PTX version too new for current driver")
ptx_kernel = ptx_object_code.get_kernel("my_kernel")
assert isinstance(ptx_kernel, Kernel)

program = Program(ptx_object_code._module.decode(), "ptx")
program = Program(ptx_object_code._module.decode(), "ptx", options={"name": "24"})
cubin_object_code = program.compile("cubin")
ptx_kernel = ptx_object_code.get_kernel("my_kernel")
cubin_kernel = cubin_object_code.get_kernel("my_kernel")
assert isinstance(ptx_object_code, ObjectCode)
assert isinstance(cubin_object_code, ObjectCode)
assert isinstance(ptx_kernel, Kernel)
assert cubin_object_code.name == "24"
cubin_kernel = cubin_object_code.get_kernel("my_kernel")
assert isinstance(cubin_kernel, Kernel)


Expand Down
Loading