Skip to content

Commit

Permalink
Provide our own Cython declaration for make_unique
Browse files Browse the repository at this point in the history
make_unique in Cython's libcpp headers is not annotated with `except
+`. As a consequence, if the constructor throws, we do not catch it in
Python. To work around this (see cython/cython#5560 for details),
provide our own implementation.

Due to the way assignments occur to temporaries, we need to now
explicitly wrap all calls to `make_unique` in `move`, but that is
arguably preferable to not being able to catch exceptions, and will
not be necessary once we move to Cython 3.

- Closes rapidsai#13743
  • Loading branch information
wence- committed Jul 26, 2023
1 parent 7dcf052 commit ab77c9f
Show file tree
Hide file tree
Showing 9 changed files with 66 additions and 45 deletions.
5 changes: 3 additions & 2 deletions python/cudf/cudf/_lib/column.pyx
Expand Up @@ -27,7 +27,7 @@ from cudf.utils.dtypes import _get_base_dtype

from cpython.buffer cimport PyObject_CheckBuffer
from libc.stdint cimport uintptr_t
from libcpp.memory cimport make_unique, unique_ptr
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
from libcpp.vector cimport vector

Expand All @@ -49,6 +49,7 @@ from cudf._lib.cpp.column.column_factories cimport (
make_numeric_column,
)
from cudf._lib.cpp.column.column_view cimport column_view
from cudf._lib.cpp.libcpp.memory cimport make_unique
from cudf._lib.cpp.null_mask cimport null_count as cpp_null_count
from cudf._lib.cpp.scalar.scalar cimport scalar
from cudf._lib.scalar cimport DeviceScalar
Expand Down Expand Up @@ -275,7 +276,7 @@ cdef class Column:
self._children = ()
else:
children = Column.from_unique_ptr(
make_unique[column](self.view())
move(make_unique[column](self.view()))
).base_children
dtypes = [
base_child.dtype for base_child in self.base_children
Expand Down
7 changes: 4 additions & 3 deletions python/cudf/cudf/_lib/concat.pyx
@@ -1,7 +1,7 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Copyright (c) 2020-2023, NVIDIA CORPORATION.

from libcpp cimport bool
from libcpp.memory cimport make_unique, unique_ptr
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
from libcpp.vector cimport vector

Expand All @@ -12,6 +12,7 @@ from cudf._lib.cpp.concatenate cimport (
concatenate_masks as libcudf_concatenate_masks,
concatenate_tables as libcudf_concatenate_tables,
)
from cudf._lib.cpp.libcpp.memory cimport make_unique
from cudf._lib.cpp.table.table cimport table, table_view
from cudf._lib.utils cimport (
data_from_unique_ptr,
Expand All @@ -30,7 +31,7 @@ cpdef concat_masks(object columns):
cdef vector[column_view] c_views = make_column_views(columns)
with nogil:
c_result = move(libcudf_concatenate_masks(c_views))
c_unique_result = make_unique[device_buffer](move(c_result))
c_unique_result = move(make_unique[device_buffer](move(c_result)))
return as_buffer(
DeviceBuffer.c_from_unique_ptr(move(c_unique_result))
)
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/_lib/copying.pyx
Expand Up @@ -4,7 +4,7 @@ import pickle

from libc.stdint cimport int32_t, uint8_t, uintptr_t
from libcpp cimport bool
from libcpp.memory cimport make_shared, make_unique, shared_ptr, unique_ptr
from libcpp.memory cimport make_shared, shared_ptr, unique_ptr
from libcpp.utility cimport move
from libcpp.vector cimport vector

Expand All @@ -29,6 +29,7 @@ cimport cudf._lib.cpp.copying as cpp_copying
from cudf._lib.cpp.column.column cimport column
from cudf._lib.cpp.column.column_view cimport column_view, mutable_column_view
from cudf._lib.cpp.libcpp.functional cimport reference_wrapper
from cudf._lib.cpp.libcpp.memory cimport make_unique
from cudf._lib.cpp.lists.gather cimport (
segmented_gather as cpp_segmented_gather,
)
Expand Down Expand Up @@ -80,7 +81,6 @@ def copy_column(Column input_column):
Deep copied column
"""
cdef unique_ptr[column] c_result

cdef column_view input_column_view = input_column.view()
with nogil:
c_result = move(make_unique[column](input_column_view))
Expand Down
12 changes: 12 additions & 0 deletions python/cudf/cudf/_lib/cpp/libcpp/memory.pxd
@@ -0,0 +1,12 @@
# Copyright (c) 2023, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr


cdef extern from "<memory>" namespace "std" nogil:
# The Cython standard header does not have except +, so C++
# exceptions from make_unique are not caught and translated to
# Python ones. This is not perfectly ergonomic, we always have to
# wrap make_unique in move, but at least we can catch exceptions.
# See https://github.com/cython/cython/issues/5560
unique_ptr[T] make_unique[T](...) except +
28 changes: 15 additions & 13 deletions python/cudf/cudf/_lib/expressions.pyx
Expand Up @@ -4,9 +4,11 @@ from enum import Enum

from cython.operator cimport dereference
from libc.stdint cimport int64_t
from libcpp.memory cimport make_unique, unique_ptr
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

from cudf._lib.cpp cimport expressions as libcudf_exp
from cudf._lib.cpp.libcpp.memory cimport make_unique
from cudf._lib.cpp.types cimport size_type

# Necessary for proper casting, see below.
Expand Down Expand Up @@ -80,26 +82,26 @@ cdef class Literal(Expression):
def __cinit__(self, value):
if isinstance(value, int):
self.c_scalar.reset(new numeric_scalar[int64_t](value, True))
self.c_obj = <expression_ptr> make_unique[libcudf_exp.literal](
self.c_obj = <expression_ptr> move(make_unique[libcudf_exp.literal](
<numeric_scalar[int64_t] &>dereference(self.c_scalar)
)
))
elif isinstance(value, float):
self.c_scalar.reset(new numeric_scalar[double](value, True))
self.c_obj = <expression_ptr> make_unique[libcudf_exp.literal](
self.c_obj = <expression_ptr> move(make_unique[libcudf_exp.literal](
<numeric_scalar[double] &>dereference(self.c_scalar)
)
))
elif isinstance(value, str):
self.c_scalar.reset(new string_scalar(value.encode(), True))
self.c_obj = <expression_ptr> make_unique[libcudf_exp.literal](
self.c_obj = <expression_ptr> move(make_unique[libcudf_exp.literal](
<string_scalar &>dereference(self.c_scalar)
)
))


cdef class ColumnReference(Expression):
def __cinit__(self, size_type index):
self.c_obj = <expression_ptr>make_unique[libcudf_exp.column_reference](
self.c_obj = <expression_ptr>move(make_unique[libcudf_exp.column_reference](
index
)
))


cdef class Operation(Expression):
Expand All @@ -109,10 +111,10 @@ cdef class Operation(Expression):
)

if right is None:
self.c_obj = <expression_ptr> make_unique[libcudf_exp.operation](
self.c_obj = <expression_ptr> move(make_unique[libcudf_exp.operation](
op_value, dereference(left.c_obj)
)
))
else:
self.c_obj = <expression_ptr> make_unique[libcudf_exp.operation](
self.c_obj = <expression_ptr> move(make_unique[libcudf_exp.operation](
op_value, dereference(left.c_obj), dereference(right.c_obj)
)
))
7 changes: 4 additions & 3 deletions python/cudf/cudf/_lib/join.pyx
Expand Up @@ -2,7 +2,7 @@

from cudf.core.buffer import acquire_spill_lock

from libcpp.memory cimport make_unique, unique_ptr
from libcpp.memory cimport unique_ptr
from libcpp.pair cimport pair
from libcpp.utility cimport move

Expand All @@ -11,6 +11,7 @@ from rmm._lib.device_buffer cimport device_buffer
cimport cudf._lib.cpp.join as cpp_join
from cudf._lib.column cimport Column
from cudf._lib.cpp.column.column cimport column
from cudf._lib.cpp.libcpp.memory cimport make_unique
from cudf._lib.cpp.table.table_view cimport table_view
from cudf._lib.cpp.types cimport data_type, size_type, type_id
from cudf._lib.utils cimport table_view_from_columns
Expand Down Expand Up @@ -66,8 +67,8 @@ cdef Column _gather_map_as_column(cpp_join.gather_map_type gather_map):
# help to convert a gather map to a Column
cdef device_buffer c_empty
cdef size_type size = gather_map.get()[0].size()
cdef unique_ptr[column] c_col = make_unique[column](
cdef unique_ptr[column] c_col = move(make_unique[column](
data_type(type_id.INT32),
size,
gather_map.get()[0].release(), move(c_empty), 0)
gather_map.get()[0].release(), move(c_empty), 0))
return Column.from_unique_ptr(move(c_col))
13 changes: 7 additions & 6 deletions python/cudf/cudf/_lib/null_mask.pyx
@@ -1,17 +1,18 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Copyright (c) 2020-2023, NVIDIA CORPORATION.

from enum import Enum

from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer

from cudf.core.buffer import acquire_spill_lock, as_buffer

from libcpp.memory cimport make_unique, unique_ptr
from libcpp.memory cimport unique_ptr
from libcpp.pair cimport pair
from libcpp.utility cimport move

from cudf._lib.column cimport Column
from cudf._lib.cpp.column.column_view cimport column_view
from cudf._lib.cpp.libcpp.memory cimport make_unique
from cudf._lib.cpp.null_mask cimport (
bitmask_allocation_size_bytes as cpp_bitmask_allocation_size_bytes,
bitmask_and as cpp_bitmask_and,
Expand Down Expand Up @@ -50,7 +51,7 @@ def copy_bitmask(Column col):

with nogil:
db = move(cpp_copy_bitmask(col_view))
up_db = make_unique[device_buffer](move(db))
up_db = move(make_unique[device_buffer](move(db)))

rmm_db = DeviceBuffer.c_from_unique_ptr(move(up_db))
buf = as_buffer(rmm_db)
Expand Down Expand Up @@ -96,7 +97,7 @@ def create_null_mask(size_type size, state=MaskState.UNINITIALIZED):

with nogil:
db = move(cpp_create_null_mask(size, c_mask_state))
up_db = make_unique[device_buffer](move(db))
up_db = move(make_unique[device_buffer](move(db)))

rmm_db = DeviceBuffer.c_from_unique_ptr(move(up_db))
buf = as_buffer(rmm_db)
Expand All @@ -110,7 +111,7 @@ def bitmask_and(columns: list):
cdef unique_ptr[device_buffer] up_db
with nogil:
c_result = move(cpp_bitmask_and(c_view))
up_db = make_unique[device_buffer](move(c_result.first))
up_db = move(make_unique[device_buffer](move(c_result.first)))
dbuf = DeviceBuffer.c_from_unique_ptr(move(up_db))
buf = as_buffer(dbuf)
return buf, c_result.second
Expand All @@ -123,7 +124,7 @@ def bitmask_or(columns: list):
cdef unique_ptr[device_buffer] up_db
with nogil:
c_result = move(cpp_bitmask_or(c_view))
up_db = make_unique[device_buffer](move(c_result.first))
up_db = move(make_unique[device_buffer](move(c_result.first)))
dbuf = DeviceBuffer.c_from_unique_ptr(move(up_db))
buf = as_buffer(dbuf)
return buf, c_result.second
5 changes: 3 additions & 2 deletions python/cudf/cudf/_lib/parquet.pyx
Expand Up @@ -32,7 +32,7 @@ from cudf._lib.utils import _index_level_name, generate_pandas_metadata
from libc.stdint cimport uint8_t
from libcpp cimport bool
from libcpp.map cimport map
from libcpp.memory cimport make_unique, unique_ptr
from libcpp.memory cimport unique_ptr
from libcpp.string cimport string
from libcpp.unordered_map cimport unordered_map
from libcpp.utility cimport move
Expand All @@ -51,6 +51,7 @@ from cudf._lib.cpp.io.parquet cimport (
write_parquet as parquet_writer,
)
from cudf._lib.cpp.io.types cimport column_in_metadata, table_input_metadata
from cudf._lib.cpp.libcpp.memory cimport make_unique
from cudf._lib.cpp.table.table_view cimport table_view
from cudf._lib.cpp.types cimport data_type, size_type
from cudf._lib.io.datasource cimport NativeFileDatasource
Expand Down Expand Up @@ -641,7 +642,7 @@ cpdef merge_filemetadata(object filemetadata_list):

for blob_py in filemetadata_list:
blob_c = blob_py
list_c.push_back(make_unique[vector[uint8_t]](blob_c))
list_c.push_back(move(make_unique[vector[uint8_t]](blob_c)))

with nogil:
output_c = move(parquet_merge_metadata(list_c))
Expand Down
30 changes: 16 additions & 14 deletions python/cudf_kafka/cudf_kafka/_lib/kafka.pyx
@@ -1,12 +1,14 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Copyright (c) 2020-2023, NVIDIA CORPORATION.

from libc.stdint cimport int32_t, int64_t
from libcpp cimport bool, nullptr
from libcpp.map cimport map
from libcpp.memory cimport make_unique, unique_ptr
from libcpp.memory cimport unique_ptr
from libcpp.string cimport string
from libcpp.utility cimport move

from cudf._lib.cpp.io.types cimport datasource
from cudf._lib.cpp.libcpp.memory cimport make_unique

from cudf_kafka._lib.kafka cimport kafka_consumer

Expand Down Expand Up @@ -50,20 +52,20 @@ cdef class KafkaDatasource(Datasource):

if topic != b"" and partition != -1:
self.c_datasource = <unique_ptr[datasource]> \
make_unique[kafka_consumer](configs,
python_callable,
python_callable_wrapper,
topic,
partition,
start_offset,
end_offset,
batch_timeout,
delimiter)
move(make_unique[kafka_consumer](configs,
python_callable,
python_callable_wrapper,
topic,
partition,
start_offset,
end_offset,
batch_timeout,
delimiter))
else:
self.c_datasource = <unique_ptr[datasource]> \
make_unique[kafka_consumer](configs,
python_callable,
python_callable_wrapper)
move(make_unique[kafka_consumer](configs,
python_callable,
python_callable_wrapper))

cdef datasource* get_datasource(self) nogil:
return <datasource *> self.c_datasource.get()
Expand Down

0 comments on commit ab77c9f

Please sign in to comment.