-
Notifications
You must be signed in to change notification settings - Fork 77
/
python_bindings.cc
194 lines (158 loc) · 5.95 KB
/
python_bindings.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
/* Copyright (c) 2020 UATC, LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "neuropod/bindings/python_bindings.hh"
#include "neuropod/internal/error_utils.hh"
#include "neuropod/internal/neuropod_tensor.hh"
#include "neuropod/internal/neuropod_tensor_raw_data_access.hh"
#include "neuropod/neuropod.hh"
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
namespace neuropod
{
namespace py = pybind11;
namespace
{
TensorType get_array_type(py::array &array)
{
#define IS_INSTANCE_CHECK(cpp_type, neuropod_type) \
if (py::isinstance<py::array_t<cpp_type>>(array)) \
return neuropod_type;
FOR_EACH_TYPE_MAPPING_EXCEPT_STRING(IS_INSTANCE_CHECK)
// Strings need to be handled separately because `py::isinstance` does not do
// what we want in this case.
if (array.dtype().kind() == 'S')
{
return STRING_TENSOR;
}
NEUROPOD_ERROR("Unsupported array type in python bindings: {}", array.dtype().kind());
#undef IS_INSTANCE_CHECK
}
pybind11::dtype get_py_type(const NeuropodTensor &tensor)
{
#define GET_TYPE(CPP_TYPE, NEUROPOD_TYPE) \
case NEUROPOD_TYPE: { \
return pybind11::dtype::of<CPP_TYPE>(); \
}
const auto &tensor_type = tensor.get_tensor_type();
switch (tensor_type)
{
FOR_EACH_TYPE_MAPPING_EXCEPT_STRING(GET_TYPE)
default:
NEUROPOD_ERROR("Unsupported array type in python bindings: {}", tensor_type);
}
#undef GET_TYPE
}
std::shared_ptr<NeuropodTensor> tensor_from_string_numpy(NeuropodTensorAllocator &allocator,
py::array & array,
std::vector<int64_t> & shape)
{
// Unfortunately, for strings, we need to copy all the data in the tensor
auto tensor = allocator.allocate_tensor<std::string>(shape);
int max_len = array.itemsize();
int numel = tensor->get_num_elements();
// Get a pointer to the underlying data
char *data = static_cast<char *>(array.mutable_data());
std::vector<std::string> out;
std::string chars_to_strip("\0", 1);
for (int i = 0; i < numel * max_len; i += max_len)
{
std::string item(data + i, max_len);
// Remove null padding at the end
item.erase(item.find_last_not_of(chars_to_strip) + 1);
out.emplace_back(item);
}
// This potentially does another copy (depending on the backend)
tensor->copy_from(out);
return tensor;
}
} // namespace
std::shared_ptr<NeuropodTensor> tensor_from_numpy(NeuropodTensorAllocator &allocator, py::array array)
{
// TODO(vip): Make sure it's contiguous and aligned
auto ndims = array.ndim();
auto dims = array.shape();
auto dtype = get_array_type(array);
auto data = array.mutable_data();
// Capture the array in our deleter so it doesn't get deallocated
// until we're done
auto to_delete = std::make_shared<py::array>(array);
auto deleter = [to_delete](void *unused) mutable {
py::gil_scoped_acquire gil;
to_delete.reset();
};
// Create a vector with the shape info
std::vector<int64_t> shape(&dims[0], &dims[ndims]);
if (dtype != STRING_TENSOR)
{
// Wrap the data from the numpy array
return allocator.tensor_from_memory(shape, dtype, data, deleter);
}
else
{
return tensor_from_string_numpy(allocator, array, shape);
}
}
py::array tensor_to_numpy(std::shared_ptr<NeuropodTensor> value)
{
auto tensor = value->as_tensor();
// This isn't going to be null, but we do a null check to keep
// static analyzers happy
if (tensor == nullptr)
{
NEUROPOD_ERROR("Error converting value to tensor");
}
if (tensor->get_tensor_type() == STRING_TENSOR)
{
// Special case for empty string tensors because the pybind functions below don't correctly set the
// type of the resulting array in this case
if (tensor->get_num_elements() == 0)
{
return py::array_t<std::array<char, 1>>(tensor->get_dims());
}
// This makes a copy
auto arr = py::array(py::cast(tensor->as_typed_tensor<std::string>()->get_data_as_vector()));
arr.resize(tensor->get_dims());
return arr;
}
else
{
auto dims = tensor->get_dims();
auto data = internal::NeuropodTensorRawDataAccess::get_untyped_data_ptr(*tensor);
// Make sure we don't deallocate the tensor until the numpy array is deallocated
auto deleter = [value](void *unused) {};
auto deleter_handle = register_deleter(deleter, nullptr);
auto capsule = py::capsule(deleter_handle, [](void *handle) { run_deleter(handle); });
return py::array(get_py_type(*tensor), dims, data, capsule);
}
}
NeuropodValueMap from_numpy_dict(NeuropodTensorAllocator &allocator, py::dict &items)
{
// Convert from a py::dict of numpy arrays to an unordered_map of `NeuropodTensor`s
NeuropodValueMap out;
for (auto item : items)
{
out[item.first.cast<std::string>()] = tensor_from_numpy(allocator, item.second.cast<py::array>());
}
return out;
}
py::dict to_numpy_dict(NeuropodValueMap &items)
{
// Convert the items to a python dict of numpy arrays
py::dict out;
for (auto &item : items)
{
out[item.first.c_str()] = tensor_to_numpy(std::dynamic_pointer_cast<NeuropodTensor>(item.second));
}
return out;
}
} // namespace neuropod