In [1]:
!pip install pycuda

Collecting pycuda
  Downloading pycuda-2022.2.2.tar.gz (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m22.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pytools>=2011.2 (from pycuda)
  Downloading pytools-2023.1.1-py2.py3-none-any.whl (70 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.6/70.6 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
Collecting mako (from pycuda)
  Downloading Mako-1.2.4-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.7/78.7 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: pycuda
  Building wheel for pycuda (pyproject.toml) ... [?25l[?25hdone
  Created wheel for pycuda: filename=pycuda-2022.2.2-cp310-cp310-linux_x86_64.whl size=661265 sha256=f95d

In [23]:
%%writefile Minmax_array.cu

#include <thrust/pair.h>
#include <thrust/device_vector.h>
#include <thrust/extrema.h>

// Wrapper function to call the Thrust function
extern "C" void my_cuda_function(float *d_a, float& minel, float& maxel, const int N) {

	thrust::pair<thrust::device_ptr<float>, thrust::device_ptr<float>> tuple;
	tuple = thrust::minmax_element(thrust::device_pointer_cast(d_a), thrust::device_pointer_cast(d_a) + N);
	minel = tuple.first[0];
	maxel = tuple.second[0]; }

Overwriting Minmax_array.cu


In [24]:
!nvcc -arch=sm_75 -o Minmax_array.so -shared -Xcompiler -fPIC Minmax_array.cu

In [28]:
import ctypes
import pycuda.gpuarray as gpuarray
import pycuda.driver as cuda
from pycuda.compiler import SourceModule
import pycuda.autoinit

import numpy as np

# Load the CUDA library
cuda_lib = ctypes.CDLL('./Minmax_array.so')  # Update with the correct path

minel = ctypes.c_float(0.)
maxel = ctypes.c_float(0.)

# Define the function prototype
cuda_lib.my_cuda_function.argtypes = [ctypes.POINTER(ctypes.c_float), ctypes.POINTER(ctypes.c_float), ctypes.POINTER(ctypes.c_float), ctypes.c_int]
cuda_lib.my_cuda_function.restype = None

# Prepare data
input_data = np.array([3, 1, 33, -4]).astype(np.float32)
size = len(input_data)

# Use PyCUDA to allocate GPU memory
input_gpu   = gpuarray.to_gpu(input_data)

# Call the CUDA function
test = cuda_lib.my_cuda_function(ctypes.cast(input_gpu.ptr, ctypes.POINTER(ctypes.c_float)), ctypes.byref(minel), ctypes.byref(maxel), size)

print(input_gpu)
print(np.float32(minel))
print(np.float32(maxel))

[ 3.  1. 33. -4.]
-4.0
33.0
