In [None]:
import numpy as np
from PIL import Image
import cupy as cp
import time
import pandas as pd
from scipy.interpolate import RegularGridInterpolator

In [None]:
img = Image.open('ris.bmp')
arr = cp.array(img)
print(arr.shape)

(813, 751)


In [None]:
class CUDABilinearInterp:
    def __init__(self, shape: tuple, matrix: cp.ndarray, params: dict):
        self.add_kernel = cp.RawKernel(r'''
        extern "C"
        __global__ void bilinearInterp(unsigned char* input, double* output, int width, int height)
        {
            int x = blockIdx.x * blockDim.x + threadIdx.x;
            int y = blockIdx.y * blockDim.y + threadIdx.y;

            if (x < (width * 2) && y < (height * 2))
            {
                double px = x / 2.0;
                double py = y / 2.0;

                double fx = px - floor(px);
                double fy = py - floor(py);
                double fx1 = 1.0f - fx;
                double fy1 = 1.0f - fy;

                int px_int = (int)px;
                int py_int = (int)py;

                double result = input[py_int * width + px_int] * fx1 * fy1 +
                            input[py_int * width + (px_int + 1)] * fx * fy1 +
                            input[(py_int + 1) * width + px_int] * fx1 * fy +
                            input[(py_int + 1) * width + (px_int + 1)] * fx * fy;

                output[y * (width * 2) + x] = result;
            }
        }
        ''',
        "bilinearInterp")
        self.matrix = matrix
        self.flattened_matrix = cp.array(matrix.flatten())
        self.shape = shape
        self.params = params
        self.result = cp.zeros((self.shape[0] * 2, self.shape[1] * 2), dtype=cp.float64)

    def bilinear_interp(self):
        x = np.arange(0, self.shape[1])
        y = np.arange(0, self.shape[0])

        new_x = np.linspace(0, self.shape[1]-1, int(self.shape[1] * 2), endpoint=False)
        new_y = np.linspace(0, self.shape[0]-1, int(self.shape[0] * 2), endpoint=False)

        interpolating_function = RegularGridInterpolator((y, x), cp.array(self.matrix).get(), method='linear')

        points = np.meshgrid(new_y, new_x, indexing='ij')
        points = np.stack(points, axis=-1)

        new_image = interpolating_function(points)

        return new_image

    def get_result(self):
        scipystart = time.perf_counter()
        result_scipy = self.bilinear_interp()
        scipyend = time.perf_counter()

        gstart = time.perf_counter()
        self.add_kernel((self.params["gridX"], self.params["gridY"]),
                        (self.params["blockX"], self.params["blockY"]),
                        (self.flattened_matrix, self.result,
                         self.shape[1], self.shape[0]))
        gend = time.perf_counter()

        img_gpu = Image.fromarray(self.result.get())
        img_scipy = Image.fromarray(result_scipy)
        img_gpu = img_gpu.convert("RGB")
        img_scipy = img_scipy.convert("RGB")

        img_gpu.save('out_gpu.bmp')
        img_scipy.save('out_scipy.bmp')

        return {
            "matrix size": str(self.shape),
            "parameters": str(self.params),
            "gpu time": (gend - gstart),
            "scipy time": (scipyend - scipystart),
            "gpu result": "![Alt Text](out_gpu.bmp)",
            "scipy result": "![Alt Text](out_scipy.bmp)",
        }


In [None]:
size = arr.shape
block = (32, 32)
grid = (round((size[1] * 2 + block[0] - 1) / block[0]), round((size[0] * 2 + block[1] - 1) / block[1]))

# Create the object and get the result
obj = CUDABilinearInterp(size,
                        arr,
                        {
                            "blockX": block[0],
                            "blockY": block[1],
                            "gridX": grid[0],
                            "gridY": grid[1],
                        })

result = obj.get_result()

# Print the result as a markdown table
df1 = pd.DataFrame(result, index=[0])
print(df1.to_markdown(index=False))

| matrix size   | parameters                                             |   gpu time |   scipy time | gpu result               | scipy result               |
|:--------------|:-------------------------------------------------------|-----------:|-------------:|:-------------------------|:---------------------------|
| (813, 751)    | {'blockX': 32, 'blockY': 32, 'gridX': 48, 'gridY': 52} |  0.0920388 |      0.27111 | ![Alt Text](out_gpu.bmp) | ![Alt Text](out_scipy.bmp) |
