In [1]:
from numba import cuda
import numpy as np

In [7]:
@cuda.jit
def add_arrays_kernel(a, b, result):
    """
    GPU kernel for adding two arrays element-wise.
    
    Args:
    a (cuda.devicearray): First input array.
    b (cuda.devicearray): Second input array.
    result (cuda.devicearray): Array to store the result.
    """
    # Calculate the thread's absolute position within the grid
    idx = cuda.grid(1)
    
    # Ensure the thread index does not exceed the array length
    if idx < a.size:
        result[idx] = a[idx] + b[idx]

def add_arrays(a, b):
    """
    Add two arrays using GPU acceleration.
    
    Args:
    a (np.ndarray): First input array.
    b (np.ndarray): Second input array.
    
    Returns:
    np.ndarray: Resultant array after addition.
    """
    # Ensure input arrays are of the same size
    assert a.shape == b.shape, "Arrays must be of the same size"
    
    # Create device arrays and copy data
    d_a = cuda.to_device(a)
    d_b = cuda.to_device(b)
    d_result = cuda.device_array_like(d_a)  # Allocate space for result
    
    # Configure the blocks
    threads_per_block = 8
    blocks_per_grid = (a.size + threads_per_block - 1) // threads_per_block
    
    # Launch the kernel
    add_arrays_kernel[blocks_per_grid, threads_per_block](d_a, d_b, d_result)
    
    # Copy the result back to the host
    return d_result.copy_to_host()

In [8]:
a = np.random.rand(1024)
b = np.random.rand(1024)
result = add_arrays(a, b)
print("Result of addition:", result)

Result of addition: [0.30520261 0.3265152  1.17173019 ... 1.06224932 1.16175546 0.9117495 ]


In [13]:
a = "EEFd"

In [14]:
len(a)

4

In [16]:
a[0:2].lower()

'ee'