In [None]:
!nvidia-smi

Wed Oct 23 18:01:37 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   45C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [None]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0


In [None]:
import numpy as np
import time

def cpu_vector_addition(a, b):
    return a + b

def measure_cpu_time(size):
    a = np.random.rand(size)
    b = np.random.rand(size)

    start_time = time.time()
    result = cpu_vector_addition(a, b)
    end_time = time.time()

    return end_time - start_time

In [None]:
pip install pycuda

Collecting pycuda
  Downloading pycuda-2024.1.2.tar.gz (1.7 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.7 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m60.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pytools>=2011.2 (from pycuda)
  Downloading pytools-2024.1.14-py3-none-any.whl.metadata (3.0 kB)
Collecting mako (from pycuda)
  Downloading Mako-1.3.6-py3-none-any.whl.metadata (2.9 kB)
Downloading pytools-2024.1.14-py3-none-any.whl (89 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading Mako-1.3.6-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m6.1 MB/s[0m eta 

In [47]:
import pycuda.driver as cuda
import pycuda.autoinit
from matplotlib import pyplot as plt
from pycuda.compiler import SourceModule
import numpy as np
import cupy as cp
import time


def vector_addition_cpu(vector1, vector2):
    if len(vector1) != len(vector2):
        raise ValueError("Vectors must be of the same length")

    result = np.zeros_like(vector1)
    start = time.time()
    for i in range(len(vector1)):
        result[i] = vector1[i] + vector2[i]
    end = time.time()
    return result, end - start


vector_sum_kernel = ("""
__global__ void vectorAdditionKernel(float* res, float* a, float* b, unsigned int size) {
    for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x)
        res[i] = a[i] + b[i];
}
""")


def vector_addition_gpu(vector1, vector2):
    if len(vector1) != len(vector2):
        raise ValueError("Vectors must be of the same length")

    start_time = time.time()

    block_size = 256
    grid_size = (len(vector1) + block_size - 1) // block_size

    res = np.zeros_like(vector1, dtype=np.float32)
    vec1_gpu = cuda.mem_alloc(vector1.nbytes)
    vec2_gpu = cuda.mem_alloc(vector2.nbytes)
    result_gpu = cuda.mem_alloc(res.nbytes)

    cuda.memcpy_htod(vec1_gpu, vector1)
    cuda.memcpy_htod(vec2_gpu, vector2)
    cuda.memcpy_htod(result_gpu, res)

    mod = SourceModule(vector_sum_kernel)
    vector_addition = mod.get_function("vectorAdditionKernel")
    vector_addition(result_gpu, vec1_gpu, vec2_gpu, np.uint32(len(vector1)), block=(block_size, 1, 1), grid=(grid_size, 1))
    cuda.Context.get_current().synchronize()

    cuda.memcpy_dtoh(res, result_gpu)
    end_time = time.time()

    return res, end_time - start_time


def compare_vector_addition_times(vector_lengths):
    results = []
    for length in vector_lengths:
        vector1 = np.random.rand(length).astype(np.float32)
        vector2 = np.random.rand(length).astype(np.float32)

        cpu_result, cpu_time = vector_addition_cpu(vector1, vector2)
        gpu_result, gpu_time = vector_addition_gpu(vector1, vector2)

        # Проверка результатов
        if not np.allclose(cpu_result, gpu_result):
            raise ValueError("Results do not match!")

        speedup = cpu_time / gpu_time
        results.append((length, cpu_time, gpu_time, speedup))

    return results


def print_results_table(results):
    print("Vector Length | CPU Time (s) | GPU Time (s) | Speedup")
    print("-" * 65)
    for length, cpu_time, gpu_time, speedup in results:
        print(f"{length:<13} | {cpu_time:.6f} | {gpu_time:.6f} | {speedup:.2f}x")


if __name__ == "__main__":
    vector_lengths = [1000, 10000, 50000, 100000]
    results = compare_vector_addition_times(vector_lengths)
    print_results_table(results)

Vector Length | CPU Time (s) | GPU Time (s) | Speedup
-----------------------------------------------------------------
1000          | 0.000763 | 0.000895 | 0.85x
10000         | 0.009667 | 0.000836 | 11.56x
50000         | 0.034792 | 0.000895 | 38.89x
100000        | 0.068200 | 0.001446 | 47.15x
