# Setup

Checking CUDA Installation



In [None]:
!nvcc --version

Install Extension to Enable NVCC in Notebook Cells

In [None]:
!pip install git+https://github.com/andreinechaev/nvcc4jupyter.git

Checking the installation success state


In [None]:
!pip show nvcc4jupyter

# Kode CUDA

In [None]:
%%writefile cuda.cu

#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#define EPSILON 1e-9

__global__ void normalize_row(double *matrix, int row, int N, int stride) {
    int idx = threadIdx.x + blockIdx.x * blockDim.x;
    if (idx < stride) {
        double pivot_value = matrix[row * stride + row];
        if (fabs(pivot_value) > EPSILON) {
            matrix[row * stride + idx] /= pivot_value;
        } else {
            printf("Pivot too small or zero at row %d, cannot normalize.\n", row);
        }
    }
}

__global__ void eliminate_row(double *matrix, int pivot_row, int curr_row, int N, int stride) {
    int idx = threadIdx.x + blockIdx.x * blockDim.x;
    if (idx < stride) {
        double factor = matrix[curr_row * stride + pivot_row];
        if (fabs(factor) > EPSILON) {
            matrix[curr_row * stride + idx] -= factor * matrix[pivot_row * stride + idx];
        }
    }
}

void read_matrix(double *matrix, int N)
{
    // Read the matrix from stdin
    for (int i = 0; i < N; ++i)
    {
        int curr_offset = i * 2 * N;
        for (int j = 0; j < N; ++j)
        {
            // printf("Reading element %d\n", i * 2 * N + j);
            if (scanf("%lf", &matrix[curr_offset + j]) != 1)
            {
                fprintf(stderr, "Error reading matrix element at position (%d, %d).\n", i, j);
                exit(1);
            }
        }
    }
}

void initialize_identity(double *matrix, int N) {
    for (int i = 0; i < N; i++) {
        for (int j = N; j < 2 * N; j++) {
            matrix[i * 2 * N + j] = (i == (j - N)) ? 1.0 : 0.0;
        }
    }
}

void invert_matrix(double *h_matrix, int N) {
    double *d_matrix;
    int stride = 2 * N;

    cudaMalloc(&d_matrix, sizeof(double) * N * stride);
    cudaMemcpy(d_matrix, h_matrix, sizeof(double) * N * stride, cudaMemcpyHostToDevice);

    dim3 blocks((stride + 255) / 256);
    dim3 threads(256);

    for (int i = 0; i < N; i++) {
        normalize_row<<<blocks, threads>>>(d_matrix, i, N, stride);
        cudaDeviceSynchronize();

        for (int j = 0; j < N; j++) {
            if (i != j) {
                eliminate_row<<<blocks, threads>>>(d_matrix, i, j, N, stride);
                cudaDeviceSynchronize();
            }
        }
    }

    cudaMemcpy(h_matrix, d_matrix, sizeof(double) * N * stride, cudaMemcpyDeviceToHost);
    cudaFree(d_matrix);
}

void save_file(double *matrix, int N)
{
    char filename[100]; // Adjust the size according to your needs
    snprintf(filename, sizeof(filename), "mp_inverse_%d.txt", N);

    // Open the file
    FILE *file = fopen(filename, "w");
    if (file != NULL)
    {
        fprintf(file, "%d\n", N);
        for (int i = 0; i < N; i++)
        {
            int curr_offset = i * 2 * N;
            for (int j = N; j < 2 * N; j++)
            {
                fprintf(file, "%f ", matrix[curr_offset + j]);
            }
            fprintf(file, "\n");
        }
        fclose(file);
        printf("Result matrix logged to file named mp_inverse_%d.txt.\n", N);
    }
    else
    {
        printf("Failed to open file for logging.\n");
    }
}

int main(void) {
    int N;
    double *matrix = NULL;

    if (scanf("%d\n", &N) == 1)
    {
        matrix = (double *)malloc(sizeof(double) * N * 2 * N);
        if (matrix == NULL)
        {
            fprintf(stderr, "Malloc error for matrix! Aborting.. \n");
            exit(1);
        }
        read_matrix(matrix, N);
        initialize_identity(matrix, N);
    }

    invert_matrix(matrix, N);

    save_file(matrix, N);

    free(matrix);
    return 0;
}

Compiling the cuda code

In [None]:
!nvcc cuda.cu -o cuda

# Get Testcase

In [None]:
#!gsutil cp <Link testcase> .


# Execution

In [None]:
!./cuda < 64.txt

In [None]:
!./cuda < 1024.txt