# Raw code

In [None]:
%%writefile raw_add.cu
#include <stdio.h>
#include <stdlib.h>

__global__ void add(int a, int b, int *res) {
  *res = a + b;
}


int main() {
  int res=0;
  int *d_res = NULL;

  // Launch add() kernel on GPU
  add<<<1,1>>>(2, 2, d_res);

  cudaMemcpy(&res, d_res, sizeof(int), cudaMemcpyDeviceToHost);
  printf("2 + 2 = %d\n", res);

  return EXIT_SUCCESS;
}

Writing raw_add.cu


In [None]:
!nvcc raw_add.cu -o raw_add

In [None]:
!./raw_add

2 + 2 = 0


# Debugging

In [None]:
%%writefile add.cu
#include <stdio.h>
#include <stdlib.h>

__global__ void add(int a, int b, int *res) {
  *res = a + b;
}
int main() {
  int res=0;
  int *d_res = NULL;

  // Launch add() kernel on GPU
  add<<<1,1>>>(2, 2, d_res);

  cudaMemcpy(&res, d_res, sizeof(int), cudaMemcpyDeviceToHost);
  printf("2 + 2 = %d\n", res);

  return EXIT_SUCCESS;
}

Writing add.cu


In [None]:
! nvcc -g -G add.cu -o add

In [None]:
%%writefile debug_instructions.txt
set cuda memcheck on
set cuda api_failures stop
catch throw
r
bt
info locals
thread 1
bt

Overwriting debug_instructions.txt


In [None]:
! cuda-gdb -batch -x debug_instructions.txt ./add

Catchpoint 1 (throw)
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
[Detaching after fork from child process 616]
[New Thread 0x7fb3efb1f000 (LWP 624)]
[New Thread 0x7fb3ef31e000 (LWP 625)]

Thread 1 "add" received signal CUDA_EXCEPTION_1, Lane Illegal Address.
[Switching focus to CUDA kernel 0, grid 1, block (0,0,0), thread (0,0,0), device 0, sm 0, warp 0, lane 0]
0x0000557882dee160 in add (a=2, b=2, res=0x0) at add.cu:5
5	  *res = a + b;
#0  0x0000557882dee160 in add (a=2, b=2, res=0x0) at add.cu:5
#1  0x0000557882dee160 in add<<<(1,1,1),(1,1,1)>>> (a=2, b=2, res=0x0) at add.cu:5
No locals.
[Switching to thread 1 (Thread 0x7fb3f720c000 (LWP 611))]
#0  0x00007fb3f5cad430 in ?? () from /usr/lib64-nvidia/libcuda.so.1
#0  0x00007fb3f5cad430 in ?? () from /usr/lib64-nvidia/libcuda.so.1
#1  0x00007fb3f5f1fa96 in ?? () from /usr/lib64-nvidia/libcuda.so.1
#2  0x00007fb3f5f20581 in ?? () from /usr/lib64-nvidia/libcuda.s

# Code with error management

In [None]:
%%writefile add.cu
#include <stdio.h>
#include <stdlib.h>

__global__ void add(int a, int b, int *res) {
  *res = a + b;
}


int main() {
  int res=0;
  int *d_res = NULL;
  cudaError_t err;

  // Launch add() kernel on GPU
  add<<<1,1>>>(2, 2, d_res);
  err = cudaPeekAtLastError();
  if (err != cudaSuccess){
      fprintf(stderr,"GPUassert: add launch failed with the error : %s \n", cudaGetErrorString(err));
      exit(err);
   }
  err = cudaDeviceSynchronize() ;
  if (err != cudaSuccess){
      fprintf(stderr,"GPUassert: add execution failed with the error : %s \n", cudaGetErrorString(err));
      exit(err);
  }

  err = cudaMemcpy(&res, d_res, sizeof(int), cudaMemcpyDeviceToHost);
  if (err != cudaSuccess){
      fprintf(stderr,"GPUassert: cudaMemcpy failed with the error : %s \n", cudaGetErrorString(err));
      exit(err);
   }

  printf("2 + 2 = %d\n", res);

  return EXIT_SUCCESS;
}

Overwriting add.cu


In [None]:
! nvcc add.cu -o add

In [None]:
!./add

GPUassert: add execution failed with the error : an illegal memory access was encountered 


# CUDA error management Utilities in a separate cell


In [None]:
%%writefile cuda_stuff.cuh
#include <stdio.h>
#include <stdlib.h>
#include <cuda.h>
#include <cuda_runtime.h>

#ifndef cuda_stuff_H
#define cuda_stuff_H

//MACRO TO DEBUG CUDA FUNCTIONS
/** Error checking,
 *  taken from https://stackoverflow.com/questions/14038589/what-is-the-canonical-way-to-check-for-errors-using-the-cuda-runtime-api
 */
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
   if (code != cudaSuccess)
   {
      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
      if (abort) exit(code);
   }
}

#endif


Writing cuda_stuff.cuh


In [None]:
%%writefile addition.cu
#include <stdio.h>
#include <stdlib.h>

#include "cuda_stuff.cuh"

__global__ void add(int a, int b, int *res) {
  *res = a + b;
}


int main() {
  int res=0;
  int *d_res = NULL;

  // Launch add() kernel on GPU
  add<<<1,1>>>(2, 2, d_res);
  gpuErrchk( cudaPeekAtLastError() );
  gpuErrchk( cudaDeviceSynchronize() );

  gpuErrchk(cudaMemcpy(&res, d_res, sizeof(int), cudaMemcpyDeviceToHost));
  printf("2 + 2 = %d\n", res);

  return EXIT_SUCCESS;
}

Writing addition.cu


In [None]:
!nvcc addition.cu -o addition

In [None]:
! ./addition

GPUassert: an illegal memory access was encountered addition.cu 18
