<a href="https://colab.research.google.com/github/trungvinhbui/cudaGoogleColab/blob/master/tetsCuda.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Install Cuda Toolkit**

In [0]:

!apt update -qq;
!wget https://developer.nvidia.com/compute/cuda/10.0/Prod/local_installers/cuda-repo-ubuntu1804-10-0-local-10.0.130-410.48_1.0-1_amd64;
!dpkg -i cuda-repo-ubuntu1804-10-0-local-10.0.130-410.48_1.0-1_amd64;
!apt-key add /var/cuda-repo-10-0-local-10.0.130-410.48/7fa2af80.pub;
!apt-get update -qq;
!apt-get install cuda;
!ln -s /usr/bin/gcc /usr/local/cuda/bin/gcc;
!ln -s /usr/bin/g++ /usr/local/cuda/bin/g++;
!/usr/local/cuda/bin/nvcc --version
!pip install git+git://github.com/trungvinhbui/nvcc4jupyter.git
%load_ext nvcc_plugin

**Test Cuda Cell**

In [0]:
%%cu
#include <stdio.h>

__global__
void saxpy(int n, float a, float *x, float *y)
{
  int i = blockIdx.x*blockDim.x + threadIdx.x;
  if (i < n) y[i] = a*x[i] + y[i];
}

int main(void)
{
  int N = 1<<20;
  float *x, *y, *d_x, *d_y;
  x = (float*)malloc(N*sizeof(float));
  y = (float*)malloc(N*sizeof(float));

  cudaMalloc(&d_x, N*sizeof(float)); 
  cudaMalloc(&d_y, N*sizeof(float));

  for (int i = 0; i < N; i++) {
    x[i] = 1.0f;
    y[i] = 2.0f;
  }

  cudaMemcpy(d_x, x, N*sizeof(float), cudaMemcpyHostToDevice);
  cudaMemcpy(d_y, y, N*sizeof(float), cudaMemcpyHostToDevice);

  // Perform SAXPY on 1M elements
  saxpy<<<(N+255)/256, 256>>>(N, 2.0f, d_x, d_y);

  cudaMemcpy(y, d_y, N*sizeof(float), cudaMemcpyDeviceToHost);

  float maxError = 0.0f;
  for (int i = 0; i < N; i++)
    maxError = max(maxError, abs(y[i]-4.0f));
  printf("Max error: %f\n", maxError);

  cudaFree(d_x);
  cudaFree(d_y);
  free(x);
  free(y);
}

**Install cuda python**

In [0]:
!wget https://repo.anaconda.com/archive/Anaconda3-5.3.0-Linux-x86_64.sh;
!chmod +x Anaconda3-5.3.0-Linux-x86_64.sh
!bash ./Anaconda3-5.3.0-Linux-x86_64.sh -b -f -p /usr/local
!conda install numba
!conda install cudatoolkit

**Connect Google Driver**

In [0]:
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse
from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}
!mkdir -p drive
!google-drive-ocamlfuse drive

**Working folder**
*  Dir new: /content/drive/
*  Dir data: /content/drive/GoogleColabData/






**Test Cuda Python and C**

In [99]:
!echo Test Python
!python /content/drive/GoogleColabData/testcudapython.py;
!echo Test C
!nvcc -o testcudac /content/drive/GoogleColabData/testcudac.cu;
!./testcudac;

Test Python
C[:5] = [2. 2. 2. 2. 2.]
C[-5:] = [2. 2. 2. 2. 2.]
VectorAdd took for 0.3741447925567627econds
[0mTest C
Max error: 2.000000
