#Mount Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

**!!!Create a shortcut to the shared directory in your drive!!!**

In [None]:
%cd /content/drive/MyDrive/cuda_src/

#GPU TYPE

In [None]:
!nvcc --version
!nvidia-smi


#Plugin

In [None]:
!pip install git+git://github.com/andreinechaev/nvcc4jupyter.git

In [None]:
%load_ext nvcc_plugin

#GPU INFO

In [None]:
%%cu

#include <stdio.h>
#include <stdlib.h>

void deviceQuery()
{
  cudaDeviceProp prop;
  int nDevices=0, i;
  cudaError_t ierr;

  ierr = cudaGetDeviceCount(&nDevices);
  if (ierr != cudaSuccess) { printf("Sync error: %s\n", cudaGetErrorString(ierr)); }



  for( i = 0; i < nDevices; ++i )
  {
     ierr = cudaGetDeviceProperties(&prop, i);
     printf("Device number: %d\n", i);
     printf("  Device name: %s\n", prop.name);
     printf("  Compute capability: %d.%d\n\n", prop.major, prop.minor);
     
     printf("  Clock Rate: %d kHz\n", prop.clockRate);
     printf("  Total SMs: %d \n", prop.multiProcessorCount);
     printf("  Shared Memory Per SM: %lu bytes\n", prop.sharedMemPerMultiprocessor);
     printf("  Registers Per SM: %d 32-bit\n", prop.regsPerMultiprocessor);
     printf("  Max threads per SM: %d\n", prop.maxThreadsPerMultiProcessor);
     printf("  L2 Cache Size: %d bytes\n", prop.l2CacheSize);
     printf("  Total Global Memory: %lu bytes\n", prop.totalGlobalMem);
     printf("  Memory Clock Rate: %d kHz\n\n", prop.memoryClockRate);
     
     
     printf("  Max threads per block: %d\n", prop.maxThreadsPerBlock);
     printf("  Max threads in X-dimension of block: %d\n", prop.maxThreadsDim[0]);
     printf("  Max threads in Y-dimension of block: %d\n", prop.maxThreadsDim[1]);
     printf("  Max threads in Z-dimension of block: %d\n\n", prop.maxThreadsDim[2]);

     printf("  Max blocks in X-dimension of grid: %d\n", prop.maxGridSize[0]);
     printf("  Max blocks in Y-dimension of grid: %d\n", prop.maxGridSize[1]);
     printf("  Max blocks in Z-dimension of grid: %d\n\n", prop.maxGridSize[2]);     
     
     printf("  Shared Memory Per Block: %lu bytes\n", prop.sharedMemPerBlock);
     printf("  Registers Per Block: %d 32-bit\n", prop.regsPerBlock);
     printf("  Warp size: %d\n\n", prop.warpSize);

  }
}

int main() {
    deviceQuery();
}

#CPU INFO

In [None]:
!cat /proc/cpuinfo

#MEMOMRY INFO

In [None]:
!cat /proc/meminfo

#DISK INFO

In [None]:
!df -h

#K80

In [None]:
!apt-get --purge remove cuda nvidia* libnvidia-*
!dpkg -l | grep cuda- | awk '{print $2}' | xargs -n1 dpkg --purge
!apt-get remove cuda-*
!apt autoremove
!apt-get update

In [None]:
!wget https://developer.nvidia.com/compute/cuda/9.2/Prod/local_installers/cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64 -O cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64.deb
!dpkg -i cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64.deb
!apt-key add /var/cuda-repo-9-2-local/7fa2af80.pub
!apt-get update
!apt-get install cuda-9.2

#Bandwidth

In [None]:
%cd /usr/local/cuda/samples
%cd 1_Utilities/bandwidthTest/
!make
!./bandwidthTest --mode-range --start=1000 --end=1000000 --increment=100000

# Counting Sort - Global memory


In [None]:
%cd /content/drive/MyDrive/cuda_src/
!nvcc -o counting_sort_global counting_sort_global.cu
!nvprof --print-gpu-trace ./counting_sort_global 33554432  --benchmark

# Counting Sort - Shared memory

## Single Kernel


In [None]:
%cd /content/drive/MyDrive/cuda_src/
!nvcc -o counting_sort_maxminShared counting_sort_maxminShared.cu
!nvprof --print-gpu-trace ./counting_sort_maxminShared 33554432  --benchmark

## Double Kernel

In [None]:
%cd /content/drive/MyDrive/cuda_src/
!nvcc -o counting_sort_streamed counting_sort_streamed.cu
!nvprof --print-gpu-trace ./counting_sort_streamed 33554432  --benchmark

# Counting Sort - Texture Memory


In [None]:
%cd /content/drive/MyDrive/cuda_src/
!nvcc -o counting_sort_texture counting_sort_texture.cu
!nvprof --print-gpu-trace ./counting_sort_texture 33554432  --benchmark