In [3]:
#!/usr/bin/env python


import pycuda.autoinit
from pycuda.compiler import SourceModule
import pycuda.driver as cuda
import pycuda.gpuarray as gpuarray
import numpy as np
import time
import matplotlib.pyplot as plt
from scipy.signal import convolve2d


In [4]:
class Convolution:
	def __init__(self):
		# """
		# Attributes for instance of EncoderDecoder module
		# """
		self.mod = None
		pass
	
	def getSourceModule(self, method):
		# kernel code wrapper
		kernelwrapper_naive = """
			#include <stdio.h>
			__global__ 
			void conv_gpu_naive(float *N, float *P, float *M, int height, int width, int mask_width){

				// the coordinate of thread (also coordinate in N or P)
				int col = blockDim.x * blockIdx.x + threadIdx.x;
				int row = blockDim.y * blockIdx.y + threadIdx.y;

				// copy to register
				int mask_w = mask_width;
				int n_w = width;
				int n_h = height;
				// start point of the kernel
				int col_start = col - mask_w/2;
				int row_start = row - mask_w/2;

				float p_value = 0.0f;

				// for every pixel in mask
				for(int i=0; i<mask_w; i++){
					// x coordinate in N
					int row_i = row_start + i;
					// if in the range of N
					if(row_i>=0 && row_i<n_h){
						for(int j=0; j<mask_w; j++){
							// y coordinate in N
							int col_i = col_start + j;
							// if in the range of N
							if(col_i>=0 && col_i<n_w){
								p_value += N[row_i*n_w+col_i] * M[mask_w*mask_w-(i*mask_w+j)-1];
								//int a = col_i*n_w+row_i;
								//printf("%d", a);
							}
						}
					}
				}
				//P[row*n_w+col] = N[row*n_w+col];
				P[row*n_w+col] = p_value;
			}
		
		""" # you can either use a string or save the kernel in kernel.cu file and reference it here.
		# Compile the kernel code when an instance
		# of this class is made. 
		if method == 'naive':
			mod = SourceModule(kernelwrapper_naive)
		else:
			print("Please enter the correct method name! -naive")
			
		self.mod = mod

	def getBlockGridDim(self, N, blocksize=32):
		BlockDim = (blocksize, blocksize,1)
		GridDim = (N.shape[0]//blocksize+1, N.shape[1]//blocksize+1,1)
		return BlockDim, GridDim

	def conv_gpu_naive(self, N, M):
		"""
		convolution with global memory
		:param N: input matrix
		:param M: mask
		:return:
		- out: a tensor with the same shape as x
		- cache: (train phase) cache a random dropout mask used in feedforward process
				(test phase) None
		"""
		# implement this, note you can change the function signature (arguments and return type)
		# convert the datatype
		N = N.astype(np.float32)
		M = M.astype(np.float32)

		self.getSourceModule('naive')
		func_conv = self.mod.get_function("conv_gpu_naive")
		height, width = N.shape
		# print(height,width)
		mask_width = M.shape[0]
		# print(mask_width)
		# the result matrix
		P = np.empty_like(N)
		# copy to device global memory
		N_d = gpuarray.to_gpu(N)
		M_d = gpuarray.to_gpu(M)
		P_d = gpuarray.to_gpu(P)
		# block and grid size
		BlockDim, GridDim = self.getBlockGridDim(N)

		func_conv(N_d, P_d, M_d, np.int32(height), np.int32(width), np.int32(mask_width), block=BlockDim, grid = GridDim)

		P = P_d.get()

		return P


	def conv_gpu_shared_mem(self):
		# implement this, note you can change the function signature (arguments and return type)
		pass

	def conv_gpu_shared_and_constant_mem(self):
		# implement this, note you can change the function signature (arguments and return type)
		pass

	def test_conv_pycuda(self):
		# implement this, note you can change the function signature (arguments and return type)
		pass

In [10]:
N = np.random.rand(6,5)
M = np.random.rand(3,3)
M = np.array([[1,0,0],[0,0,0],[0,0,0]])
# print(M)
conver = Convolution()
P_cu = conver.conv_gpu_naive(N,M)
P_sp = convolve2d(N.astype(np.float32), M.astype(np.float32), mode='same')
print(np.allclose(P_cu, P_sp))
print(N,'\n')
print(P_cu,'\n')
print(P_sp)

False
[[0.27530279 0.31653434 0.7534699  0.67114368 0.99024104]
 [0.00622994 0.72482204 0.3604693  0.78444885 0.45419031]
 [0.62593506 0.49758604 0.03948458 0.73805377 0.45162348]
 [0.93877677 0.01413113 0.31624175 0.16355324 0.04810857]
 [0.76496508 0.23980418 0.88955593 0.94045016 0.38946281]
 [0.07466562 0.33541515 0.28967001 0.03664556 0.65185675]] 

[[0.72482204 0.3604693  0.78444886 0.4541903  0.        ]
 [0.49758604 0.03948458 0.7380538  0.45162347 0.        ]
 [0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.        ]] 

[[0.72482204 0.3604693  0.78444886 0.4541903  0.        ]
 [0.49758604 0.03948458 0.7380538  0.45162347 0.        ]
 [0.01413113 0.31624174 0.16355324 0.04810857 0.        ]
 [0.23980418 0.88955593 0.94045013 0.3894628  0.        ]
 [0.33541515 0.28967002 0.03664556 0.6518567  0.        ]
 [

[[0.25858879 0.76644024 0.23690224 ... 0.10312783 0.802767   0.2767137 ]
 [0.48213143 0.43717487 0.24105782 ... 0.42669798 0.75363248 0.88913923]
 [0.96097627 0.33534357 0.14011016 ... 0.31211257 0.05761873 0.35872807]
 ...
 [0.94446634 0.94470696 0.26177127 ... 0.25845082 0.92723354 0.9162604 ]
 [0.95204508 0.47338191 0.06090975 ... 0.23428814 0.87261467 0.15879089]
 [0.05189116 0.74409996 0.25270494 ... 0.71068001 0.58741064 0.44194069]]
[[4.37174857e-01 2.41057813e-01 7.93456316e-01 ... 7.53632486e-01
  8.89139235e-01 0.00000000e+00]
 [3.35343570e-01 1.40110165e-01 6.23291790e-01 ... 5.76187335e-02
  3.58728081e-01 0.00000000e+00]
 [5.04994333e-01 8.71350057e-04 1.13818854e-01 ... 6.85880005e-01
  8.24093997e-01 0.00000000e+00]
 ...
 [4.73381907e-01 6.09097518e-02 2.18991861e-02 ... 8.72614682e-01
  1.58790886e-01 0.00000000e+00]
 [7.44099975e-01 2.52704948e-01 7.44353354e-01 ... 5.87410629e-01
  4.41940695e-01 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.000

In [10]:
N = np.random.rand(5,6)
height, width = N.shape
print(height, width)
BlockDim = (*N.shape,1)
print(BlockDim)
M = np.array(([0,0,0],[0,1,0],[0,0,0]))
print(M)

5 6
(5, 6, 1)
[[0 0 0]
 [0 1 0]
 [0 0 0]]
