In [63]:
from time import time
import numpy as np
from tqdm import tqdm
import torch


In [56]:
test_image = np.random.randn(32, 32)
test_kernels = np.random.randn(3, 3)

def naive_convolution(image, kernel):
	height, width = image.shape
	kernel_height, kernel_width = kernel.shape
	output_height = height - kernel_height + 1
	output_width  = width - kernel_width + 1

	output = np.zeros((output_height, output_width))	
	# without stride	
	for row in range(output_height):
		for col in range(output_width):
			output[row, col] = np.sum(image[row:row+kernel_height, col:col+kernel_width] * kernel)
	return output

def optimized_convolution(image, kernel):
	kernel_height, kernel_width = kernel.shape
	height, width = image.shape
	output_height = height - kernel_height + 1
	output_width  = width - kernel_width + 1
	image = np.lib.stride_tricks.sliding_window_view(image, (kernel_height, kernel_width))
	image = image.reshape(output_height, output_width, -1)
	return np.sum(image * kernel.flatten(), axis=-1)


conv = naive_convolution(test_image, test_kernels)
optimized_conv = optimized_convolution(test_image, test_kernels)

In [59]:
np.array_linp.ones(32)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [62]:
image_sizes = list(range(16, 257, 2))
kernel_size = (3, 3)
results = []
torch_convolution = torch.nn.Conv2d(1, 1, kernel_size, stride=1, padding=0)
N_RUNS = 10
for size in tqdm(image_sizes):
	image = np.random.randn(size, size)
	kernel = np.random.randn(*kernel_size)
	naive_times = []
	optimized_times = []
	torch_times = []
	for _ in range(N_RUNS):
		start_time = time()
    
		naive_convolution(image, kernel)
		naive_time = time() - start_time
		naive_times.append(naive_time)
	for _ in range(N_RUNS):
		start_time = time()
		optimized_convolution(image, kernel)
		optimized_time = time() - start_time
		optimized_times.append(optimized_time)
	
	torch_image = torch.from_numpy(image).unsqueeze(0).unsqueeze(0)
	torch_kernel = torch.from_numpy(kernel).unsqueeze(0).unsqueeze(0)
	for _ in range(N_RUNS):
		start_time = time()
		torch_convolution(torch_image, torch_kernel)
		torch_time = time() - start_time
		torch_times.append(torch_time)
	results.append((size, np.mean(naive_times), np.mean(optimized_times), np.mean(torch_times)))

100%|██████████| 121/121 [00:54<00:00,  2.23it/s]
