## Задание 1.
Реализовать функцию свертки (специфицировать размер и количество фильтров, входной тензор, stride,...)
(по формуле (*)). Придумать тест для проверки.

$O[m][x][y] = \Sigma_{i=0}^{R-1}\Sigma_{j=0}^{S-1}\Sigma_{k=0}^{C-1}I[k][x+i][y+j] \times W[m][k][i][j]$

In [1]:
import numpy as np

def convolution(input_tensor, weights, stride=1):
    C, H_in, W_in = input_tensor.shape
    M, C, R, S = weights.shape

    H_out = (H_in - R) // stride + 1
    W_out = (W_in - S) // stride + 1
    
    output_tensor = np.zeros((M, H_out, W_out))
    
    for m in range(M):
        for x in range(W_out):
            for y in range(H_out):
                for i in range(R):
                    for j in range(S):
                        for k in range(C):
                            output_tensor[m][x][y] += input_tensor[k, stride*x+i, stride*y+j] * weights[m, k, i, j]
                                
    return output_tensor

stride = 2
input_tensor = np.random.rand(3, 5, 5)  # Тензор размером 3x5x5
weights = np.random.rand(2, 3, 3, 3)    # Веса размером 2x3x3x3

output_tensor = convolution(input_tensor, weights, stride)

print("Размер выходного тензора:", output_tensor.shape)

Размер выходного тензора: (2, 2, 2)


In [2]:
from scipy.signal import convolve

# Сравниваем результаты с функцией convolve из scipy
expected_output = np.zeros_like(output_tensor)
for m in range(output_tensor.shape[0]):
    for k in range(input_tensor.shape[0]):
        expected_output[m] += convolve(input_tensor[k], np.fliplr(np.flipud(weights[m, k])), mode='valid')[::stride, ::stride]

# Проверяем, совпадают ли результаты
assert np.allclose(output_tensor, expected_output)
print("Тест пройден успешно!")

Тест пройден успешно!


## Задание 2.

Написать функцию реализующую сверточный слой через im2col. Сделать проверку результата с помощью прямой реализации свертки.

Источник: https://hackmd.io/@machine-learning/blog-post-cnnumpy-fast

In [3]:
def get_indices(X_shape, HF, WF, stride, pad):
    """
        Returns index matrices in order to transform our input image into a matrix.

        Parameters:
        -X_shape: Input image shape.
        -HF: filter height.
        -WF: filter width.
        -stride: stride value.
        -pad: padding value.

        Returns:
        -i: matrix of index i.
        -j: matrix of index j.
        -d: matrix of index d. 
            (Use to mark delimitation for each channel
            during multi-dimensional arrays indexing).
    """
    # get input size
    m, n_C, n_H, n_W = X_shape

    # get output size
    out_h = int((n_H + 2 * pad - HF) / stride) + 1
    out_w = int((n_W + 2 * pad - WF) / stride) + 1
  
    # ----Compute matrix of index i----

    # Level 1 vector.
    level1 = np.repeat(np.arange(HF), WF)
    # Duplicate for the other channels.
    level1 = np.tile(level1, n_C)
    # Create a vector with an increase by 1 at each level.
    everyLevels = stride * np.repeat(np.arange(out_h), out_w)
    # Create matrix of index i at every levels for each channel.
    i = level1.reshape(-1, 1) + everyLevels.reshape(1, -1)

    # ----Compute matrix of index j----
    
    # Slide 1 vector.
    slide1 = np.tile(np.arange(WF), HF)
    # Duplicate for the other channels.
    slide1 = np.tile(slide1, n_C)
    # Create a vector with an increase by 1 at each slide.
    everySlides = stride * np.tile(np.arange(out_w), out_h)
    # Create matrix of index j at every slides for each channel.
    j = slide1.reshape(-1, 1) + everySlides.reshape(1, -1)

    # ----Compute matrix of index d----

    # This is to mark delimitation for each channel
    # during multi-dimensional arrays indexing.
    d = np.repeat(np.arange(n_C), HF * WF).reshape(-1, 1)

    return i, j, d

def im2col(X, HF, WF, stride, pad):
    """
        Transforms our input image into a matrix.

        Parameters:
        - X: input image.
        - HF: filter height.
        - WF: filter width.
        - stride: stride value.
        - pad: padding value.

        Returns:
        -cols: output matrix.
    """
    # Padding
    X_padded = np.pad(X, ((0,0), (0,0), (pad, pad), (pad, pad)), mode='constant')
    i, j, d = get_indices(X.shape, HF, WF, stride, pad)
    # Multi-dimensional arrays indexing.
    cols = X_padded[:, d, i, j]
    cols = np.concatenate(cols, axis=-1)
    return cols

def im2col_convolution(input_tensor, weights, stride=1, padding=0):
    C, H_in, W_in = input_tensor.shape
    M, C, R, S = weights.shape
    
    n_H = (H_in + 2 * padding - R) // stride + 1
    n_W = (W_in + 2 * padding - S) // stride + 1
    
    return (im2col(weights, R, S, stride, padding).T @ im2col(input_tensor[None,], R, S, stride, padding)).reshape(M, n_H, n_W)

In [4]:
output_im2col = im2col_convolution(input_tensor, weights, stride)
assert np.allclose(output_tensor, output_im2col)
print("Тест пройден успешно!")

Тест пройден успешно!
