In [1]:
import torch
import torch.nn as nn
from torch.nn.functional import pad
import numpy as np

# MaxPool1d

In [2]:
# torch.nn.MaxPool1d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)

Applies a 1D max pooling over an input signal composed of several input planes.

- Input: $(N, C, L_{in})$ or $(C, L_{in})$, where N is the batch size, C is the number of features or channels, and L is the sequence length.

- Output: $(N, C, L_{out})$ or $(C, L_{out})$, where $L_{out}$ depends on $L_{in}$, padding, dilation, kernel_size and stride.

## 2D input data

In [3]:
number_feature = 4
sequence_length = 6

x  = torch.arange(number_feature * sequence_length, dtype=torch.double).reshape(number_feature, sequence_length)
print(x)

tensor([[ 0.,  1.,  2.,  3.,  4.,  5.],
        [ 6.,  7.,  8.,  9., 10., 11.],
        [12., 13., 14., 15., 16., 17.],
        [18., 19., 20., 21., 22., 23.]], dtype=torch.float64)


In [4]:
# kernel_size=3, stride=1, padding=0, dilation=1
max_pool_1d = nn.MaxPool1d(kernel_size=3, stride=1)
output = max_pool_1d(x)
print(output)  # for feature 0: [0,1,2] -> 2, [1,2,3] -> 3, [2,3,4] -> 4, [3,4,5] -> 5

tensor([[ 2.,  3.,  4.,  5.],
        [ 8.,  9., 10., 11.],
        [14., 15., 16., 17.],
        [20., 21., 22., 23.]], dtype=torch.float64)


In [5]:
# kernel_size=3, stride=2, padding=0, dilation=1
max_pool_1d_with_stride = nn.MaxPool1d(kernel_size=3, stride=2)
output_with_stride = max_pool_1d_with_stride(x)
print('stride=1: \n', output)
print('stride=2: \n', output_with_stride)  # for feature 0: [0,1,2] -> 2, [2,3,4] -> 4

stride=1: 
 tensor([[ 2.,  3.,  4.,  5.],
        [ 8.,  9., 10., 11.],
        [14., 15., 16., 17.],
        [20., 21., 22., 23.]], dtype=torch.float64)
stride=2: 
 tensor([[ 2.,  4.],
        [ 8., 10.],
        [14., 16.],
        [20., 22.]], dtype=torch.float64)


In [6]:
# kernel_size=3, stride=1, padding=1, dilation=1
max_pool_1d_with_padding = nn.MaxPool1d(kernel_size=3, stride=1, padding=1)
output_with_padding = max_pool_1d_with_padding(x)
print('padding=0: \n', output)
print('padding=1: \n', output_with_padding)  # for feature 0: [0,0,1] -> 1, [0,1,2] -> 2, [1,2,3] -> 3, [2,3,4] -> 4, [3,4,5] -> 5, [4,5,0] -> 5

padding=0: 
 tensor([[ 2.,  3.,  4.,  5.],
        [ 8.,  9., 10., 11.],
        [14., 15., 16., 17.],
        [20., 21., 22., 23.]], dtype=torch.float64)
padding=1: 
 tensor([[ 1.,  2.,  3.,  4.,  5.,  5.],
        [ 7.,  8.,  9., 10., 11., 11.],
        [13., 14., 15., 16., 17., 17.],
        [19., 20., 21., 22., 23., 23.]], dtype=torch.float64)


In [7]:
# kernel_size=3, stride=1, padding=0, dilation=2
max_pool_1d_with_dilation = nn.MaxPool1d(kernel_size=3, stride=1, padding=0, dilation=2)
output_with_dilation = max_pool_1d_with_dilation(x)
print('dilation=1: \n', output) # for feature 0 (dilation=1): [0,1,2] -> 2, [1,2,3] -> 3, [2,3,4] -> 4, [3,4,5] -> 5
print('dilation=2: \n', output_with_dilation)  # for feature 0 (dilation=2, two steps between elements, eg.[0,2,4]): [0,2,4] -> 4, [1,3,5] -> 5

dilation=1: 
 tensor([[ 2.,  3.,  4.,  5.],
        [ 8.,  9., 10., 11.],
        [14., 15., 16., 17.],
        [20., 21., 22., 23.]], dtype=torch.float64)
dilation=2: 
 tensor([[ 4.,  5.],
        [10., 11.],
        [16., 17.],
        [22., 23.]], dtype=torch.float64)


In [8]:
# kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True
max_pool_1d_ceil_mode = nn.MaxPool1d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
output_ceil_mode = max_pool_1d_ceil_mode(x)
print('stride=2 & ceil_mode=False: \n', output_with_stride)  # for feature 0: [0,1,2] -> 2, [2,3,4] -> 4)
print('stride=2 & ceil_mode=True: \n', output_ceil_mode)  # for feature 0: [0,1,2] -> 2, [2,3,4] -> 4, [4,5] -> 5

stride=2 & ceil_mode=False: 
 tensor([[ 2.,  4.],
        [ 8., 10.],
        [14., 16.],
        [20., 22.]], dtype=torch.float64)
stride=2 & ceil_mode=True: 
 tensor([[ 2.,  4.,  5.],
        [ 8., 10., 11.],
        [14., 16., 17.],
        [20., 22., 23.]], dtype=torch.float64)




## 3D input data

use nn.MaxPool1d on each 2D tensor.

In [9]:
sample_size = 3
x_3d  = torch.arange(sample_size * number_feature * sequence_length, 
                     dtype=torch.double).reshape(sample_size, number_feature, sequence_length)
print(x_3d)

tensor([[[ 0.,  1.,  2.,  3.,  4.,  5.],
         [ 6.,  7.,  8.,  9., 10., 11.],
         [12., 13., 14., 15., 16., 17.],
         [18., 19., 20., 21., 22., 23.]],

        [[24., 25., 26., 27., 28., 29.],
         [30., 31., 32., 33., 34., 35.],
         [36., 37., 38., 39., 40., 41.],
         [42., 43., 44., 45., 46., 47.]],

        [[48., 49., 50., 51., 52., 53.],
         [54., 55., 56., 57., 58., 59.],
         [60., 61., 62., 63., 64., 65.],
         [66., 67., 68., 69., 70., 71.]]], dtype=torch.float64)


In [10]:
# kernel_size=3, stride=1, padding=0, dilation=1
output_3d = max_pool_1d(x_3d)
print(output_3d)

tensor([[[ 2.,  3.,  4.,  5.],
         [ 8.,  9., 10., 11.],
         [14., 15., 16., 17.],
         [20., 21., 22., 23.]],

        [[26., 27., 28., 29.],
         [32., 33., 34., 35.],
         [38., 39., 40., 41.],
         [44., 45., 46., 47.]],

        [[50., 51., 52., 53.],
         [56., 57., 58., 59.],
         [62., 63., 64., 65.],
         [68., 69., 70., 71.]]], dtype=torch.float64)


In [11]:
# kernel_size=3, stride=2, padding=0, dilation=1
output_with_stride_3d = max_pool_1d_with_stride(x_3d)
print('stride=1: \n', output_3d)
print('stride=2: \n', output_with_stride_3d)

stride=1: 
 tensor([[[ 2.,  3.,  4.,  5.],
         [ 8.,  9., 10., 11.],
         [14., 15., 16., 17.],
         [20., 21., 22., 23.]],

        [[26., 27., 28., 29.],
         [32., 33., 34., 35.],
         [38., 39., 40., 41.],
         [44., 45., 46., 47.]],

        [[50., 51., 52., 53.],
         [56., 57., 58., 59.],
         [62., 63., 64., 65.],
         [68., 69., 70., 71.]]], dtype=torch.float64)
stride=2: 
 tensor([[[ 2.,  4.],
         [ 8., 10.],
         [14., 16.],
         [20., 22.]],

        [[26., 28.],
         [32., 34.],
         [38., 40.],
         [44., 46.]],

        [[50., 52.],
         [56., 58.],
         [62., 64.],
         [68., 70.]]], dtype=torch.float64)


In [12]:
# kernel_size=3, stride=1, padding=1, dilation=1
output_with_padding_3d = max_pool_1d_with_padding(x_3d)
print('padding=1: \n', output_3d)
print('padding=2: \n', output_with_padding_3d)

padding=1: 
 tensor([[[ 2.,  3.,  4.,  5.],
         [ 8.,  9., 10., 11.],
         [14., 15., 16., 17.],
         [20., 21., 22., 23.]],

        [[26., 27., 28., 29.],
         [32., 33., 34., 35.],
         [38., 39., 40., 41.],
         [44., 45., 46., 47.]],

        [[50., 51., 52., 53.],
         [56., 57., 58., 59.],
         [62., 63., 64., 65.],
         [68., 69., 70., 71.]]], dtype=torch.float64)
padding=2: 
 tensor([[[ 1.,  2.,  3.,  4.,  5.,  5.],
         [ 7.,  8.,  9., 10., 11., 11.],
         [13., 14., 15., 16., 17., 17.],
         [19., 20., 21., 22., 23., 23.]],

        [[25., 26., 27., 28., 29., 29.],
         [31., 32., 33., 34., 35., 35.],
         [37., 38., 39., 40., 41., 41.],
         [43., 44., 45., 46., 47., 47.]],

        [[49., 50., 51., 52., 53., 53.],
         [55., 56., 57., 58., 59., 59.],
         [61., 62., 63., 64., 65., 65.],
         [67., 68., 69., 70., 71., 71.]]], dtype=torch.float64)


In [13]:
# kernel_size=3, stride=1, padding=0, dilation=2
output_with_dilation_3d = max_pool_1d_with_dilation(x_3d)
print('dilation=1: \n', output_3d)
print('dilation=2: \n', output_with_dilation_3d)

dilation=1: 
 tensor([[[ 2.,  3.,  4.,  5.],
         [ 8.,  9., 10., 11.],
         [14., 15., 16., 17.],
         [20., 21., 22., 23.]],

        [[26., 27., 28., 29.],
         [32., 33., 34., 35.],
         [38., 39., 40., 41.],
         [44., 45., 46., 47.]],

        [[50., 51., 52., 53.],
         [56., 57., 58., 59.],
         [62., 63., 64., 65.],
         [68., 69., 70., 71.]]], dtype=torch.float64)
dilation=2: 
 tensor([[[ 4.,  5.],
         [10., 11.],
         [16., 17.],
         [22., 23.]],

        [[28., 29.],
         [34., 35.],
         [40., 41.],
         [46., 47.]],

        [[52., 53.],
         [58., 59.],
         [64., 65.],
         [70., 71.]]], dtype=torch.float64)


In [14]:
# kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True
output_ceil_mode_3d = max_pool_1d_ceil_mode(x_3d)
print('stride=2 & ceil_mode=False: \n', output_with_stride_3d)
print('stride=2 & ceil_mode=True: \n', output_ceil_mode_3d)

stride=2 & ceil_mode=False: 
 tensor([[[ 2.,  4.],
         [ 8., 10.],
         [14., 16.],
         [20., 22.]],

        [[26., 28.],
         [32., 34.],
         [38., 40.],
         [44., 46.]],

        [[50., 52.],
         [56., 58.],
         [62., 64.],
         [68., 70.]]], dtype=torch.float64)
stride=2 & ceil_mode=True: 
 tensor([[[ 2.,  4.,  5.],
         [ 8., 10., 11.],
         [14., 16., 17.],
         [20., 22., 23.]],

        [[26., 28., 29.],
         [32., 34., 35.],
         [38., 40., 41.],
         [44., 46., 47.]],

        [[50., 52., 53.],
         [56., 58., 59.],
         [62., 64., 65.],
         [68., 70., 71.]]], dtype=torch.float64)


## how does `MaxPool1d` work with 2D input

In [15]:
def pool1d_max(X: torch.tensor, kernel_size: int, stride: int=1, padding: int=0):
    """max pooling with 2D input"""
    # output shape
    h, w = X.shape[0], np.ceil((X.shape[-1] - kernel_size + 2 * padding + 1) / stride)
    Y = torch.zeros(size=(h, int(w)))
    # X with padding    
    x = pad(X, pad=(padding, padding, 0, 0), mode='constant', value=0)
    # get output
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j] = x[i, stride * j: stride * j + kernel_size].max()
    return Y

In [16]:
print('MaxPool1d output(kernek_size=3): \n', output)
print('pool1d_max output(kernek_size=3): \n', pool1d_max(x, kernel_size=3))

MaxPool1d output(kernek_size=3): 
 tensor([[ 2.,  3.,  4.,  5.],
        [ 8.,  9., 10., 11.],
        [14., 15., 16., 17.],
        [20., 21., 22., 23.]], dtype=torch.float64)
pool1d_max output(kernek_size=3): 
 tensor([[ 2.,  3.,  4.,  5.,  5.],
        [ 8.,  9., 10., 11., 11.],
        [14., 15., 16., 17., 17.],
        [20., 21., 22., 23., 23.]])


In [17]:
print('MaxPool1d output(stride=2): \n', output_with_stride)
print('pool1d_max output(stride=2): \n', pool1d_max(x, kernel_size=3, stride=2))

MaxPool1d output(stride=2): 
 tensor([[ 2.,  4.],
        [ 8., 10.],
        [14., 16.],
        [20., 22.]], dtype=torch.float64)
pool1d_max output(stride=2): 
 tensor([[ 2.,  4.,  5.],
        [ 8., 10., 11.],
        [14., 16., 17.],
        [20., 22., 23.]])


In [18]:
print('MaxPool1d output(padding=1): \n', output_with_padding)
print('pool1d_max output(padding=1): \n', pool1d_max(x, kernel_size=3, stride=1, padding=1))

MaxPool1d output(padding=1): 
 tensor([[ 1.,  2.,  3.,  4.,  5.,  5.],
        [ 7.,  8.,  9., 10., 11., 11.],
        [13., 14., 15., 16., 17., 17.],
        [19., 20., 21., 22., 23., 23.]], dtype=torch.float64)
pool1d_max output(padding=1): 
 tensor([[ 1.,  2.,  3.,  4.,  5.,  5.,  5.],
        [ 7.,  8.,  9., 10., 11., 11., 11.],
        [13., 14., 15., 16., 17., 17., 17.],
        [19., 20., 21., 22., 23., 23., 23.]])


# MaxPool2d

In [19]:
# torch.nn.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)

Applies a 2D max pooling over an input signal composed of several input planes.

- Input: $ (N, C, H_{in}, W_{in}) $ or $ (C, H_{in}, W_{in}) $
- Output: $ (N, C, H_{out}, W_{out}) $ or $ (C, H_{out}, W_{out}) $, where $H_{out}$ and $W_{out}$ depends on $H_{in}$, $W_{in}$, padding, dilation, kernel_size and stride.

In [20]:
print('input: \n', x_3d)

input: 
 tensor([[[ 0.,  1.,  2.,  3.,  4.,  5.],
         [ 6.,  7.,  8.,  9., 10., 11.],
         [12., 13., 14., 15., 16., 17.],
         [18., 19., 20., 21., 22., 23.]],

        [[24., 25., 26., 27., 28., 29.],
         [30., 31., 32., 33., 34., 35.],
         [36., 37., 38., 39., 40., 41.],
         [42., 43., 44., 45., 46., 47.]],

        [[48., 49., 50., 51., 52., 53.],
         [54., 55., 56., 57., 58., 59.],
         [60., 61., 62., 63., 64., 65.],
         [66., 67., 68., 69., 70., 71.]]], dtype=torch.float64)


In [21]:
# kernel_size=3 (same as kernel_size=(3,3))
max_pool_2d = nn.MaxPool2d(kernel_size=3, stride=1)
output_2d_pool = max_pool_2d(x_3d)
print(output_2d_pool) # [[0,1,2],[6,7,8],[12,13,14]] -> 14, [[1,2,3],[7,8,9],[13,14,15]] -> 15, ...

tensor([[[14., 15., 16., 17.],
         [20., 21., 22., 23.]],

        [[38., 39., 40., 41.],
         [44., 45., 46., 47.]],

        [[62., 63., 64., 65.],
         [68., 69., 70., 71.]]], dtype=torch.float64)


In [22]:
# kernel_size=(3,2)
max_pool_2d_with_size = nn.MaxPool2d(kernel_size=(3, 2), stride=1)
output_2d_pool_with_size = max_pool_2d_with_size(x_3d)
print(output_2d_pool_with_size) # [[0,1],[6,7],[12,13]] -> 13, [[1,2],[7,8],[13,14]] -> 14, ...

tensor([[[13., 14., 15., 16., 17.],
         [19., 20., 21., 22., 23.]],

        [[37., 38., 39., 40., 41.],
         [43., 44., 45., 46., 47.]],

        [[61., 62., 63., 64., 65.],
         [67., 68., 69., 70., 71.]]], dtype=torch.float64)


In [23]:
# stride=(2,1), which means take two steps to the bottom and one step to the left each time)
max_pool_2d_with_stride = nn.MaxPool2d(kernel_size=3, stride=(1,2))
output_2d_pool_with_stride = max_pool_2d_with_stride(x_3d)
print(output_2d_pool_with_stride) # [[0,1,2],[6,7,8],[12,13,14]] -> 14, [[2,3,4],[8,9,10],[14,15,16]] -> 16, ...

tensor([[[14., 16.],
         [20., 22.]],

        [[38., 40.],
         [44., 46.]],

        [[62., 64.],
         [68., 70.]]], dtype=torch.float64)


In [24]:
# padding=(1,0), which means add one column at left and right edge, and add zero column at top and bottom edge
max_pool_2d_with_padding = nn.MaxPool2d(kernel_size=3, stride=1, padding=(1,0))
output_2d_pool_with_padding = max_pool_2d_with_padding(x_3d)
print(output_2d_pool_with_padding) # [[0,0,0],[0,0,1],[0,6,7]] -> 7, [[0,0,0],[0,1,2],[6,7,8]] -> 8, ...

tensor([[[ 8.,  9., 10., 11.],
         [14., 15., 16., 17.],
         [20., 21., 22., 23.],
         [20., 21., 22., 23.]],

        [[32., 33., 34., 35.],
         [38., 39., 40., 41.],
         [44., 45., 46., 47.],
         [44., 45., 46., 47.]],

        [[56., 57., 58., 59.],
         [62., 63., 64., 65.],
         [68., 69., 70., 71.],
         [68., 69., 70., 71.]]], dtype=torch.float64)


In [25]:
# dilation=(1,2), which means one step between horizontal elements and two steps between vertical elements
max_pool_2d_with_dilation = nn.MaxPool2d(kernel_size=3, stride=1, dilation=(1,2))
output_2d_pool_with_dilation = max_pool_2d_with_dilation(x_3d)
print(output_2d_pool_with_dilation) # [[0,2,4],[6,8,10],[12,14,16]] -> 16, [[1,3,5],[7,9,11],[13,15,17]] -> 17, ...

tensor([[[16., 17.],
         [22., 23.]],

        [[40., 41.],
         [46., 47.]],

        [[64., 65.],
         [70., 71.]]], dtype=torch.float64)


In [26]:
# ceil_mode=True, which means use ceil instead of floor to compute the output shape
max_pool_2d_with_ceil_mode = nn.MaxPool2d(kernel_size=3, stride=(1,2), ceil_mode=True)
output_2d_pool_with_ceil_mode = max_pool_2d_with_ceil_mode(x_3d)
print('stride=(1,2) & cei_mode=False: \n', output_2d_pool_with_stride) # [[0,1,2],[6,7,8],[12,13,14]] -> 14, [[2,3,4],[8,9,10],[14,15,16]] -> 16, ...
print('stride=(1,2) & cei_mode=True: \n', output_2d_pool_with_ceil_mode) # [[0,1,2],[6,7,8],[12,13,14]] -> 14, [[2,3,4],[8,9,10],[14,15,16]] -> 16, [[4,5],[10,11],[16,17]] -> 17, ...

stride=(1,2) & cei_mode=False: 
 tensor([[[14., 16.],
         [20., 22.]],

        [[38., 40.],
         [44., 46.]],

        [[62., 64.],
         [68., 70.]]], dtype=torch.float64)
stride=(1,2) & cei_mode=True: 
 tensor([[[14., 16., 17.],
         [20., 22., 23.]],

        [[38., 40., 41.],
         [44., 46., 47.]],

        [[62., 64., 65.],
         [68., 70., 71.]]], dtype=torch.float64)


