In [1]:
#pragma cling add_include_path("../../libtorch/include")
#pragma cling add_include_path("../../libtorch/include/torch/csrc/api/include")
#pragma cling add_library_path("../../libtorch/lib")
#pragma cling load("libtorch")

In [2]:
#include <iostream>
#include <tuple>
#include <torch/torch.h>
#include <ATen/ATen.h>
namespace nn = torch::nn;

# BatchNorm1d

Applies Batch Normalization over a 2D or 3D input as described in the paper Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift.

* Input: (N, C) or (N, C, L), where N is the batch size, C is the number of features or channels, and L is the sequence length

* Output: (N, C) or (N, C, L) (same shape as input)

In [3]:
// https://github.com/pytorch/pytorch/blob/master/torch/csrc/api/include/torch/nn/modules/batchnorm.h

// https://github.com/pytorch/pytorch/blob/master/torch/csrc/api/include/torch/nn/options/batchnorm.h

## a) the inputy data is 2d

In [4]:
int sample_size = 3;
int number_features = 4;

torch::Tensor x = torch::arange(sample_size*number_features).reshape({sample_size,number_features}) * 1.0;

In [5]:
std::cout << x << std::endl;

  0   1   2   3
  4   5   6   7
  8   9  10  11
[ CPUFloatType{3,4} ]


In [6]:
auto options = nn::BatchNorm1dOptions(/*num_features*/4).eps(0.0).momentum(1.0).affine(true).track_running_stats(true);

In [7]:
nn::BatchNorm1d  batch_norm_1d(options);

In [8]:
//nn::BatchNorm1d batch_norm_1d{}

In [9]:
torch::Tensor output = batch_norm_1d -> forward(x);

In [10]:
std::cout << output << std::endl;

-1.2247 -1.2247 -1.2247 -1.2247
 0.0000  0.0000  0.0000  0.0000
 1.2247  1.2247  1.2247  1.2247
[ CPUFloatType{3,4} ]


In [11]:
std::cout << "after process data " << std::endl;
std::cout << batch_norm_1d -> running_mean << std::endl;
std::cout << batch_norm_1d -> running_var << std::endl;

after process data 
 4
 5
 6
 7
[ CPUFloatType{4} ]
 16
 16
 16
 16
[ CPUFloatType{4} ]


## how to reimplement it?

In [12]:
std::cout << x.mean(0, 1) << std::endl;

 4  5  6  7
[ CPUFloatType{1,4} ]


In [13]:
std::cout << x.var(0) << std::endl;

 16
 16
 16
 16
[ CPUFloatType{4} ]


In [14]:
std::cout << (x - x.mean(0,1))/x.std(0,1) << std::endl;

-1 -1 -1 -1
 0  0  0  0
 1  1  1  1
[ CPUFloatType{3,4} ]


## b) the input is 3d data

In [15]:
int sample_size = 3;
int number_features = 4;
int sequence_length = 5;

torch::Tensor x = torch::arange(sample_size*number_features*sequence_length).reshape({sample_size,number_features,sequence_length}) * 1.0;

In [16]:
std::cout << x << std::endl;

(1,.,.) = 
   0   1   2   3   4
   5   6   7   8   9
  10  11  12  13  14
  15  16  17  18  19

(2,.,.) = 
  20  21  22  23  24
  25  26  27  28  29
  30  31  32  33  34
  35  36  37  38  39

(3,.,.) = 
  40  41  42  43  44
  45  46  47  48  49
  50  51  52  53  54
  55  56  57  58  59
[ CPUFloatType{3,4,5} ]


In [17]:
output = batch_norm_1d -> forward(x);

In [18]:
std::cout << output << std::endl;

(1,.,.) = 
 -1.3422 -1.2812 -1.2202 -1.1592 -1.0982
 -1.3422 -1.2812 -1.2202 -1.1592 -1.0982
 -1.3422 -1.2812 -1.2202 -1.1592 -1.0982
 -1.3422 -1.2812 -1.2202 -1.1592 -1.0982

(2,.,.) = 
 -0.1220 -0.0610  0.0000  0.0610  0.1220
 -0.1220 -0.0610 -0.0000  0.0610  0.1220
 -0.1220 -0.0610  0.0000  0.0610  0.1220
 -0.1220 -0.0610 -0.0000  0.0610  0.1220

(3,.,.) = 
  1.0982  1.1592  1.2202  1.2812  1.3422
  1.0982  1.1592  1.2202  1.2812  1.3422
  1.0982  1.1592  1.2202  1.2812  1.3422
  1.0982  1.1592  1.2202  1.2812  1.3422
[ CPUFloatType{3,4,5} ]


In [19]:
std::cout << "after process data " << std::endl;
std::cout << batch_norm_1d -> running_mean << std::endl;
std::cout << batch_norm_1d -> running_var << std::endl;

after process data 
 22
 27
 32
 37
[ CPUFloatType{4} ]
 287.8571
 287.8571
 287.8571
 287.8571
[ CPUFloatType{4} ]


## how to reimplement it?

In [20]:
at::IntArrayRef shape = x.sizes();
std::cout << shape;

[3, 4, 5]

In [21]:
at::IntArrayRef dim{{0,2}};
std::cout << dim;

[0, 2]

In [22]:
std::cout << x.mean(dim, 1) << std::endl;

(1,.,.) = 
  22
  27
  32
  37
[ CPUFloatType{1,4,1} ]


In [23]:
std::cout << x.var(dim, 1) << std::endl;

 287.8571
 287.8571
 287.8571
 287.8571
[ CPUFloatType{4} ]


In [24]:
std::cout << x.var(dim, 1).unsqueeze(0).unsqueeze(-1);

(1,.,.) = 
  287.8571
  287.8571
  287.8571
  287.8571
[ CPUFloatType{1,4,1} ]

In [25]:
std::cout << (x-x.mean(dim, 1))/x.std(dim, 1).unsqueeze(0).unsqueeze(-1) << std::endl;

(1,.,.) = 
 -1.2967 -1.2377 -1.1788 -1.1199 -1.0609
 -1.2967 -1.2377 -1.1788 -1.1199 -1.0609
 -1.2967 -1.2377 -1.1788 -1.1199 -1.0609
 -1.2967 -1.2377 -1.1788 -1.1199 -1.0609

(2,.,.) = 
 -0.1179 -0.0589  0.0000  0.0589  0.1179
 -0.1179 -0.0589  0.0000  0.0589  0.1179
 -0.1179 -0.0589  0.0000  0.0589  0.1179
 -0.1179 -0.0589  0.0000  0.0589  0.1179

(3,.,.) = 
  1.0609  1.1199  1.1788  1.2377  1.2967
  1.0609  1.1199  1.1788  1.2377  1.2967
  1.0609  1.1199  1.1788  1.2377  1.2967
  1.0609  1.1199  1.1788  1.2377  1.2967
[ CPUFloatType{3,4,5} ]


# what happens in eval mode?

In [26]:
batch_norm_1d -> running_mean.data().fill_(10.0);
std::cout << batch_norm_1d -> running_mean << std::endl;

 10
 10
 10
 10
[ CPUFloatType{4} ]


In [27]:
batch_norm_1d -> running_var.data().fill_(1.0);
std::cout << batch_norm_1d -> running_var << std::endl;

 1
 1
 1
 1
[ CPUFloatType{4} ]


In [28]:
batch_norm_1d -> eval();

In [29]:
output = batch_norm_1d ->forward(x);
std::cout << output << std::endl;

(1,.,.) = 
 -10  -9  -8  -7  -6
  -5  -4  -3  -2  -1
   0   1   2   3   4
   5   6   7   8   9

(2,.,.) = 
  10  11  12  13  14
  15  16  17  18  19
  20  21  22  23  24
  25  26  27  28  29

(3,.,.) = 
  30  31  32  33  34
  35  36  37  38  39
  40  41  42  43  44
  45  46  47  48  49
[ CPUFloatType{3,4,5} ]
