In [8]:
import numpy as np


class LayerNormalization:
    def __init__(self, epsilon = 1e-9):
        self.epsilon = epsilon
        self.gamma = None
        self.beta = None

    def forward(self, X):
        ''' 
        X has shape [batch, seq_len, input_size].
        The layer normalization is conducted along the input_size dimension
        '''
        mean = np.mean(X, axis=-1, keepdims=True)
        var = np.var(X, axis=-1, keepdims=True)

        # Normalize
        X_hat = (X-mean)/np.sqrt(var + self.epsilon)

        # Auto scale and shift
        if self.gamma is None:
            self.gamma = np.ones_like(X)
        if self.beta is None:
            self.beta = np.zeros_like(X)
        output = self.gamma * X_hat + self.beta

        return output


LN = LayerNormalization()
X = np.random.randn(5,20,100)
print(LN.forward(X).shape)

np.mean(LN.forward(X)[0,0]), np.var(LN.forward(X)[0,0])

(5, 20, 100)


(-8.881784197001253e-18, 0.9999999989043468)