In [1]:
import torch
from torch import nn
from torch.nn import functional as F
from d2l import torch as d2l

## Layers without Parameters

In [2]:
class CenteredLayer(nn.Module):
    def __init__(self):
        super().__init__()
    
    def forward(self, X):
        return X - X.mean()

In [3]:
layer = CenteredLayer()
layer(torch.FloatTensor([1, 2, 3, 4, 5]))

tensor([-2., -1.,  0.,  1.,  2.])

In [5]:
net = nn.Sequential(nn.LazyLinear(128), CenteredLayer())

In [6]:
Y = net(torch.rand(4, 8))
Y.mean()

tensor(9.3132e-09, grad_fn=<MeanBackward0>)

## Layers with Parameters

Implementation of the following fully-connected layers:
* The layer requires 2 parameters: `W` and `b`.
* We bake in the ReLu activation as a default that requires two input arguments: `in_units` and `units` (the number of input and output units, respectively).

In [7]:
class MyLinear(nn.Module):
    def __init__(self, in_units, units):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_units, units))
        self.bias = nn.Parameter(torch.randn(units,))

    def forward(self, X):
        linear = torch.matmul(X, self.weight.data) + self.bias.data
        return F.relu(linear)

In [8]:
# instantiate MyLinear class and access parameters
linear = MyLinear(5, 3)
linear.weight

Parameter containing:
tensor([[-1.4732,  1.5295,  2.7014],
        [ 0.9858,  0.6644, -0.0225],
        [-2.3179, -1.4872, -0.1630],
        [ 0.4626,  0.8520,  0.1964],
        [-0.7388, -1.1762, -0.8762]], requires_grad=True)

In [9]:
# forward pass
linear(torch.rand(2, 5))

tensor([[0.0000, 0.1634, 1.2390],
        [0.0000, 0.5700, 0.0341]])

In [10]:
net = nn.Sequential(MyLinear(64, 8), MyLinear(8, 1))
net(torch.rand(2, 64))

tensor([[3.2815],
        [1.9035]])