In [None]:
import torch
from torch import nn

! pip install pytorch-model-summary
from pytorch_model_summary import summary

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# Hyperparameters (not trainable, and NOT registered in `model.parameters()`)

Hyperparameters such as learning rate, momentum, regularization parameter, network structures, are not trainable, but they will not be registered in `model.parameters()` by default in PyTorch.

In [None]:
# hyperparameters
learning_rate = 1e-3
input_dim = 10
n_hidden_units = 128
output_dim = 1

# Trainable parameters (registered in `model.parameters()`)

Weights initialized by modules such as `nn.Linear()`, `nn.Conv2d()` are automatically trainable parameters since they have gradient tracing enabled by default upon initialization, and will be updated during the batch optimization.

In [None]:
model = nn.Sequential(
    nn.Linear(input_dim, n_hidden_units),
    nn.ReLU(),
    nn.Linear(n_hidden_units, output_dim)
)
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

In [None]:
# model summary
print(summary(model, torch.zeros((1, input_dim)), show_input=True))

-----------------------------------------------------------------------
      Layer (type)         Input Shape         Param #     Tr. Param #
          Linear-1             [1, 10]           1,408           1,408
            ReLU-2            [1, 128]               0               0
          Linear-3            [1, 128]             129             129
Total params: 1,537
Trainable params: 1,537
Non-trainable params: 0
-----------------------------------------------------------------------


# Non-trainable parameters (not trainable, registered in `model.parameters()`)

If we disable gradient tracing of some weights, they become non-trainable parameters (usually set manually), so that they are "frozen", meaning that they will not be updated during the batch optimization. This is useful if you want to freeze the embedding part of your network only, and train on the rest of the part.

In [None]:
# freeze all weights of the first linear layer
for param in model.named_parameters():
    if param[0].__contains__('0'):
        param[1].requires_grad = False

# model summary
print(summary(model, torch.zeros((1, input_dim)), show_input=True))

-----------------------------------------------------------------------
      Layer (type)         Input Shape         Param #     Tr. Param #
          Linear-1             [1, 10]           1,408               0
            ReLU-2            [1, 128]               0               0
          Linear-3            [1, 128]             129             129
Total params: 1,537
Trainable params: 129
Non-trainable params: 1,408
-----------------------------------------------------------------------
