# Parameter Management
1. Accessing parameters for debugging, diagnostics and visualizations
2. Sharing parameters across different model components

In [1]:
import torch
from torch import nn

net = nn.Sequential(nn.LazyLinear(8), nn.ReLU(), nn.LazyLinear(1))
X = torch.rand(size=(2, 4))
net(X).shape



torch.Size([2, 1])

## 1. Parameter Access

In [7]:
net[2].state_dict()

OrderedDict([('weight',
              tensor([[ 0.1270, -0.2636, -0.2054,  0.1804, -0.3342, -0.2389,  0.0804,  0.3002]])),
             ('bias', tensor([-0.0708]))])

- Each parameter is represented as an instance of the parameter class
- We should use attribute 'data' to access the underlying numeric values

### 1.1 Targeted parameters

In [9]:
type(net[2].bias), net[2].bias.data

(torch.nn.parameter.Parameter, tensor([-0.0708]))

Parameters are complex objects, containing values, gradients, and additional information. We can access them

In [11]:
net[2].weight.grad == None

True

### 1.2 All Parameters at Once

In [13]:
[(name, param.shape) for name, param in net.named_parameters()]

[('0.weight', torch.Size([8, 4])),
 ('0.bias', torch.Size([8])),
 ('2.weight', torch.Size([1, 8])),
 ('2.bias', torch.Size([1]))]

## 2. Tied parameters
Often we want to share parameters across multiple layers.

In [16]:
shared = nn.LazyLinear(8)
net = nn.Sequential(nn.LazyLinear(8), nn.ReLU(),
                    shared, nn.ReLU(),
                    shared, nn.ReLU(),
                    nn.LazyLinear(1))



In [18]:
# We need to run forward propagation before accessing the parameters
print(net[2].weight.data)
net(X)
print(net[2].weight.data)

tensor([])
tensor([[ 0.2657,  0.0210,  0.2408, -0.1471, -0.3233, -0.1848, -0.3084,  0.1845],
        [ 0.2217,  0.1290,  0.0114, -0.0651,  0.2876,  0.2639,  0.0158, -0.0181],
        [ 0.1363, -0.2174, -0.3226, -0.3349,  0.1392, -0.0387,  0.1256, -0.1311],
        [ 0.0778, -0.3477,  0.0612,  0.1634, -0.0767,  0.0536,  0.1505,  0.1924],
        [ 0.1036, -0.2272, -0.2156, -0.3019,  0.0740,  0.3483, -0.2545, -0.3050],
        [-0.1476, -0.2929, -0.0332,  0.1034,  0.0217, -0.3399,  0.3334,  0.1892],
        [ 0.1214, -0.0436, -0.2163, -0.0149,  0.1078,  0.1173,  0.0555,  0.0730],
        [-0.2708,  0.0182,  0.2909,  0.0490, -0.3031, -0.0181, -0.2348,  0.2634]])


In [19]:
print(net[2].weight.data[0] == net[4].weight.data[0])
net[2].weight.data[0, 0] = 100
print(net[2].weight.data[0] == net[4].weight.data[0])

tensor([True, True, True, True, True, True, True, True])
tensor([True, True, True, True, True, True, True, True])
