In [1]:
import torch
import torchvision
from d2l import torch as d2l

In [2]:
from torch import nn

In [3]:
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1)) # 参数？

In [4]:
X = torch.rand(size=(2, 4))

In [5]:
net(X)

tensor([[-0.2217],
        [-0.1875]], grad_fn=<AddmmBackward0>)

In [7]:
# 可访问任意层的参数
net[0].state_dict()

OrderedDict([('weight',
              tensor([[ 0.1339, -0.0014,  0.1429, -0.1044],
                      [ 0.0603,  0.1294,  0.3417, -0.4180],
                      [-0.4866, -0.3153, -0.4087,  0.1704],
                      [-0.4508, -0.1809,  0.4891,  0.1798],
                      [ 0.3002,  0.1484,  0.0029,  0.4295],
                      [-0.1176, -0.1249,  0.2617, -0.2530],
                      [ 0.0758, -0.3189,  0.0635,  0.2394],
                      [-0.2162,  0.0585, -0.4415, -0.0682]])),
             ('bias',
              tensor([-0.2347, -0.3134,  0.4886,  0.3523,  0.4174, -0.4156,  0.3170,  0.2178]))])

In [9]:
net[0].weight.grad == None

True

In [11]:
*net[0].named_parameters()

SyntaxError: can't use starred expression here (3865494094.py, line 1)

In [12]:
print(*[(name, param.shape) for name, param in net[0].named_parameters()])

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))


In [13]:
print(*[(name, param.shape) for name, param in net.named_parameters()])

('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


In [14]:
def block1():
    return nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 4), nn.ReLU())

In [16]:
def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(f'block1{i}', block1())
    return net

In [17]:
rgnet = nn.Sequential(block2(), nn.Linear(4, 1))

In [18]:
rgnet(X)

tensor([[0.0049],
        [0.0049]], grad_fn=<AddmmBackward0>)

In [19]:
rgnet

Sequential(
  (0): Sequential(
    (block10): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block11): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block12): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block13): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)

In [20]:
rgnet[0][1][0].bias.data

tensor([-0.0616,  0.2016, -0.3916, -0.4937,  0.4943, -0.1244,  0.2536, -0.2556])

In [21]:
def init_normal(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, mean=0, std=0.01)
        nn.init.zeros_(m.bias)

In [22]:
net.apply(init_normal)

Sequential(
  (0): Linear(in_features=4, out_features=8, bias=True)
  (1): ReLU()
  (2): Linear(in_features=8, out_features=1, bias=True)
)

In [24]:
net[0].weight.data[0], net[0].bias.data[0]

(tensor([0.0081, 0.0133, 0.0218, 0.0088]), tensor(0.))

In [25]:
def xavier(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)
def init_42(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight, 42)

In [26]:
net[0].apply(xavier)
net[2].apply(init_42)
print(net[0].weight.data[0])
print(net[2].weight.data)

tensor([ 0.3411,  0.2851, -0.5460, -0.5519])
tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])


In [27]:
def my_init(m):
    if type(m) == nn.Linear:
        print("Init", *[(name, param.shape)
    for name, param in m.named_parameters()][0])
        nn.init.uniform_(m.weight, -10, 10)
        m.weight.data *= m.weight.data.abs() >= 5

In [31]:
print(*[(i, i+1) for i in range(4)])

(0, 1) (1, 2) (2, 3) (3, 4)


In [46]:
a = torch.tensor([1, 1, 3], dtype=float)

In [47]:
nn.init.uniform_(a, -10, 10)

tensor([1.4018, 6.4197, 2.8025], dtype=torch.float64)

In [48]:
!nvidia-smi

/bin/bash: nvidia-smi: command not found


In [49]:
torch.cuda.device_count()

0