net(X)实际上是net.__call__(X)的简写

In [2]:
import torch
from torch import nn
from torch.nn import functional as F


In [4]:
#顺序块
X = torch.rand(2, 20)
class MySequntial(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for idx, module in enumerate(args):  #enumerrate用于同时获取索引和对应的模块
            self._modules[str(idx)] = module #将module以有序字典的方式注册到当前模块中
    def forward(self, X):
        for block in self._modules.values():
            X = block(X)
        return X
net = MySequntial(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))
net(X)

tensor([[-0.1990,  0.0691,  0.2007, -0.1092, -0.0011, -0.1867,  0.2401, -0.0573,
          0.1281, -0.1466],
        [-0.1344,  0.0036,  0.1457, -0.0506, -0.0832, -0.0617,  0.2054, -0.0391,
          0.1036,  0.0527]], grad_fn=<AddmmBackward0>)

In [5]:
#如何执行自己的数学运算，而不是简单的依赖预定义的神经网络层
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight = torch.rand((20, 20), requires_grad=False) #不参与梯度计算，所以将其固定为常量
        self.linear = nn.Linear(20, 20)
    def forward(self, X):
        X = self.linear(X)
        #手动操作中间层，加入常量权重
        X = F.relu(torch.mm(X, self.rand_weight) + 1)
        #复用之前的线性层（共享参数）
        X = self.linear(X)
        #控制流结构，控制X的L1范数的大小
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()
net = FixedHiddenMLP()
net(X)

tensor(0.0578, grad_fn=<SumBackward0>)

In [20]:
#如何访问参数
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))
X = torch.rand(2, 4)
for name, param in net.named_parameters():
    print(name, param.shape)
for name, param in net[0].named_parameters():
    print(name, param.shape)
net.state_dict()
net.state_dict()['2.bias'].data

0.weight torch.Size([8, 4])
0.bias torch.Size([8])
2.weight torch.Size([1, 8])
2.bias torch.Size([1])
weight torch.Size([8, 4])
bias torch.Size([8])


tensor([0.1925])

In [8]:
#参数初始化
def init_normal(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, mean=0, std=0.01)
        nn.init.zeros_(m.bias)
net.apply(init_normal)
net[0].weight.data, net[0].bias.data

(tensor([[-0.0084, -0.0050,  0.0035,  0.0119],
         [ 0.0010,  0.0081, -0.0020,  0.0146],
         [-0.0099,  0.0115,  0.0028, -0.0037],
         [ 0.0111,  0.0037,  0.0121,  0.0138],
         [-0.0072,  0.0109, -0.0252,  0.0259],
         [ 0.0242, -0.0144,  0.0026, -0.0021],
         [ 0.0185,  0.0030, -0.0139,  0.0078],
         [ 0.0006, -0.0005,  0.0020, -0.0025]]),
 tensor([0., 0., 0., 0., 0., 0., 0., 0.]))

In [12]:
#自定义初始参数分布
#对模型中的 nn.Linear 层的 weight 参数用 区间为 [-10, 10] 的均匀分布进行初始化，并且将那些绝对值小于 5 的权重置为 0（实现稀疏性）。
def my_init(m):
  if type(m) == nn.Linear:
    for name, param in m.named_parameters():
        print("Init", name, param.shape)
        break  # 只打印第一个参数
    nn.init.uniform_(m.weight, -10, 10)
    m.weight.data *= m.weight.data.abs() >= 5 #abs() 取绝对值后和 5 比较，得到一个布尔Tensor，布尔值在PyTorch中等价于0和1，参与乘法运算
net.apply(my_init)
net[0].weight

Init weight torch.Size([8, 4])
Init weight torch.Size([1, 8])


Parameter containing:
tensor([[-0.0000,  0.0000,  0.0000, -5.6434],
        [ 7.6030,  0.0000,  0.0000, -0.0000],
        [-5.9312, -0.0000, -5.0934,  7.8821],
        [-0.0000,  0.0000,  0.0000, -6.0444],
        [ 0.0000, -8.0181,  5.1119, -0.0000],
        [-0.0000, -0.0000,  0.0000,  6.1335],
        [ 0.0000,  0.0000,  0.0000,  8.8631],
        [ 5.2614, -0.0000, -0.0000, -0.0000]], requires_grad=True)

In [13]:
#加载和保存模型参数
class MLP(nn.Module):
 def __init__(self):
  super().__init__()
  self.hidden = nn.Linear(20, 256)
  self.output = nn.Linear(256, 10)
 def forward(self, x):
  return self.output(F.relu(self.hidden(x)))
net = MLP()
X = torch.randn(size=(2, 20))
Y = net(X)
torch.save(net.state_dict(), 'mlp.params')
clone = MLP()
clone.load_state_dict(torch.load('mlp.params'))
clone.eval()

MLP(
  (hidden): Linear(in_features=20, out_features=256, bias=True)
  (output): Linear(in_features=256, out_features=10, bias=True)
)

In [15]:
!nvidia-smi

Mon Jul 28 16:32:16 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   40C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [16]:
torch.cuda.device_count() #可用gpu数目

1

In [18]:
def try_gpu(i=0):
 """如果存在，则返回gpu(i)，否则返回cpu()"""
 if torch.cuda.device_count() >= i + 1:
  return torch.device(f'cuda:{i}')
 return torch.device('cpu')
def try_all_gpus():
 """返回所有可用的GPU，如果没有GPU，则返回[cpu(),]"""
 devices = [torch.device(f'cuda:{i}')
 for i in range(torch.cuda.device_count())]
 return devices if devices else [torch.device('cpu')]
try_gpu(), try_gpu(10), try_all_gpus()

(device(type='cuda', index=0),
 device(type='cpu'),
 [device(type='cuda', index=0)])