In [2]:
import torch
from torch import nn
from torch.nn import functional as F

'''模型构造'''
net = nn.Sequential(
    nn.Linear(20,256),
    nn.ReLU(),
    nn.Linear(256,10)
)

X = torch.rand(2,20)
print(net(X))

tensor([[-0.0597, -0.2138, -0.1193, -0.3012,  0.1099, -0.1234,  0.0381,  0.0569,
          0.3250, -0.1946],
        [-0.0346, -0.1288, -0.0626, -0.3181,  0.1118, -0.0912, -0.0237,  0.1157,
          0.1843, -0.0546]], grad_fn=<AddmmBackward0>)


In [3]:
#自定义块
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20,256)
        self.out = nn.Linear(256,10)

    def forward(self,X):
        return self.out(F.relu(self.hidden(X)))

In [4]:
net = MLP()
print(net(X))

tensor([[-0.1104, -0.0174,  0.0312, -0.0292, -0.0608,  0.0656, -0.5230,  0.1949,
          0.1078,  0.2923],
        [-0.0059, -0.0769,  0.0398,  0.1006, -0.0602,  0.0930, -0.4082,  0.1442,
          0.0111,  0.1675]], grad_fn=<AddmmBackward0>)


In [5]:
#顺序块
class MySequential(nn.Module):
    def __init__(self,*args):
        super().__init__()
        for block in args:
            self._modules[block] = block

    def forward(self,x):
        for block in self._modules.values():
            x = block(x)
        return x

net = MySequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
print(net(X))

tensor([[ 0.0698,  0.0777,  0.2339,  0.0709,  0.1804,  0.0816,  0.2424,  0.2079,
         -0.0403, -0.2147],
        [ 0.0260,  0.1094,  0.1417,  0.1069,  0.1289,  0.0409,  0.2323,  0.2301,
         -0.0939, -0.0813]], grad_fn=<AddmmBackward0>)


In [6]:
#在正向传播函数中执行代码
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight = torch.rand((20,20),requires_grad=False)
        self.Linear = nn.Linear(20,20)

    def forward(self,x):
        x = self.Linear(x)
        x = F.relu(torch.mm(x,self.rand_weight)+1)
        x = self.Linear(x)
        while x.abs().sum() > 1:
            x /= 2
        return x.sum()

net = FixedHiddenMLP()
print(net(X))

tensor(-0.1456, grad_fn=<SumBackward0>)


In [7]:
#混合搭配各种组合块的方法
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(20,64),nn.ReLU(),
            nn.Linear(64,32),nn.ReLU())
        self.linear = nn.Linear(32,16)

    def forward(self,x):
        return self.linear(self.net(x))

chimera = nn.Sequential(NestMLP(),nn.Linear(16,20),FixedHiddenMLP())
print(chimera(X))

tensor(0.3390, grad_fn=<SumBackward0>)


In [8]:
'''参数管理'''

net = nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,1))
X = torch.rand(size = (2,4))
print(net(X))
#参数访问
print(net[2].state_dict()) #获取第2层（net的Linear(8,1)线性层）的参数

tensor([[0.5736],
        [0.4072]], grad_fn=<AddmmBackward0>)
OrderedDict([('weight', tensor([[ 0.2515,  0.3506,  0.2789,  0.3019, -0.0180,  0.3410, -0.1809,  0.2035]])), ('bias', tensor([0.1968]))])


In [9]:
#目标参数
print(type(net[2].bias)) #查看net第二层的bias的类型
print(net[2].bias)       #查看net第二层的bias
print(net[2].bias.data)  #查看net第二层的bias的数值（data访问数值，grad访问梯度）

<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([0.1968], requires_grad=True)
tensor([0.1968])


In [10]:
print(net[2].weight.grad == None)

True


In [11]:
#一次性访问所有参数
print(*[(name,param.shape)for name,param in net[0].named_parameters()])
print(*[(name,param.shape)for name,param in net.named_parameters()])

('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))
('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


In [12]:
print(net.state_dict()['2.bias'].data)

tensor([0.1968])


In [13]:
#从嵌套块收集参数
def block1():
    return nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,4),nn.ReLU())

def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(f'block{i}',block1())
    return net

rgnet = nn.Sequential(block2(),nn.Linear(4,1))
print(rgnet)

Sequential(
  (0): Sequential(
    (block0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)


In [14]:
#内置初始化
def init_normal(m):
    #如果当前层为线性层
    if type(m) == nn.Linear:
        #对当前module的weights做均值为0，标准差为1的初始化
        nn.init.normal_(m.weight,mean=0,std=0.01)
        #偏置bias赋值为0
        nn.init.zeros_(m.bias)

net.apply(init_normal)
print(net[0].weight.data[0])
print(net[0].bias.data[0])

tensor([-0.0091,  0.0051, -0.0080, -0.0024])
tensor(0.)


In [15]:
def init_constant(m):
    if type(m) == nn.Linear:
        #weight全部初始化成常数
        nn.init.constant_(m.weight,1)
        nn.init.zeros_(m.bias)

net.apply(init_constant)
print(net[0].weight.data[0])
print(net[0].bias.data[0])

tensor([1., 1., 1., 1.])
tensor(0.)


In [16]:
#对某些块应用不同的初始化方法
def xavier(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)

def init_42(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight,42)

net[0].apply(xavier)
net[2].apply(init_42)
print(net[0].weight.data[0])
print(net[2].weight.data)

tensor([-0.0330,  0.5448,  0.6322,  0.2464])
tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])


In [17]:
#自定义初始化
def my_init(m):
    if type(m) == nn.Linear:
        print(
            'Init',
            *[(name,param.shape) for name,param in m.named_parameters()][0]
        )
        nn.init.uniform_(m.weight,-10,10)
        m.weight.data *= m.weight.data.abs() >= 5

net.apply(my_init)
print(net[0].weight[:2])

Init weight torch.Size([8, 4])
Init weight torch.Size([1, 8])
tensor([[-9.1514,  5.4084,  9.5100, -0.0000],
        [-7.1216,  0.0000, -0.0000,  7.7655]], grad_fn=<SliceBackward0>)


In [18]:
net[0].weight.data[:] += 1
net[0].weight.data[0,0] = 42
print(net[0].weight.data[0])

tensor([42.0000,  6.4084, 10.5100,  1.0000])


In [19]:
#参数绑定

shared = nn.Linear(8,8)
net = nn.Sequential(nn.Linear(4,8),
                    nn.ReLU(),
                    shared,
                    nn.ReLU(),
                    shared,
                    nn.ReLU(),
                    nn.Linear(8,1)
                    )
net(X)

print(net[2].weight.data[0] == net[4].weight.data[0])

net[2].weight.data[0,0] = 100

print(net[2].weight.data[0] == net[4].weight.data[0])

tensor([True, True, True, True, True, True, True, True])
tensor([True, True, True, True, True, True, True, True])


In [20]:
#自定义层

class CenteredLayer(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self,x):
        return x - x.mean()

layer = CenteredLayer()
print(layer(torch.FloatTensor([1,2,3,4,5])))

tensor([-2., -1.,  0.,  1.,  2.])


In [21]:
#将层作为组件合并到构建更复杂的模型中

net = nn.Sequential(nn.Linear(8,128),
                    CenteredLayer())

Y = net(torch.rand(4,8))
print(Y.mean())

tensor(6.9849e-09, grad_fn=<MeanBackward0>)


In [22]:
#带参数的图层
class MyLinear(nn.Module):
    def __init__(self,in_units,units):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_units,units))
        self.bias = nn.Parameter(torch.randn(units))

    def forward(self,x):
        linear = torch.matmul(X,self.weight.data) + self.bias.data
        return F.relu(linear)

dense = MyLinear(5,3)
print(dense.weight)

Parameter containing:
tensor([[ 0.9813,  1.7083, -1.0305],
        [-0.6162, -0.9795,  0.2824],
        [-1.7707, -0.2265, -0.6627],
        [ 0.9590, -0.4737,  1.9916],
        [ 0.8226, -1.1645, -0.0337]], requires_grad=True)


In [23]:
'''读写文件'''

#加载和保存张量
x = torch.arange(4)
print(x)
torch.save(x,'x-file')

x2 = torch.load('x-file')
print(x2)

tensor([0, 1, 2, 3])
tensor([0, 1, 2, 3])


In [24]:
#存储一个张量列表，然后把它们读回内存
y = torch.zeros(4)
torch.save([x,y],'x-files')

x2,y2 = torch.load('x-files')

print(x2,y2)

tensor([0, 1, 2, 3]) tensor([0., 0., 0., 0.])


In [25]:
#写入或读取从字符串映射到张量的字典
mydict = {'x':x,'y':y}
torch.save(mydict,'mydict')
mydict2 = torch.load('mydict')
print(mydict2)

{'x': tensor([0, 1, 2, 3]), 'y': tensor([0., 0., 0., 0.])}


In [26]:
#加载和保存模型参数
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20,256)
        self.output = nn.Linear(256,10)

    def forward(self,x):
        return self.output(F.relu(self.hidden(x)))

net = MLP()
X = torch.randn(size=(2,20))
Y = net(X)
print(Y)

tensor([[ 0.1713, -0.0733, -0.2155,  0.0764, -0.3268, -0.0417, -0.0322, -0.1065,
          0.1317,  0.2631],
        [-0.3002, -0.2125, -0.0408,  0.1001, -0.1531, -0.2249, -0.0830,  0.3696,
         -0.0687,  0.1874]], grad_fn=<AddmmBackward0>)


In [27]:
#将模型的参数存储为一个叫做“mlp.params”的文件
torch.save(net.state_dict(),'mlp.params')

In [28]:
#实例化了原始多层感知机模型的一个备份。直接读取文件中存储的参数
clone = MLP()
clone.load_state_dict(torch.load('mlp.params'))
clone.eval()

MLP(
  (hidden): Linear(in_features=20, out_features=256, bias=True)
  (output): Linear(in_features=256, out_features=10, bias=True)
)

In [29]:
Y_clone = clone(X)
print(Y_clone == Y)

tensor([[True, True, True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True]])
