In [5]:
import torch
from torch.utils import data
import math
import random
import numpy as np
import matplotlib.pyplot as plt
from torch import nn

In [12]:
net = nn.Sequential(nn.Linear(20,256), nn.ReLU(), nn.Linear(256,10))
x = torch.rand(2,20)
net(x)

tensor([[ 0.0238,  0.0926,  0.1272,  0.2369,  0.1306, -0.0186,  0.0208, -0.1681,
         -0.0055,  0.0674],
        [ 0.0641,  0.0415,  0.1095,  0.1629, -0.0276, -0.0547,  0.1061, -0.0833,
          0.0139, -0.0629]], grad_fn=<AddmmBackward>)

In [9]:
class myMlp(nn.Module):
    def __init__(self):
        super(myMlp, self).__init__()
        self.fc1 = nn.Linear(20,256)
        self.ac1 = nn.ReLU()
        self.fc2 = nn.Linear(256,10)
    def forward(self, x):
        x = self.ac1(self.fc1(x))
        x = self.fc2(x)
        return x

In [10]:
net = myMlp()

In [21]:
def block1():
    return nn.Sequential(nn.Linear(4,8), nn.ReLU(), nn.Linear(8,4), nn.ReLU())

def block2():
    net = nn.Sequential()
    for i in range(4):
        ## 想Sequential网络中加网络块
        net.add_module(f'block {i}', block1())
    return net
regnet = nn.Sequential(block2(), nn.Linear(4,1))


In [20]:
print(regnet)

Sequential(
  (0): Sequential(
    (block 0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
    (block 3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
      (3): ReLU()
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)


In [28]:
def init_normal(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, mean=0, std=0.01)
        nn.init.zeros_(m.bias)
regnet.apply(init_normal)
regnet[0][0][0].weight.data

tensor([[ 4.3461e-03, -2.3029e-02,  7.7349e-05,  1.0963e-02],
        [-2.2114e-03, -1.3230e-02,  8.9787e-03, -1.7506e-02],
        [ 8.9315e-03, -6.6447e-04, -5.7658e-03,  1.7067e-02],
        [ 1.1609e-02,  1.0928e-02,  8.7091e-03, -1.1998e-02],
        [ 2.2895e-05, -1.4469e-02,  5.7485e-03, -2.7559e-03],
        [ 8.6673e-03, -6.3756e-03, -1.8417e-02, -1.9887e-04],
        [ 1.4850e-03,  4.6963e-03, -3.4085e-03,  8.7827e-03],
        [-8.9264e-03, -3.3266e-03, -3.1490e-03,  4.1312e-03]])

In [29]:
def init_constant(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight,1)
        nn.init.zeros_(m.bias)
net.apply(init_constant)
net[0].weight.data[0]

tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1.])

In [31]:
def xavier(m):
    if type(m) == nn.Linear:
        nn.init.xavier_normal_(m.weight)
    
def init_42(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight, 42)

net[0].apply(xavier)
net[2].apply(init_42)
print(net[0].weight.data[0])
print(net[2].weight.data)

tensor([-0.0012,  0.1158,  0.0320,  0.0170, -0.2020,  0.1006,  0.0561, -0.0948,
         0.0233,  0.2263, -0.0261, -0.0031, -0.0284, -0.0388,  0.1436, -0.0116,
         0.0562,  0.1971,  0.0664,  0.1213])
tensor([[42., 42., 42.,  ..., 42., 42., 42.],
        [42., 42., 42.,  ..., 42., 42., 42.],
        [42., 42., 42.,  ..., 42., 42., 42.],
        ...,
        [42., 42., 42.,  ..., 42., 42., 42.],
        [42., 42., 42.,  ..., 42., 42., 42.],
        [42., 42., 42.,  ..., 42., 42., 42.]])
