<a href="https://colab.research.google.com/github/veager/AudioCaption/blob/main/Codes/PyTorch-Tutorial/PyTorch-%E6%AD%A3%E5%88%99%E5%8C%96.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

PyTorch-正则化.ipynb

参考资料：

- 博客：PyTorch-正则化.ipynb，[地址](https://www.cnblogs.com/veager/articles/16305187.html)

- GitHub：Codes/PyTorch-Tutorial/PyTorch-正则化.ipynb

# 0 加载数据

In [3]:
import time
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F

## 0.1 加载 Iris 数据集

In [4]:
from sklearn.datasets import load_diabetes
from sklearn.preprocessing import MinMaxScaler

data = load_diabetes()
X = data.data
Y = data.target

# 将输入输出数据归一化到 [0, 1] 之间
scaler_X = MinMaxScaler().fit(X)
scaler_Y = MinMaxScaler().fit(np.expand_dims(Y, axis=1))

Xs = scaler_X.transform(X)
Ys = scaler_Y.transform(np.expand_dims(Y, axis=1))

print(Xs.shape, Ys.shape)
print(Xs[:5], Ys[:5])
print(pd.DataFrame(Xs).describe(), pd.DataFrame(Ys).describe())

# 将数据转换为 tensor 类型 
Xs_tensor = torch.tensor(Xs, dtype=torch.float)
Ys_tensor = torch.tensor(Ys, dtype=torch.float)

(442, 10) (442, 1)
[[0.66666667 1.         0.58264463 0.54929577 0.29411765 0.25697211
  0.20779221 0.28208745 0.56221737 0.43939394]
 [0.48333333 0.         0.14876033 0.35211268 0.42156863 0.30677291
  0.62337662 0.14104372 0.22244301 0.16666667]
 [0.88333333 1.         0.51652893 0.43661972 0.28921569 0.25896414
  0.24675325 0.28208745 0.49658437 0.40909091]
 [0.08333333 0.         0.30165289 0.30985915 0.49509804 0.44721116
  0.23376623 0.42313117 0.57293604 0.46969697]
 [0.51666667 0.         0.20661157 0.54929577 0.46568627 0.41733068
  0.38961039 0.28208745 0.36236911 0.33333333]] [[0.39252336]
 [0.15576324]
 [0.36137072]
 [0.56386293]
 [0.34267913]]
                0           1           2           3           4           5  \
count  442.000000  442.000000  442.000000  442.000000  442.000000  442.000000   
mean     0.491968    0.468326    0.346107    0.459818    0.451668    0.367725   
std      0.218484    0.499561    0.182567    0.194806    0.169647    0.151460   
min      0

## 0.2 全局参数设置

In [5]:
# 样本信息，划分的数据集
N_SAMPLE = Xs_tensor.size()[0]              # 样本总数
N_TRAIN = int(N_SAMPLE * 0.7)               # 训练样本数
N_VALID = int(N_SAMPLE * 0.2)               # 验证样本数
N_TEST = N_SAMPLE - N_TRAIN - N_VALID       # 测试样本数


# 训练过程超参数设置
BATCH_SIZE = 64
N_EPOCH = 1000
LEARNING_RATE = 0.05


# 神经网络模型参数
HIDDEN_DIM = 4
INPUT_DIM = Xs_tensor.size()[1]     # sizes of input data and output data
OUTPUT_DIM = Ys_tensor.size()[1]
print("NN Structure:", INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM)


# 设置 device，如果 GPU 可用，则使用
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print("device:", DEVICE)

NN Structure: 10 4 1
device: cuda:0


## 0.3 创建结构化数据

In [6]:
from torch.utils.data import TensorDataset, Dataset, random_split, DataLoader
from torch.utils.data.dataloader import default_collate

# DataSet 类
ds = TensorDataset(Xs_tensor, Ys_tensor)

# split training, validation, testing data
ds_train, ds_vaild, ds_test = random_split(ds, lengths=[N_TRAIN, N_VALID, N_TEST])
print(len(ds_train), len(ds_vaild), len(ds_test))


# DataLoader of train data, valid data, test data
dl_train = DataLoader(ds_train, batch_size = BATCH_SIZE, shuffle = True,
    collate_fn = lambda x: tuple(x_.to(DEVICE) for x_ in default_collate(x))
    )

dl_valid = DataLoader(ds_vaild, batch_size = BATCH_SIZE, # default shuffle = False,
    collate_fn = lambda x: tuple(x_.to(DEVICE) for x_ in default_collate(x))
    )

dl_test = DataLoader(ds_test, batch_size = BATCH_SIZE, # default shuffle = False,
    collate_fn = lambda x: tuple(x_.to(DEVICE) for x_ in default_collate(x))
    )

309 88 45


## 0.4 定义模型

In [7]:
# 方式 2: 全部使用 层类（nn.Module 类）
class BPNNModeler2(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):

        super(BPNNModeler2, self).__init__()

        # Layer 1
        self.layer1_linear = nn.Linear(input_dim, hidden_dim)
        self.layer1_sigmoid = nn.Sigmoid()
        # Layer 2
        self.layer2_linear = nn.Linear(hidden_dim, output_dim)
        self.layer2_sigmoid = nn.Sigmoid()
        # Output
        self.layer2_flattern = nn.Flatten(0, -1)
    
    # 正向传播
    def forward(self, x):
        # Layer 1
        out_layer1 = self.layer1_linear(x)
        out_layer1 = self.layer1_sigmoid(out_layer1)
        # Layer 2
        out_layer2 = self.layer2_linear(out_layer1)
        out_layer2 = self.layer2_sigmoid(out_layer2)
        # Output
        out = self.layer2_flattern(out_layer2)
        return out
# ==============================================================================

### 0.4.1 初始化模型

In [8]:
# 定义模型
model = BPNNModeler2(input_dim = INPUT_DIM, hidden_dim = HIDDEN_DIM, output_dim = OUTPUT_DIM)
model = model.to(DEVICE)

## 0.5 定义损失函数

In [9]:
# 定义 损失函数 MSE 损失
def loss_func(model_out, target, reduction='mean'):
    loss = F.mse_loss(model_out, target, reduction=reduction) 
    return loss

# 1 正则化

## 1.1 权重衰减 Weight Decay

## 1.2 L1 和 L2 正则化

定义正则化函数

In [22]:
def regularization_loss(model, factor, p=2):
    '''
    regularization_loss，只惩罚含有 weight 的参数
    model: 传入模型
    factor: 正则化惩罚系数
    p: p-范数
    '''
    reg_loss = torch.tensor(0.)

    for name, w in model.named_parameters():
        print(w.requires_grad)
        if 'weight' in name:    # 只对 参数名 含有 weight 的参数 正则化
            reg_loss = reg_loss + torch.norm(w, p)
    
    reg_loss = factor * reg_loss
    return reg_loss

In [24]:
N_EPOCH = 100
LEARNING_RATE = 0.01

# Step 1: 定义优化器
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)

for epoch in range(N_EPOCH):

    train_total_loss = 0.
    fun_total_loss = 0.
    reg_total_loss = 0.

    for i, (X_batch, Y_batch) in enumerate(dl_train): 

        optimizer.zero_grad()    # Step 2: 参数梯度归零

        out = model(X_batch)  

        fun_loss = loss_func(out, Y_batch.flatten(), reduction='sum')  # 函数损失
        reg_loss = regularization_loss(model, factor=0.1)  # 正则化损失
        print(reg_loss.requires_grad)
        train_loss = fun_loss + reg_loss       # 总损失

        train_loss.backward()    # Step 3: 反向传播，计算梯度
        optimizer.step()         # Step 4: 执行一步优化，更新参数

        fun_total_loss += fun_loss.item()
        reg_total_loss += reg_loss.item()
        train_total_loss += train_loss.item()

    if (epoch + 1) % 1 == 0:
        # Print Traing information
        print('Epoch: {0:>4}, Loss: {1:>10.5f}, Reg Loss: {2:>10.8f}, Total Loss: {3:>10.8f}'.format(
            epoch+1, 
            fun_total_loss, 
            reg_total_loss, 
            train_total_loss))

True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
Epoch:    1, Loss:    8.60657, Reg Loss: 3.31027704, Total Loss: 11.91684401
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
Epoch:    2, Loss:    8.60537, Reg Loss: 3.30920142, Total Loss: 11.91457534
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
Epoch:    3, Loss:    8.61193, Reg Loss: 3.31001467, Total Loss: 11.92194891
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
Epoch:    4, Loss:    8.59119, Reg Loss: 3.30975384, Total Loss: 11.90093935
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
Tr