##### 结合归一化和正则化来优化网络模型结构，观察对比loss结果。
使用Dropout(p=0.1)

In [1]:
from sklearn.datasets import fetch_olivetti_faces
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import torch.nn as nn

In [2]:
class MyTorchNN(nn.Module):
    def __init__(self, dp):
        super().__init__()
        self.linear1 = nn.Linear(4096, 2048)
        self.batchNorm1 = nn.BatchNorm1d(2048)
        self.linear2 = nn.Linear(2048, 512)
        self.batchNorm2 = nn.BatchNorm1d(512)
        self.linear3 = nn.Linear(512, 40)
        self.act = nn.Tanh()
        self.dropout = nn.Dropout(dp)

    def forward(self, input_tensor):
        out = self.linear1(input_tensor)
        out = self.batchNorm1(out) # 归一化
        out = self.act(out)
        out = self.dropout(out) # 正则化
        out = self.linear2(out)
        out = self.batchNorm2(out) # 归一化
        out = self.act(out)
        out = self.dropout(out) # 正则化
        final = self.linear3(out)
        return final

In [3]:
olivetti_faces = fetch_olivetti_faces(data_home='./face_data')
datasets = [(data, lab) for data,lab in zip(torch.tensor(olivetti_faces.data), torch.tensor(olivetti_faces.target))]

In [4]:
train_data, test_data = train_test_split(datasets, test_size=0.2)
train_dl = DataLoader(train_data, batch_size=32, shuffle=True)

In [9]:
LR = 1e-2
EPOCHS = 150
DP = [0.1, 0.3, 0.5, 0.7]

In [6]:
def model_train_test(model):
    print(model)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=LR)
    model.train() # 归一化、正则化生效
    for i in range(EPOCHS):
        for x, y in train_dl:
            out = model(x)
            loss = loss_fn(out, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        if (i+1) % 10 == 0:
            print(f"Epoch {i+1} loss: {loss.item()}")
            
    test_dl = DataLoader(test_data, batch_size=32)
    correct = 0
    total = 0
    model.eval() # 归一化、正则化不生效
    with torch.no_grad():
        for x, y in test_dl:
            out = model(x)
            pred = torch.max(out, dim=1).indices
            total += y.size(0)
            correct += (pred == y).sum().item()
        print(correct/total)

In [10]:
for dp in DP:
    model = MyTorchNN(dp)
    model_train_test(model)

MyTorchNN(
  (linear1): Linear(in_features=4096, out_features=2048, bias=True)
  (batchNorm1): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (linear2): Linear(in_features=2048, out_features=512, bias=True)
  (batchNorm2): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (linear3): Linear(in_features=512, out_features=40, bias=True)
  (act): Tanh()
  (dropout): Dropout(p=0.1, inplace=False)
)
Epoch 10 loss: 0.2250312864780426
Epoch 20 loss: 0.09079863876104355
Epoch 30 loss: 0.05426532030105591
Epoch 40 loss: 0.04481801390647888
Epoch 50 loss: 0.034069597721099854
Epoch 60 loss: 0.02425672672688961
Epoch 70 loss: 0.02534124255180359
Epoch 80 loss: 0.02078363485634327
Epoch 90 loss: 0.019974078983068466
Epoch 100 loss: 0.014581501483917236
Epoch 110 loss: 0.014908287674188614
Epoch 120 loss: 0.011505710892379284
Epoch 130 loss: 0.011984811164438725
Epoch 140 loss: 0.013230388052761555
Epoch 150 loss: 0.01129514724016