##### 尝试不同optimizer对模型进行训练，观察对比loss结果。
使用RMSprop优化器，在训练50次左右时，loss可以降到0.0001左右

In [1]:
from sklearn.datasets import fetch_olivetti_faces
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import torch.nn as nn

In [2]:
class MyTorchNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1 = nn.Linear(4096, 2048)
        self.batchNorm1 = nn.BatchNorm1d(2048)
        self.linear2 = nn.Linear(2048, 512)
        self.batchNorm2 = nn.BatchNorm1d(512)
        self.linear3 = nn.Linear(512, 40)
        self.act = nn.Tanh()
        self.dropout = nn.Dropout(0.1)

    def forward(self, input_tensor):
        out = self.linear1(input_tensor)
        out = self.batchNorm1(out) # 归一化
        out = self.act(out)
        out = self.dropout(out) # 正则化
        out = self.linear2(out)
        out = self.batchNorm2(out) # 归一化
        out = self.act(out)
        out = self.dropout(out) # 正则化
        final = self.linear3(out)
        return final

In [3]:
olivetti_faces = fetch_olivetti_faces(data_home='./face_data')
datasets = [(data, lab) for data,lab in zip(torch.tensor(olivetti_faces.data), torch.tensor(olivetti_faces.target))]

In [4]:
train_data, test_data = train_test_split(datasets, test_size=0.2)
train_dl = DataLoader(train_data, batch_size=32, shuffle=True)

In [5]:
LR = 1e-2
EPOCHS = 150

In [10]:
def model_train_test(model, optimizer):
    print("======>", type(optimizer))
    loss_fn = nn.CrossEntropyLoss()
    model.train() # 归一化、正则化生效
    for i in range(EPOCHS):
        for x, y in train_dl:
            out = model(x)
            loss = loss_fn(out, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        if (i+1) % 10 == 0:
            print(f"Epoch {i+1} loss: {loss.item()}")
            
    test_dl = DataLoader(test_data, batch_size=32)
    correct = 0
    total = 0
    model.eval() # 归一化、正则化不生效
    with torch.no_grad():
        for x, y in test_dl:
            out = model(x)
            pred = torch.max(out, dim=1).indices
            total += y.size(0)
            correct += (pred == y).sum().item()
        print(correct/total)

In [12]:
# 优化器使用SGD
model1 = MyTorchNN()
optimizer1 = torch.optim.SGD(model1.parameters(), lr=LR)
model_train_test(model1, optimizer1)

Epoch 10 loss: 0.20887590944766998
Epoch 20 loss: 0.11782942712306976
Epoch 30 loss: 0.05995038151741028
Epoch 40 loss: 0.038809072226285934
Epoch 50 loss: 0.04247087240219116
Epoch 60 loss: 0.02686908468604088
Epoch 70 loss: 0.02056385949254036
Epoch 80 loss: 0.02187480591237545
Epoch 90 loss: 0.020156994462013245
Epoch 100 loss: 0.014928937889635563
Epoch 110 loss: 0.014847781509160995
Epoch 120 loss: 0.013830840587615967
Epoch 130 loss: 0.011224251240491867
Epoch 140 loss: 0.010266399942338467
Epoch 150 loss: 0.009852776303887367
1.0


In [22]:
# 优化器使用RMSprop
model2 = MyTorchNN()
optimizer2 = torch.optim.RMSprop(model2.parameters(), lr=LR)
model_train_test(model2, optimizer2)

Epoch 10 loss: 0.035026177763938904
Epoch 20 loss: 0.2037833333015442
Epoch 30 loss: 0.0005376567714847624
Epoch 40 loss: 0.0001555976050440222
Epoch 50 loss: 0.00010971107985824347
Epoch 60 loss: 0.00017484942509327084
Epoch 70 loss: 3.581593409762718e-05
Epoch 80 loss: 0.03440999239683151
Epoch 90 loss: 0.0010026624659076333
Epoch 100 loss: 0.00044648515176959336
Epoch 110 loss: 0.0013121808879077435
Epoch 120 loss: 0.00027923195739276707
Epoch 130 loss: 3.217717676307075e-05
Epoch 140 loss: 0.009920645505189896
Epoch 150 loss: 0.0008343622321262956
0.8125


In [20]:
# 优化器使用Adam
model3 = MyTorchNN()
optimizer3 = torch.optim.Adam(model3.parameters(), lr=LR)
model_train_test(model3, optimizer3)

Epoch 10 loss: 0.00620283093303442
Epoch 20 loss: 0.0033291650470346212
Epoch 30 loss: 0.0017179101705551147
Epoch 40 loss: 0.0010761336889117956
Epoch 50 loss: 0.0017936397343873978
Epoch 60 loss: 0.0006151125999167562
Epoch 70 loss: 0.0002807878772728145
Epoch 80 loss: 0.00015354283095803112
Epoch 90 loss: 0.00013325491454452276
Epoch 100 loss: 0.00025626429123803973
Epoch 110 loss: 0.00010698313417378813
Epoch 120 loss: 0.016146689653396606
Epoch 130 loss: 0.17505022883415222
Epoch 140 loss: 4.6006671254872344e-06
Epoch 150 loss: 3.516989454510622e-05
0.95


In [21]:
# 优化器使用Adamw
model4 = MyTorchNN()
optimizer4 = torch.optim.AdamW(model4.parameters(), lr=LR)
model_train_test(model4, optimizer4)

Epoch 10 loss: 0.051093075424432755
Epoch 20 loss: 0.01029511820524931
Epoch 30 loss: 0.0007972782477736473
Epoch 40 loss: 0.000995403272099793
Epoch 50 loss: 0.0019867916125804186
Epoch 60 loss: 0.00048064024304039776
Epoch 70 loss: 0.000287108268821612
Epoch 80 loss: 0.00026440314832143486
Epoch 90 loss: 0.0022210844326764345
Epoch 100 loss: 0.00025398394791409373
Epoch 110 loss: 0.0005125068710185587
Epoch 120 loss: 9.761198452906683e-05
Epoch 130 loss: 0.00014167251356411725
Epoch 140 loss: 0.00040922677726484835
Epoch 150 loss: 0.00010953166201943532
0.95
