In [None]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
import torchvision
from torchvision import transforms
import torchvision.datasets as datasets

In [None]:
class AlexNet(nn.Module):
    def __init__(self, class_num):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            # in=3, out=96, kernel_size=(11,11), inputs=(227,227,3),stride=4 -> (227-11+2*0)/4 +1 --> 55*55*96
            nn.Conv2d(3, 96, (11, 11), stride=4),
            nn.ReLU(inplace=True),  # inplace为True，将会改变输入的数据 ，否则不会改变原输入，只会产生新的输出。
            nn.MaxPool2d((3,3), stride=2),    # (55-3)/2+1 = 27
            # in=96, out=256, kerner=(5,5), inputs=(27,27,96) stride=1, padding=2->27*27*256
            nn.Conv2d(96, 256, (5,5), padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d((3,3), stride=2),      # (27-3)/2 +1= 13*13*256
            # in=256,out=384,kernel=(3,3), inputs=(13,13,256),padding=1
            nn.Conv2d(256, 384, (3,3), padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, (3,3), padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, (3,3), padding=1),  # 13*13*256
            nn.ReLU(inplace=True),
            nn.MaxPool2d((3,3), stride=2),  # 6*6*256 = 9216
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(6*6*256, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, class_num),
        )
        
    def forward(self, x):
        x = self.features(x)
        #print(x.size())  #torch.Size([8, 256, 6, 6])  原本是[32,256, 6, 6],我这里有4个核
        x = x.view(x.size(0), 256 * 6 * 6)
        x = self.classifier(x)
        return x

In [None]:
def accu(out, y):
    _, pred = torch.max(out, 1)  # 最大概率，对应的类别
    corr_num = pred.eq(y).sum()  # tensor 得取出结果
    acc = corr_num.item() / y.shape[0]
    return acc

# 单epoch的训练
def train(model, device, train_loader, criterion, optimizer):
    model.train()
    train_loss = 0.0
    train_acc = 0.0
    for x, y in train_loader:
        x = x.to(device)
        y = y.to(device)
        optimizer.zero_grad()
        out = model(x)               # 各个分类的概率
        loss = criterion(out, y)
        train_loss += loss
        acc = accu(out, y)
        train_acc += acc
        loss.backward()
        optimizer.step()
    return train_loss / len(train_loader), train_acc / len(train_loader) 

# 测试
def test(model, device, test_loader, criterion, optimizer):
    model.eval()
    test_loss = 0.0
    test_acc = 0.0
    for x, y in test_loader:
        x = x.to(device)
        y = y.to(device)
        out = model(x)               # 各个分类的概率
        loss = criterion(out, y)
        test_loss += loss
        acc = accu(out, y)
        test_acc += acc
    return test_loss/len(test_loader), test_acc/len(test_loader)

In [None]:
batch_size = 32
epochs = 1

In [None]:
data_trans = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomCrop(32,padding=3),
    transforms.Resize(227),
    transforms.ToTensor(),
    transforms.Normalize((0.49139968, 0.48215827, 0.44653124), (0.24703233, 0.24348505, 0.26158768))#参数mean和std来自于训练集，但是transform本身会在训练和评测的时候都会使用
])

data_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize(227),
    transforms.Normalize((0.49139968, 0.48215827, 0.44653124), (0.24703233, 0.24348505, 0.26158768))
])

In [None]:
train_data = datasets.CIFAR10('data', train=True, download=True, transform=data_trans)
test_data = datasets.CIFAR10('data', train=False, download=True, transform=data_trans)

In [None]:
n_train = int(len(train_data)*0.9)
n_valid = len(train_data) - n_train
train_datasets, valid_datasets = torch.utils.data.random_split(train_data, [n_train, n_valid])

train_loader = DataLoader(train_datasets, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_datasets, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AlexNet(10)
if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model)
    model = model.to(device)
else:
    model = model.to(device)

In [None]:
optimizer = optim.SGD(model.parameters(),lr=1e-2)
criterion = nn.CrossEntropyLoss()

In [None]:
model_path = './models/alexnet-sifar10.pth'
best_valid_loss = float("inf")
info = 'Epoch:{0}|Train Loss:{1}|Train Acc:{2}|Val Loss:{3}|Val Acc:{4}'
for epoch in range(epochs):
    train_loss, train_acc = train(model, device, train_loader, criterion, optimizer)
    valid_loss, valid_acc = test(model, device, valid_loader, criterion, optimizer)
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), model_path)
    print(info.format(epoch+1, train_loss, train_acc, valid_loss, valid_acc))

## 图像增强
### torchvision.transforms 包括所有图像增强的方法 。
- Scale，对图片的尺度进行缩小和放大;
- RandomRotation 对图片随机旋转；
- CenterCrop，对图像正中心进行给定大小的裁剪;
- RandomCrop，对图片进行给定大小的随机裁剪;
- RandomHorizaontalFlip，对图片进行概率为0.5的随机水平翻转;
- RandomSizedCrop，首先对图片进行随机尺寸的裁剪，然后对裁剪的图片进行一个随机比例的缩放，最后将图片变成给定的大小，这在Inception Net 中比较流行;
- 最后一个是Pad，对图片进行边界零填充。

上面介绍了 PyTorch 内置的一些图像增强的方法，还有更多的增强方法见[transforms的二十二个方法](https://zhuanlan.zhihu.com/p/53367135)，可以使用OpenCV或者PIL等第二方图形库实现。在网络的训练中图像增强是一种常见、默认的做法，对多任务进行图像增强之后都能够在一定程度上提升任务的准确率。

- 本篇幅参考[经典CNN网络 - AlexNet总结](https://juejin.im/post/5ad173b651882555731c8f9b)

## AlexNet优点/AlexNet高性能的解释
- 非线性激活函数：ReLU
- 防止过拟合的方法：Dropout，Data augmentation
- 大数据训练：百万级ImageNet图像数据
- 其他：GPU实现，LRN归一化层的使用

## SGD与Adam
- 有时候Adam不收敛，效果很差