In [63]:
import os 
import time
import random
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets

import os 
import time
from datetime import timedelta

## 1 模型定义

In [2]:
"""
Pytorch中神经网络模块化接口nn的了解:

torch.nn是专门为神经网络设计的模块化接口。nn构建于autograd之上，可以用来定义和运行神经网络。
nn.Module是nn中十分重要的类,包含网络各层的定义及forward方法。
定义自已的网络：
    需要继承nn.Module类，并实现forward方法。
    一般把网络中具有可学习参数的层放在构造函数__init__()中，
    不具有可学习参数的层(如ReLU)可放在构造函数中，也可不放在构造函数中(而在forward中使用nn.functional来代替)
    
    只要在nn.Module的子类中定义了forward函数，backward函数就会被自动实现(利用Autograd)。
    在forward函数中可以使用任何Variable支持的函数，毕竟在整个pytorch构建的图中，是Variable在流动。还可以使用
    if,for,print,log等python语法.
    
    注：Pytorch基于nn.Module构建的模型中，只支持mini-batch的Variable输入方式，
    比如，只有一张输入图片，也需要变成 N x C x H x W 的形式：
    
    input_image = torch.FloatTensor(1, 28, 28)
    input_image = Variable(input_image)
    input_image = input_image.unsqueeze(0)   # 1 x 1 x 28 x 28
    
    二维卷积层, 输入的尺度是(N, C_in,H,W)，输出尺度（N,C_out,H_out,W_out）的计算方式
    torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
    in_channels(int) – 输入信号的通道
    out_channels(int) – 卷积产生的通道
    kerner_size(int or tuple) - 卷积核的尺寸
    stride(int or tuple, optional) - 卷积步长
    padding(int or tuple, optional) - 输入的每一条边补充0的层数
    dilation(int or tuple, optional) – 卷积核元素之间的间距
    groups(int, optional) – 从输入通道到输出通道的阻塞连接数
    bias(bool, optional) - 如果bias=True，添加偏置
"""
class ShuffleBlock(nn.Module):
    def __init__(self,groups):
        super(ShuffleBlock,self).__init__()
        self.groups=groups
    def forward(self,x):#转置重组操作
        '''
            [N,C,H,W]->分组操作->[N,C/g,H,W]*g->转置重组->[N,g,H,W]*C/g
        '''
        N,C,H,W=x.size()
        g=self.groups
        return x.view(N,g,int(C/g),H,W).permute(0,2,1,3,4).contiguous().view(N,C,H,W)

class Bottleneck(nn.Module):
    def __init__(self, in_planes, out_planes, stride, groups):
        super(Bottleneck, self).__init__()
        self.stride = stride

        mid_planes = int(out_planes/4)
        g = 1 if in_planes==24 else groups
        self.conv1 = nn.Conv2d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False)
        self.bn1 = nn.BatchNorm2d(mid_planes)
        self.shuffle1 = ShuffleBlock(groups=g)
        self.conv2 = nn.Conv2d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=int(mid_planes), bias=False)
        self.bn2 = nn.BatchNorm2d(mid_planes)
        self.conv3 = nn.Conv2d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False)
        self.bn3 = nn.BatchNorm2d(out_planes)

        self.shortcut = nn.Sequential()
        if stride == 2:
            self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1))

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.shuffle1(out)
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        res = self.shortcut(x)
        out = F.relu(torch.cat([out,res], 1)) if self.stride==2 else F.relu(out+res)
        return out
    
class ShuffleNet(nn.Module):
    def __init__(self, cfg):
        super(ShuffleNet, self).__init__()
        out_planes = cfg['out_planes']
        num_blocks = cfg['num_blocks']
        groups = cfg['groups']

        self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(24)
        self.in_planes = 24    # in_planes
        self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups)
        self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups)
        self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups)
        self.linear = nn.Linear(out_planes[2], 2)

    def _make_layer(self, out_planes, num_blocks, groups):
        layers = []
        for i in range(num_blocks):
            stride = 2 if i == 0 else 1
            cat_planes = self.in_planes if i == 0 else 0
            layers.append(Bottleneck(self.in_planes, out_planes-cat_planes, stride=stride, groups=groups))
            self.in_planes = out_planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

## 2 模型训练与评估类

In [20]:
def timer(func):
    def wrapper(*args, **kwargs):
        start = time.time()
        func(*args,**kwargs)
        end = time.time()
        cost = end - start
        print("Cost time: {} mins.".format(cost/60)) 
    return wrapper

class CNNModel(object):
    def __init__(self, model, train_data, test_data, model_dir, model_name,
                 best_valid_loss=float('inf'), n_split=0.9, batch_size=64, epochs=10):
        self.batch_size = batch_size
        self.epochs = epochs
        self.best_valid_loss = best_valid_loss
        self.model_dir = model_dir
        self.model_name = model_name
        self.n_split = n_split
        
        self.train_data =  train_data
        self.test_data = test_data
        
        self.device = self.get_device()
        self.init_data()
        self.init_iterator()
        self.init_model_path()
        
        self.model = self.init_model(model)
        self.optimizer = self.set_optimizer()
        self.criterion = self.set_criterion()
        
    def get_device(self):
        d = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        return d
    
    def init_data(self):
        n_train = int(len(self.train_data)*self.n_split)
        n_validation = len(self.train_data) - n_train
        self.train_data, self.valid_data = torch.utils.data.random_split(self.train_data, [n_train, n_validation])
    
    def init_iterator(self):
        self.train_iterator = torch.utils.data.DataLoader(self.train_data, shuffle=True, batch_size=self.batch_size)
        self.valid_iterator = torch.utils.data.DataLoader(self.valid_data, batch_size=self.batch_size)
        self.test_iterator = torch.utils.data.DataLoader(self.test_data, batch_size=self.batch_size)
        
    def set_optimizer(self):
        optimizer = optim.Adam(self.model.parameters()) 
        return optimizer
    
    def set_criterion(self):
        criterion = nn.CrossEntropyLoss()
        return criterion
    
    def init_model(self, model):
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
        model = model.to(self.device)
        return model
        
    def init_model_path(self):
        if not os.path.isdir(self.model_dir):
            os.makedirs(self.model_dir)
        self.model_path = os.path.join(self.model_dir, self.model_name)
        
    # 定义评估函数
    def accu(self, fx, y):
        pred = fx.max(1, keepdim=True)[1]
        correct = pred.eq(y.view_as(pred)).sum()  # 得到该batch的准确度
        acc = correct.float()/pred.shape[0]
        return acc

    def train(self):
        epoch_loss = 0   # 积累变量
        epoch_acc = 0    # 积累变量
        self.model.train()    # 该函数表示PHASE=Train

        for (x,y) in self.train_iterator:  # 拿去每一个minibatch
            x = x.to(self.device)
            y = y.to(self.device)
            self.optimizer.zero_grad()
            fx = self.model(x)           # 进行forward
            loss = self.criterion(fx,y)  # 计算Loss,train_loss
            type(loss)
            acc = self.accu(fx,y)    # 计算精确度，train_accu
            loss.backward()          # 进行BP
            self.optimizer.step()    # 统一更新模型
            epoch_loss += loss.item()
            epoch_acc += acc.item()

        return epoch_loss/len(self.train_iterator),epoch_acc/len(self.train_iterator)

    def evaluate(self, iterator):
        epoch_loss = 0
        epoch_acc = 0
        self.model.eval()
        with torch.no_grad():
            for (x,y) in iterator:
                x = x.to(self.device)
                y = y.to(self.device)
                fx = self.model(x)
                loss = self.criterion(fx,y)
                acc = self.accu(fx,y)
                epoch_loss += loss.item()
                epoch_acc += acc.item()
        return epoch_loss/len(iterator),epoch_acc/len(iterator)
    
    @timer
    def train_fit(self):
        info = 'Epoch:{0} | Train Loss:{1} | Train Acc:{2} | Val Loss:{3} | Val Acc:{4}'
        for epoch in range(self.epochs):
            train_loss, train_acc = self.train()
            valid_loss, valid_acc = self.evaluate(self.valid_iterator)
            if valid_loss < self.best_valid_loss:  # 如果是最好的模型就保存到文件夹
                self.best_valid_loss = valid_loss
                torch.save(self.model.state_dict(), self.model_path)
            print(info.format(epoch+1, train_loss, train_acc, valid_loss, valid_acc))
    
    def get_acc(self):
        self.model.load_state_dict(torch.load(self.model_path))
        test_loss, test_acc = self.evaluate(self.test_iterator)
        print('| Test Loss: {0} | Test Acc: {1} |'.format(test_loss,test_acc))


## 3 数据集的准备

In [4]:
tran_filenames = os.listdir('data/train')
datasetdir = os.path.join('./data')
traindir = os.path.join(datasetdir,'train2')
testdir = os.path.join(datasetdir,'test2')

batch_size = 24
epochs = 10
csv_name = "submission.csv"
trick = False

In [5]:
train_cat = filter(lambda x:x[:3]=='cat', tran_filenames)
train_dog = filter(lambda x:x[:3]=='dog', tran_filenames)

In [15]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
train_dataset = datasets.ImageFolder(traindir,transforms.Compose([
    transforms.RandomHorizontalFlip(), # 随机挑一些图镜像翻转
    transforms.RandomResizedCrop(32),  #224
    transforms.RandomHorizontalFlip(),  
    transforms.ToTensor(),
    normalize,
]))

test_dataset = datasets.ImageFolder(testdir, transforms.Compose([
#         transforms.Resize(256),
        transforms.CenterCrop(32),   #224
        transforms.ToTensor(),
        normalize,
    ]))

In [16]:
train_dataset

Dataset ImageFolder
    Number of datapoints: 25000
    Root Location: ./data/train2
    Transforms (if any): Compose(
                             RandomHorizontalFlip(p=0.5)
                             RandomResizedCrop(size=(32, 32), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=PIL.Image.BILINEAR)
                             RandomHorizontalFlip(p=0.5)
                             ToTensor()
                             Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                         )
    Target Transforms (if any): None

## 4 模型训练

In [26]:
epochs = 100
n_split = 0.9
batch_size = 32
model_dir = 'models'
best_valid_loss = float('inf')

model_name = "shufflenet_c2.pt"

cfg={
    'out_planes':[200, 400, 800],
    'num_blocks':[4, 8, 4],
    'groups': 2
}

model = ShuffleNet(cfg)

obj = CNNModel(model=model, 
               train_data=train_dataset, 
               test_data=test_dataset, 
               model_dir=model_dir, 
               model_name=model_name,
               best_valid_loss=best_valid_loss, 
               n_split=n_split, 
               batch_size=batch_size, 
               epochs=epochs)

In [22]:
# for name, parameters in model.named_parameters():   # 各层参数及具体数字
#     print('name: {}, param: {}'.format(name, parameters))
# for n, c in model.named_children():    # 各层名称与具体定义
#     print("name:{}, children:{}".format(n,c))

In [27]:
print(obj.model)

DataParallel(
  (module): ShuffleNet(
    (conv1): Conv2d(3, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(24, 44, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(44, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (shuffle1): ShuffleBlock()
        (conv2): Conv2d(44, 44, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=44, bias=False)
        (bn2): BatchNorm2d(44, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(44, 176, kernel_size=(1, 1), stride=(1, 1), groups=2, bias=False)
        (bn3): BatchNorm2d(176, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (shortcut): Sequential(
          (0): AvgPool2d(kernel_size=3, stride=2, padding=1)
        )
      )
      (1): Bottleneck(
        (conv1): C

In [28]:
obj.train_fit()

Epoch:1 | Train Loss:0.6817506957291202 | Train Acc:0.5847833806818182 | Val Loss:0.7120145913920825 | Val Acc:0.6127373417721519
Epoch:2 | Train Loss:0.650565601842986 | Train Acc:0.6271306818181818 | Val Loss:0.6404438173469109 | Val Acc:0.6348892405063291
Epoch:3 | Train Loss:0.6227682635865428 | Train Acc:0.6579811789772727 | Val Loss:0.5908296904231929 | Val Acc:0.6878955696202531
Epoch:4 | Train Loss:0.6038446096618745 | Train Acc:0.6721857244318182 | Val Loss:0.5807541291924971 | Val Acc:0.6910601265822784
Epoch:5 | Train Loss:0.5822103642011908 | Train Acc:0.6909623579545454 | Val Loss:0.5492817755741409 | Val Acc:0.7143987341772152
Epoch:6 | Train Loss:0.5616090063598346 | Train Acc:0.7079190340909091 | Val Loss:0.5735848044292836 | Val Acc:0.7108386075949367
Epoch:7 | Train Loss:0.5472854843617163 | Train Acc:0.7197265625 | Val Loss:0.5536691878415361 | Val Acc:0.7203322784810127
Epoch:8 | Train Loss:0.5318808886222541 | Train Acc:0.7310014204545454 | Val Loss:0.5485694065124

Epoch:64 | Train Loss:0.2470878683115271 | Train Acc:0.8873401988636364 | Val Loss:0.26485199712311164 | Val Acc:0.8801424050632911
Epoch:65 | Train Loss:0.2494010366446508 | Train Acc:0.8873401988636364 | Val Loss:0.26215022802352905 | Val Acc:0.8900316455696202
Epoch:66 | Train Loss:0.2423110963692042 | Train Acc:0.8915127840909091 | Val Loss:0.2587999675092818 | Val Acc:0.8840981012658228
Epoch:67 | Train Loss:0.24082251709610733 | Train Acc:0.8918678977272727 | Val Loss:0.27906003056824963 | Val Acc:0.8742088607594937
Epoch:68 | Train Loss:0.24317655877464198 | Train Acc:0.8916459517045454 | Val Loss:0.2606967370721358 | Val Acc:0.884493670886076
Epoch:69 | Train Loss:0.24142923661698165 | Train Acc:0.8896484375 | Val Loss:0.25895260717672636 | Val Acc:0.8777689873417721
Epoch:70 | Train Loss:0.2367617966109802 | Train Acc:0.8937766335227273 | Val Loss:0.2669787236218211 | Val Acc:0.877373417721519
Epoch:71 | Train Loss:0.23857786287871105 | Train Acc:0.8902698863636364 | Val Loss:

In [None]:
obj.get_acc()

In [27]:
# loss的内部是怎么计算的？
# acc的计算 8/64 8/64  16/128=(8/64+8/64)/2

In [61]:
def run_test(model, loader):#用来对test数据集进行predict的函数
    preds = []
    model.eval()
    start_time = time.time()
    for x, _ in loader:
        with torch.no_grad():
            x_var = x.cuda()
            scores = model(x_var)
            preds.append(scores)
    print('duration = %s\n' % timedelta(seconds=time.time() - start_time))
    return preds

def save_csv(preds, images, csv_name, clib=False):
    pred_soft = F.softmax(torch.cat(preds), dim=1).cpu().numpy()
    if clib:
        pred_soft = pred_soft.clip(min=0.005,max=0.995)
    pred_result = pred_soft[:, 1]
    results = zip(images, pred_result)
    idx = [(x[0].split('/')[-1]).split('.')[0] for x in results]
    #labels = [0 if x[1]<0.5 else 1 for x in results]
    res = pd.DataFrame.from_dict({
        'id': idx,
        'label': pred_result.tolist()
    })
    res = res.set_index('id')
    res.to_csv(csv_name)
    print("Save {} done.".format(csv_name))
    

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size = batch_size,
    shuffle=False,
    num_workers = 10)

In [58]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
shuffle_model = ShuffleNet(cfg)
shuffle_model = shuffle_model.to(device)

In [65]:
test_images = [os.path.join(testdir, 'test', f) for f in sorted(os.listdir(os.path.join(testdir,'test')))]

In [68]:
import numpy as np
import pandas as pd
preds = run_test(obj.model, test_loader)
save_csv(preds, test_images, csv_name, clib=trick)

duration = 0:00:26.489227

Save submission.csv done.


In [69]:
save_csv(preds, test_images, csv_name, clib=True)

Save submission.csv done.
