## 试题说明

### 任务描述
近年来，随着人工智能的发展，其在语音识别、自然语言处理、图像与视频分析等诸多领域取得了巨大成功。随着政府对环境保护的呼吁，垃圾分类成为一个亟待解决的问题，本次竞赛将聚焦在垃圾图片的分类，利用人工智能技术，对居民生活垃圾图片进行检测，找出图片中有哪些类别的垃圾。
要求参赛者给出一个算法或模型，对于给定的图片，检测出图片中的垃圾类别。给定图片数据，选手据此训练模型，为每张测试数据预测出最正确的类别。

### 数据说明
本竞赛所用训练和测试图片均来自生活场景。总共四十个类别，类别和标签对应关系在训练集中的dict文件里。图片中垃圾的类别，格式是“一级类别/二级类别”，二级类别是具体的垃圾物体类别，也就是训练数据中标注的类别，比如一次性快餐盒、果皮果肉、旧衣服等。一级类别有四种类别：可回收物、厨余垃圾、有害垃圾和其他垃圾。

数据文件包括训练集(有标注)和测试集(无标注)，训练集的所有图片分别保存在train文件夹下面的0-39个文件夹中，文件名即类别标签，测试集共有400张待分类的垃圾图片在test文件夹下，testpath.txt保存了所有测试集文件的名称，格式为：name+\n。

![](https://ai-studio-static-online.cdn.bcebos.com/cec625e7b61d459fa13ed2b822817bfd57030ad23c524f1ab93b3e7108d75d78)
![](https://ai-studio-static-online.cdn.bcebos.com/df06e2956f2044fab21fde2a9602abfdb9d4720861d54c66b2e079f3e64000a7)

### 提交答案
考试提交，需要提交**模型代码项目版本**和**结果文件**。结果文件为TXT文件格式，命名为model_result.txt，文件内的字段需要按照指定格式写入。

提交结果的格式如下：
1. 每个类别的行数和测试集原始数据行数应一一对应，不可乱序。
2. 输出结果应检查是否为400行数据，否则成绩无效。
3. 输出结果文件命名为model_result.txt，一行一个类别标签（数字）


样例如下：

···

35

3

2

37

10

3

26

4

34

21

···


In [1]:
### 开始答题，解压数据
### 数据集所在目录有可能发生改变，将"data/data38817/"替换为实际的数据集文件所在目录
!unzip  -qo data/data38817/train.zip -d 'dataset/'
!unzip  -qo data/data38817/test.zip -d  'dataset/'

In [2]:
'''
导入库
'''
import os
import sys
import math
import zipfile
import random
import json
import cv2
import numpy as np
from PIL import Image, ImageEnhance
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph import Pool2D,Conv2D
import matplotlib.pyplot as plt
from multiprocessing import cpu_count

In [3]:
'''
参数配置
'''
import codecs
import os


train_parameters = {  
    "input_size": [3, 224, 224],  
    "class_dim": -1,  # 分类数，会在初始化自定义 reader 的时候获得  
    "image_count": -1,  # 训练图片数量，会在初始化自定义 reader 的时候获得  
    "label_dict": {},  
    "data_dir": "dataset",  # 训练数据存储地址  
    "train_file_list": "train_list.txt",  
    "evals_file_list": "evals_list.txt", 
    "tests_file_list": "testpath.txt",
    "label_file": "label_list.txt",  
    "save_persistable_dir": "./persistable-params/model",  
    #"continue_train": True,        # 是否接着上一次保存的参数接着训练，优先级高于预训练模型  
    "continue_train": False,
    "num_epochs": 2,                # 这里是演示，设置为训练2个epoch
    "train_batch_size": 32, 
    "dropout_prob": 0.2,
    "dropout_seed": None,   
    "network_vgg": {               #VGG
        "layer": 16                #VGG的层数
    },                                            
    "network_resnet": {            # ResNet
        "layer": 50                # ResNet的层数
    },  
    "result_dir": "output",        # 数据结果存储地址  
    "infer_img": 'dataset/test/test111.jpg',
    "mean_rgb": [127.5, 127.5, 127.5],  # 常用图片的三通道均值，通常来说需要先对训练数据做统计，此处仅取中间值  
    "image_enhance_strategy": {  # 图像增强相关策略  
        "need_distort": True,  # 是否启用图像颜色增强  
        "need_rotate": True,   # 是否需要增加随机角度  
        "need_crop": True,      # 是否要增加裁剪  
        "need_flip": True,      # 是否要增加水平随机翻转  
        "hue_prob": 0.5,  
        "hue_delta": 18,  
        "contrast_prob": 0.5,  
        "contrast_delta": 0.5,  
        "saturation_prob": 0.5,  
        "saturation_delta": 0.5,  
        "brightness_prob": 0.5,  
        "brightness_delta": 0.125  
    },  
    "early_stop": {  
        "sample_frequency": 1,  
        "successive_limit": 3,  
        "good_acc1": 0.9625  
    },  
    "rms_strategy": {  
        "learning_rate": 0.001, # 0.00125, # 
        "lr_epochs": [20, 40, 60, 80, 100],  
        "lr_decay": [1, 0.5, 0.25, 0.1, 0.01, 0.002]  
    },  
    "momentum_strategy": {  
        "learning_rate": 0.000625, #0.001,  
        "lr_epochs": [20, 40, 60, 80, 100],  
        "lr_decay": [1, 0.5, 0.25, 0.1, 0.01, 0.002]  
    },  
    "sgd_strategy": {  
        "learning_rate": 0.00125, #0.001,  
        "lr_epochs": [20, 40, 60, 80, 100],  
        "lr_decay": [1, 0.5, 0.25, 0.1, 0.01, 0.002]  
    },  
    "adam_strategy": {  
        "learning_rate": 0.002  
    }, 
    "adamax_strategy": {  
        "learning_rate": 0.00125  
    }  
}  

def init_train_parameters():
    """
    初始化训练参数，主要是初始化图片数量，类别数
    :return:
    """
    train_file_list = os.path.join(train_parameters['data_dir'], train_parameters['train_file_list'])
    label_list = os.path.join(train_parameters['data_dir'], train_parameters['label_file'])
    index = 0
    with codecs.open(label_list, encoding='utf-8') as flist:
        lines = [line.strip() for line in flist]
        for line in lines:
            parts = line.strip().split()
            train_parameters['label_dict'][parts[1]] = int(parts[0])
            index += 1
        train_parameters['class_dim'] = index
    with codecs.open(train_file_list, encoding='utf-8') as flist:
        lines = [line.strip() for line in flist]
        train_parameters['image_count'] = len(lines)


# **一、数据准备**

（1）解压原始数据集

（2）按照比例划分训练集与验证集

（3）乱序，生成数据列表

（4）构造训练数据集提供器和验证数据集提供器

In [4]:
def unzip_data(src_path,target_path):
    '''
    解压原始数据集，将src_path路径下的zip包解压至target_path目录下
    '''
    if(not os.path.isdir(target_path + "scenes")):     
        z = zipfile.ZipFile(src_path, 'r')
        z.extractall(path=target_path)
        z.close()

# 一些简单的图像处理函数，用于深度学习数据增强（Data Augmentation）

In [5]:
def distort_image(img):
    """
    图像增强
    :param img:
    :return:
    """
    
    def random_brightness(img):
        """
        随机亮度调整
        :param img:
        :return:
        """
        prob = np.random.uniform(0, 1)
        if prob < train_parameters['image_enhance_strategy']['brightness_prob']:
            brightness_delta = train_parameters['image_enhance_strategy']['brightness_delta']
            delta = np.random.uniform(-brightness_delta, brightness_delta) + 1
            img = ImageEnhance.Brightness(img).enhance(delta)
        return img

    def random_contrast(img):
        """
        随机对比度调整
        :param img:
        :return:
        """
        prob = np.random.uniform(0, 1)
        if prob < train_parameters['image_enhance_strategy']['contrast_prob']:
            contrast_delta = train_parameters['image_enhance_strategy']['contrast_delta']
            delta = np.random.uniform(-contrast_delta, contrast_delta) + 1
            img = ImageEnhance.Contrast(img).enhance(delta)
        return img
    
    def random_saturation(img):
        """
        随机饱和度调整
        :param img:
        :return:
        """
        prob = np.random.uniform(0, 1)
        if prob < train_parameters['image_enhance_strategy']['saturation_prob']:
            saturation_delta = train_parameters['image_enhance_strategy']['saturation_delta']
            delta = np.random.uniform(-saturation_delta, saturation_delta) + 1
            img = ImageEnhance.Color(img).enhance(delta)
        return img

    def random_hue(img):
        """
        随机色调整
        :param img:
        :return:
        """
        prob = np.random.uniform(0, 1)
        if prob < train_parameters['image_enhance_strategy']['hue_prob']:
            hue_delta = train_parameters['image_enhance_strategy']['hue_delta']
            delta = np.random.uniform(-hue_delta, hue_delta)
            img_hsv = np.array(img.convert('HSV'))
            img_hsv[:, :, 0] = img_hsv[:, :, 0] + delta
            img = Image.fromarray(img_hsv, mode='HSV').convert('RGB')
        return img
        
    ops = [random_brightness, random_contrast, random_saturation, random_hue]    
    np.random.shuffle(ops) 
    img = ops[0](img)
    img = ops[1](img)
    img = ops[2](img)
    img = ops[3](img)
    return img

def random_crop(img, scales=[0.3, 1.0], max_ratio=2.0, max_trial=50):
    """
    随机裁剪
    :param img:
    :param scales:
    :param max_ratio:
    :param constraints:
    :param max_trial:
    :return:
    """
    if random.random() > 0.6:
        return img

    w, h = img.size
    crops = [(0, 0, w, h)]

    while crops:
        crop = crops.pop(np.random.randint(0, len(crops)))
        img = img.crop((crop[0], crop[1], crop[0] + crop[2],
                        crop[1] + crop[3])).resize(img.size, Image.LANCZOS)
        return img
    return img

def random_expand(img, keep_ratio=True):
    """
    随机扩张
    :param img:
    :param keep_ratio:
    :return:
    """
    if np.random.uniform(0, 1) < train_parameters['image_enhance_strategy']['expand_prob']:
        return img

    max_ratio = train_parameters['image_enhance_strategy']['expand_max_ratio']
    w, h = img.size
    c = 3
    ratio_x = random.uniform(1, max_ratio)
    if keep_ratio:
        ratio_y = ratio_x
    else:
        ratio_y = random.uniform(1, max_ratio)
    oh = int(h * ratio_y)
    ow = int(w * ratio_x)
    off_x = random.randint(0, ow -w)
    off_y = random.randint(0, oh -h)

    out_img = np.zeros((oh, ow, c), np.uint8)
    for i in range(c):
        out_img[:, :, i] = train_parameters['mean_rgb'][i]

    out_img[off_y: off_y + h, off_x: off_x + w, :] = img

    return Image.fromarray(out_img)

def random_flip(img, thresh=0.5):
    """
    随机翻转
    :param img:
    :param thresh:
    :return:
    """
    if random.random() > thresh:
        img = img.transpose(Image.FLIP_LEFT_RIGHT)

    return img

def rotate_image(img, thresh=0.5):
    """
    图像增强，增加随机旋转角度
    """
    if random.random() > thresh:
        angle = np.random.randint(-14, 15)
        img = img.rotate(angle)

    return img


def preprocess(img, mode='train'):
    """
    preprocess，图像增强预处理
    :param img:
    :param mode:
    :return:
    """
    # 在线增强模式和离线增强模式可以叠加使用，但是个人不建议一开始就用，样本文件数量多一些的话，耗时太久了。
    # mode == 'train'，训练模式下才进行在线图像增强    
    if mode == 'train':
        if train_parameters["image_enhance_strategy"]["need_distort"]:
            img = distort_image(img)
        if train_parameters["image_enhance_strategy"]['need_rotate']:
            img = rotate_image(img, thresh=0.5)                                     
        if train_parameters["image_enhance_strategy"]["need_crop"]:
            img = random_crop(img)                 
        if train_parameters["image_enhance_strategy"]["need_flip"]:         
            img = random_flip(img)
        if train_parameters["image_enhance_strategy"]["need_expand"]:
            img = random_expand(img)    
    return img  


def distort_color(img):
    """
    概率的图像增强
    :param img:
    :return:
    """
    prob = np.random.uniform(0, 1)
    # Apply different distort order
    if prob < 0.35:
        img = random_brightness(img)
        img = random_contrast(img)
        img = random_saturation(img)
        img = random_hue(img)
    elif prob < 0.7:
        img = random_brightness(img)
        img = random_saturation(img)
        img = random_hue(img)
        img = random_contrast(img)
    return img
    

# 预处理数据，将其转化为标准格式。同时将数据拆分成两份，以便训练和计算
# 数据集保存到'dataset/'目录中

In [6]:
# 预处理数据，将其转化为标准格式。同时将数据拆分成两份，以便训练和计算预估准确率 
import codecs   
import os   
import random   
import shutil   
import json
from PIL import Image   


train_ratio = 9.0/ 10   
   
dataset_dstdir = 'dataset/'   
train_file_dir = 'dataset/train/'   
class_list = [c for c in os.listdir(train_file_dir) if os.path.isdir(os.path.join(train_file_dir, c)) and not c.endswith('Set') and not c.startswith('.')]   
class_list.sort() 
class_list.sort(key = lambda x:int(x[:])) ##目录名按数字排序
#print('class_list=',class_list)   

# json.load读取
with open(os.path.join(dataset_dstdir, 'garbage_dict.json'), 'r') as f:
    class_dict = json.load(f)
print('class_dict=',class_dict)

train_image_dir = os.path.join(dataset_dstdir, "trainImageSet")   
if not os.path.exists(train_image_dir):   
    os.makedirs(train_image_dir)   
for class_dir in class_list:
    class_path = os.path.join(train_image_dir,class_dir)
    if not os.path.exists(class_path):   
        os.makedirs(class_path)   
       
evals_image_dir = os.path.join(dataset_dstdir, "evalsImageSet")   
if not os.path.exists(evals_image_dir):   
    os.makedirs(evals_image_dir)   
for class_dir in class_list:
    class_path = os.path.join(evals_image_dir,class_dir)
    if not os.path.exists(class_path):   
        os.makedirs(class_path)      

train_list_filename = os.path.join(dataset_dstdir, "train_list.txt")
train_file = codecs.open(train_list_filename, 'w')   
evals_list_filename = os.path.join(dataset_dstdir, "evals_list.txt")
evals_file = codecs.open(evals_list_filename, 'w')   
label_list_filename = os.path.join(dataset_dstdir, "label_list.txt")
with codecs.open(label_list_filename, "w") as label_file:   
    label_id = 0   
    for class_dir in class_list:   
        #
        label_file.write("{0}\t{1}\n".format(label_id, class_dict[class_dir]))   
        #
        image_path_pre = os.path.join(train_file_dir, class_dir)   
        train_path_dst = os.path.join(train_image_dir, class_dir)   
        evals_path_dst = os.path.join(evals_image_dir, class_dir)   
        #print(image_path_pre, train_path_dst,evals_path_dst)
        #break
        file_list = os.listdir(image_path_pre)
        random.shuffle(file_list)
        for file in file_list:   
            try:
                #img = Image.open(os.path.join(image_path_pre, file))   
                #print(image_path_pre, file, (os.path.join(image_path_pre, file), os.path.join(train_path_dst, file)), (os.path.join(image_path_pre, file), os.path.join(evals_path_dst, file)))
                #break
                if random.uniform(0, 1) > train_ratio:   
                    shutil.copyfile(os.path.join(image_path_pre, file), os.path.join(evals_path_dst, file))   
                    evals_file.write("{0}\t{1}\n".format(os.path.join(evals_path_dst, file), label_id))   
                else:
                    shutil.copyfile(os.path.join(image_path_pre, file), os.path.join(train_path_dst, file))   
                    train_file.write("{0}\t{1}\n".format(os.path.join(train_path_dst, file), label_id))                       

                    """
                    离线数据增强，并保存到训练列表文件中    
                    """    
            except Exception as e:   
                #打印出来看是啥问题
                print(e)
                pass   
                # 可能会有一些文件打不开，此处需要稍作清洗   
        print('label_id=%d have porcessed.' % label_id)
        label_id += 1   
        #break              

train_file.close()   
evals_file.close()

print('列表已生成:', train_list_filename,evals_list_filename,label_list_filename)

# 构造数据提供器

In [7]:
import os
import math
import sys
import numpy as np
import argparse
import ast
import paddle
import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid import framework
from paddle.fluid.param_attr import ParamAttr

init_train_parameters()

# reader
def custom_image_reader(file_list,mode):
    '''
    自定义reader
    '''
    def reader():
        with open(file_list, 'r') as f:
            lines = [line.strip() for line in f]
            random.shuffle(lines)
            for line in lines:
                if mode == 'train' or mode == 'eval':
                    img_path, lab = line.strip().split('\t')
                    img = Image.open(img_path) 
                    if img.mode != 'RGB': 
                        img = img.convert('RGB') 
                    #使用在线增强方式，不用的话，把下一段代码注释掉
                    #验证模式下不用增强，所以加了一个工作模式条件判断
                    """                    
                    if mode == 'train': 
                        #在线增强方式：
                        if (train_parameters["use_image_enhance"]):
                            img = preprocess(img, mode)  #只有在'train'模式下才执行图像增强
                    """
                    #图像缩放到指定大小，VGG是3x224x224
                    img = img.resize((224, 224), Image.ANTIALIAS)
                    img = np.array(img).astype('float32') 
                    #图像数据按照所需要的格式重新排列
                    img = img.transpose((2, 0, 1))  # HWC to CHW 
                    img = img/255.0                   # 像素值归一化 
                    yield img, int(lab) 
                elif mode == 'test':
                    img_path = line.strip()
                    img = Image.open(img_path)
                    img = img.resize((224, 224), Image.ANTIALIAS)
                    img = np.array(img).astype('float32') 
                    img = img.transpose((2, 0, 1))  # HWC to CHW 
                    img = img/255.0                   # 像素值归一化                     
                    yield img                    
    return reader

'''
构造数据提供器，
训练集和验证集调用同样的函数，但是工作模式这个参数不一样。
'''
train_file_list = os.path.join(train_parameters['data_dir'], train_parameters["train_file_list"])
train_reader = paddle.batch(custom_image_reader(train_file_list, 'train'),
                                    batch_size=train_parameters['train_batch_size'],
                                    drop_last=True)
evals_file_list = os.path.join(train_parameters['data_dir'], train_parameters["evals_file_list"])
evals_reader = paddle.batch(custom_image_reader(evals_file_list, 'eval'),
                                   batch_size=train_parameters['train_batch_size'],
                                   drop_last=True)


def eval_net(reader, model):
    acc_set = []
    
    for batch_id, data in enumerate(reader()):
        dy_x_data = np.array([x[0] for x in data]).astype('float32')
        y_data = np.array([x[1] for x in data]).astype('int')
        y_data = y_data[:, np.newaxis]
        img = fluid.dygraph.to_variable(dy_x_data)
        label = fluid.dygraph.to_variable(y_data)
        label.stop_gradient = True
        prediction, acc = model(img, label)

        acc_set.append(float(acc.numpy()))

        # get test acc and loss
    acc_val_mean = np.array(acc_set).mean()

    return acc_val_mean   

# 训练过程中loss/acc的显示

In [8]:
import matplotlib.pyplot as plt

all_train_iter=0
all_train_iters=[]
all_train_costs=[]
all_train_accs=[]

def draw_train_process(title,iters,costs,accs,label_cost,lable_acc):
    plt.title(title, fontsize=24)
    plt.xlabel("iter", fontsize=20)
    plt.ylabel("loss/acc", fontsize=20)
    plt.plot(iters, costs,color='red',label=label_cost) 
    plt.plot(iters, accs,color='green',label=lable_acc) 
    plt.legend()
    plt.grid()
    plt.show()


def draw_process(title,color,iters,data,label):
    plt.title(title, fontsize=24)
    plt.xlabel("iter", fontsize=20)
    plt.ylabel(label, fontsize=20)
    plt.plot(iters, data,color=color,label=label) 
    plt.legend()
    plt.grid()
    plt.show()

# **二、模型配置**

![](https://ai-studio-static-online.cdn.bcebos.com/9ca0744272b0449186f766afcabadd598e24679088a4438dafede05a71b7c141)

VGG的核心是五组卷积操作，每两组之间做Max-Pooling空间降维。同一组内采用多次连续的3X3卷积，卷积核的数目由较浅组的64增多到最深组的512，同一组内的卷积核数目是一样的。卷积之后接两层全连接层，之后是分类层。由于每组内卷积层的不同，有11、13、16、19层这几种模型，上图展示一个16层的网络结构。
# 在以下cell中完成VGG网络的定义

In [None]:
"""
VGG网络的定义，AIStudio 官方项目：用PaddlePaddle实现图像分类-VGG（动态图版）
    Fork源：https://aistudio.baidu.com/aistudio/projectdetail/204999
"""
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear


"""
VGG网络
"""

class ConvBlock(fluid.dygraph.Layer):
    """
    卷积+池化
    """
    def __init__(self, name_scope, num_channels, num_filters, groups):
        """构造函数"""
        super(ConvBlock, self).__init__(name_scope)

        self._conv2d_list = []
        init_num_channels = num_channels
        for i in range(groups):
            conv2d = self.add_sublayer(
                'bb_%d' % i,
                fluid.dygraph.Conv2D(
                    init_num_channels, num_filters=num_filters, filter_size=3,
                    stride=1, padding=1, act='relu'
                )
            )
            self._conv2d_list.append(conv2d)
            init_num_channels = num_filters

        self._pool = fluid.dygraph.Pool2D(
            pool_size=2, pool_type='max', pool_stride=2
        )

    def forward(self, inputs):
        """前向计算"""
        x = inputs
        for conv in self._conv2d_list:
            x = conv(x)
        x = self._pool(x)
        return x

class VGGNet(fluid.dygraph.Layer):
    """
    VGG网络
    """
    def __init__(self, name_scope, layers=16, class_dim=1000):
        """
        构造函数
        :param name_scope:   命名空间
        :param layers:       具体的层数如VGG-16、VGG-19等
        """
        super(VGGNet, self).__init__(name_scope)
        self.vgg_spec = {
            11: ([1, 1, 2, 2, 2]),
            13: ([2, 2, 2, 2, 2]),
            16: ([2, 2, 3, 3, 3]),
            19: ([2, 2, 4, 4, 4])
        }
        assert layers in self.vgg_spec.keys(), \
            "supported layers are {} but input layer is {}".format(self.vgg_spec.keys(), layers)

        nums = self.vgg_spec[layers]
        self.conv1 = ConvBlock(self.full_name(), num_channels=3, num_filters=64, groups=nums[0])
        self.conv2 = ConvBlock(self.full_name(), num_channels=64, num_filters=128, groups=nums[1])
        self.conv3 = ConvBlock(self.full_name(), num_channels=128, num_filters=256, groups=nums[2])
        self.conv4 = ConvBlock(self.full_name(), num_channels=256, num_filters=512, groups=nums[3])
        self.conv5 = ConvBlock(self.full_name(), num_channels=512, num_filters=512, groups=nums[4])

        fc_dim = 4096
        self.fc1 = fluid.dygraph.Linear(input_dim=25088, output_dim=fc_dim, act='relu')
        self.fc2 = fluid.dygraph.Linear(input_dim=fc_dim, output_dim=fc_dim, act='relu')
        self.out = fluid.dygraph.Linear(input_dim=fc_dim, output_dim=class_dim, act='softmax')

    def forward(self, inputs, label=None):
        """前向计算"""
        out = self.conv1(inputs)
        out = self.conv2(out)
        out = self.conv3(out)
        out = self.conv4(out)
        out = self.conv5(out)

        out = fluid.layers.reshape(out, [-1, 25088])

        out = self.fc1(out)
        out = fluid.layers.dropout(out, dropout_prob=0.5)

        out = self.fc2(out)
        out = fluid.layers.dropout(out, dropout_prob=0.5)

        out = self.out(out)
        
        if label is not None:
            acc = fluid.layers.accuracy(input=out, label=label)
            return out, acc
        else:
            return out

# **三、模型训练 && 四、模型评估**

In [None]:
# 参考项目代码，自己写了一个优化器配置函数（比较粗糙，赶时间为了完成作业）
# 提高最后精度的措施：
# 1、加了L2正则化：[L2Decay实现L2权重衰减正则化，用于模型训练，有助于防止模型对训练数据过拟合]
#    我的代码里取l2_decay = 1.2e-4。缺省是1e-4
# 2、学习率随训练次数衰减：[在训练模型时，建议一边进行训练一边降低学习率] 
#    我在调不动时后再用，最后效果还能再提高一点，cosine_decay，https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/layers_cn/cosine_decay_cn.html#cosine-decay
#    前期训练时候用的是常值（初始学习率不变），或者对初始学习率进行分段衰减
#    piecewise_decay，https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/layers_cn/piecewise_decay_cn.html#piecewise-decay  

print(train_parameters)
momentum_rate = 0.9
l2_decay = 1.2e-4

def optimizer_setting(params, paramsList):
    total_images = params["image_count"]
    ls = params["train_batch_size"]
    step = int(math.ceil(float(total_images) / batch_size))
    
    #为了调试方便，这里直接赋值初始学习率
    learning_rate = 0.00125  
    #实际取值需要根据调试要求选取
    lr = learning_rate

    num_epochs = params["num_epochs"]
    regularization=fluid.regularizer.L2Decay(l2_decay)
    learning_rate=fluid.layers.cosine_decay(learning_rate=lr, step_each_epoch=step, epochs=num_epochs)

    optimizer=fluid.optimizer.AdamaxOptimizer(learning_rate=learning_rate, regularization=regularization, parameter_list=paramsList)

    return optimizer    

In [None]:
'''
模型训练
'''
epochs_num = train_parameters["num_epochs"]
batch_size = train_parameters["train_batch_size"]
total_images=train_parameters["image_count"]
stepsnumb = int(math.ceil(float(total_images) / batch_size))        

with fluid.dygraph.guard(place = fluid.CUDAPlace(0)):   #使用GPU进行训练
#with fluid.dygraph.guard():                            #使用CPU进行训练
    print(train_parameters['class_dim'])
    print(train_parameters['label_dict'])

    best_acc = 0    
    best_epc = -1 
    eval_epchnumber = 0
    all_eval_avgacc = []
    all_eval_iters = []

    model = VGGNet("vgg_net", train_parameters["network_vgg"]["layer"], train_parameters["class_dim"])

    if True:        
        try:
            if os.path.exists('MyVGG_best.pdparams'):
                print('try model file MyVGG_best. Loading...')
                model_dict, _ = fluid.load_dygraph('MyVGG_best')        
            else:
                print('try model file MyVGG. Loading...')
                model_dict, _ = fluid.load_dygraph('MyVGG')        
            model.load_dict(model_dict) #加载模型参数  
            print('model initialization finished.')
        except Exception as e:
            print('model initialization error found:')                 
            print(e)               

    #后面代码会切换工作模式
    model.train() #训练模式

    #定义优化方法
    optimizer = optimizer_setting(train_parameters, model.parameters())
    
    #epochs_num = 1
     #开始训练
    for epoch_num in range(epochs_num):
        model.train() #训练模式
         #从train_reader中获取每个批次的数据
        for batch_id, data in enumerate(train_reader()):
            dy_x_data = np.array([x[0] for x in data]).astype('float32').reshape(-1, 3,224,224)
            y_data = np.array([x[1] for x in data]).astype('int64').reshape(-1,1)                               

            #将Numpy转换为DyGraph接收的输入
            img = fluid.dygraph.to_variable(dy_x_data)
            label = fluid.dygraph.to_variable(y_data)

            out,acc = model(img,label)
            loss = fluid.layers.cross_entropy(out, label)
            avg_loss = fluid.layers.mean(loss)

            #使用backward()方法可以执行反向网络
            avg_loss.backward()
            optimizer.minimize(avg_loss)             
            #将参数梯度清零以保证下一轮训练的正确性
            model.clear_gradients()            

            all_train_iter=all_train_iter+train_parameters['train_batch_size']
            all_train_iters.append(all_train_iter)
            all_train_costs.append(loss.numpy()[0])
            all_train_accs.append(acc.numpy()[0])
                
            # 周期性输出显示信息
            if batch_id % 100 == 0 or batch_id == stepsnumb-1:
                print("epoch %3d step %4d: loss: %f, acc: %f" % (epoch_num, batch_id, avg_loss.numpy(), acc.numpy()))

        if epoch_num % 1 == 0 or epoch_num == epochs_num-1:
            model.eval()      
            accs = []
            for batch_id, data in enumerate(evals_reader()):
                dy_x_data = np.array([x[0] for x in data]).astype('float32').reshape(-1,3,224,224)
                y_data = np.array([x[1] for x in data]).astype('int').reshape(-1,1)
        
                img = fluid.dygraph.to_variable(dy_x_data)
                label = fluid.dygraph.to_variable(y_data)

                out, acc = model(img, label)
                lab = np.argsort(out.numpy())
                accs.append(acc.numpy()[0])    
            epoch_acc = np.mean(accs)            
            print('  train_pass:%d,avg_acc=%f' % (epoch_num,epoch_acc))  
            eval_epchnumber = epoch_num
            all_eval_avgacc.append(eval_epchnumber)
            all_eval_iters.append(epoch_acc)
                
            if best_acc < epoch_acc:  
                best_epc=epoch_num                                      
                best_acc=epoch_acc
                fluid.save_dygraph(model.state_dict(),'MyVGG_best')#保存模型
                print('    current best_acc=%f in No.%d epoch' % (best_acc,best_epc)) 
                print('    MyVGG_best模型已保存')
                with open('beast_acc_Vgg.txt', "w") as f:
                    f.write(str(best_acc))
                fluid.dygraph.save_dygraph(model.state_dict(), "save_dir/model_best")
                fluid.dygraph.save_dygraph(optimizer.state_dict(), "save_dir/model_best")    

            #显示
            draw_train_process("training",all_train_iters,all_train_costs,all_train_accs,"trainning loss","trainning acc")    

    draw_train_process("training",all_train_iters,all_train_costs,all_train_accs,"trainning loss","trainning acc")  
    draw_process("trainning loss","red",all_train_iters,all_train_costs,"trainning loss")
    draw_process("trainning acc","green",all_train_iters,all_train_accs,"trainning acc")  
    
    #保存模型参数
    fluid.save_dygraph(model.state_dict(), "MyVGG")   
    print('MyVGG模型已保存')
    print("Final loss: {}".format(avg_loss.numpy()))
    #fluid.dygraph.save_dygraph(model.state_dict(), "save_dir/model")
    #fluid.dygraph.save_dygraph(optimizer.state_dict(), "save_dir/model")    
    with open('acc_VGG.txt', "w") as f:
        f.write(str(epoch_acc)) 

In [None]:
'''
模型校验
'''
with fluid.dygraph.guard(place = fluid.CUDAPlace(0)):
    model = VGGNet("vgg_net", train_parameters["network_vgg"]["layer"], train_parameters["class_dim"])
    model_dict, _ = fluid.load_dygraph("MyVGG_best")    
    #model_dict, _ = fluid.dygraph.load_dygraph("save_dir/model_best")
    model.load_dict(model_dict)
    model.eval()

    accs = []
    for batch_id, data in enumerate(evals_reader()):
        dy_x_data = np.array([x[0] for x in data]).astype('float32').reshape(-1,3,224,224)
        y_data = np.array([x[1] for x in data]).astype('int').reshape(-1,1)
        
        img = fluid.dygraph.to_variable(dy_x_data)
        label = fluid.dygraph.to_variable(y_data)

        out, acc = model(img, label)
        lab = np.argsort(out.numpy())
        accs.append(acc.numpy()[0])

    avg_acc = np.mean(accs)
#print(np.mean(accs))
print("模型校验avg_acc=",avg_acc)

# **五、模型预测**

In [None]:
import os
import codecs
from PIL import Image, ImageEnhance

#打印出来看一下标签信息
print('label_dic =',train_parameters['label_dict'])

def load_image(img_path):
    '''
    预测图片预处理
    '''
    img = Image.open(img_path) 
    if img.mode != 'RGB': 
        img = img.convert('RGB') 
    img = img.resize((224, 224), Image.BILINEAR)
    img = np.array(img).astype('float32') 
    img = img.transpose((2, 0, 1))  # HWC to CHW 
    img = img/255                # 像素值归一化 
    return img

'''
模型预测
'''
with fluid.dygraph.guard():
    model = VGGNet("vgg_net", train_parameters["network_vgg"]["layer"], train_parameters["class_dim"])
    model_dict, _ = fluid.load_dygraph("MyVGG_best")    
    #model_dict, _ = fluid.dygraph.load_dygraph("save_dir/model_best")
    model.load_dict(model_dict)
    model.eval()  

    # 目录路径
    data_dir = train_parameters["data_dir"]
    infer_path = os.path.join(data_dir,"test/")

    #展示预测图片
    infer_imag=os.path.join(infer_path,'test1.jpg')
    img = Image.open(infer_imag)
    plt.imshow(img)          #根据数组绘制图像
    plt.show()               #显示图像
    print("demo演示：", infer_imag)

     #对预测图片进行预处理
    #tests_file_list = train_parameters["tests_file_list"]
    tests_file_list = "testpath.txt"
    file_list = os.path.join(data_dir,tests_file_list)
    with codecs.open(file_list) as flist:
        img_list = [line.strip() for line in flist]
    infer_imgs = []        
    for imgfn in img_list:
        infer_imag=os.path.join(infer_path,imgfn)
        infer_imgs.append(load_image(infer_imag))
    infer_imgs = np.array(infer_imgs)

    print("test开始")
    final_result_list = [] 
    for  i in range(len(infer_imgs)):
        data = infer_imgs[i]
        dy_x_data = np.array(data).astype('float32')
        dy_x_data=dy_x_data[np.newaxis,:, : ,:]
        img = fluid.dygraph.to_variable(dy_x_data)
        out = model(img)
        lab = np.argmax(out.numpy())  #argmax():返回最大数的索引        
        final_result_list.append(lab)  

    print("test存储结果")
    if True:
        if True:
            # 判断结果数量是否满足要求
            saveFileName = 'final_result_vgg.txt'
            with codecs.open(saveFileName,'w') as flist:
                for i in range(len(final_result_list)):                    
                    # 最后一行不要加'\n'换行符
                    if i != len(final_result_list)-1:
                        flist.write(str(final_result_list[i])+"\n")
                    else:
                        flist.write(str(final_result_list[i]))
                if len(final_result_list)>0:
                    print(saveFileName+' saved with %d records.' % len(final_result_list))
                else:
                    print('invalid record num.')   

    print("test结束")