<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#验证模型实现" data-toc-modified-id="验证模型实现-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>验证模型实现</a></span></li><li><span><a href="#验证结果分析" data-toc-modified-id="验证结果分析-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>验证结果分析</a></span><ul class="toc-item"><li><span><a href="#结论" data-toc-modified-id="结论-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>结论</a></span></li><li><span><a href="#过拟合分析" data-toc-modified-id="过拟合分析-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>过拟合分析</a></span><ul class="toc-item"><li><span><a href="#首先尝试增加dropout层，在resnet18提取完图片的特征之后，使用一个dropout层，分别尝试了0.3,0.5,0.7三个不同丢弃率的dropout层，下面是结果:" data-toc-modified-id="首先尝试增加dropout层，在resnet18提取完图片的特征之后，使用一个dropout层，分别尝试了0.3,0.5,0.7三个不同丢弃率的dropout层，下面是结果:-2.2.1"><span class="toc-item-num">2.2.1&nbsp;&nbsp;</span>首先尝试增加dropout层，在resnet18提取完图片的特征之后，使用一个dropout层，分别尝试了0.3,0.5,0.7三个不同丢弃率的dropout层，下面是结果:</a></span></li></ul></li></ul></li></ul></div>

# 验证模型，解决过拟合问题

## 验证模型实现

In [1]:
import torch
import torch.nn as nn
import torchvision.models as models
from tqdm import tqdm 

import cv2
import os
import torch
import numpy as np
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
from collections import OrderedDict
import glob
import json

    
class MyDataset(Dataset):
    
    def __init__(self,img_path,label_content=None):
        self.img_path = img_path
        self.img_path.sort()
        if label_content is not None:
            self.label_content = label_content
        else:
            self.label_content = None
    
    def __getitem__(self,index):
        img_path = self.img_path[index]
        img_cv2 = cv2.imread(img_path)  #return （H，W，C），C list by B，G，R
        img_cv2 = cv2.resize(img_cv2,(200,100))  #cv2.resize(obj,(width,height))
        img_array = img_cv2.transpose(2,0,1)
        return img_array
    
    def __len__(self):
        return len(self.img_path)

    
class MyModel(nn.Module):
    def __init__(self,T,img_width):
        super().__init__()
        self.img_width = img_width
        self.cnn_out_width = self.cal_finnal_width()
        #self.cnn: input(N,3,100,200),output(N,512,4,7)
        trained_resnet18 = models.resnet18(weights='DEFAULT')
        self.cnn = nn.Sequential(trained_resnet18.conv1,
                                 trained_resnet18.bn1,
                                 trained_resnet18.relu,
                                 trained_resnet18.maxpool,
                                 trained_resnet18.layer1,
                                 trained_resnet18.layer2,
                                 trained_resnet18.layer3,
                                 trained_resnet18.layer4)
        #self.feat_to_seq: input(N,7,512,1),output(N,30,512,1)
        self.feat_to_seq = nn.Conv2d(self.cnn_out_width,T,kernel_size=(1,1),stride=1)
        #self.rnn: input(30,N,512),output(30,N,64*2)
        self.rnn = nn.GRU(input_size=512,hidden_size=64,bidirectional=True)
        #self.linear: input(30,N,64*2),output(30,N,11)
        self.linear = nn.Linear(128,11)
    
    def forward(self,X):
        X = self.cnn(X)
        X = X.mean(dim=2,keepdim=True)
        X = X.permute((0,3,1,2))
        X = self.feat_to_seq(X)
        X = X.permute((1,0,2,3))
        X = X.squeeze(dim=3)
        X,_ = self.rnn(X)
        y_hat = self.linear(X)
        return y_hat

    def cal_finnal_width(self):
        width = self.img_width
        for i in range(5):
            if width%2 == 0:
                width = int(width/2)
            else:
                width = int(width/2)+1
        return width
    
    
class ValidModel():
    def __init__(self,saved_model_path=None,T=30,img_width=200):
        self.model = MyModel(T,img_width)
        if saved_model_path is not None:
            best_state_dict = torch.load(os.path.join('./',saved_model_path))
            self.model.load_state_dict(best_state_dict)
            if torch.cuda.is_available():
                self.model.cuda()
                print(saved_model_path,' cuda model load success!')
            else:
                print(saved_model_path,' model load success!')
        else:
            return('model loading failed')

    def predict(self,img_fold_path):
        img_path = glob.glob(os.path.join(img_fold_path,'*.png'))
        my_dataset = MyDataset(img_path)
        self.my_dataloader = DataLoader(my_dataset,batch_size=1)
#         print(self.my_dataloader)

        self.prediction_dict = OrderedDict()
        my_dataloader = self.my_dataloader
        my_dataloader = tqdm(my_dataloader,ncols=60)
        
        with torch.no_grad():
            self.model.eval()
            for batch_index,batch_data in enumerate(my_dataloader):
                X = batch_data.float()
                if torch.cuda.is_available:
                    X = X.cuda()
                y_hat = self.model(X)
                y_hat = y_hat.permute(1,0,2).cpu()
                pred = torch.argmax(y_hat,dim=2)
                batch_size = pred.shape[0]
                pred_list = []
                for i in range(batch_size):
                    raw_pred_list = list(pred[i].numpy())
                    pred_data = []
                    for j in range(len(raw_pred_list)):
                        if j == 0 and raw_pred_list[0] != 0:
                            pred_data.append(raw_pred_list[0]-1)
                        if j != 0 and raw_pred_list[j] != raw_pred_list[j-1] and raw_pred_list[j] != 0:
                            pred_data.append(raw_pred_list[j]-1)
                    if len(pred_data) != 0:
                        pred_num = ''.join([str(int(num)) for num in pred_data])
                        pred_num = int(pred_num)
                        img_name = os.path.split(img_path[batch_index])[1]
                        self.prediction_dict[img_name] = pred_num
                    else:
                        img_name = os.path.split(img_path[batch_index])[1]
                        self.prediction_dict[img_name] = -1
        
        return self.prediction_dict
            
    def get_accurancy(self,label_path):
        predict_right_num = 0 
        prediction_dict = self.prediction_dict
        label_content = json.load(open(label_path))
        if len(prediction_dict.keys()) == len(label_content.keys()):
            for img_name in prediction_dict.keys():
                if prediction_dict[img_name] == int(''.join([str(x) for x in label_content[img_name]['label']])):
                    predict_right_num+=1
            acc = (predict_right_num/len(prediction_dict))
            return acc
        else:
            print('label path error or may be need to call predict first')
            return None


In [39]:
index = 0
class_list =['train','val']
my_valid = ValidModel(saved_model_path='crnn_resnet18_ctc_best')
valid_dataset_prediction = my_valid.predict(f'../input/{class_list[index]}')
print(f'{class_list[index]} len:',len(valid_dataset_prediction.keys()))
print(f'{class_list[index]} acc:',my_valid.get_accurancy(f'../input/{class_list[index]}.json'))
index = 1
valid_dataset_prediction = my_valid.predict(f'../input/{class_list[index]}')
print(f'{class_list[index]} len:',len(valid_dataset_prediction.keys()))
print(f'{class_list[index]} acc:',my_valid.get_accurancy(f'../input/{class_list[index]}.json'))

crnn_resnet18_ctc_best  cuda model load success!


100%|████████████████| 30000/30000 [02:49<00:00, 176.65it/s]


train len: 30000
train acc: 0.9961666666666666


100%|████████████████| 10000/10000 [00:58<00:00, 171.09it/s]

val len: 10000
val acc: 0.5837





## 验证结果分析

### 结论
　　从上面第一次使用验证集的结果来看，验证集的准确度只有0.58，而此时训练集的精度已经到了0.99，模型明显过拟合了。

### 过拟合分析

　　常见的过拟合原因有几种，大概是在两个方面，一是数据端，一是模型端:   
  
　　对于数据来说， 1.数据数量不够导致模型过拟合，可以通过增加数据数量来解决。2.数据特征较多，有些特征相互关联影响，可以通过添加dropout层来解决。  
  
　　对于模型来说，3.模型过于复杂，参数多。模型过于复杂可以通过减少模型某些维度来解决，参数多可以通过通过添加正则项来解决。下面依次尝试这些办法，看是否能减少过拟合，在验证集上获得更好的表现。

#### 首先尝试增加dropout层，在resnet18提取完图片的特征之后，使用一个dropout层，分别尝试了0.3,0.5,0.7三个不同丢弃率的dropout层，下面是结果:

In [41]:
index = 0
class_list =['train','val']
my_valid = ValidModel(saved_model_path='crnn_resnet18_ctc_dropout0.3_best')
valid_dataset_prediction = my_valid.predict(f'../input/{class_list[index]}')
print(f'{class_list[index]} len:',len(valid_dataset_prediction.keys()))
print(f'{class_list[index]} acc:',my_valid.get_accurancy(f'../input/{class_list[index]}.json'))
index = 1
valid_dataset_prediction = my_valid.predict(f'../input/{class_list[index]}')
print(f'{class_list[index]} len:',len(valid_dataset_prediction.keys()))
print(f'{class_list[index]} acc:',my_valid.get_accurancy(f'../input/{class_list[index]}.json'))

crnn_resnet18_ctc_dropout0.3_best  cuda model load success!


100%|████████████████| 30000/30000 [02:50<00:00, 175.72it/s]


train len: 30000
train acc: 0.9917666666666667


100%|█████████████████| 10000/10000 [01:47<00:00, 92.61it/s]


val len: 10000
val acc: 0.5791


In [42]:
index = 0
class_list =['train','val']
my_valid = ValidModel(saved_model_path='crnn_resnet18_ctc_dropout0.5_best')
valid_dataset_prediction = my_valid.predict(f'../input/{class_list[index]}')
print(f'{class_list[index]} len:',len(valid_dataset_prediction.keys()))
print(f'{class_list[index]} acc:',my_valid.get_accurancy(f'../input/{class_list[index]}.json'))
index = 1
valid_dataset_prediction = my_valid.predict(f'../input/{class_list[index]}')
print(f'{class_list[index]} len:',len(valid_dataset_prediction.keys()))
print(f'{class_list[index]} acc:',my_valid.get_accurancy(f'../input/{class_list[index]}.json'))

crnn_resnet18_ctc_dropout0.5_best  cuda model load success!


100%|████████████████| 30000/30000 [02:48<00:00, 178.15it/s]


train len: 30000
train acc: 0.9739333333333333


100%|████████████████| 10000/10000 [01:31<00:00, 109.83it/s]

val len: 10000
val acc: 0.5982





In [43]:
index = 0
class_list =['train','val']
my_valid = ValidModel(saved_model_path='crnn_resnet18_ctc_dropout0.7_best')
valid_dataset_prediction = my_valid.predict(f'../input/{class_list[index]}')
print(f'{class_list[index]} len:',len(valid_dataset_prediction.keys()))
print(f'{class_list[index]} acc:',my_valid.get_accurancy(f'../input/{class_list[index]}.json'))
index = 1
valid_dataset_prediction = my_valid.predict(f'../input/{class_list[index]}')
print(f'{class_list[index]} len:',len(valid_dataset_prediction.keys()))
print(f'{class_list[index]} acc:',my_valid.get_accurancy(f'../input/{class_list[index]}.json'))

crnn_resnet18_ctc_dropout0.7_best  cuda model load success!


100%|█████████████████| 30000/30000 [05:11<00:00, 96.36it/s]


train len: 30000
train acc: 0.9439


100%|████████████████| 10000/10000 [01:38<00:00, 101.26it/s]


val len: 10000
val acc: 0.5803


In [3]:
class MyDataset(Dataset):
    
    def __init__(self,img_path,label_content,img_width,img_height):
        self.img_path = img_path
        self.label_content = label_content
        self.img_width = img_width
        self.img_height = img_height
    
    def __getitem__(self,index):
        img_path = self.img_path[index]
        img_cv2 = cv2.imread(img_path)  #return （H，W，C），C list by B，G，R
        img_array = cv2.resize(img_cv2,(self.img_width,self.img_height))  #cv2.resize的输出为（w,h），宽在前，高在后
        
        img_name = os.path.split(img_path)[1]
        img_label = self.label_content[img_name]['label']
        #transform the label 0~9 to index 1~10，index 0 is for blank
        img_label_index_list = [x+1 for x in img_label]
        
        return img_array,img_label_index_list
    
    def __len__(self):
        return len(self.img_path)


class MyModel(nn.Module):
    def __init__(self,T,img_width,dropout):
        super().__init__()
        self.img_width = img_width
        self.img_height = int(0.5*self.img_width)
        self.cnn_out_width = self.cal_finnal_width()
        self.cnn_out_height = self.cal_finnal_height()
        #self.cnn: input(N,3,240,480),output(N,512,8,15)
        trained_resnet18 = models.resnet18(weights='DEFAULT')
        self.cnn = nn.Sequential(trained_resnet18.conv1,
                                 trained_resnet18.bn1,
                                 trained_resnet18.relu,
                                 trained_resnet18.maxpool,
                                 trained_resnet18.layer1,
                                 trained_resnet18.layer2,
                                 trained_resnet18.layer3,
                                 trained_resnet18.layer4)
        self.feat_to_seq = nn.Conv2d(self.cnn_out_height,1,kernel_size=(1,1),stride=1)
        self.rnn = nn.GRU(input_size=512,hidden_size=64,batch_first=True,bidirectional=True)
        self.linear = nn.Linear(128,11)
    
    def forward(self,X):
#         print('X:',X.shape)
        X = self.cnn(X)
#         print('cnn:',X.shape)
        X = X.permute((0,2,1,3))
        X = self.feat_to_seq(X)
#         print('feat_to_seq:',X.shape)
        X = X.permute((0,3,1,2))
#         print('permute:',X.shape)
        X = X.squeeze(dim=2)
#         print('squeeze:',X.shape)
        X,_ = self.rnn(X)
#         print('rnn:',X.shape)
        X = self.linear(X)
        X = X.permute((1,0,2))
#         print('linear:',X.shape)
        return X

    def cal_finnal_width(self):
        width = self.img_width
        for i in range(5):
            if width%2 == 0:
                width = int(width/2)
            else:
                width = int(width/2)+1
        return width

    def cal_finnal_height(self):
        height = self.img_height
        for i in range(5):
            if height%2 == 0:
                height = int(height/2)
            else:
                height = int(height/2)+1
        return height

class ValidModel():
    def __init__(self,saved_model_path=None,T=30,img_width=200):
        self.model = MyModel(T,img_width)
        if saved_model_path is not None:
            best_state_dict = torch.load(os.path.join('./',saved_model_path))
            self.model.load_state_dict(best_state_dict)
            if torch.cuda.is_available():
                self.model.cuda()
                print(saved_model_path,' cuda model load success!')
            else:
                print(saved_model_path,' model load success!')
        else:
            return('model loading failed')

    def predict(self,img_fold_path):
        img_path = glob.glob(os.path.join(img_fold_path,'*.png'))
        my_dataset = MyDataset(img_path)
        self.my_dataloader = DataLoader(my_dataset,batch_size=1)
#         print(self.my_dataloader)

        self.prediction_dict = OrderedDict()
        my_dataloader = self.my_dataloader
        my_dataloader = tqdm(my_dataloader,ncols=60)
        
        with torch.no_grad():
            self.model.eval()
            for batch_index,batch_data in enumerate(my_dataloader):
                X = batch_data.float()
                if torch.cuda.is_available:
                    X = X.cuda()
                y_hat = self.model(X)
                y_hat = y_hat.permute(1,0,2).cpu()
                pred = torch.argmax(y_hat,dim=2)
                batch_size = pred.shape[0]
                pred_list = []
                for i in range(batch_size):
                    raw_pred_list = list(pred[i].numpy())
                    pred_data = []
                    for j in range(len(raw_pred_list)):
                        if j == 0 and raw_pred_list[0] != 0:
                            pred_data.append(raw_pred_list[0]-1)
                        if j != 0 and raw_pred_list[j] != raw_pred_list[j-1] and raw_pred_list[j] != 0:
                            pred_data.append(raw_pred_list[j]-1)
                    if len(pred_data) != 0:
                        pred_num = ''.join([str(int(num)) for num in pred_data])
                        pred_num = int(pred_num)
                        img_name = os.path.split(img_path[batch_index])[1]
                        self.prediction_dict[img_name] = pred_num
                    else:
                        img_name = os.path.split(img_path[batch_index])[1]
                        self.prediction_dict[img_name] = -1
        
        return self.prediction_dict
            
    def get_accurancy(self,label_path):
        predict_right_num = 0 
        prediction_dict = self.prediction_dict
        label_content = json.load(open(label_path))
        if len(prediction_dict.keys()) == len(label_content.keys()):
            for img_name in prediction_dict.keys():
                if prediction_dict[img_name] == int(''.join([str(x) for x in label_content[img_name]['label']])):
                    predict_right_num+=1
            acc = (predict_right_num/len(prediction_dict))
            return acc
        else:
            print('label path error or may be need to call predict first')
            return None

In [4]:
index = 0
class_list =['train','val']
my_valid = ValidModel(saved_model_path='crnn_resnet18_ctc_valid_0.655',T=15,img_width=480)
valid_dataset_prediction = my_valid.predict(f'../input/{class_list[index]}')
print(f'{class_list[index]} len:',len(valid_dataset_prediction.keys()))
print(f'{class_list[index]} acc:',my_valid.get_accurancy(f'../input/{class_list[index]}.json'))
index = 1
valid_dataset_prediction = my_valid.predict(f'../input/{class_list[index]}')
print(f'{class_list[index]} len:',len(valid_dataset_prediction.keys()))
print(f'{class_list[index]} acc:',my_valid.get_accurancy(f'../input/{class_list[index]}.json'))

TypeError: __init__() missing 1 required positional argument: 'dropout'