# 网络构建

In [1]:
import torch
from torch import nn

class small_basic_block(nn.Module):
    def __init__(self, ch_in, ch_out):
        super(small_basic_block, self).__init__()
        self.block = nn.Sequential(
            nn.Conv2d(ch_in, ch_out // 4, kernel_size=1),
            nn.ReLU(),
            nn.Conv2d(ch_out // 4, ch_out // 4, kernel_size=(3, 1), padding=(1, 0)),
            nn.ReLU(),
            nn.Conv2d(ch_out // 4, ch_out // 4, kernel_size=(1, 3), padding=(0, 1)),
            nn.ReLU(),
            nn.Conv2d(ch_out // 4, ch_out, kernel_size=1),
            )
    def forward(self, x):
        return self.block(x)

class LPRNet(nn.Module):
    def __init__(self, class_num, dropout_rate=0.5):
        super(LPRNet, self).__init__()
        self.backbone = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1), # 0
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(),  # 2
            nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 1, 1)),
            small_basic_block(ch_in=64, ch_out=128),    # *** 4 ***
            nn.BatchNorm2d(num_features=128),
            nn.ReLU(),  # 6
            nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(2, 1, 2)),
            small_basic_block(ch_in=64, ch_out=256),   # 8
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),  # 10
            small_basic_block(ch_in=256, ch_out=256),   # *** 11 ***
            nn.BatchNorm2d(num_features=256),   # 12
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(4, 1, 2)),  # 14
            nn.Dropout(dropout_rate),
            nn.Conv2d(in_channels=64, out_channels=256, kernel_size=(1, 4), stride=1),  # 16
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),  # 18
            nn.Dropout(dropout_rate),
            nn.Conv2d(in_channels=256, out_channels=class_num, kernel_size=(13, 1), stride=1), # 20
            nn.BatchNorm2d(num_features=class_num),
            nn.ReLU(),  # *** 22 ***
            )
        self.container = nn.Sequential(
            nn.Conv2d(in_channels=448+class_num, out_channels=class_num, kernel_size=(1, 1), stride=(1, 1)),
            )

    def forward(self, x):
        keep_features = []
        for i, layer in enumerate(self.backbone.children()):
            x = layer(x)   # 向前传递每一层
            if i in [2, 6, 13, 22]: # [2, 4, 8, 11, 22]   # 对指定层的数据进行收集
                keep_features.append(x)

            global_context = []
            for i, f in enumerate(keep_features):
              # 为提取的层添加下采样
                if i in [0, 1]:
                    f = nn.AvgPool2d(kernel_size=5, stride=5)(f)
                if i in [2]:
                    f = nn.AvgPool2d(kernel_size=(4, 10), stride=(4, 2))(f)

              # 这里的作用类似BN，可以更好进行训练，收敛速度更快
                f_pow = torch.pow(f, 2)
                f_mean = torch.mean(f_pow)
                f = torch.div(f, f_mean)
                global_context.append(f)

        # 多尺度特征融合
        x = torch.cat(global_context, 1)
        x = self.container(x)
        logits = torch.mean(x, dim=2)
        return logits

# 数据集定义

In [2]:
import os
import numpy as np
from PIL import Image
import torch
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt


# 自定义数据加载
class PlateDataSet(Dataset):
    def __init__(self, root_dir, transform=None, target_transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.target_transform = target_transform
        self.images = os.listdir(self.root_dir)
        # 标签定义
        self.labels = ["京", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "皖", "闽", "赣", "鲁", "豫", "鄂", "湘", "粤", "桂",
                 "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A",
                 "B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "U", "V", "W", "X",
                 "Y", "Z", "-"]

        # CCPD中字符的位置
        self.provinces = ["皖", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "京", "闽", "赣", "鲁", "豫", "鄂", "湘", "粤", "桂", "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新", "警", "学", "-"]
        self.alphabets = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
                 'X', 'Y', 'Z', '-']
        self.ads = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
           'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-']

    def __len__(self):
        return len(self.images)
  

  # 如果使用CCPD数据集，则使用如下代码
    def __getitem__(self, index):
        image_index = self.images[index]
        img_path = os.path.join(self.root_dir, image_index)

        # 只识别车牌，所以首先要把车牌给找到
        img = Image.open(img_path)
        # plt.imshow(img)
        # plt.show()
        anno_str = img_path.split("-")
        # print(anno_str)
        # print(anno_str[2])
        # 找到对应车牌位置
        lt, rb = anno_str[2].split("_")
        lt = list(map(int, lt.split('&')))
        rb = list(map(int, rb.split('&')))
        img = img.crop((lt[0], lt[1], rb[0], rb[1]))
#         plt.imshow(img)
#         plt.show()
#         label为车牌号
#         print(anno_str[4])
        label = list(map(int, anno_str[4].split('_')))
        label_char = []
        label_char.append(self.provinces[label[0]])
        label_char.append(self.alphabets[label[1]])
        for i in label[2:]:
            label_char.append(self.ads[i])
    # print(label_char)
    # 与labels进行映射
        labels = []
        for i in range(len(label_char)):
            labels.append(torch.tensor(self.labels.index(label_char[i])))
        if self.transform:
      # plt.imshow(img)
      # plt.show()
            img = self.transform(img)
    # print("length: ", len(labels))
        labels = torch.tensor(labels) # 将label转化为tensor
        length = torch.tensor(len(labels))
        return img, labels, length

In [3]:
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset

def collate_fn(batch):
    imgs = []
    labels = []
    lengths = []
    for _, sample in enumerate(batch):
        img, label, length = sample
        imgs.append(img)
        labels.extend(label)
        lengths.append(length)
    labels = np.asarray(labels).flatten().astype(np.float32)
    lengths = torch.tensor(lengths)
    return (torch.stack(imgs, 0), torch.from_numpy(labels), lengths)

train_dataset = PlateDataSet('E:/test/licensePlateRecognition/content/license_plate/train/', 
                               transform=transforms.Compose([
                                 transforms.Resize((24, 94)),
                                 transforms.ToTensor(),
                                 transforms.Normalize(mean = (0.5, 0.5, 0.5), std = (0.5, 0.5, 0.5))
                               ]))

train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=4, collate_fn=collate_fn)
x, y, z = iter(train_dataloader).next()


# 训练

In [None]:
# train
import time
from PIL import Image
import torch
import numpy as np
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset

device = torch.device('cuda')
weight_save_path = 'E:/test/licensePlateRecognition/content/LPRNet_Weights/'
train_batch_size = 128
test_batch_size = 120
learning_rate = 1e-3
max_epoch = 31
save_interval = 3
T_length = 18  # 最大8位车牌，每个真实标签前后都要有一个空白位占位
load_pretrained_weights = False
resume_epoch = 1
pretrained_weights_path = '/content/drive/MyDrive/LPRNet_Weights/LPRNet__epoch_20.pth'

train_dataset = PlateDataSet('E:/test/licensePlateRecognition/content/license_plate/train',
                             transform=transforms.Compose([
                                 transforms.Resize((24, 94)),
                                 transforms.ToTensor(),
                                 transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
                             ]))
val_dataset = PlateDataSet('E:/test/licensePlateRecognition/content/license_plate/test',
                           transform=transforms.Compose([
                               transforms.Resize((24, 94)),
                               transforms.ToTensor()
                           ]))


def sparse_tuple_for_ctc(T_length, lengths):
    input_lengths = []
    target_lengths = []
    for ch in lengths:
        input_lengths.append(T_length)
        target_lengths.append(ch)
    return tuple(input_lengths), tuple(target_lengths)


def collate_fn(batch):
    imgs = []
    labels = []
    lengths = []
    for _, sample in enumerate(batch):
        img, label, length = sample
        imgs.append(img)
        labels.extend(label)
        lengths.append(length)
    labels = np.asarray(labels).flatten().astype(np.float32)
    lengths = torch.tensor(lengths)
    return (torch.stack(imgs, 0), torch.from_numpy(labels), lengths)


net = LPRNet(class_num=len(train_dataset.labels), dropout_rate=0.5).to(device)

# 加载数据
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=train_batch_size, collate_fn=collate_fn)
val_dataloader = DataLoader(val_dataset, shuffle=True, batch_size=test_batch_size, collate_fn=collate_fn)

# 创建损失函数和优化器
ctc_loss = nn.CTCLoss(blank=len(train_dataset.labels) - 1, reduction='mean')
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
# optimizer = torch.optim.RMSprop(net.parameters(), lr=learning_rate, alpha = 0.9, eps=1e-08,
#                          momentum=momentum, weight_decay=weight_decay)

# 加载预训练权重
if load_pretrained_weights:
    net.load_state_dict(torch.load(pretrained_weights_path, map_location=device))
    print("load pretrained model successful!")
else:
    def xavier(param):
        nn.init.xavier_uniform(param)


    def weights_init(m):
        for key in m.state_dict():
            if key.split('.')[-1] == 'weight':
                if 'conv' in key:
                    nn.init.kaiming_normal_(m.state_dict()[key], mode='fan_out')
                if 'bn' in key:
                    m.state_dict()[key][...] = xavier(1)
            elif key.split('.')[-1] == 'bias':
                m.state_dict()[key][...] = 0.01


    net.backbone.apply(weights_init)
    net.container.apply(weights_init)
    print("initial net weights successful!")
for epoch in range(resume_epoch, max_epoch):
    print('starting epoch:',epoch)
    if (epoch) != 0 and (epoch) % save_interval == 0:
        torch.save(net.state_dict(), weight_save_path + 'LPRNet_' + '_epoch_' + repr(epoch) + '.pth')

    net.train()
    loss_val = 0
    start_time = time.time()
    for images, labels, lengths in train_dataloader:
        images, labels, lengths = images.to(device), labels.to(device), lengths.to(device)
        input_lengths, target_lengths = sparse_tuple_for_ctc(T_length, lengths)
        # print(images.shape)
        # print(labels.shape)
        # print(lengths.shape)
        logits = net(images)
        log_probs = logits.permute(2, 0, 1)
        log_probs = log_probs.log_softmax(2).requires_grad_()
        optimizer.zero_grad()
        loss = ctc_loss(log_probs, labels, input_lengths=input_lengths, target_lengths=target_lengths)
        # print(loss.item())
        if loss.item() == np.inf:
            continue
        loss.backward()
        optimizer.step()
        loss_val += loss.item()
    end_time = time.time()
    print('Epoch:' + repr(epoch) + ' || Total Loss: %f||' % (loss_val) +
          'Batch time: %.4f sec. ||' % (end_time - start_time) + 'LR: %.8f' % (learning_rate))

    # validation
    if epoch % 3 == 0:
        net.eval()
        with torch.no_grad():
            count = 0
            Tp = 0
            Tn_1 = 0
            Tn_2 = 0
            t1 = time.time()
            for images, labels, lengths in val_dataloader:

                images, labels, lengths = images.to(device), labels.to(device), lengths.to(device)
                start = 0
                targets = []
                for length in lengths:
                    label = labels[start:start + length]
                    targets.append(label)
                    start += length
                prebs = net(images)
                preb_labels = []
                for i in range(prebs.shape[0]):
                    preb = prebs[i, :, :]
                    preb_label = []
                    preb_label = preb.argmax(dim=0)
                    no_repeat_blank_label = []
                    pre_c = preb_label[0]
                    if pre_c != len(val_dataset.labels) - 1:
                        no_repeat_blank_label.append(pre_c)
                    for c in preb_label:  # dropout repeate label and blank label
                        if (pre_c == c) or (c == len(val_dataset.labels) - 1):
                            if c == len(val_dataset.labels) - 1:
                                pre_c = c
                            continue
                        no_repeat_blank_label.append(c)
                        pre_c = c
                    preb_labels.append(no_repeat_blank_label)

                for i, label in enumerate(preb_labels):
                    label = torch.tensor(label).to(device)
                    targets[i] = targets[i].to(device)
                    # print('================')
                    # print(label)
                    # print(targets[i])
                    # print('================')
                    if len(label) != len(targets[i]):
                        Tn_1 += 1
                        continue
                    if targets[i].eq(label).all():
                        Tp += 1
                    else:
                        Tn_2 += 1
                count += 1

            print("[Info] Validation Accuracy: {} [{}:{}:{}:{}]".format(Tp / (Tp + Tn_1 + Tn_2), Tp, Tn_1, Tn_2,
                                                                        (Tp + Tn_1 + Tn_2)))
            t2 = time.time()
            print("[Info] Validation Speed: {}s]".format(t2 - t1))


# 测试

## 单张图片测试

In [10]:
import time
from PIL import Image
import torch
from torchvision import transforms

device = torch.device('cuda')
test_batch_size = 120
T_length = 18  # 最大8位车牌，每个真实标签前后都要有一个空白位占位
pretrained_weights_path = 'E:\test\licensePlateRecognition\content\LPRNet_Weights\'

labels = ["京", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "皖", "闽", "赣", "鲁", "豫", "鄂", "湘", "粤", "桂",
          "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A",
          "B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "U", "V", "W", "X",
          "Y", "Z", "-"]

img_path = 'E:/test/licensePlateRecognition/content/test/3.jpg'
preprocess_transform = transforms.Compose([
    transforms.Resize((24, 94)),
    transforms.ToTensor(),
])


def sparse_tuple_for_ctc(T_length, lengths):
    input_lengths = []
    target_lengths = []
    for ch in lengths:
        input_lengths.append(T_length)
        target_lengths.append(ch)
    return tuple(input_lengths), tuple(target_lengths)


net = LPRNet(class_num=len(labels)).to(device)
net.load_state_dict(torch.load(pretrained_weights_path, map_location=device))
print("load pretrained model successful!")
img = Image.open(img_path).convert('RGB')
img = preprocess_transform(img).to(device)
net.eval()
with torch.no_grad():
    t1 = time.time()
    start = 0
    targets = []
    img = img.unsqueeze(0)
    prebs = net(img)
    preb_labels = []
    for i in range(prebs.shape[0]):
        preb = prebs[i, :, :]
        preb_label = []
        preb_label = preb.argmax(dim=0)
        no_repeat_blank_label = []
        pre_c = preb_label[0]
        if pre_c != len(labels) - 1:
            no_repeat_blank_label.append(pre_c)
        for c in preb_label:  # dropout repeate label and blank label
            if (pre_c == c) or (c == len(labels) - 1):
                if c == len(labels) - 1:
                    pre_c = c
                continue
            no_repeat_blank_label.append(c)
            pre_c = c
        preb_labels.append(no_repeat_blank_label)
    for i, label in enumerate(preb_labels):
        # print('================')
        print(label)
#         print(targets[i])
        # print('================')
        for j in label:
            print(labels[j], end='')

load pretrained model successful!
[tensor(12, device='cuda:0'), tensor(44, device='cuda:0'), tensor(44, device='cuda:0'), tensor(38, device='cuda:0'), tensor(33, device='cuda:0'), tensor(39, device='cuda:0'), tensor(36, device='cuda:0')]
皖DD7285

## 多张图片测试

In [14]:
import torch
import time


def collate_fn(batch):
    imgs = []
    labels = []
    lengths = []
    for _, sample in enumerate(batch):
        img, label, length = sample
        imgs.append(img)
        labels.extend(label)
        lengths.append(length)
    labels = np.asarray(labels).flatten().astype(np.float32)
    lengths = torch.tensor(lengths)
    return (torch.stack(imgs, 0), torch.from_numpy(labels), lengths)


# 超参数定义
test_batch_size = 120
device = torch.device('cuda')
pretrained_weights_path = 'E:/test/licensePlateRecognition/content/LPRNet_Weights/LPRNet__epoch_21_last.pth'

test_dataset = PlateDataSet('E:/test/licensePlateRecognition/content/plates/test/',
                            transform=transforms.Compose([
                                transforms.Resize((24, 94)),
                                transforms.ToTensor()
                            ]))
test_dataloader = DataLoader(test_dataset, shuffle=True, batch_size=test_batch_size, collate_fn=collate_fn)
net = LPRNet(class_num=len(test_dataset.labels), dropout_rate=0.0).to(device)
net.load_state_dict(torch.load(pretrained_weights_path, map_location=device))
print("load pretrained model successful!")
net.eval()
with torch.no_grad():
    count = 0
    Tp = 0
    Tn_1 = 0
    Tn_2 = 0
    t1 = time.time()
    for images, labels, lengths in val_dataloader:

        images, labels, lengths = images.to(device), labels.to(device), lengths.to(device)
        start = 0
        targets = []
        for length in lengths:
            label = labels[start:start + length]
            targets.append(label)
            start += length
        prebs = net(images)
        preb_labels = []
        for i in range(prebs.shape[0]):
            preb = prebs[i, :, :]
            preb_label = []
            preb_label = preb.argmax(dim=0)
            no_repeat_blank_label = []
            pre_c = preb_label[0]
            if pre_c != len(val_dataset.labels) - 1:
                no_repeat_blank_label.append(pre_c)
            for c in preb_label:  # dropout repeate label and blank label
                if (pre_c == c) or (c == len(val_dataset.labels) - 1):
                    if c == len(val_dataset.labels) - 1:
                        pre_c = c
                    continue
                no_repeat_blank_label.append(c)
                pre_c = c
            preb_labels.append(no_repeat_blank_label)

        for i, label in enumerate(preb_labels):
            label = torch.tensor(label).to(device)
            targets[i] = targets[i].to(device)
            # print('================')
            # print(label)
            # print(targets[i])
            # print('================')
            if len(label) != len(targets[i]):
                Tn_1 += 1
                continue
            if targets[i].eq(label).all():
                Tp += 1
            else:
                Tn_2 += 1
        count += 1

    print("[Info] Validation Accuracy: {} [{}:{}:{}:{}]".format(Tp / (Tp + Tn_1 + Tn_2), Tp, Tn_1, Tn_2,
                                                                (Tp + Tn_1 + Tn_2)))
    t2 = time.time()
    print("[Info] Validation Speed: {}s]".format(t2 - t1))

load pretrained model successful!
[Info] Validation Accuracy: 0.8641534901658311 [26889:1240:2987:31116]
[Info] Validation Speed: 421.1495382785797s]
