In [9]:
# 获取验证码背景图片，利用yolov5模型进行目标汉字检测，得到汉字坐标位置并剪切对应汉字图片保存
# 利用模型对汉字图片进行识别，得到相应的汉字列表
# 基于jieba分词和n-gram模型判断汉字列表的排序组合，得到最有可能的汉字排序组合，最后输出对应的坐标位置

In [10]:
from selenium import webdriver
from selenium.webdriver import Chrome, ChromeOptions
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
import requests
import time
import matplotlib.pyplot as plt
from io import BytesIO
import cv2
from PIL import ImageChops
import string
import shutil
import math
import jieba
from itertools import permutations
import kenlm

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import models
from torchvision.transforms import transforms
from PIL import Image
import numpy as np
import random
import os
import pandas as pd

%matplotlib inline

In [4]:
# 获得汉字的所有排列方式
def _permutation(s, r = None): 
    word_list = list(permutations(s, r))
    for i in range(len(word_list)):
        word_list[i] = ''.join(word_list[i])
    return word_list

# 寻找列表中最长的词
def find_longest(list):
    l = 0
    index = 0
    for i,word in enumerate(list):
        if len(word) > l:
            l = len(word)
            index = i 
    return index

# 结巴分词 识别语序
def recog_order_jieba(s):
    l = len(s)  # l表示输入字符串个数
    word_list = _permutation(s)  # 获得该字符串的所有排列方式
    possible_words = []  # 用来存放语序可能正确的词
    for word in word_list:  # 编列所有排列方式
        seg_list = jieba.lcut(word, cut_all=True)  # 对某一种排列方式使用结巴分词
        index = find_longest(seg_list)  # 寻找结巴分词返回的列表中字符串最长的索引，并返回
        if len(seg_list[index]) == l:  # 若最长的字符串与输入的字符串长度相同，则加入可能正确列表
            possible_words.append(seg_list[index])
    if len(possible_words) == 1:  # 遍历完后，若可能正确的列表只有一个元素，那么他就是正确的，返回
        return possible_words[0]
    else:  # 如果可能正确的列表元素为0，则返回0
        return 0

def recog_order(s, lm):
    # jieba识别
    res = recog_order_jieba(s)
    if res != 0:
        return list(res)
    else:
        best_word = ''
        word_list = _permutation(s)
        score_max = -1e5
        for word in word_list:
            score = lm.score(' '.join(list(word)))
            if score > score_max:
                score_max = score
                best_word = word
        return list(best_word)

class ImgClassifyModel(nn.Module):
    
    def __init__(self, class_num, pretrained=None):
        super().__init__()
        self.model = models.efficientnet_b5(pretrained=False)
        # self.model = models.efficientnet_b7(pretrained=False)
        if pretrained:
            self.model.load_state_dict(torch.load(pretrained))
        # self.model.classifier.add_module('3', nn.Linear(1000, class_num))
        self.model.classifier[1] = nn.Linear(2048, class_num)
        # self.model.classifier[1] = nn.Linear(2560, class_num)
    
    def forward(self, x):
        x = self.model(x)
        
        return x

def recog_chars(img_path, model, trf, device):
    test_imgs = None
    for i in os.listdir(img_path):
        img = Image.open(f'{img_path}{i}').convert('RGB')
        img = trf(img)
        img = img.unsqueeze(0)
        if test_imgs is None:
            test_imgs = img
        else:
            test_imgs = torch.cat((test_imgs, img), dim=0)
    test_imgs = test_imgs.to(device)
    
    model.eval()
    pred_y = model(test_imgs)
    pred_y = pred_y.detach().argmax(dim=-1).cpu().numpy()
    
    return pred_y

In [6]:
chars = os.listdir('./classes/')
label_to_char = dict(enumerate(chars))
char_to_label = dict([(j, i) for i, j in label_to_char.items()])

img_size = (112, 112)
norm_mean = [0.485, 0.456, 0.406]
norm_std = [0.229, 0.224, 0.225]
trf = transforms.Compose([
    transforms.Resize(img_size), 
    transforms.ToTensor(),
    transforms.Normalize(mean=norm_mean, std=norm_std)
])
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
class_num = 899
model_path = './model/model_0802.pt'
lm = kenlm.Model('D:/Boost/zh_giga.no_cna_cmn.prune01244.klm')
model = ImgClassifyModel(class_num=class_num, pretrained=None)
model.load_state_dict(torch.load(model_path))
model = model.to(device)

In [7]:
options = ChromeOptions()
options.add_argument('user-agent="Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"')
options.add_argument('--ignore-certificate-errors')
options.add_argument('--disable-gpu')
options.add_argument('--ssl-protocol=any')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('user-agent=ywy')
options.add_argument('--ignore-urlfetcher-cert-requests')
options.add_argument('--ignore-ssl-errors')
options.add_experimental_option('excludeSwitches', ['enable-automation'])

In [8]:
driver = webdriver.Chrome(options=options)
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
    "source": """
    Object.defineProperty(navigator, 'webdriver', {
      get: () => undefined
    })
  """
})
driver.get('https://www.gsxt.gov.cn/index.html')
time.sleep(5)

input_box = driver.find_element(By.ID, 'keyword')
input_box.send_keys('德信行')
time.sleep(1)
driver.find_element(By.ID, 'btn_query').click()
time.sleep(3)

box = driver.find_element(By.XPATH, '/html/body/div[7]/div[1]/div[1]/div[1]/div[1]/div[1]')
text = box.get_attribute('innerText')
if text == '请按语序依次点击':

    # s = ''.join(random.sample(string.ascii_letters + string.digits, 4))
    url = driver.find_element(By.XPATH, '/html/body/div[7]/div[1]/div[1]/div[2]/div/div/div[1]/div[1]').get_attribute('style')[23:-3]
    content = requests.get(url).content
    with open('./test/background.jpg', 'wb') as f:
        f.write(content)
    shutil.copy('./test/background.jpg', 'D:/yolo_dataset')
    %run ./model/yolov5/detect.py --weight ./model/yolov5/runs/train/exp4/weights/best.pt --source D:/yolo_dataset/background.jpg --save-txt --conf-thres 0.5 --line-thickness 1 --img 320 --project ./results/detect/ --device cpu
    background = Image.open(BytesIO(content))
    width, height = background.size
    num = len(os.listdir('./results/detect/'))
    location = {}
    with open(f'./results/detect/exp{str(num)}/labels/background.txt', 'r', encoding='utf8') as f:
        # if len(f.readlines()) != 3:
        #     print(i)
        for i, line in enumerate(f):
            box = line.split(' ')
            x_center = width * float(box[1])  # 左上点的x坐标  
            y_center = height * float(box[2])  # 左上点的y坐标
            w = round(width * float(box[3]))  # 图片width
            h = round(height * float(box[4]))  # 图片height
            lefttopx = math.ceil(x_center - w / 2.0)
            lefttopy = math.ceil(y_center - h / 2.0)
            crop_img = background.crop((lefttopx, lefttopy, lefttopx + w, lefttopy + h))
            # s = ''.join(random.sample(string.ascii_letters + string.digits, 5))
            crop_img.save(f'./test/images/{str(i)}.png', compress_level=0)
            location[i] = [x_center - width // 2, y_center - height // 2]
    
    pred_y = recog_chars(img_path='./test/images/', model=model, trf=trf, device=device)
    result = [label_to_char[y] for y in pred_y]
    res_order = recog_order(result, lm)
    idx = [result.index(i) for i in res_order]

    actions = ActionChains(driver)
    
    code_img = driver.find_element(By.XPATH, '/html/body/div[7]/div[1]/div[1]/div[2]/div/div/div[1]/div[1]')
    for key in idx:
        value = location[key]
        actions.move_to_element_with_offset(code_img, value[0], value[1]).click().perform()
        time.sleep(1)
    
    check = driver.find_element(By.XPATH, '/html/body/div[7]/div[1]/div[1]/div[2]/div/div/div[2]/div')
    actions.click(check).perform()
    # refresh = driver.find_element(By.XPATH, '/html/body/div[7]/div[1]/div[1]/div[3]/div[1]/button[2]')
    # actions.click(refresh)
    # actions.move_to_element_with_offset(refresh, 50, 50)
    # actions.perform()
    time.sleep(2)

    # driver.find_element(By.XPATH, '/html/body/div[7]/div[1]/div[1]/div[3]/div[1]/button[2]').click()
    # time.sleep(2)

else:
    # driver.refresh()
    # driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
    #     "source": """
    #     Object.defineProperty(navigator, 'webdriver', {
    #       get: () => undefined
    #     })
    #   """
    # })
    # time.sleep(3)
    # break
    driver.close()

driver.close()

    import torch
    ckpt = torch.load("model.pt")  # applies to both official and custom models
    torch.save(ckpt, "updated-model.pt")

[34m[1mdetect: [0mweights=['./model/yolov5/runs/train/exp4/weights/best.pt'], source=D:/yolo_dataset/background.jpg, data=model\yolov5\data\coco128.yaml, imgsz=[320, 320], conf_thres=0.5, iou_thres=0.45, max_det=1000, device=cpu, view_img=False, save_txt=True, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=./results/detect/, name=exp, exist_ok=False, line_thickness=1, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1
YOLOv5  v7.0-196-gacdf73b Python-3.8.13 torch-1.10.1+cu102 CPU

Fusing layers... 
Model summary: 267 layers, 46108278 parameters, 0 gradients, 107.6 GFLOPs
image 1/1 D:\yolo_dataset\background.jpg: 224x320 3 chars, 224.0ms
Speed: 0.0ms pre-process, 224.0ms inference, 1.0ms NMS per image at shape (1, 3, 320, 320)
Results save