In [1]:
# 获取目标汉字截图，利用百度接口识别汉字
# 根据汉字到缓存中随机获取对应的汉字图片，若缓存中没有，利用函数生成汉字图片，该图片作为孪生神经网络的输入之一
# 获取验证码背景图，利用yolov5检测图片中的汉字，保存坐标位置并剪切汉字图片保存，作为孪生神经网络的输入之一
# 经过以上的步骤，利用孪生神经网络按照顺序依次判断目标汉字图片与背景图中检测的图片的文字相似度，找到相似度最高的图片的坐标位置
# 最终得到排序后的坐标位置

In [3]:
from selenium import webdriver
from selenium.webdriver import Chrome, ChromeOptions
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
import time
import requests
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont, ImageOps, ImageFilter
from io import BytesIO
import os
from IPython.display import clear_output as clear
import sys
import random
import string
import numpy as np
import re
import math
import shutil
import cv2
import urllib
import base64
import scipy.misc
import scipy.signal
import pandas as pd


import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import models
from torchvision.transforms import transforms

%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

In [4]:
# 调用百度接口识别汉字
API_KEY = "7gA8YqNKUIBdX5HIenGEGBzm"
SECRET_KEY = "tFrsRHSIRdvsGLIhaVkfThU8BR74S3Go"

def get_access_token():
    url = "https://aip.baidubce.com/oauth/2.0/token"
    params = {"grant_type": "client_credentials", "client_id": API_KEY, "client_secret": SECRET_KEY}
    return str(requests.post(url, params=params).json().get("access_token"))

def get_file_content_as_base64(path, urlencoded=False):
    """
    获取文件base64编码
    :param path: 文件路径
    :param urlencoded: 是否对结果进行urlencoded 
    :return: base64编码信息
    """
    with open(path, "rb") as f:
        content = base64.b64encode(f.read()).decode("utf8")
        if urlencoded:
            content = urllib.parse.quote_plus(content)
    return content

def recog_chars_baidu(img_path):
    target_words = []
    url = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic?access_token=" + get_access_token()
    headers = {
        'Content-Type': 'application/x-www-form-urlencoded',
        'Accept': 'application/json'
    }
    image = get_file_content_as_base64(img_path, True)
    payload = 'image=' + image

    response = requests.request("POST", url, headers=headers, data=payload)
    words = response.json()['words_result']
    for item in words:
        word = item['words']
        target_words.extend(list(word))
    
    if len(target_words) < 3:
        raise ValueError("目前暂不支持此类型验证码识别")
    
    return target_words

def convert_2d_1(r):
    r_ext = np.zeros((r.shape[0] * 2, r.shape[1] * 2))
    for i in range(r.shape[0]):
        for j in range(r.shape[1]):
            r_ext[i][j] = r[i][j]

    r_ext_fu = np.fft.fft2(r_ext)
    r_ext_fu = np.fft.fftshift(r_ext_fu)

    # 截止频率为 100
    d0 = 100
    # 频率域中心坐标
    center = (r_ext_fu.shape[0] // 2, r_ext_fu.shape[1] // 2)
    h = np.empty(r_ext_fu.shape)
    # 绘制滤波器 H(u, v)
    for u in range(h.shape[0]):
        for v in range(h.shape[1]):
            duv = ((u - center[0]) ** 2 + (v - center[1]) ** 2) ** 0.65
            h[u][v] = duv < d0

    s_ext_fu = r_ext_fu * h
    s_ext = np.fft.ifft2(np.fft.ifftshift(s_ext_fu))
    s_ext = np.abs(s_ext)
    s = s_ext[0:r.shape[0], 0:r.shape[1]]

    for i in range(s.shape[0]):
        for j in range(s.shape[1]):
            s[i][j] = min(max(s[i][j], 0), 255)

    return s.astype(np.uint8)


def convert_3d_1(r):
    s_dsplit = []
    for d in range(r.shape[2]):
        rr = r[:, :, d]
        ss = convert_2d_1(rr)
        s_dsplit.append(ss)
    s = np.dstack(s_dsplit)
    return s

def convert_2d_2(r):
    # 滤波掩模
    window = np.array([
        [0, -1, 0],
        [-1, 5, -1],
        [0, -1, 0]
    ])
    s = scipy.signal.convolve2d(r, window, mode='same', boundary='symm')
    # 像素值如果大于 255 则取 255, 小于 0 则取 0
    for i in range(s.shape[0]):
        for j in range(s.shape[1]):
            s[i][j] = min(max(0, s[i][j]), 255)
    s = s.astype(np.uint8)
    return s


def convert_3d_2(r):
    s_dsplit = []
    for d in range(r.shape[2]):
        rr = r[:, :, d]
        ss = convert_2d_2(rr)
        s_dsplit.append(ss)
    s = np.dstack(s_dsplit)
    return s

def convert_2d_3(r):
    n = 1
    s = scipy.ndimage.median_filter(r, (n, n))
    return s.astype(np.uint8)


def convert_3d_3(r):
    s_dsplit = []
    for d in range(r.shape[2]):
        rr = r[:, :, d]
        ss = convert_2d_3(rr)
        s_dsplit.append(ss)
    s = np.dstack(s_dsplit)
    return s

def convert(img):
    im_mat = np.asarray(img)
    im_converted_mat = convert_3d_1(im_mat)
    im_converted = Image.fromarray(im_converted_mat)
    im_mat = np.asarray(im_converted)
    im_converted_mat = convert_3d_2(im_mat)
    im_converted = Image.fromarray(im_converted_mat)
    im_mat = np.asarray(im_converted)
    im_converted_mat = convert_3d_3(im_mat)
    im_converted = Image.fromarray(im_converted_mat)
    return im_converted

def get_locate(i):
    loc_box = []
    width, height = 300, 200
    # background = Image.open(f'./images/{i}.jpg')
    with open(f'./detect_labels/{i}.txt', 'r', encoding='utf8') as f:
        for line in f:
            box = line.split(' ')
            x_center = width * float(box[1])  # 左上点的x坐标  
            y_center = height * float(box[2])  # 左上点的y坐标
            w = round(width * float(box[3]))  # 图片width
            h = round(height * float(box[4]))  # 图片height
            lefttopx = math.ceil(x_center - w / 2.0)
            lefttopy = math.ceil(y_center - h / 2.0)
            loc_box.append((lefttopx, lefttopy, lefttopx + w, lefttopy + h))
    return loc_box

def generate_char_img(lab, colors, bg_idx, crop_box, font_path='./中文像素字体.ttf'):
    img = Image.new('RGBA', (52, 52))
    draw = ImageDraw.Draw(img)
    font = ImageFont.truetype(font_path, 46)
    c1, c2 = random.choice(colors)
    draw.text((2, 2), lab, font=font, fill=c1)
    draw.text((4, 4), lab, font=font, fill=c1)
    draw.text((2, 4), lab, font=font, fill=c1)
    draw.text((4, 2), lab, font=font, fill=c1)
    draw.text((3, 3), lab, font=font, fill=c2)

    img_convert = convert(img)
    angle = random.randint(-45, 45)
    img_convert = img_convert.rotate(angle=angle)

    i = random.choice(bg_idx)
    loc_box = get_locate(i)
    back_img = Image.open(f'./images/{i}.jpg')
    _, _, _, mask = img_convert.split()
    while True:
        x1, y1, x2, y2 = random.choice(crop_box)
        for leftx, lefty, rightx, righty in loc_box:
            if (x1>rightx) or (y1>righty) or (x2<leftx) or (y2<lefty):
                continue
            else:
                break
        else:
            back_img.paste(img_convert, box=(x1, y1), mask=mask)
            crop_img = back_img.crop((x1, y1, x2, y2))
            break
    return crop_img

class SiameseNetwork(nn.Module):
    def __init__(self, image_size, pretrained=False):
        super(SiameseNetwork, self).__init__()
        
        self.features_net = models.vgg11_bn(pretrained=False)
        if pretrained:
            params = torch.load('./model/vgg11_bn-6002323d.pth')
            self.features_net.load_state_dict(params)
        del self.features_net.avgpool
        del self.features_net.classifier
        
        height, width = image_size[0], image_size[1]
        for _ in range(5):
            height //= 2
            width //= 2
        flat_shape = 512 * height * width
        self.match_net = nn.Sequential(
            nn.Conv2d(1024, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
            nn.ReLU(inplace=True),
            nn.Flatten(),
            nn.Linear(flat_shape, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(512, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 1)
        )
    
    def forward(self, x1, x2):
        x1 = self.features_net.features(x1)
        x2 = self.features_net.features(x2)

        x = torch.cat((x1, x2), 1)
        x = self.match_net(x)
        
        return x

def check_diff(img_0, img_1, trf, model, device):
    img_0 = trf(img_0).unsqueeze(0)
    img_1 = trf(img_1).unsqueeze(0)
    img_0 = img_0.to(device)
    img_1 = img_1.to(device)
    
    model.eval()
    pred = model(img_0, img_1)
    pred = nn.functional.sigmoid(pred.squeeze(-1))
    
    return pred.detach().cpu().numpy()

In [5]:
colors = [('#C13808', '#FFF111'), ('#0023A7', '#0BF29E'), ('#6DCDF4', '#E111F9'), ('#22258C', '#7E0300'), ('#D55929', '#8513D4')]
crop_box = [(0, 0, 52, 52), (124, 0, 176, 52), (248, 0, 300, 52), (0, 74, 52, 126), (124, 74, 176, 126), (248, 74, 300, 126), (0, 148, 52, 200), (124, 148, 176, 200), (248, 148, 300, 200)]
bg_idx = [i[:-4] for i in os.listdir('./images/')]
classes = os.listdir('./classes/')

img_size = (112, 112)
# norm_mean = [0.485, 0.456, 0.406]
# norm_std = [0.229, 0.224, 0.225]
trf = transforms.Compose([
    transforms.Resize(img_size), 
    transforms.ToTensor()
])
model_path = './model/siamese_gpu_08_02.pt'
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
model = SiameseNetwork(image_size=img_size)
model.load_state_dict(torch.load(model_path, map_location=device))
model = model.to(device)

In [6]:
options = ChromeOptions()
options.add_argument('user-agent="Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"')
options.add_argument('--ignore-certificate-errors')
options.add_argument('--disable-gpu')
options.add_argument('--ssl-protocol=any')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('user-agent=ywy')
options.add_argument('--ignore-urlfetcher-cert-requests')
options.add_argument('--ignore-ssl-errors')
options.add_experimental_option('excludeSwitches', ['enable-automation'])

In [10]:
driver = webdriver.Chrome(options=options)
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
    "source": """
    Object.defineProperty(navigator, 'webdriver', {
      get: () => undefined
    })
  """
})
driver.get('https://www.gsxt.gov.cn/index.html')
time.sleep(5)

input_box = driver.find_element(By.ID, 'keyword')
input_box.send_keys('德信行')
time.sleep(1)
driver.find_element(By.ID, 'btn_query').click()
time.sleep(3)

box = driver.find_element(By.XPATH, '/html/body/div[7]/div[1]/div[1]/div[1]/div[1]/div[1]')
text = box.get_attribute('innerText')
if text == '请在下图依次点击':
    # s = ''.join(random.sample(string.ascii_letters + string.digits, 4))
    driver.find_element(By.XPATH, '/html/body/div[7]/div[1]/div[1]/div[1]/div[1]/div[2]').screenshot('./test/chars.png')
    target_words = recog_chars_baidu('./test/chars.png')
    # print(target_words)
    
    # 根据汉字到缓存中随机获取对应的汉字图片，若缓存中没有，利用函数生成汉字图片，该图片作为孪生神经网络的输入之一
    target_imgs = []
    for word in target_words:
        if word in classes:
            tmp = random.choice(os.listdir(f'./classes/{word}/'))
            img = Image.open(f'./classes/{word}/{tmp}')
            target_imgs.append(img)
        else:
            img = generate_char_img(word, colors, bg_idx, crop_box)
            target_imgs.append(img)

    # s = ''.join(random.sample(string.ascii_letters + string.digits, 4))
    style = driver.find_element(By.XPATH, '/html/body/div[7]/div[1]/div[1]/div[2]/div/div/div[1]/div[1]').get_attribute('style')
    url = style[23:-3]
    content = requests.get(url).content
    with open('./test/background.jpg', 'wb') as f:
        f.write(content)
    shutil.copy('./test/background.jpg', 'D:/yolo_dataset')
    %run ./model/yolov5/detect.py --weight ./model/yolov5/runs/train/exp4/weights/best.pt --source D:/yolo_dataset/background.jpg --save-txt --conf-thres 0.5 --line-thickness 1 --img 320 --project ./results/detect/ --device cpu
    background = Image.open(BytesIO(content))
    width, height = background.size
    num = len(os.listdir('./results/detect/'))
    location = {}
    with open(f'./results/detect/exp{str(num)}/labels/background.txt', 'r', encoding='utf8') as f:
        # if len(f.readlines()) != 3:
        #     print(i)
        for i, line in enumerate(f):
            box = line.split(' ')
            x_center = width * float(box[1])  # 左上点的x坐标  
            y_center = height * float(box[2])  # 左上点的y坐标
            w = round(width * float(box[3]))  # 图片width
            h = round(height * float(box[4]))  # 图片height
            lefttopx = math.ceil(x_center - w / 2.0)
            lefttopy = math.ceil(y_center - h / 2.0)
            crop_img = background.crop((lefttopx, lefttopy, lefttopx + w, lefttopy + h))
            # s = ''.join(random.sample(string.ascii_letters + string.digits, 5))
            crop_img.save(f'./test/images/{str(i)}.png', compress_level=0)
            location[i] = [x_center - width // 2, y_center - height // 2]
    
    # 经过以上的步骤，利用孪生神经网络按照顺序依次判断目标汉字图片与背景图中检测的图片的文字相似度，找到相似度最高的图片的坐标位置
    idx = []
    for img_0 in target_imgs:
        rate = []
        for j in os.listdir('./test/images/'):
            img_1 = Image.open(f'./test/images/{j}').convert('RGB')
            img_0 = img_0.convert('RGB')
            pred = check_diff(img_0=img_0, img_1=img_1, trf=trf, model=model, device=device)
            rate.append(pred[0])
        idx.append(np.argmin(rate))
    
    actions = ActionChains(driver)
    
    code_img = driver.find_element(By.XPATH, '/html/body/div[7]/div[1]/div[1]/div[2]/div/div/div[1]/div[1]')
    for key in idx:
        value = location[key]
        actions.move_to_element_with_offset(code_img, value[0], value[1]).click().perform()
        time.sleep(1)
    
    check = driver.find_element(By.XPATH, '/html/body/div[7]/div[1]/div[1]/div[2]/div/div/div[2]/div')
    actions.click(check).perform()
    
    # driver.find_element(By.XPATH, '/html/body/div[7]/div[1]/div[1]/div[3]/div[1]/button[2]').click()
    time.sleep(5)

else:
    # driver.refresh()
    # driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
    #     "source": """
    #     Object.defineProperty(navigator, 'webdriver', {
    #       get: () => undefined
    #     })
    #   """
    # })
    # time.sleep(3)
    # break
    driver.close()

driver.close()

InvalidSessionIdException: Message: invalid session id
Stacktrace:
Backtrace:
	GetHandleVerifier [0x00007FF6CD9B4A62+57106]
	(No symbol) [0x00007FF6CD92CF52]
	(No symbol) [0x00007FF6CD7FE17D]
	(No symbol) [0x00007FF6CD82CA69]
	(No symbol) [0x00007FF6CD82E064]
	GetHandleVerifier [0x00007FF6CDC64222+2873042]
	GetHandleVerifier [0x00007FF6CDCB6590+3209792]
	GetHandleVerifier [0x00007FF6CDCAF3AF+3180639]
	GetHandleVerifier [0x00007FF6CDA45F25+652245]
	(No symbol) [0x00007FF6CD938618]
	(No symbol) [0x00007FF6CD9347C4]
	(No symbol) [0x00007FF6CD9348BC]
	(No symbol) [0x00007FF6CD924C33]
	BaseThreadInitThunk [0x00007FFF57917614+20]
	RtlUserThreadStart [0x00007FFF591C26B1+33]


In [15]:
# %run ./model/yolov5/detect.py --weight ./model/yolov5/runs/train/exp4/weights/best.pt --source D:/yolo_dataset/4DnS.jpg --device 0 --save-txt --conf-thres 0.5 --line-thickness 1 --img 320
# %run ./model/yolov5/detect.py --weight ./model/yolov5/runs/train/exp4/weights/best.pt --source D:/yolo_dataset/background.jpg --save-txt --conf-thres 0.5 --line-thickness 1 --img 320 --project ./results/detect/

In [8]:
with open('./classes.txt', 'w', encoding='utf-8') as f:
    for c in classes:
        f.write(c + '\n')