In [9]:
import cv2
import os
import sys
import time
import matplotlib.pyplot as plt

import ocr

In [None]:
# 載入Fast RCNN模型
from fastercnn import fastrcnn

In [11]:
# 準備影片路徑
video_dir_path = '影片資料集'
video_names_list = os.listdir(video_dir_path)
video_names_list.remove('範例影片.mkv')
video_names_list.sort()

In [12]:
# 如果輸出資料夾不存在則產生
if not os.path.isdir(os.path.join(video_dir_path, 'fastrcnn_out')):
    os.mkdir(os.path.join(video_dir_path, 'fastrcnn_out'))

In [13]:
def id_rule_check(str_id):
    # 檢查第11位編號是否正確
    values = {
        'A': 10, 'B': 12, 'C': 13, 'D': 14, 'E': 15, 'F': 16, 'G': 17, 'H': 18, 'I': 19, 'J': 20,
        'K': 21, 'L': 23, 'M': 24, 'N': 25, 'O': 26, 'P': 27, 'Q': 28, 'R': 29, 'S': 30, 'T': 31,
        'U': 32, 'V': 34, 'W': 35, 'X': 36, 'Y': 37, 'Z': 38, '1': 1,  '2':  2,'3':3,'4':4,'5':5,'6':6,'7':7,'8':8,'9':9
    }
    total = 0
    for i, letter in enumerate(str_id[:9]):
        if letter in values:
            total += values[letter] * (2 ** i)
    return str(total % 11) == str_id[10]

In [14]:
# 使用迴圈依序讀取影片 進行物件偵測(Fast RCNN)和文字辨識(Tesseract-OCR)，並儲存輸出至影片
for video_name in video_names_list:
    video_path = os.path.join(video_dir_path, video_name)
    if not os.path.isfile(video_path):
        continue
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print('Error: Cannot open video', video_path)
        continue

    # 取得輸入影像資訊
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # 影片輸出設定
    out = cv2.VideoWriter(os.path.join(video_dir_path, 'fastrcnn_out', f'{video_name}'), cv2.VideoWriter_fourcc(*'XVID'), fps, (width, height))

    # 儲存辨識結果
    id_text_list = []

    while cap.isOpened():
        print(f'Processing {video_name}: {(int(cap.get(cv2.CAP_PROP_POS_FRAMES))/int(cap.get(cv2.CAP_PROP_FRAME_COUNT)))*100:.2f}%', end='\r')
        start_time = time.perf_counter()
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        outputs = fastrcnn.inference_img(frame)
        for obj in outputs:
            try:
                confidence = obj['scores'].data.numpy()[0]
            except:
                continue
            if confidence > 0.5:
                x_min, y_min, x_max, y_max = obj['boxes'].data.numpy()[0]
                x_min, y_min, x_max, y_max = int(x_min), int(y_min), int(x_max), int(y_max)
                # 裁切號碼區域
                crop_img = frame[y_min:y_max, x_min:x_max]
                # 使用OCR辨識號碼
                text = ocr.get_text_from_image(crop_img)
                try:
                    text = ocr.get_text_from_image(crop_img)
                except:
                    text = ''
                # 畫號碼區域框
                cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 20, 255), 3)
                # 文字
                if text != '':
                    id_text_list.append(text)
                    (textWidth, textHeight), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 1 ,2)
                    cv2.rectangle(frame, (x_min, y_max), (x_min + textWidth + 2 , y_max + textHeight + 10), (0, 20, 255), -1)
                    cv2.putText(frame, text, (x_min+1, y_max+textHeight+5), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

        # 畫fps
        end_time = time.perf_counter()
        fps = 1 / (end_time - start_time)
        (textWidth, textHeight), _ = cv2.getTextSize(f'FPS: {fps:.2f}', cv2.FONT_HERSHEY_SIMPLEX, 2 ,5)
        cv2.rectangle(frame, (30, 80-textHeight-5), (30+textWidth, 80+5), (50, 50, 50), -1)
        cv2.putText(frame, f'FPS: {fps:.2f}', (30, 80), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 5)

        out.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))

    print('')
    print(f'Processing {video_name} Done!')
    id_text_list_filtered_len = list(filter(lambda x: len(x)==15, id_text_list))
    if id_text_list_filtered_len != []:
        id_text_list_filtered_len_rule = list(filter(lambda x: id_rule_check(x), id_text_list_filtered_len))
        if id_text_list_filtered_len_rule != []:
            # 超過一筆符合長度與規則要求
            most_common = max(set(id_text_list_filtered_len_rule), key = id_text_list_filtered_len_rule.count)
            confidence = 'high'
        else:
            # 超過一筆符合長度要求
            most_common = max(set(id_text_list_filtered_len), key = id_text_list_filtered_len.count)
            confidence = 'medium'
    else:
        # 都不符合長度要求
        most_common = max(set(id_text_list), key = id_text_list.count)
        confidence = 'low'
    print(f'Most common number: {most_common}, confidence: {confidence};{(id_text_list.count(most_common)/len(id_text_list))*100:.2f}%.')
    video_name_without_ext = os.path.splitext(video_name)[0]
    os.rename(os.path.join(video_dir_path, 'fastrcnn_out', f'{video_name}'),
              os.path.join(video_dir_path, 'fastrcnn_out', f'{video_name_without_ext}_{most_common}.avi'))

    cap.release()
    out.release()

Processing video_0001.avi: 100.00%
Processing video_0001.avi Done!
Most common number: SEKU58753494561, confidence: medium;1.90%.
Processing video_0002.avi: 100.00%
Processing video_0002.avi Done!
Most common number: TSSU50994004561, confidence: medium;2.01%.
Processing video_0003.avi: 100.00%
Processing video_0003.avi Done!
Most common number: WHSY58278514561, confidence: high;0.44%.
Processing video_0004.avi: 100.00%
Processing video_0004.avi Done!
Most common number: WHSU67206904531, confidence: high;0.33%.
Processing video_0005.avi: 100.00%
Processing video_0005.avi Done!
Most common number: WHSU53681994561, confidence: medium;1.74%.
Processing video_0006.avi: 100.00%
Processing video_0006.avi Done!
Most common number: WHSU248317PRING, confidence: medium;0.60%.
Processing video_0007.avi: 100.00%
Processing video_0007.avi Done!
Most common number: WHSU16167124561, confidence: medium;1.48%.
Processing video_0008.avi: 100.00%
Processing video_0008.avi Done!
Most common number: WHSU689