In [87]:
from PIL import Image
import pytesseract
import matplotlib.pyplot as plt
import cv2
import numpy as np 
import os

In [88]:
def calculate_brightness(image):
    # 將圖片轉換為灰階
    gray_image = image.convert('L')
    # 計算灰階圖片的平均亮度
    brightness = np.mean(gray_image)
    return brightness

def preprocess_image(image) :

    # 轉換為灰度圖像
    gray_image = image.convert('L')
    
    # 轉換為 OpenCV 圖像
    opencv_image = np.array(gray_image)
    
    # 光照均衡化
    equalized_image = cv2.equalizeHist(opencv_image)
    
    # Gamma 校正
    gamma = 1.5
    gamma_corrected_image = np.uint8(cv2.pow(equalized_image / 255.0, gamma) * 255)
    
    # 高斯模糊
    blurred_image = cv2.GaussianBlur(gamma_corrected_image, (5, 5), 0)
    
    # 適應性閾值處理
    adaptive_thresh = cv2.adaptiveThreshold(blurred_image, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 15, -15)
    
    # 色彩反轉
    inverted_image = cv2.bitwise_not(adaptive_thresh)
    
    # 去噪
    denoised_image = cv2.fastNlMeansDenoising(inverted_image, None, 30, 7, 21)
    
    # 轉換回 PIL 圖像
    preprocessed_image = Image.fromarray(denoised_image)

    return preprocessed_image

def preprocess_image2(image) :
    # 轉換為灰度圖像
    gray_image = image.convert('L')
    
    # 轉換為 OpenCV 圖像
    opencv_image = np.array(gray_image)
    
    # 色彩反轉
    inverted_image = cv2.bitwise_not(opencv_image)
    
    # 適應性閾值處理
    adaptive_thresh = cv2.adaptiveThreshold(inverted_image, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 15, 15)

    # 去噪
    denoised_image = cv2.fastNlMeansDenoising(adaptive_thresh, None, 30, 7, 21)
    
    # 轉換回 PIL 圖像
    preprocessed_image = Image.fromarray(denoised_image)

    return preprocessed_image

def preprocess_image3(image) :
    # 轉換為灰度圖像
    gray_image = image.convert('L')
    
    # 轉換為 OpenCV 圖像
    opencv_image = np.array(gray_image)

    # Gamma 校正
    gamma = 1.5
    gamma_corrected_image = np.uint8(cv2.pow(opencv_image / 255.0, gamma) * 255)
    
    # 轉換回 PIL 圖像
    preprocessed_image = Image.fromarray(gamma_corrected_image)

    return preprocessed_image


In [89]:
# 讀取圖片
image_path = "D:\\研究所資料\\碩一下\\機器學習\\第三次作業\\data\\test\\image_0076.jpg"
image = Image.open(image_path)

# 文字座標 (左, 上, 右, 下)
text_coor = (875, 11, 1054, 94)

# 裁剪圖片
crop_image = image.crop(text_coor)

# 顯示裁剪後的圖片
# plt.imshow(crop_image)
# plt.axis('off')
# plt.show


In [90]:
def detect(image): 
    # 設定 Tesseract 的 psm 和 oem 參數
    custom_config = r'--psm 6 --oem 2'
    
    # 計算圖片的光線值
    calc_brig = calculate_brightness(crop_image)
    # print("光線平均值為 : ", round(calc_brig, 3))
    
    # 設定 Tesseract 可執行文件的路徑
    pytesseract.pytesseract.tesseract_cmd = r'D:\Program Files\Tesseract-OCR\tesseract.exe'
    
    # 根據光線值進行條件判斷並採取相應的操作
    if calc_brig > 150:
        # print("圖片光線值大於150，進行預處理。")
        preprocessed_image = preprocess_image(image)
        # 顯示預處理後的圖片
        # plt.imshow(preprocessed_image, cmap='gray')
        # plt.axis('off')
        # plt.show()
        text = pytesseract.image_to_string(preprocessed_image, lang='eng', config=custom_config) #Eng為英文
        # print(text)
    
    elif calc_brig < 50 : 
        # print("圖片光線值小於50，進行預處理3。")
        preprocessed_image = preprocess_image3(image)
        # plt.imshow(preprocessed_image, cmap='gray')
        # plt.axis('off')
        # plt.show()
        text = pytesseract.image_to_string(preprocessed_image, lang='eng', config=custom_config) #Eng為英文
        # print(text)
        
    else:
        # print("圖片光線值小於150和大於50，進行預處理2。")
        preprocessed_image = preprocess_image2(image)
        # 顯示預處理後的圖片
        # plt.imshow(preprocessed_image, cmap='gray')
        # plt.axis('off')
        # plt.show
        text = pytesseract.image_to_string(preprocessed_image, lang='eng', config=custom_config) #Eng為英文
        # print(text)
    
    allowed_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 \n') #允許的字元
    text_filtered = ''.join(filter(allowed_chars.__contains__, text)).upper() #把所有英文字改成大寫，並刪除不在allowed_chars裡面的字
    # print(text_filtered)
    return text_filtered #返回辨識出的文字

In [92]:
fold_path = f"D:\\研究所資料\\碩一下\\機器學習\\第三次作業\\圖片準確率測試集"   
def accuracy(fold_path):  #丟入裁切後的圖片資料夾路徑
    files = os.listdir(fold_path)
    correct = 0
    acc = correct / len(files) #正確 / 總數
    for i in range(len(files)):
        image = Image.open(fold_path + "\\" + files[i]) #讀取圖片
        text = detect(image)[:15] #文字辨識，並只取前15個字 (由於前11個字會包含空格造成讀取不完全。所以設為15)
        text2 = text.replace(" ", "") #把空白刪除
        #print(text2)
        #print(files[i][:11])
        if files[i][:11] == text2 :         #計算準確率 ; files[i][:11]為檔案名稱
            correct += 1
    return acc

print("正確率為 : ", accuracy(fold_path))

正確率為 :  0.0
