In [13]:
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from azure.cognitiveservices.vision.computervision.models import VisualFeatureTypes
from msrest.authentication import CognitiveServicesCredentials
from array import array
import jellyfish
import numpy as np
import os
from PIL import Image, ImageFilter, ImageOps
import sys
import time
import cv2

from skimage import color, filters, exposure, io
from skimage.morphology import closing,footprint_rectangle 
from skimage.restoration import denoise_tv_chambolle
from skimage.exposure import equalize_adapthist
import json

In [14]:
'''
Authenticate
Authenticates your credentials and creates a client.
'''
var = json.load(open("credidentials.json"))
subscription_key= var["API_KEY"]
endpoint = var["END_POINT"]
computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key))
'''
END - Authenticate
'''

'\nEND - Authenticate\n'

In [15]:
# img = open("test1.png", "rb")
img = open("test2.jpeg", "rb")
read_response = computervision_client.read_in_stream(
    image=img,
    mode="Printed",
    raw=True
)
# print(read_response.as_dict())

operation_id = read_response.headers['Operation-Location'].split('/')[-1]
while True:
    read_result = computervision_client.get_read_result(operation_id)
    if read_result.status not in ['notStarted', 'running']:
        break
    time.sleep(1)

# Print the detected text, line by line
result = []
if read_result.status == OperationStatusCodes.succeeded:
    for text_result in read_result.analyze_result.read_results:
        for line in text_result.lines:
            print(line.text)
            result.append(line.text)

print()
# get/define the ground truth
# groundTruth = ["Google Cloud", "Platform"]
groundTruth = ["Succes in rezolvarea", "tEMELOR la", "LABORAtoaree de", "Inteligenta Artificiala!"]

# compute the performance
noOfCorrectLines = sum(i == j for i, j in zip(result, groundTruth))
print(noOfCorrectLines)


Lucces in resolvarea
TEMELOR la
LABORA toarele de
Inteligenta Artificialà!

0


In [16]:
def preprocess_image(image_path): 
#Preprocesarea imaginii:
#daca are 4 canale rgba, eliminam canalul alpha
#reducerea zgomotul
#conversia in grayscale
#ajustarea contrastul
#binarizarea: calcularea pragului Otsu 
#aplicarea operatiilor morfologice
#conversia in format uint8
    img = io.imread(image_path)

    if img.ndim == 3 and img.shape[-1] == 4:
        img = img[:, :, :3]
    img_zgomot = cv2.medianBlur(img, 5)
    img_gray = color.rgb2gray(img_zgomot)
    img_contrast = exposure.rescale_intensity(img_gray, in_range=(0.05, 0.95))
    prag_otsu = filters.threshold_otsu(img_contrast)
    img_binar = img_contrast > prag_otsu
    img_closing = closing(img_binar, footprint_rectangle((3, 3)))
    img_result = (img_closing * 255).astype(np.uint8)

    cv2.imwrite("imagine_procesata_test2.jpeg",img_result)
    return img_result

In [17]:
def distanta_hamming(secv1, secv2):
#distanta Hamming 
    n=len(secv1)
    m=len(secv2)
    if n != m:
        raise ValueError("Distanta Hamming merge doar pe siruri de aceeasi lungime")
    return sum(c1 != c2 for c1, c2 in zip(secv1, secv2))
    
def distanta_jaro_winkler(secv1, secv2):
#distanta Jaro-Winkler 
    return 1 - jellyfish.jaro_winkler_similarity(secv1, secv2)

def distanta_levenshtein(secv1, secv2):
#distanta Levenshtein 
    n=len(secv1)
    m=len(secv2)
    if n < m:
        return distanta_levenshtein(secv2, secv1)
    if m == 0:
        return n
    anterior = list(range(m + 1))
    for i, c1 in enumerate(secv1):
        curent = [i + 1]
        for j, c2 in enumerate(secv2):
            insertii = anterior[j + 1] + 1
            stergeri = curent[j] + 1
            substitutii = anterior[j] + (c1 != c2)
            curent.append(min(insertii, stergeri, substitutii))
        anterior = curent
    return anterior[-1]
    
def distanta_lcs(secv1, secv2):
#Longest Common Subsequence (LCS)
    n = len(secv1)
    m = len(secv2)
    matr = [[0] * (m + 1) for _ in range(n + 1)]
    
    for i in range(1, n + 1):
        for j in range(1, m + 1):
            if secv1[i - 1] == secv2[j - 1]:
                matr[i][j] = matr[i - 1][j - 1] + 1
            else:
                matr[i][j] = max(matr[i - 1][j], matr[i][j - 1])
    return matr[n][m]

In [18]:
def cer_calcul(text_recunoscut, text_real):
#Character Error Rate (CER)
    if not text_real:
        return 0
    dist = distanta_levenshtein(text_recunoscut, text_real)
    return dist / len(text_real)

def wer_calcul(text_recunoscut, text_real):
#Word Error Rate (WER) 
    cuvinte_recunoscute = text_recunoscut.split()
    cuvinte_reale = text_real.split()
    if not cuvinte_reale:
        return 0
    dist = distanta_levenshtein(cuvinte_recunoscute, cuvinte_reale)
    return dist / len(cuvinte_reale)

def iou_calcul(boxA, boxB):
#Intersection over Union (IoU) 
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
    iou = interArea / float(boxAArea + boxBArea - interArea)
    return iou

In [19]:
def ocr(img, computervision):
#realizeaza OCR pe imaginea data 
    raspuns = computervision.read_in_stream(
        image=img,
        mode="Printed",
        raw=True
    )

    locatia = raspuns.headers["Operation-Location"]
    id_operatie = locatia.split("/")[-1]

    while True:
        rez = computervision.get_read_result(id_operatie)
        if rez.status.lower() not in ['notstarted', 'running']:
            break
        time.sleep(1)

    text_recunoscut = ""
    boxes = []
    if rez.status == "succeeded":
        for pag in rez.analyze_result.read_results:
            for linie in pag.lines:
                text_recunoscut += linie.text + "\n"
                pct = linie.bounding_box
                x_coord = pct[0::2]
                y_coord = pct[1::2]
                x_min, x_max = min(x_coord), max(x_coord)
                y_min, y_max = min(y_coord), max(y_coord)

                box = (x_min, y_min, x_max, y_max)
                boxes.append(box)

    return text_recunoscut.strip(), boxes

In [20]:
def evaluare(text_recunoscut, text_real):
#evaluare cu o singura metrica   --- 1A
    cer = cer_calcul(text_recunoscut, text_real)
    wer = wer_calcul(text_recunoscut, text_real)
    
    print("\n")
    print("Evaluare folosind Levenshtein")
    print("Character Error Rate (CER):", cer)
    print("Word Error Rate (WER):", wer)

def evaluare2(text_recunoscut, text_real):
#evaluare cu mai multe metrici   --- 1B
    metrici = {}
    try:
        cer = cer_calcul(text_recunoscut, text_real)
        wer = wer_calcul(text_recunoscut, text_real)
        metrici["levenshtein_cer"] = cer
        metrici["levenshtein_wer"] = wer
    except Exception as e:
        print(f"Eroare la calcularea CER/WER: {e}")
        metrici["levenshtein_cer"] = "Eroare"
        metrici["levenshtein_wer"] = "Eroare"

    try:
        if len(text_recunoscut) == len(text_real):
            hamming = distanta_hamming(text_recunoscut, text_real) / len(text_real)
            metrici["hamming_cer"] = hamming
    except ValueError as ve:
        metrici["hamming_cer"] = str(ve)
    except Exception as e:
        print(f"Eroare la calcularea Hamming distance: {e}")
        metrici["hamming_cer"] = "Eroare"

    try:
        if jellyfish is not None:
            jaro_winkler = distanta_jaro_winkler(text_recunoscut, text_real)
            metrici["jaro_winkler_cer"] = jaro_winkler
    except Exception as e:
        print(f"Eroare la calcularea Jaro-Winkler: {e}")
        metrici["jaro_winkler_cer"] = "Eroare"

    try:
        lcs_length = distanta_lcs(text_recunoscut, text_real)
        metrici["lcs"] = lcs_length
    except Exception as e:
        print(f"Eroare la calcularea LCS: {e}")
        metrici["lcs"] = "Eroare"

    print("\n")
    print("Evaluare folosind mai multe metrici")
    for key, value in metrici.items():
        print(f"{key}: {value}")

def calitate(boxes,box_real):
#calitatea localizarii textului   --- 2
    nr_boxes = min(len(boxes), len(box_real))
    for i in range(nr_boxes):
        iou = iou_calcul(boxes[i], box_real[i])
        print("\n")
        print("Calitatea localizării textului (IoU):", iou)
    if len(boxes) > len(box_real):
        print(f"Sunt {len(boxes) - len(box_real)} box-uri suplimentare.")
    elif len(box_real) > len(boxes):
        print(f"Lipsesc {len(box_real) - len(boxes)} box-uri reale.")

In [25]:
#img = open("test1.png", "rb")
#img=open("test2.jpeg","rb")
#img=open("test3.jpg","rb")
img_r = preprocess_image("test2.jpeg")
img = open("imagine_procesata_test2.jpeg", "rb")
text_recunoscut, boxes = ocr(img, computervision_client)

print("Textul recunoscut de OCR:")
print(text_recunoscut)
print("\n")
print("Bounding boxes")
for box in boxes:
    print(box)

#pt test1
#text_real = "Google Cloud Platform"
#box_real = [(170, 40, 415, 90),(235, 110, 350, 145)]

#pt test2
text_real = "Succes in rezolvarea tEMELOR la LABORAtoaree de Inteligenta Artificiala!"
box_real = [(70, 305, 1335, 430),(130, 590, 1050, 710),(80, 925, 1010, 1025),(100, 1140, 1450, 1285)]


#pt test3
#text_real = "Optical Character Recognition"
#box_real = [(332, 284, 942, 393),(448, 397, 830, 488)]

print("\n")
print("Textul real de OCR:")
print(text_real)
print("\n")
print("Bounding boxes reale")
for box in box_real:
    print(box)

evaluare(text_recunoscut, text_real)
evaluare2(text_recunoscut, text_real)
calitate(boxes,box_real)

Textul recunoscut de OCR:
Lucces in resolvarea
AEMELOR la
LABORA toarele de
Inteligentà Artificialà!
.


Bounding boxes
(72.0, 293.0, 1336.0, 476.0)
(128.0, 578.0, 1059.0, 739.0)
(80.0, 915.0, 1010.0, 1043.0)
(101.0, 1132.0, 1452.0, 1295.0)
(152.0, 1342.0, 167.0, 1365.0)


Textul real de OCR:
Succes in rezolvarea tEMELOR la LABORAtoaree de Inteligenta Artificiala!


Bounding boxes reale
(70, 305, 1335, 430)
(130, 590, 1050, 710)
(80, 925, 1010, 1025)
(100, 1140, 1450, 1285)


Evaluare folosind Levenshtein
Character Error Rate (CER): 0.16666666666666666
Word Error Rate (WER): 0.8888888888888888


Evaluare folosind mai multe metrici
levenshtein_cer: 0.16666666666666666
levenshtein_wer: 0.8888888888888888
jaro_winkler_cer: 0.16783250862198218
lcs: 64


Calitatea localizării textului (IoU): 0.6835012789040907


Calitatea localizării textului (IoU): 0.7380980766173899


Calitatea localizării textului (IoU): 0.7829457364341085


Calitatea localizării textului (IoU): 0.8883420319640877
Sunt 1

In [22]:
#posibilitati de imbunatatire   --- 3
# Preprocesarea imaginii:
# (Reducerea zgomotului; Ajustarea contrastului; Binarizare: Otsu thresholding; Corectarea geometriei imaginii; Operatii morfologice pentru unirea textului)

# Model OCR mai performant:
# (Folosirea unui model avansat; Fine-tuning pe dataset personalizat)

# Postprocesarea textului:
# (Corectarea ortografica; Dictionar personalizat; Corectare gramaticală automată)

# Imbunatatirea localizarii textului:
# (Îmbunătățirea box-urilor; Unificarea box-urilor pentru textul divizat)

# Antrenare personalizată:
# (Fine-tuning pe imagini specifice; Detectarea fonturilor si stilurilor speciale)

# Combinarea mai multor surse OCR:
# (Combinați metode OCR diferite pentru rezultate mai precise; Validare prin vot majoritar)

# Model de limbaj context-aware:
# (Folosirea unui model de limbaj)

# Ajustarea parametrilor OCR:
# (Fine-tuning-ul setărilor OCR pentru optimizarea rezultatelor)