In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
import pytesseract
import plotly.express as px
import matplotlib.pyplot as plt
import xml.etree.ElementTree as xet

from glob import glob
from skimage import io
from shutil import copy
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import TensorBoard
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import InceptionResNetV2
from tensorflow.keras.layers import Dense, Dropout, Flatten, Input
from tensorflow.keras.preprocessing.image import load_img, img_to_array

In [2]:
path = glob('C:/Users/ahmad.wicaksana/Downloads/assignment/dataset/annotations/*.xml') # Get a list of all XML files in the specified directory
labels_dict = dict(filepath=[],xmin=[],xmax=[],ymin=[],ymax=[])
for filename in path:

    info = xet.parse(filename)
    root = info.getroot()
    member_object = root.find('object')
    labels_info = member_object.find('bndbox')
    xmin = int(labels_info.find('xmin').text)
    xmax = int(labels_info.find('xmax').text)
    ymin = int(labels_info.find('ymin').text)
    ymax = int(labels_info.find('ymax').text)

    labels_dict['filepath'].append(filename)
    labels_dict['xmin'].append(xmin)
    labels_dict['xmax'].append(xmax)
    labels_dict['ymin'].append(ymin)
    labels_dict['ymax'].append(ymax)

In [3]:
df = pd.DataFrame(labels_dict)
df.to_csv('labels.csv',index=False)
df.head()

Unnamed: 0,filepath,xmin,xmax,ymin,ymax
0,C:/Users/ahmad.wicaksana/Downloads/assignment/...,226,419,125,173
1,C:/Users/ahmad.wicaksana/Downloads/assignment/...,134,262,128,160
2,C:/Users/ahmad.wicaksana/Downloads/assignment/...,140,303,5,148
3,C:/Users/ahmad.wicaksana/Downloads/assignment/...,175,214,114,131
4,C:/Users/ahmad.wicaksana/Downloads/assignment/...,167,240,202,220


In [4]:
filename = df['filepath'][0]
def getFilename(filename):
    filename_image = xet.parse(filename).getroot().find('filename').text
    filepath_image = os.path.join('C:/Users/ahmad.wicaksana/Downloads/assignment/dataset/images/',filename_image) # Construct the full file path for the image
    return filepath_image
getFilename(filename)

'C:/Users/ahmad.wicaksana/Downloads/assignment/dataset/images/Cars0.png'

In [5]:
image_path = list(df['filepath'].apply(getFilename))
image_path[:10]

['C:/Users/ahmad.wicaksana/Downloads/assignment/dataset/images/Cars0.png',
 'C:/Users/ahmad.wicaksana/Downloads/assignment/dataset/images/Cars1.png',
 'C:/Users/ahmad.wicaksana/Downloads/assignment/dataset/images/Cars10.png',
 'C:/Users/ahmad.wicaksana/Downloads/assignment/dataset/images/Cars100.png',
 'C:/Users/ahmad.wicaksana/Downloads/assignment/dataset/images/Cars101.png',
 'C:/Users/ahmad.wicaksana/Downloads/assignment/dataset/images/Cars102.png',
 'C:/Users/ahmad.wicaksana/Downloads/assignment/dataset/images/Cars103.png',
 'C:/Users/ahmad.wicaksana/Downloads/assignment/dataset/images/Cars104.png',
 'C:/Users/ahmad.wicaksana/Downloads/assignment/dataset/images/Cars105.png',
 'C:/Users/ahmad.wicaksana/Downloads/assignment/dataset/images/Cars106.png']

In [6]:
labels = df.iloc[:,1:].values
data = []
output = []
for ind in range(len(image_path)):
    image = image_path[ind]
    img_arr = cv2.imread(image)
    h,w,d = img_arr.shape
    # Prepprocesing
    load_image = load_img(image,target_size=(224,224))
    load_image_arr = img_to_array(load_image)
    norm_load_image_arr = load_image_arr/255.0 # Normalization
    # Normalization to labels
    xmin,xmax,ymin,ymax = labels[ind]
    nxmin,nxmax = xmin/w,xmax/w
    nymin,nymax = ymin/h,ymax/h
    label_norm = (nxmin,nxmax,nymin,nymax) # Normalized output
    # Append
    data.append(norm_load_image_arr)
    output.append(label_norm)

In [7]:
X = np.array(data,dtype=np.float32)
y = np.array(output,dtype=np.float32)

In [8]:
x_train, x_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=0)
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((346, 224, 224, 3), (87, 224, 224, 3), (346, 4), (87, 4))

In [9]:
inception_resnet = InceptionResNetV2(weights = "imagenet", include_top = False, input_tensor = Input(shape=(224,224,3)))
# ---------------------
headmodel = inception_resnet.output
headmodel = Flatten()(headmodel)
headmodel = Dense(500,activation="relu")(headmodel)
headmodel = Dense(250,activation="relu")(headmodel)
headmodel = Dense(4,activation='sigmoid')(headmodel)


# ---------- model
model = Model(inputs = inception_resnet.input, outputs = headmodel)




In [10]:
model.compile(loss = 'mse', optimizer = tf.keras.optimizers.Adam(learning_rate = 1e-4))
model.summary()

In [11]:
tfb = TensorBoard('object_detection')
history = model.fit(x = x_train, y = y_train, batch_size = 10, epochs = 50,   
                    validation_data = (x_test, y_test), callbacks = [tfb])

Epoch 1/50
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m169s[0m 2s/step - loss: 0.0512 - val_loss: 0.0261
Epoch 2/50
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 2s/step - loss: 0.0144 - val_loss: 0.0204
Epoch 3/50
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 2s/step - loss: 0.0069 - val_loss: 0.0169
Epoch 4/50
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 2s/step - loss: 0.0036 - val_loss: 0.0154
Epoch 5/50
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 2s/step - loss: 0.0040 - val_loss: 0.0157
Epoch 6/50
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 2s/step - loss: 0.0033 - val_loss: 0.0155
Epoch 7/50
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 2s/step - loss: 0.0031 - val_loss: 0.0152
Epoch 8/50
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 2s/step - loss: 0.0023 - val_loss: 0.0136
Epoch 9/50
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [12]:
model.save('object_detection.keras')

In [13]:
model = tf.keras.models.load_model('C:/Users/ahmad.wicaksana/Downloads/assignment/object_detection.keras')




In [14]:
# Set Tesseract executable path
pytesseract.pytesseract.tesseract_cmd = r"C:/Users/ahmad.wicaksana/AppData/Local/Programs/Tesseract-OCR/tesseract.exe"

path = 'C:/Users/ahmad.wicaksana/Downloads/P05274.jpg' #Test image

def object_detection(path):
    # Read image
    image = load_img(path) # PIL object
    ret, image = cv2.threshold(np.array(image), 150, 255, cv2.THRESH_BINARY)

    image = np.array(image, dtype=np.uint8) # 8 bit array (0,255)
    image1 = load_img(path, target_size=(224, 224))

    # Data preprocessing
    image_arr_224 = img_to_array(image1) / 255.0 # Convert to array & normalized
    h, w, d = image.shape
    test_arr = image_arr_224.reshape(1, 224, 224, 3)

    # Make predictions 
    coords = model.predict(test_arr)

    # Denormalize the values
    denorm = np.array([w, w, h, h])
    coords = coords * denorm
    coords = coords.astype(np.int32)

    # Draw bounding box on the image
    xmin, xmax, ymin, ymax = coords[0]
    pt1 = (xmin, ymin)
    pt2 = (xmax, ymax)
    cv2.rectangle(image, pt1, pt2, (0, 255, 0), 3)

    # Crop the detected object
    img = np.array(load_img(path))
    roi = img[ymin:ymax, xmin:xmax]

    return image, coords, roi

image, coords, roi = object_detection(path)


# Perform text extraction
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3, 3), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
invert = 255 - opening
text_data = pytesseract.image_to_string(invert, lang='eng', config='--psm 7')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step


In [16]:
fig = px.imshow(image)
fig.update_layout(width=700, height=500, margin=dict(l=10, r=10, b=10, t=10), xaxis_title='Figure 1 Bounding Box of Vehicle Plate Number')


In [17]:
fig2 = px.imshow(roi)
fig2.update_layout(width=350, height=250, margin=dict(l=10, r=10, b=10, t=10), xaxis_title='Figure 2 Cropped image')

In [18]:
print("Extracted Text:", text_data)

Extracted Text: | B- 3456

