<a href="https://colab.research.google.com/github/vijaydaniel45/genai-mermaid/blob/main/Verizon_Resnet50.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
import pytesseract
import os
import cv2
import numpy as np
from PIL import Image
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Flatten, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing import image

# Install required dependencies
# !apt-get install tesseract-ocr
# !pip install pytesseract
# Set the path for Tesseract OCR (if required)
# pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'  # Set the correct path if needed

# 1. Function to preprocess image for better OCR results
def preprocess_image(image_path):
    print(f"Preprocessing image: {image_path}")
    # Load the image
    img = cv2.imread(image_path)

    if img is None:
        print(f"Error: Image at {image_path} could not be loaded!")
        return None
    # Convert the image to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    print(f"Converted image to grayscale.")
    # Apply thresholding to improve contrast (you can experiment with the threshold value)
    _, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
    print(f"Applied thresholding to image.")
    # Denoise the image
    denoised = cv2.fastNlMeansDenoising(thresh, None, 30, 7, 21)
    print(f"Denoised the image.")

    # Add the channel dimension to convert to 3 channels (RGB) - necessary for CNN input
    denoised_rgb = np.stack([denoised] * 3, axis=-1)
    print(f"Converted image to 3 channels (RGB).")

    return denoised_rgb

# 2. Function to extract text and generate labels for images
def generate_labels(image_dir):
    print(f"Generating labels from images in directory: {image_dir}")
    images = []
    labels = []

    for image_name in os.listdir(image_dir):
        if image_name.endswith('.jpg') or image_name.endswith('.png'):
            image_path = os.path.join(image_dir, image_name)

            # Preprocess image for better OCR
            preprocessed_img = preprocess_image(image_path)

            if preprocessed_img is None:
                continue

            # Use Tesseract to extract text from the preprocessed image
            extracted_text = pytesseract.image_to_string(preprocessed_img)
            print(f"Extracted text from image {image_name}: {extracted_text}")

            # Clean extracted text and use it as the label
            label = extracted_text.strip().replace('\n', ' ')

            # Save the preprocessed image and label
            images.append(preprocessed_img)
            labels.append(label)

    print(f"Generated {len(images)} images and labels.")
    return images, labels

# 3. Function to encode labels as categorical (for training)
def encode_labels(labels):
    print(f"Encoding labels...")
    # Encoding labels as integers (You can extend this step for multi-class tasks)
    label_encoder = {label: idx for idx, label in enumerate(set(labels))}
    print(f"Label encoding dictionary: {label_encoder}")
    encoded_labels = [label_encoder[label] for label in labels]
    return encoded_labels, label_encoder

# 4. Preparing the dataset
image_dir = '/content/verizon_images'  # Adjust this to your image directory
print(f"Preparing dataset from images in directory: {image_dir}")
images, labels = generate_labels(image_dir)

# Encode the labels
encoded_labels, label_encoder = encode_labels(labels)

# Split the dataset into training and validation sets (since the dataset is small)
print(f"Splitting dataset into training and validation sets...")
X_train, X_val, y_train, y_val = train_test_split(images, encoded_labels, test_size=0.2, random_state=42)
print(f"Training set size: {len(X_train)} images")
print(f"Validation set size: {len(X_val)} images")

# Rescale images to a consistent size (e.g., 128x128)
print(f"Resizing images to 128x128...")
X_train_resized = [cv2.resize(img, (128, 128)) for img in X_train]
X_val_resized = [cv2.resize(img, (128, 128)) for img in X_val]
print(f"Resized images.")

# Convert images to numpy arrays and normalize pixel values
print(f"Normalizing image pixel values...")
X_train_resized = np.array(X_train_resized) / 255.0
X_val_resized = np.array(X_val_resized) / 255.0
print(f"Normalized images.")

# Convert labels to numpy arrays
y_train = np.array(y_train)
y_val = np.array(y_val)

# 5. Build and train the ResNet model
print(f"Building the ResNet model...")
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(128, 128, 3))
base_model.trainable = False  # Freeze the layers of ResNet50

# Add custom layers on top
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),  # Global pooling to reduce feature map to a vector
    Dense(256, activation='relu'),
    Dense(128, activation='relu'),
    Dense(len(label_encoder), activation='softmax')  # Number of categories in the output layer
])

print(f"Compiling the model...")
# Compile the model
model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
print(f"Training the model...")
# Train the model
model.fit(X_train_resized, y_train, epochs=10, validation_data=(X_val_resized, y_val))

# 6. Save the trained model
print(f"Saving the trained model as 'ocr_model_resnet.h5'...")
model.save('ocr_model_resnet.h5')
print(f"Model saved successfully!")

# 7. Function to test the model with a new image
def test_model_on_new_image(model, image_path, label_encoder):
    print(f"Testing the model with new image: {image_path}")

    # Preprocess the new image
    preprocessed_img = preprocess_image(image_path)

    if preprocessed_img is None:
        return None

    resized_img = cv2.resize(preprocessed_img, (128, 128)) / 255.0
    resized_img = np.expand_dims(resized_img, axis=0)  # Add batch dimension
    print(f"Resized and normalized the image for prediction.")

    # Predict the label
    prediction = model.predict(resized_img)
    predicted_label_idx = np.argmax(prediction)

    # Decode the label back to text
    label_decoder = {v: k for k, v in label_encoder.items()}
    predicted_label = label_decoder.get(predicted_label_idx, "Unknown")

    print(f"Predicted label index: {predicted_label_idx}")
    print(f"Decoded predicted label: {predicted_label}")

    return predicted_label

# 8. Test the model with a new image (example)
new_image_path = '/content/verizon_images/Recorded_ROW_and_Easement_for_Attic_Storage_page-5.png'  # Adjust this to the path of your test image
print(f"Testing model with image at: {new_image_path}")
predicted_text = test_model_on_new_image(model, new_image_path, label_encoder)
if predicted_text:
    print(f"Predicted Text: {predicted_text}")
else:
    print(f"Failed to process the image.")


Preparing dataset from images in directory: /content/verizon_images
Generating labels from images in directory: /content/verizon_images
Preprocessing image: /content/verizon_images/Recorded_ROW_and_Easement_for_Attic_Storage_page-5.png
Converted image to grayscale.
Applied thresholding to image.
Denoised the image.
Converted image to 3 channels (RGB).
Extracted text from image Recorded_ROW_and_Easement_for_Attic_Storage_page-5.png: IN WITNESS WHEREOF, Grantor and Grantee have hereunto set their hands as of the day and year
first above written.

GRANTOR:

By:

Printed Name: Cae
Title: a
GRANTEE:

MClImetro Access Transmission Services, LLC
a Delaware limited liability company

By:

Title:

 

{Acknowledgments Attached}

Preprocessing image: /content/verizon_images/Recorded_ROW_and_Easement_for_Attic_Storage_page-8.png
Converted image to grayscale.
Applied thresholding to image.
Denoised the image.
Converted image to 3 channels (RGB).
Extracted text from image Recorded_ROW_and_Easement_



Saving the trained model as 'ocr_model_resnet.h5'...
Model saved successfully!
Testing model with image at: /content/verizon_images/Recorded_ROW_and_Easement_for_Attic_Storage_page-5.png
Testing the model with new image: /content/verizon_images/Recorded_ROW_and_Easement_for_Attic_Storage_page-5.png
Preprocessing image: /content/verizon_images/Recorded_ROW_and_Easement_for_Attic_Storage_page-5.png
Converted image to grayscale.
Applied thresholding to image.
Denoised the image.
Converted image to 3 channels (RGB).
Resized and normalized the image for prediction.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
Predicted label index: 1
Decoded predicted label: IN WITNESS WHEREOF, Grantor and Grantee have hereunto set their hands as of the day and year first above written.  GRANTOR:  By:  Printed Name: Cae Title: a GRANTEE:  MClImetro Access Transmission Services, LLC a Delaware limited liability company  By:  Title:     {Acknowledgments Attached}
Predicted Text: IN WI

In [33]:
# 7. Function to test the model with a new image
def test_model_on_new_image(model, image_path, label_encoder):
    print(f"Testing the model with new image: {image_path}")

    # Preprocess the new image
    preprocessed_img = preprocess_image(image_path)

    if preprocessed_img is None:
        return None

    resized_img = cv2.resize(preprocessed_img, (128, 128)) / 255.0
    resized_img = np.expand_dims(resized_img, axis=0)  # Add batch dimension
    print(f"Resized and normalized the image for prediction.")

    # Predict the label
    prediction = model.predict(resized_img)
    predicted_label_idx = np.argmax(prediction)

    # Decode the label back to text
    label_decoder = {v: k for k, v in label_encoder.items()}
    predicted_label = label_decoder.get(predicted_label_idx, "Unknown")

    print(f"Predicted label index: {predicted_label_idx}")
    print(f"Decoded predicted label: {predicted_label}")

    return predicted_label

# 8. Test the model with a new image (example)
new_image_path = '/content/verizon_images/Recorded_ROW_and_Easement_for_Attic_Storage_page-6.png'  # Adjust this to the path of your test image
print(f"Testing model with image at: {new_image_path}")
predicted_text = test_model_on_new_image(model, new_image_path, label_encoder)
if predicted_text:
    print(f"Predicted Text: {predicted_text}")
else:
    print(f"Failed to process the image.")


Testing model with image at: /content/verizon_images/Recorded_ROW_and_Easement_for_Attic_Storage_page-6.png
Testing the model with new image: /content/verizon_images/Recorded_ROW_and_Easement_for_Attic_Storage_page-6.png
Preprocessing image: /content/verizon_images/Recorded_ROW_and_Easement_for_Attic_Storage_page-6.png
Converted image to grayscale.
Applied thresholding to image.
Denoised the image.
Converted image to 3 channels (RGB).
Resized and normalized the image for prediction.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step
Predicted label index: 1
Decoded predicted label: IN WITNESS WHEREOF, Grantor and Grantee have hereunto set their hands as of the day and year first above written.  GRANTOR:  By:  Printed Name: Cae Title: a GRANTEE:  MClImetro Access Transmission Services, LLC a Delaware limited liability company  By:  Title:     {Acknowledgments Attached}
Predicted Text: IN WITNESS WHEREOF, Grantor and Grantee have hereunto set their hands as of the d