# Package Installation

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive/nic_project/packages

/content/drive/MyDrive/nic_project/packages


In [None]:
!pip install ultralytics
!pip install opencv-python
!pip install -U torch==2.0+cpu torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
!pip install mmcv==2.0.1 -f https://download.openmmlab.com/mmcv/dist/cpu/torch2.0/index.html
!pip install mmdet
!git clone https://github.com/open-mmlab/mmocr.git
%cd mmocr
!pip install -r requirements.txt
!pip install -v -e .
!pip install 'mmdet>=3.0.0rc5 , < 3.2.0'

Collecting ultralytics
  Downloading ultralytics-8.1.15-py3-none-any.whl (715 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m715.1/715.1 kB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
Collecting thop>=0.1.1 (from ultralytics)
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Installing collected packages: thop, ultralytics
Successfully installed thop-0.1.1.post2209072238 ultralytics-8.1.15
Looking in indexes: https://download.pytorch.org/whl/cpu
Collecting torch==2.0+cpu
  Downloading https://download.pytorch.org/whl/cpu/torch-2.0.0%2Bcpu-cp310-cp310-linux_x86_64.whl (195.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m195.4/195.4 MB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
Collecting torchvision
  Downloading https://download.pytorch.org/whl/cpu/torchvision-0.17.0%2Bcpu-cp310-cp310-linux_x86_64.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m38.8 MB/s[0m eta [36m0:00:00[0m
C

In [None]:
# Check mmocr installation
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())

import mmdet
print(mmdet.__version__)

import mmcv
from mmcv.ops import get_compiling_cuda_version, get_compiler_version
print(mmcv.__version__)
print(get_compiling_cuda_version())
print(get_compiler_version())

import mmocr
print(mmocr.__version__)

%cd /mmocr/
!ls

2.0.0+cpu False
3.1.0
2.0.1
not available
GCC 7.3
1.0.1
[Errno 2] No such file or directory: '/mmocr/'
/content/drive/MyDrive/nic_project/packages/mmocr
CITATION.cff  dicts    MANIFEST.in	projects	 requirements.txt  tests
configs       docker   mmocr		README.md	 resources	   tools
dataset_zoo   docs     mmocr.egg-info	README_zh-CN.md  setup.cfg
demo	      LICENSE  model-index.yml	requirements	 setup.py


# Import Necessary Packages

In [None]:
from ultralytics import YOLO
import numpy as np
from PIL import Image
import cv2
import os
from mmocr.apis import MMOCRInferencer
import csv
import pandas as pd
from google.colab import files
import matplotlib.pyplot as plt

# Functions

### Functions for Corner Detection,Cropping & Straightening using Perspective Transformation

In [None]:
names_index = ['top left', 'top right', 'bottom right', 'bottom left']
imgWidth = 640
imgHeight = 480


sources_model = '/content/drive/MyDrive/nic_project/models/cornerdetect.pt'
model = YOLO(sources_model)


def get_index_one_point(coordinates) :
    return [int((coordinates[0][0] + coordinates[0][2])//2) , int((coordinates[0][1] + coordinates[0][3])//2)]

def changesize(img) :
    h, w = img.shape[0], img.shape[1]
    ratio = w/h
    newW = 640
    newH = int(newW/ratio)
    img1 = cv2.resize(img, (newW, newH))
    return img1

def get_transform(path_image):

    img = cv2.imread(path_image)
    img = changesize(img)
    results = model.predict(source=img)

    dic = {'top left': [],
           'top right': [],
           'bottom right': [],
           'bottom left': []}

    # Track if each corner is successfully detected
    corners_detected = {'top left': False, 'top right': False, 'bottom right': False, 'bottom left': False}

    for box in results[0].boxes:
        name = names_index[int(box.cls[0])]

        # Check if the corner is already detected, if not, add it to the dictionary
        if not corners_detected[name]:
            dic[name] = get_index_one_point(box.xyxy)
            corners_detected[name] = True

    # Check if corners are detected properly
    if sum(corners_detected.values()) < 4:
        print(f"Failed to detect corners properly in {path_image}. Skipping...")
        return None  # Return None to indicate failure

    # Adjustments for robustness (e.g., selecting only one detection if multiple)
    dic['top left'] = dic['top left'][0] - 10, dic['top left'][1] - 10
    dic['top right'] = dic['top right'][0] + 10, dic['top right'][1] - 10
    dic['bottom right'] = dic['bottom right'][0] - 10, dic['bottom right'][1] + 10
    dic['bottom left'] = dic['bottom left'][0] + 10, dic['bottom left'][1] + 10

    sources_point = np.float32([dic['top left'], dic['top right'], dic['bottom right'], dic['bottom left']])
    dest_points = np.float32([[0, 0], [imgWidth, 0], [imgWidth, imgHeight], [0, imgHeight]])
    matrix = cv2.getPerspectiveTransform(sources_point, dest_points)
    crop_img = cv2.warpPerspective(img, matrix, (imgWidth, imgHeight))

    return crop_img

### Functions for Text Detection and Recognition

In [None]:
names_index_id = {0: 'id_number', 1: 'gender', 2: 'bod'}
names_index_name = {0: 'name1', 1: 'name2', 2: 'name3'}
imgWidth = 640
imgHeight = 480

# YOLO
def load_model(model_path):
    return YOLO(model_path)

# Load models
model_idtextdetect = load_model('/content/drive/MyDrive/nic_project/models/id_gen_bod_detect.pt')
model_detectname = load_model('/content/drive/MyDrive/nic_project/models/namedetect.pt')

def ocr(crop_img):
    img_np = np.array(crop_img)
    ocr = MMOCRInferencer(det=None, rec='SATRN')
    text = ocr(img_np, show=False, print_result=False)
    if 'predictions' in text and text['predictions']:
        text = text['predictions'][0]['rec_texts'][0]
        return text
    else:
        return ''


def get_text(img, model_name='idtextdetect'):
    if model_name == 'idtextdetect':
        model = model_idtextdetect
        names_index = names_index_id
    elif model_name == 'detectname':
        model = model_detectname
        names_index = names_index_name
    else:
        raise ValueError(f"Invalid model name: {model_name}")

    results = model(source=img)

    dic = {key: [] for key in names_index.values()}

    for box in results[0].boxes:
        name = names_index[int(box.cls[0])]
        dic[name].append(box.xyxy[0].cpu().numpy().astype(int))

    res = {key: '' for key in dic}

    for key in dic:
        name_predictions = [] # Create a list to store individual name predictions
        for value in sorted(dic[key], key=lambda x: x[0]):  # Sort based on horizontal position (x-coordinate)
            print(value)
            crop_img = img.crop(value)
            if key in ['name1','name2','name3']:
                name_predictions.append(ocr(crop_img))  # Append each predicted word to the list
            else:
                res[key] = ocr(crop_img)

        if key in ['name1','name2','name3']:
            res[key] = ' '.join(name_predictions)  # Concatenate the list of name predictions into a single string


    return res