In [1]:
import os
import torch
from torch.utils.data import Dataset
from PIL import Image
import json
import xml.etree.ElementTree as ET

def convert_xml_to_dict(annotations_dir):
    dataset_dicts = []
    annotation_files = [f for f in os.listdir(annotations_dir) if f.endswith('.xml')]

    for idx, annotation_file in enumerate(annotation_files):
        annotation_path = os.path.join(annotations_dir, annotation_file)
        tree = ET.parse(annotation_path)
        root = tree.getroot()

        record = {}
        filename = root.find('filename').text
        record["file_name"] = os.path.join("images", filename)
        size = root.find('size')
        record["width"] = int(size.find('width').text)
        record["height"] = int(size.find('height').text)

        objs = []
        for obj in root.findall('object'):
            bndbox = obj.find('bndbox')
            xmin = int(bndbox.find('xmin').text)
            ymin = int(bndbox.find('ymin').text)
            xmax = int(bndbox.find('xmax').text)
            ymax = int(bndbox.find('ymax').text)
            obj = {
                "bbox": [xmin, ymin, xmax, ymax],
                "category_id": 1
            }
            objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)
    return dataset_dicts

annotations_dir = "annotations"
dataset_dicts = convert_xml_to_dict(annotations_dir)

# Save to a file
import json
with open('dataset.json', 'w') as f:
    json.dump(dataset_dicts, f)

In [2]:
class LicensePlateDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None):
        with open(annotations_file) as f:
            self.img_labels = json.load(f)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels[idx]["file_name"])
        image = Image.open(img_path).convert("RGB")
        boxes = [obj["bbox"] for obj in self.img_labels[idx]["annotations"]]
        boxes = torch.as_tensor(boxes, dtype=torch.float32)

        labels = torch.ones((len(boxes),), dtype=torch.int64)  # all objects are license plates

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels

        if self.transform:
            image = self.transform(image)

        # Debugging prints
        print(f"Image path: {img_path}")
        print(f"Boxes: {boxes}")
        print(f"Labels: {labels}")

        return image, target

In [7]:
import torchvision.transforms as T

class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image):
        for t in self.transforms:
            image = t(image)
        return image

class ToTensor(object):
    def __call__(self, image):
        return T.functional.to_tensor(image)

In [8]:
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch.utils.data import DataLoader
# from dataset import LicensePlateDataset
# import transforms as T

def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    return T.Compose(transforms)

def collate_fn(batch):
    return tuple(zip(*batch))

# Load the dataset
dataset = LicensePlateDataset("dataset.json", "", transform=get_transform(train=True))
data_loader = DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=collate_fn, drop_last=False)  # Set num_workers to 0 for debugging

def get_model_instance_segmentation(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

# Set device to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

num_classes = 2  # Background + license plate
model = get_model_instance_segmentation(num_classes)
model.load_state_dict(torch.load(r"D:\data_cpv\plate_trained_models\fasterrcnn_resnet50_fpn_epoch_{epoch}_2nd.pth"))  # Load the latest model checkpoint
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.00001)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)


Using device: cuda


In [7]:

num_epochs = 50

for epoch in range(num_epochs):
    model.train()
    i = 0
    for images, targets in data_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        i += 1
        if i % 10 == 0:
            print(f"Epoch [{epoch}/{num_epochs}], Step [{i}/{len(data_loader)}], Loss: {losses.item():.4f}")

    lr_scheduler.step()
    torch.save(model.state_dict(), r"train2\fasterrcnn_resnet50_fpn_epoch_{}_2nd.pth".format(epoch))

print("Training complete.")

RuntimeError: Numpy is not available

In [10]:
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F
from PIL import Image, ImageOps
import matplotlib.pyplot as plt
import numpy as np
import os
import cv2
import pytesseract

# Define the function to get the model
def get_model_instance_segmentation(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

# Load the trained model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_classes = 2  # Background + license plate
model = get_model_instance_segmentation(num_classes)
model.load_state_dict(torch.load(r"D:\data_cpv\plate_trained_models\fasterrcnn_resnet50_fpn_epoch_{epoch}_2nd.pth"))  # Load the latest model checkpoint
model.to(device)
model.eval()

# Load and preprocess the input image
image_path = r"D:\data_cpv\archive\images\train\00cac7ea145fc734.jpg"  # Replace with your image path
image = Image.open(image_path).convert("RGB")
image_tensor = F.to_tensor(image).unsqueeze(0).to(device)

# Perform inference
with torch.no_grad():
    prediction = model(image_tensor)

# Get bounding boxes and scores
boxes = prediction[0]['boxes'].cpu().numpy()
scores = prediction[0]['scores'].cpu().numpy()

# Filter out low-confidence detections (e.g., confidence threshold of 0.5)
confidence_threshold = 0.7
filtered_boxes = boxes[scores >= confidence_threshold]
filtered_scores = scores[scores >= confidence_threshold]

# Draw bounding boxes with scores on the image
font = cv2.FONT_ITALIC
image_np = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
for box, score in zip(filtered_boxes, filtered_scores):
    x1, y1, x2, y2 = box.astype(int)  # Convert coordinates to integers
    cv2.rectangle(image_np, (x1, y1), (x2, y2), (0, 255, 0), 2)
    label = f"{score:.2f}"
    (text_width, text_height), baseline = cv2.getTextSize(label, font, 0.4, 1)
    cv2.rectangle(image_np, (x1, y1 - text_height - baseline), (x1 + text_width, y1), (0, 255, 0), -1)
    cv2.putText(image_np, label, (x1, y1 - baseline), font, 0.3, (0, 0, 0), 1)

# Save the image with bounding boxes
output_dir = 'result'
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, 'result_image.jpg')
cv2.imwrite(output_path, image_np)

cropped_dir = 'cropped'
filtered_text = []
os.makedirs(cropped_dir, exist_ok=True)
image_pil = Image.fromarray(cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB))
for i, box in enumerate(filtered_boxes):
    x1, y1, x2, y2 = box.astype(int)
    cropped_image = image.crop((x1, y1, x2, y2))
    cropped_image_path = os.path.join(cropped_dir, f'cropped_{i+1}.jpg')
    cropped_image.save(cropped_image_path)
    
    # Improved Preprocessing: Convert to grayscale and apply thresholding
    cropped_image_gray = ImageOps.grayscage_gray = ImageOps.grayscale(cropped_image)
    cropped_image_np = np.array(cropped_image_gray)

    # Apply Gaussian blur for smoothing
    # cropped_image_np = cv2.GaussianBlur(cropped_image_np, (5, 5), 0)

    _, cropped_image_np = cv2.threshold(cropped_image_np, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    # Perform OCR on the preprocessed image
    custom_config = r'--psm 6 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-.'
    d = pytesseract.image_to_data(cropped_image_np, config=custom_config, output_type=pytesseract.Output.DICT)
    print(d['conf'])
    min_conf = 0
    text = ''
    n_boxes = len(d['text'])
    for j in range(n_boxes):
        if int(d['conf'][j]) >= min_conf:
            text += d['text'][j] + ' '

    # Post-Processing: Remove duplicate characters
    # cleaned_text = ''.join(ch for i, ch in enumerate(text) if i == 0 or ch != text[i-1])
    print(f"Detected text in cropped image {i+1}: {text.strip()}")
    filtered_text.append(text.strip())
    
    # Optionally, save OCR result to a text file
    with open(os.path.join(cropped_dir, f'cropped_{i+1}_text.txt'), 'w') as f:
        f.write(text.strip())



# Draw bounding boxes with scores on the image
image_np = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
for box, score, text in zip(filtered_boxes, filtered_scores, filtered_text):
    x1, y1, x2, y2 = box.astype(int)  # Convert coordinates to integers
    cv2.rectangle(image_np, (x1, y1), (x2, y2), (0, 255, 0), 2)
    label = f"{score:.2f}:{text}"
    (text_width, text_height), baseline = cv2.getTextSize(label,font, 0.4, 1)
    cv2.rectangle(image_np, (x1, y1 - text_height - baseline), (x1 + text_width, y1), (0, 255, 0), -1)
    cv2.putText(image_np, label, (x1, y1 - baseline), font, 0.4, (0, 0, 0), 1)

# Save the image with bounding boxes
output_dir = 'result'
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, 'result_image_char.jpg')
cv2.imwrite(output_path, image_np)

# Optionally, display the image with bounding boxes
plt.figure(figsize=(20, 8))
plt.imshow(cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.show()


AttributeError: module 'matplotlib' has no attribute 'rcParams'