In [2]:
import os
import json
import glob
from PIL import Image

# Paths
IMAGE_DIR = "trafic_data/valid/images"  # Folder containing images
YOLO_LABELS_DIR = "trafic_data/valid/labels"  # Folder with YOLO .txt files
OUTPUT_JSON = "trafic_data/valid/coco_annotations_valid.json"

# Define COCO structure
coco = {
    "images": [],
    "annotations": [],
    "categories": []
}

# Class Mapping
class_mapping = {
    1: "ambulance",
    2: "army vehicle",
    3: "auto rickshaw",
    4: "bicycle",
    5: "bus",
    6: "car",
    7: "garbagevan",
    8: "human hauler",
    9: "minibus",
    10: "minivan",
    11: "motorbike",
    12: "pickup",
    13: "policecar",
    14: "rickshaw",
    15: "scooter",
    16: "suv",
    17: "taxi",
    18: "three wheelers -CNG-",
    19: "truck",
    20: "van",
    21: "wheelbarrow"
}

# Ensure COCO category IDs start from 1
category_id_map = {k: k+1 for k in class_mapping.keys()}

# Add categories to COCO JSON
for class_id, class_name in class_mapping.items():
    coco["categories"].append({
        "id": category_id_map[class_id],
        "name": class_name,
        "supercategory": "vehicle"
    })

# Process each image
image_id = 0
annotation_id = 0

for label_file in glob.glob(os.path.join(YOLO_LABELS_DIR, "*.txt")):
    image_filename = os.path.splitext(os.path.basename(label_file))[0] + ".jpg"
    image_path = os.path.join(IMAGE_DIR, image_filename)

    # Check if image exists
    if not os.path.exists(image_path):
        print(f"Skipping {image_filename}, image not found!")
        continue

    # Get image size
    with Image.open(image_path) as img:
        width, height = img.size

    # Add image entry
    coco["images"].append({
        "id": image_id,
        "file_name": image_filename,
        "width": width,
        "height": height
    })

    # Read YOLO annotations
    with open(label_file, "r") as f:
        for line in f.readlines():
            parts = line.strip().split()
            class_id = int(parts[0])  # YOLO class ID starts from 0

            # Ensure class_id exists in category_id_map
            if class_id not in category_id_map:
                print(f"Skipping unknown class ID {class_id} in {label_file}")
                continue

            x_center, y_center, w, h = map(float, parts[1:])

            # Convert to absolute coordinates
            x_min = (x_center - w / 2) * width
            y_min = (y_center - h / 2) * height
            box_width = w * width
            box_height = h * height

            # Add annotation entry
            coco["annotations"].append({
                "id": annotation_id,
                "image_id": image_id,
                "category_id": category_id_map[class_id],
                "bbox": [x_min, y_min, box_width, box_height],
                "area": box_width * box_height,
                "iscrowd": 0
            })
            annotation_id += 1

    image_id += 1

# Save COCO JSON
with open(OUTPUT_JSON, "w") as f:
    json.dump(coco, f, indent=4)

print(f"COCO annotations saved to {OUTPUT_JSON}")


COCO annotations saved to trafic_data/valid/coco_annotations_valid.json


In [3]:
import os
import json
import glob
from PIL import Image

# Paths
IMAGE_DIR = "trafic_data/train/images"  # Folder containing images
YOLO_LABELS_DIR = "trafic_data/train/labels"  # Folder with YOLO .txt files
OUTPUT_JSON = "trafic_data/train/coco_annotations_train.json"

# Define COCO structure
coco = {
    "images": [],
    "annotations": [],
    "categories": []
}

# Class Mapping
class_mapping = {
    1: "ambulance",
    2: "army vehicle",
    3: "auto rickshaw",
    4: "bicycle",
    5: "bus",
    6: "car",
    7: "garbagevan",
    8: "human hauler",
    9: "minibus",
    10: "minivan",
    11: "motorbike",
    12: "pickup",
    13: "policecar",
    14: "rickshaw",
    15: "scooter",
    16: "suv",
    17: "taxi",
    18: "three wheelers -CNG-",
    19: "truck",
    20: "van",
    21: "wheelbarrow"
}

# Ensure COCO category IDs start from 1
category_id_map = {k: k+1 for k in class_mapping.keys()}

# Add categories to COCO JSON
for class_id, class_name in class_mapping.items():
    coco["categories"].append({
        "id": category_id_map[class_id],
        "name": class_name,
        "supercategory": "vehicle"
    })

# Process each image
image_id = 0
annotation_id = 0

for label_file in glob.glob(os.path.join(YOLO_LABELS_DIR, "*.txt")):
    image_filename = os.path.splitext(os.path.basename(label_file))[0] + ".jpg"
    image_path = os.path.join(IMAGE_DIR, image_filename)

    # Check if image exists
    if not os.path.exists(image_path):
        print(f"Skipping {image_filename}, image not found!")
        continue

    # Get image size
    with Image.open(image_path) as img:
        width, height = img.size

    # Add image entry
    coco["images"].append({
        "id": image_id,
        "file_name": image_filename,
        "width": width,
        "height": height
    })

    # Read YOLO annotations
    with open(label_file, "r") as f:
        for line in f.readlines():
            parts = line.strip().split()
            class_id = int(parts[0])  # YOLO class ID starts from 0

            # Ensure class_id exists in category_id_map
            if class_id not in category_id_map:
                print(f"Skipping unknown class ID {class_id} in {label_file}")
                continue

            x_center, y_center, w, h = map(float, parts[1:])

            # Convert to absolute coordinates
            x_min = (x_center - w / 2) * width
            y_min = (y_center - h / 2) * height
            box_width = w * width
            box_height = h * height

            # Add annotation entry
            coco["annotations"].append({
                "id": annotation_id,
                "image_id": image_id,
                "category_id": category_id_map[class_id],
                "bbox": [x_min, y_min, box_width, box_height],
                "area": box_width * box_height,
                "iscrowd": 0
            })
            annotation_id += 1

    image_id += 1

# Save COCO JSON
with open(OUTPUT_JSON, "w") as f:
    json.dump(coco, f, indent=4)

print(f"COCO annotations saved to {OUTPUT_JSON}")


Skipping unknown class ID 0 in trafic_data/train/labels\07_jpg.rf.8447b11632c1b63ab0e127f16625e0d2.txt
Skipping unknown class ID 0 in trafic_data/train/labels\09_jpg.rf.42406b1c067f04bf73349bd75b2e3fa8.txt
Skipping unknown class ID 0 in trafic_data/train/labels\12_jpg.rf.ac83fcb8cc8c8bbc5587c2e6881e3d4a.txt
Skipping unknown class ID 0 in trafic_data/train/labels\18_jpg.rf.dd32e1abd7df904495008978ddc32583.txt
Skipping unknown class ID 0 in trafic_data/train/labels\217_jpg.rf.679ec4f07c8bb1ee88470cf506d51788.txt
Skipping unknown class ID 0 in trafic_data/train/labels\68_jpg.rf.545f6cbb65441eda77d15c001778971a.txt
Skipping unknown class ID 0 in trafic_data/train/labels\78_jpg.rf.93edbe881bc016825faa8e839f78774c.txt
Skipping unknown class ID 0 in trafic_data/train/labels\80_jpg.rf.0f3e8464629e6cb01c7a1cdd5b70598b.txt
Skipping unknown class ID 0 in trafic_data/train/labels\85_jpg.rf.23d754dcb81727f702299ae5f1eb8f76.txt
Skipping unknown class ID 0 in trafic_data/train/labels\85_jpg.rf.23d754

In [4]:
import torch
import torchvision
from torch.utils.data import DataLoader
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.datasets import CocoDetection
from torchvision.transforms import functional as F
import matplotlib.pyplot as plt
from PIL import Image

In [5]:
class CocoTransform:
    def __call__(self,image,target):
        image=F.to_tensor(image)
        return image,target

In [6]:
def get_coco_dataset(img_dir,ann_file):
    return CocoDetection(
        root=img_dir,
        annFile=ann_file,
        transforms=CocoTransform()
    )
#load dataset
train_dataset=get_coco_dataset(
    img_dir=r"trafic_data\train\images",
    ann_file=r"trafic_data\train\coco_annotations_train.json"
) 
valid_dataset=get_coco_dataset(
    img_dir=r"trafic_data\valid\images",
    ann_file=r"trafic_data\valid\coco_annotations_valid.json"
) 
#Dataloader
train_loader=DataLoader(train_dataset,batch_size=4,shuffle=True,collate_fn=lambda x:tuple(zip(*x)))
valid_loader=DataLoader(valid_dataset,batch_size=4,shuffle=False,collate_fn=lambda x:tuple(zip(*x)))
   

loading annotations into memory...
Done (t=0.22s)
creating index...
index created!
loading annotations into memory...
Done (t=0.61s)
creating index...
index created!


In [7]:
#Load faster rcnn from reset-50 backbone
def get_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features=model.roi_heads.box_predictor.cls_score.in_features  #get number of input features for classifier
    #replace pretrained head woth the new one
    model.roi_heads.box_predictor= FastRCNNPredictor(in_features,num_classes)
    return model
    

In [8]:
num_classes=22
model=get_model(num_classes)



In [9]:
device=torch.device('cpu')
#define optimizer and learning rate scheduler
params=[p for p in model.parameters() if p.requires_grad]
optimizer=torch.optim.SGD(params,lr=0.005,momentum=0.9,weight_decay=0.0005)
lr_scheduler=torch.optim.lr_scheduler.StepLR(optimizer,step_size=3,gamma=0.1)

In [None]:
def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    for images, targets in data_loader:
        # Move images to the device
        images = [img.to(device) for img in images]

        # Validate and process targets
        processed_targets = []
        valid_images = []
        for i, target in enumerate(targets):
            boxes = []
            labels = []
            for obj in target:
                # Extract bbox
                bbox = obj["bbox"]  # Format: [x, y, width, height]
                x, y, w, h = bbox

                # Ensure the width and height are positive
                if w > 0 and h > 0:
                    boxes.append([x, y, x + w, y + h])  # Convert to [x_min, y_min, x_max, y_max]
                    labels.append(obj["category_id"])

            # Only process if there are valid boxes
            if boxes:
                processed_target = {
                    "boxes": torch.tensor(boxes, dtype=torch.float32).to(device),
                    "labels": torch.tensor(labels, dtype=torch.int64).to(device),
                }
                processed_targets.append(processed_target)
                valid_images.append(images[i])  # Add only valid images

        # Skip iteration if no valid targets
        if not processed_targets:
            continue

        # Ensure images and targets are aligned
        images = valid_images

        # Forward pass
        loss_dict = model(images, processed_targets)
        losses = sum(loss for loss in loss_dict.values())

        # Backpropagation
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

    print(f"Epoch [{epoch}] Loss: {losses.item():.4f}")

In [None]:
# Training loop
num_epochs = 2
for epoch in range(num_epochs):
    train_one_epoch(model, optimizer, train_loader, device, epoch)
    lr_scheduler.step()
    
    # Save the model's state dictionary after every epoch
    model_path = f"fasterrcnn_resnet50_epoch_{epoch + 1}.pth"
    torch.save(model.state_dict(), model_path)
    print(f"Model saved: {model_path}")

In [None]:
import torch
import torchvision
from torch.utils.data import DataLoader
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.datasets import CocoDetection
from torchvision.transforms import functional as F
import matplotlib.pyplot as plt
from PIL import Image

# Load Faster R-CNN with ResNet-50 backbone
def get_model(num_classes):
    # Load pre-trained Faster R-CNN
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    # Get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # Replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model


# Initialize the model
num_classes = 4  # Background + chair + person + table

# Move model to GPU if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


# Load the trained model
model = get_model(num_classes)
model.load_state_dict(torch.load("fasterrcnn_resnet50_epoch_5.pth"))
model.to(device)
model.eval()  # Set the model to evaluation mode


def prepare_image(image_path):
    image = Image.open(image_path).convert("RGB")  # Open image
    image_tensor = F.to_tensor(image).unsqueeze(0)  # Convert image to tensor and add batch dimension
    return image_tensor.to(device)



# Load the unseen image
image_path = "test.jpg"
image_tensor = prepare_image(image_path)

with torch.no_grad():  # Disable gradient computation for inference
    prediction = model(image_tensor)

# `prediction` contains:
# - boxes: predicted bounding boxes
# - labels: predicted class labels
# - scores: predicted scores for each box (confidence level)
COCO_CLASSES = {0: "Background", 1: "Chair", 2: "Person", 3: "Table"}

def get_class_name(class_id):
    return COCO_CLASSES.get(class_id, "Unknown")
    
# Draw bounding boxes with the correct class names and increase image size
def draw_boxes(image, prediction, fig_size=(10, 10)):
    boxes = prediction[0]['boxes'].cpu().numpy()  # Get predicted bounding boxes
    labels = prediction[0]['labels'].cpu().numpy()  # Get predicted labels
    scores = prediction[0]['scores'].cpu().numpy()  # Get predicted scores
    
    # Set a threshold for showing boxes (e.g., score > 0.5)
    threshold = 0.5
    
    # Set up the figure size to control the image size
    plt.figure(figsize=fig_size)  # Adjust the figure size here

    for box, label, score in zip(boxes, labels, scores):
        if score > threshold:
            x_min, y_min, x_max, y_max = box
            class_name = get_class_name(label)  # Get the class name
            plt.imshow(image)  # Display the image
            plt.gca().add_patch(plt.Rectangle((x_min, y_min), x_max - x_min, y_max - y_min, 
                                              linewidth=2, edgecolor='r', facecolor='none'))
            plt.text(x_min, y_min, f"{class_name} ({score:.2f})", color='r')
    
    plt.axis('off')  # Turn off axis
    plt.show()

# Display the image with bounding boxes and correct labels
draw_boxes(Image.open(image_path), prediction, fig_size=(12, 10))