# Import Libraries

In [None]:
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import Compose, Resize, ToTensor
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt
import os
from PIL import Image
import json
from tqdm import tqdm

# Create Custom Dataset Class

In [None]:
class TACO(Dataset):
    def __init__(self, root, annotation_file, transforms=None):
        self.root = root
        self.transforms = transforms
        with open(annotation_file) as f:
            self.annotations = json.load(f)
        self.images = list(self.annotations["images"])
        self.categories = {c["id"]: c["name"] for c in self.annotations["categories"]}

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image_info = self.images[idx]
        image_path = os.path.join(self.root, image_info["file_name"])
        image = Image.open(image_path).convert("RGB")

        # Process annotations
        annotations = [
            a for a in self.annotations["annotations"] if a["image_id"] == image_info["id"]
        ]
        boxes = []
        labels = []
        for ann in annotations:
            bbox = ann["bbox"]
            boxes.append([bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]])
            labels.append(ann["category_id"])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        target = {"boxes": boxes, "labels": labels}

        if self.transforms:
            image = self.transforms(image)

        return image, target

# Transformation to 224 x 224 Pixel

transform = Compose([
    Resize((224, 224)),  # Resize images
    ToTensor(),          # Convert to tensor
])

# Define Path and DataLoader

In [None]:
# Dataset and DataLoader
train_dataset = TACO(
    root="TACO-master/data/train",
    annotation_file="TACO-master/data/train_annotations.json",
    transforms=transform
)

test_dataset = TACO(
    root="TACO-master/data/test",
    annotation_file="TACO-master/data/test_annotations.json",
    transforms=transform
)

train_loader = DataLoader(
    train_dataset, 
    batch_size=4, 
    shuffle=True, 
    collate_fn=lambda x: tuple(zip(*x))
)

test_loader = DataLoader(
    test_dataset, 
    batch_size=4, 
    shuffle=False, 
    collate_fn=lambda x: tuple(zip(*x))
)

# Model Initialization

In [None]:
# Model Initialization
model = fasterrcnn_resnet50_fpn(pretrained=True)

# Modify number of classes (background + categories)
num_classes = len(train_dataset.categories) + 1
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)

# Move model to device
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)


# Training

In [None]:
# Training Loop
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
num_epochs = 10  # Example value
loss_values = []

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    progress_bar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch+1}/{num_epochs}")

    for batch_idx, (images, targets) in progress_bar:
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        epoch_loss += losses.item()
        progress_bar.set_postfix(loss=losses.item())

    loss_values.append(epoch_loss)
    print(f"Epoch {epoch+1}/{num_epochs}, Total Loss: {epoch_loss:.4f}")

# Visualize Training Loss
plt.figure(figsize=(10, 6))
plt.plot(range(1, num_epochs + 1), loss_values, marker='o', label="Training Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Training Loss Over Epochs")
plt.legend()
plt.show()


# Testing

In [None]:
# Evaluation Function
def evaluate_model(model, data_loader, device):
    model.eval()
    results = []
    with torch.no_grad():
        for images, targets in tqdm(data_loader, desc="Evaluating"):
            images = [img.to(device) for img in images]
            outputs = model(images)
            results.append(outputs)
    return results

# Evaluate on Test Data
test_results = evaluate_model(model, test_loader, device)


# Visualizing Predictions

# Visualize predictions on a few test images
for i, (image, prediction) in enumerate(zip(test_dataset, test_results[:5])):
    image = image[0].permute(1, 2, 0).cpu().numpy()  # Convert tensor to HWC
    plt.figure(figsize=(8, 8))
    plt.imshow(image)
    plt.title(f"Prediction {i+1}")
    plt.show()


# Saving Model

# Save the model
torch.save(model.state_dict(), "faster_rcnn_model.pth")

# Load the model

# Load the model
model.load_state_dict(torch.load("faster_rcnn_model.pth"))
model.eval()