In [1]:
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import functional as F
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset
import os
from PIL import Image
import json
from tqdm import tqdm


In [2]:
class TACO(Dataset):
    def __init__(self, root, annotation_file, transforms=None):
        self.root = root
        self.transforms = transforms
        with open(annotation_file) as f:
            self.annotations = json.load(f)
        self.images = list(self.annotations["images"])
        self.categories = {c["id"]: c["name"] for c in self.annotations["categories"]}

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image_info = self.images[idx]
        image_path = os.path.join(self.root, image_info["file_name"])
        image = Image.open(image_path).convert("RGB")

        # Proses anotasi
        annotations = [
            a for a in self.annotations["annotations"] if a["image_id"] == image_info["id"]
        ]
        boxes = []
        labels = []
        for ann in annotations:
            bbox = ann["bbox"]
            boxes.append([bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]])
            labels.append(ann["category_id"])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        target = {"boxes": boxes, "labels": labels}

        if self.transforms:
            image = self.transforms(image)

        return image, target


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
from torchvision.transforms import ToTensor

# Ganti "path_to_dataset" dan "path_to_annotations.json" dengan path yang sesuai
dataset = TACO(root="/content/drive/MyDrive/TACO-rcnn/data", annotation_file="/content/drive/MyDrive/TACO-rcnn/data/annotations.json", transforms=ToTensor())
data_loader = DataLoader(dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))


In [5]:
model = fasterrcnn_resnet50_fpn(pretrained=True)

# Menyesuaikan jumlah kelas
num_classes = len(dataset.categories) + 1  # +1 untuk background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)




In [6]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
num_epochs = 5

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    total_batches = len(data_loader)  # Total jumlah batch dalam epoch

    print(f"Epoch {epoch+1}/{num_epochs}")
    progress_bar = tqdm(enumerate(data_loader), total=total_batches, desc="Training")  # Inisialisasi progress bar

    for batch_idx, (images, targets) in progress_bar:
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        epoch_loss += losses.item()

        # Update progress bar dengan persentase selesai dan current loss
        progress_bar.set_postfix(loss=losses.item())

    print(f"Epoch {epoch+1}/{num_epochs}, Total Loss: {epoch_loss:.4f}")

Epoch 1/5


Training: 100%|██████████| 375/375 [25:02<00:00,  4.01s/it, loss=0.667]


Epoch 1/5, Total Loss: 269.6984
Epoch 2/5


Training: 100%|██████████| 375/375 [15:58<00:00,  2.56s/it, loss=0.283]


Epoch 2/5, Total Loss: 199.7731
Epoch 3/5


Training: 100%|██████████| 375/375 [16:03<00:00,  2.57s/it, loss=0.312]


Epoch 3/5, Total Loss: 198.4185
Epoch 4/5


Training: 100%|██████████| 375/375 [16:06<00:00,  2.58s/it, loss=0.377]


Epoch 4/5, Total Loss: 200.3803
Epoch 5/5


Training: 100%|██████████| 375/375 [16:06<00:00,  2.58s/it, loss=0.332]

Epoch 5/5, Total Loss: 200.4424





In [8]:
# Simpan model
torch.save(model.state_dict(), "fasterrcnn_taco.pth")



In [9]:
# Load model
model.load_state_dict(torch.load("fasterrcnn_taco.pth"))
model.eval()

  model.load_state_dict(torch.load("fasterrcnn_taco.pth"))


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(