In [1]:
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import Compose, Resize, ToTensor
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset
import os
from PIL import Image
import json
from tqdm import tqdm

In [2]:
# Check if CUDA is available
if torch.cuda.is_available():
    # Get the name of the GPU being used
    gpu_name = torch.cuda.get_device_name(torch.cuda.current_device())
    
    # Get the CUDA version being used by PyTorch
    cuda_version = torch.version.cuda
    
    # Get the cuDNN version
    cudnn_version = torch.backends.cudnn.version()
    
    print(f"🚀 GPU in use: {gpu_name}")
    print(f"⚡ CUDA version: {cuda_version}")
    print(f"📚 cuDNN version: {cudnn_version}")
else:
    print("❌ CUDA is not available. Running on CPU.")


🚀 GPU in use: NVIDIA GeForce GTX 1050
⚡ CUDA version: 12.4
📚 cuDNN version: 90100


In [3]:
class TACO(Dataset):
    def __init__(self, root, annotation_file, transforms=None):
        self.root = root
        self.transforms = transforms
        with open(annotation_file) as f:
            self.annotations = json.load(f)
        self.images = list(self.annotations["images"])
        self.categories = {c["id"]: c["name"] for c in self.annotations["categories"]}

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image_info = self.images[idx]
        image_path = os.path.join(self.root, image_info["file_name"])
        image = Image.open(image_path).convert("RGB")
        
        # Proses anotasi
        annotations = [
            a for a in self.annotations["annotations"] if a["image_id"] == image_info["id"]
        ]
        boxes = []
        labels = []
        for ann in annotations:
            bbox = ann["bbox"]
            boxes.append([bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]])
            labels.append(ann["category_id"])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        target = {"boxes": boxes, "labels": labels}

        # Gunakan transformasi jika tersedia
        if self.transforms:
            image = self.transforms(image)

        return image, target

In [4]:
transform = Compose([
    Resize((224, 224)),  # Mengubah ukuran gambar menjadi 224x224
    ToTensor(),          # Mengubah gambar menjadi tensor
])

# Dataset dan DataLoader
dataset = TACO(
    root="TACO-master/data", 
    annotation_file="TACO-master/data/annotations.json", 
    transforms=transform
)
data_loader = DataLoader(
    dataset, 
    batch_size=4, 
    shuffle=True, 
    collate_fn=lambda x: tuple(zip(*x))
)

In [5]:
# Load model Faster R-CNN
model = fasterrcnn_resnet50_fpn(pretrained=True)

# Menyesuaikan jumlah kelas
num_classes = len(dataset.categories) + 1  # +1 untuk background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)



# Training


In [6]:
# Training Loop
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
num_epochs = 50

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    total_batches = len(data_loader)

    print(f"Epoch {epoch+1}/{num_epochs}")
    progress_bar = tqdm(enumerate(data_loader), total=total_batches, desc="Training")

    for batch_idx, (images, targets) in progress_bar:
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        epoch_loss += losses.item()

        # Update progress bar
        progress_bar.set_postfix(loss=losses.item())

    print(f"Epoch {epoch+1}/{num_epochs}, Total Loss: {epoch_loss:.4f}")

Epoch 1/50


Training:  26%|██▋       | 99/375 [07:36<21:11,  4.61s/it, loss=109]   


KeyboardInterrupt: 

In [None]:
# Save model
torch.save(model.state_dict(), "rcnn_224pixel_50epochs.pth")

In [None]:
# Load model for inference
model.load_state_dict(torch.load("rcnn_224pixel_50epochs.pth"))
model.eval()

  model.load_state_dict(torch.load("rcnn_224pixel.pth"))


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(