# Prepare the data


In [1]:
%cd /content/drive/MyDrive/CS331/

/content/drive/MyDrive/CS331


In [None]:
!rm -rf datalan1/
!unzip datalan1.zip

In [None]:
# import os
# import json

# json_folder = '/content/drive/MyDrive/CS331/datalan1/train/labels'

# for jsonfile in os.listdir(json_folder):
#     data = None
#     json_path = os.path.join(json_folder, jsonfile)
#     with open(json_path, 'r') as f:
#         data = json.load(f)

#     if 'drone' in data['imagePath']:
#         data['imagePath'] = data['imagePath'][10:]

#     # Ghi lại dữ liệu JSON đã chỉnh sửa vào tệp
#     with open(json_path, 'w') as f:
#         json.dump(data, f, indent=4)

# Prelims

In [2]:
import os, sys
import json
from PIL import Image
import torch
import cv2

# data denpendencies
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

# define model dependencies
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# 1 cycle of machine learning mode
1. Prepare the data
2. Define the model
3. Train model
4. Evaluate the model

## hyperparameters

In [2]:
batch_size = 32
learning_rate = 0.0001
num_epochs = 10

# DataLoader

## # crawl data

In [3]:
%cd /content/drive/MyDrive/CS331/datalan1

/content/drive/MyDrive/CS331/datalan1


## get label & data from jsonfile

In [4]:
def get_label(label):
    return 0 if label == 'bird' else 1

In [5]:
def preprocess(data):
    y = [
        float(get_label(data['shapes'][0]['label'])),
        data['shapes'][0]['points'][0][0],
        data['shapes'][0]['points'][0][1],
        data['shapes'][0]['points'][1][0],
        data['shapes'][0]['points'][1][1],
        ]
    y1 = torch.tensor(y)
    return  y1

In [6]:
class BirdDroneDataset(Dataset):
    def __init__(self, img_path, label_path):
        super().__init__()

        self.img_to_tensor = transforms.ToTensor()
        self.samples = []

        for jsonfile in os.listdir(label_path):
            data = None

            with open(os.path.join(label_path, jsonfile), 'r') as f:
                data = json.load(f)
                x = Image.open(os.path.join(img_path, data['imagePath']))



            x =  self.img_to_tensor(x)

            y = preprocess(data)


            self.samples.append([x, y])

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        return self.samples[idx]

## get dataset

In [7]:
train_dataset = BirdDroneDataset('/content/drive/MyDrive/CS331/datalan1/train/images', '/content/drive/MyDrive/CS331/datalan1/train/labels')

In [8]:
val_dataset = BirdDroneDataset('/content/drive/MyDrive/CS331/datalan1/val/images', '/content/drive/MyDrive/CS331/datalan1/val/labels')

## get dataloader

In [9]:
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

## Check dataload


In [None]:
# %cd /content/drive/MyDrive/CS331/datalan1/val/images

/content/drive/MyDrive/CS331/datalan1/val/images


In [10]:
len(val_dataset)
print(val_dataset[1][1])

tensor([  0.,   1.,  28., 223., 204.])


In [None]:
# y = [1.266454545, 2.544557, 3.4, 4.8454545, 5.1]

# # Chuyển danh sách thành tensor
# y_tensor = torch.tensor(y)
# print(y_tensor)

tensor([1.2665, 2.5446, 3.4000, 4.8455, 5.1000])


# Define the Model

In [54]:
class MyModel(nn.Module):
    def __init__(self, ):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, stride=2, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(in_channels=32, out_channels=64, stride=2, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(in_channels=64, out_channels=128, stride=2, kernel_size=3),
            nn.ReLU(),
        )

        # output_shape = 32*128*6*6
        # => shape = 128*6*6 (num_features)
        shape = 4608
        # b,5 -> xuống 5

        self.classifier = nn.Sequential(
            nn.Linear(shape, 512),
            nn.ReLU(),
            nn.Linear(512, 2)

        )

        self.boundingbox_regression = nn.Sequential(
            nn.Linear(shape, 512),
            nn.ReLU(),
            nn.Linear(512, 4),
        )


    def forward(self, x):
        x = self.cnn(x)
        x = x.reshape(x.shape[0], -1)

        c = self.classifier(x)
        c = F.sigmoid(c)

        b = self.boundingbox_regression(x)


        return c, b


In [55]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
# class TripletMarginLoss(nn.Module):
#     def __init__(self, margin=1.0):
#         super(TripletMarginLoss, self).__init__()
#         self.margin = margin

#     def forward(self, anchor, positive, negative):
#         distance_positive = torch.norm(anchor - positive, dim=1)
#         distance_negative = torch.norm(anchor - negative, dim=1)
#         loss = torch.relu(distance_positive - distance_negative + self.margin)
#         return torch.mean(loss)

# margin_loss = TripletMarginLoss(margin=1.0)

In [57]:
model = MyModel().to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()
criterion2 = nn.MSELoss()


# Train the Model

In [72]:
correct_predictions = 0
total_predictions = 0

for epoch in range(num_epochs):
    losses = []

    for batch_idx, (X, y) in enumerate(train_loader):
        X = X.to(torch.float32).to(device)
        y_true = y.to(torch.float32).to(device)

        scores = model(X)

        # y_true = y_true.type(torch.LongTensor)
        loss1 = criterion(scores[0], y_true[:,0].type(torch.LongTensor))

        loss2 = criterion2(scores[1], y_true[:,1:] )

        loss = (loss1 + loss2)

        losses.append(loss.item())

          # Tính accuracy
        predicted_classes = torch.argmax(scores[0], dim=1)
        correct_predictions += torch.sum(predicted_classes == y_true[:,0]).item()
        total_predictions += len(y_true)

        accuracy = correct_predictions / total_predictions


        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    epoch_loss = sum(losses) / len(losses)

    print(f"Epoch {epoch}, Avg Loss: {epoch_loss:.2f}, Accuracy: {accuracy:.2f}")


Epoch 0, Avg Loss: 1240.26, Accuracy: 0.58
Epoch 1, Avg Loss: 1198.79, Accuracy: 0.57
Epoch 2, Avg Loss: 1170.46, Accuracy: 0.59
Epoch 3, Avg Loss: 1090.87, Accuracy: 0.60
Epoch 4, Avg Loss: 1061.86, Accuracy: 0.59
Epoch 5, Avg Loss: 1014.06, Accuracy: 0.59
Epoch 6, Avg Loss: 1020.83, Accuracy: 0.59
Epoch 7, Avg Loss: 994.09, Accuracy: 0.60
Epoch 8, Avg Loss: 981.07, Accuracy: 0.59
Epoch 9, Avg Loss: 968.14, Accuracy: 0.60


## Save model ()

In [73]:
%cd /content/drive/MyDrive/CS331
torch.save(model, 'best_model_ver4.pth')

/content/drive/MyDrive/CS331


# Evaluate the Model

## Load model

In [74]:
model = torch.load('best_model_ver4.pth')
model.eval()

MyModel(
  (cnn): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2))
    (7): ReLU()
  )
  (classifier): Sequential(
    (0): Linear(in_features=4608, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=2, bias=True)
  )
  (boundingbox_regression): Sequential(
    (0): Linear(in_features=4608, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=4, bias=True)
  )
)

## get evalution dataset


In [227]:
evalution_dataset = BirdDroneDataset('/content/drive/MyDrive/CS331/datalan1/test/images', '/content/drive/MyDrive/CS331/datalan1/test/labels')

In [228]:
evaluation_dataloader = DataLoader(evalution_dataset)

## function calculate IoU


In [229]:
def calculate_iou(box1, box2):
    # box1 và box2 là hai bounding box, mỗi bounding box là một tuple hoặc danh sách (x1, y1, x2, y2).
    # (x1, y1) là tọa độ góc trái trên và (x2, y2) là tọa độ góc dưới phải.

    # Tính diện tích của phần giao
    x1_inter = max(box1[0], box2[0])
    y1_inter = max(box1[1], box2[1])
    x2_inter = min(box1[2], box2[2])
    y2_inter = min(box1[3], box2[3])

    width_inter = max(0, x2_inter - x1_inter)
    height_inter = max(0, y2_inter - y1_inter)

    area_inter = width_inter * height_inter

    # Tính tổng diện tích của cả hai bounding box
    area_box1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area_box2 = (box2[2] - box2[0]) * (box2[3] - box2[1])

    # Tính IoU
    iou = area_inter / (area_box1 + area_box2 - area_inter)

    return iou

In [261]:
import numpy as np
import cv2

pre_dict = []

with torch.no_grad():
    for images, labels in evaluation_dataloader:
        c, b = model(images)

        for pred_bbox, true_bbox in zip(b, labels[:1,:]):
            iou = calculate_iou(pred_bbox, true_bbox)

        if float(c[0][1]) >= 0.5:
            z = 1
        else:
            z =0

        pre_dict.append([b, iou,z])

print(pre_dict)

[[tensor([[ 25.8173,  46.4946, 215.7916, 183.8678]]), tensor(0.0852), 1], [tensor([[ 22.2117,  39.7100, 184.8625, 156.7229]]), tensor(0.0833), 1], [tensor([[ 24.6486,  44.5700, 205.9707, 174.2914]]), tensor(0.0391), 1], [tensor([[ 25.1147,  45.0502, 208.8032, 177.3105]]), tensor(0.), 1], [tensor([[ 24.9485,  44.9349, 208.5853, 177.0536]]), tensor(0.0651), 1], [tensor([[ 21.9805,  39.7021, 183.6854, 156.0166]]), tensor(0.2049), 1], [tensor([[ 26.2586,  47.2228, 218.9911, 186.4964]]), tensor(0.1882), 1], [tensor([[ 23.4915,  42.2928, 194.3531, 164.6998]]), tensor(0.2842), 1], [tensor([[ 25.8750,  46.5816, 215.4660, 183.1142]]), tensor(0.), 1], [tensor([[ 24.8291,  44.4918, 206.7155, 174.8765]]), tensor(0.), 1], [tensor([[ 24.7185,  44.0646, 204.9480, 175.9872]]), tensor(0.1903), 1], [tensor([[ 24.8077,  44.6272, 207.5300, 177.1365]]), tensor(0.1036), 1], [tensor([[ 25.6390,  46.2548, 213.9659, 181.9021]]), tensor(0.0261), 1], [tensor([[ 28.6856,  51.7050, 238.7184, 203.7378]]), tensor(0.

## Test ảnh


In [247]:
import random
import matplotlib.pyplot as plt
import matplotlib.patches as patches

def plot_image_with_bboxes(image,predicted_bbox, predicted_class):
    fig, ax = plt.subplots(1, figsize=(8, 8))
    ax.imshow(image, cmap='gray')

    # Vẽ bounding box dự đoán (màu đỏ) và gắn tên object
    if predicted_bbox is not None:
        x1, x2, y1, y2  = predicted_bbox

        predicted_rect = patches.Rectangle(
            (x1, x2), y1, y2,
            linewidth=3, edgecolor='black', facecolor='none', label='Predicted'
        )
        ax.add_patch(predicted_rect)
        ax.text(x1, x2+y2, predicted_class, color='black', backgroundcolor='none', fontsize=16)
    print("Predicted_bbox:", predicted_bbox)

    # Hiển thị
    plt.legend(handles=[predicted_rect])
    plt.axis('on')
    plt.show()

In [None]:
import numpy as np
import cv2
from PIL import Image
from google.colab.patches import cv2_imshow
import matplotlib.pyplot as plt

threshold = 0.5
total_im = []


# Danh sách đường dẫn tệp tin ảnh
image_paths = ["/content/drive/MyDrive/CS331/datalan1/test/images"]

i = 0
predicted_class = None
for path in image_paths:
    x1 = pre_dict[i][0][0][0]
    x2 = pre_dict[i][0][0][1]
    y1 = pre_dict[i][0][0][2]
    y2 = pre_dict[i][0][0][3]

    if pre_dict[i][2] == 1:
        predicted_class = 'Drone'
    else:
        predicted_class = 'Bird'

    plot_image_with_bboxes(path, pre_dict[i][0][0], predicted_class)

    i += 1
