In [1]:
import torch
import torchvision.transforms as transforms
from PIL import Image
from torchvision.transforms.functional import InterpolationMode

import torch
import torch.nn as nn
import torchvision.models as models
import torch.optim as optim
import torch.nn.functional as F

import os

## **Model and Transform**

In [2]:
# Load pre-trained ResNet-50
device = torch.device('cuda')
resnet = models.resnet50(weights=False)
resnet = nn.Sequential(*list(resnet.children())[:-1])
mlp = nn.Sequential(
    nn.Linear(2048, 2),
)
# Kết hợp ResNet và MLP
class CombinedModel(nn.Module):
    def __init__(self, encoder, head):
        super(CombinedModel, self).__init__()
        self.encoder = encoder
        self.head = head

    def forward(self, x):
        x = self.encoder(x)
        x = x.view(x.size(0), -1)
        x = self.head(x)
        return x

# Tạo mô hình hoàn chỉnh
model = CombinedModel(resnet, mlp)

# Tải mô hình từ tệp nhị phân
model_path = '/kaggle/input/resnet-dataset/pytorch_model.bin'
model.load_state_dict(torch.load(model_path))
model.eval()
model.to(device)



CombinedModel(
  (encoder): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0): Conv2

In [3]:
# Transform
transform = transforms.Compose([
        transforms.Resize((150,150),interpolation=InterpolationMode.BICUBIC),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5],
                             std=[0.5, 0.5, 0.5])
        #transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))
        ])

In [4]:
def pred_1_sample(root_dir, data, model, transform, device):
    image_path = os.path.join(root_dir,'0' * (3-len(str(line[0]))) + f"{line[0]}", f"frame{line[1]-1}.jpg")
    image = Image.open(image_path)
    cropped_image = image.crop((line[2], line[3], line[2]+line[4], line[3]+line[5]))
    image_tensor = transform(cropped_image)
    image_batch = image_tensor.unsqueeze(0)
    #Pred
    pred = model(image_batch.to(device))
    score = F.softmax(pred, dim=1)
    output = torch.argmax(score, 1)
    return round(score[0,output].item(),2), output.item()

In [5]:
gt_dir = '/kaggle/input/resnet-dataset/gt_pred_yolo5.txt'
root_dir = '/kaggle/input/aicity-track5-test'
data = []
i = 0
with open(gt_dir, "r") as file:
    for line in file:
        line = line.strip().split(',')
        line[:7] = [int(x) for x in line[:7]]
        line[7] = float(line[7])
        if line[6] == 1:
            data.append(line)
        elif line[6] <= 5:
            score, label = pred_1_sample(root_dir, line, model, transform, device)
            line[7] = line[7] * 0.85 + score * 0.15
            if label == 0:
                line[6] = line[6] * 2 - 1
            else:
                line[6] = line[6] * 2 - 2
            data.append(line)
            
        i = i+1
        if(i%100==0): print(f"Done {i} sample")

Done 100 sample
Done 200 sample
Done 300 sample
Done 400 sample
Done 500 sample
Done 600 sample
Done 700 sample
Done 800 sample
Done 900 sample
Done 1000 sample
Done 1100 sample
Done 1200 sample
Done 1300 sample
Done 1400 sample
Done 1500 sample
Done 1600 sample
Done 1700 sample
Done 1800 sample
Done 1900 sample
Done 2000 sample
Done 2100 sample
Done 2200 sample
Done 2300 sample
Done 2400 sample
Done 2500 sample
Done 2600 sample
Done 2700 sample
Done 2800 sample
Done 2900 sample
Done 3000 sample
Done 3100 sample
Done 3200 sample
Done 3300 sample
Done 3400 sample
Done 3500 sample
Done 3600 sample
Done 3700 sample
Done 3800 sample
Done 3900 sample
Done 4000 sample
Done 4100 sample
Done 4200 sample
Done 4300 sample
Done 4400 sample
Done 4500 sample
Done 4600 sample
Done 4700 sample
Done 4800 sample
Done 4900 sample
Done 5000 sample
Done 5100 sample
Done 5200 sample
Done 5300 sample
Done 5400 sample
Done 5500 sample
Done 5600 sample
Done 5700 sample
Done 5800 sample
Done 5900 sample
Done 6

In [6]:
# Tên tệp để lưu
file_name = "/kaggle/working/gt_pred_yolo5_resnet.txt"

# Ghi danh sách vào tệp văn bản
with open(file_name, 'w') as file:
    for sublist in data:
        # Chuyển mỗi phần tử trong sublist thành chuỗi
        sublist[7] = round(sublist[7],2)
        sublist = [str(item) for item in sublist]
        # Ghi sublist vào tệp, phân tách các phần tử bằng dấu phẩy
        file.write(','.join(sublist) + '\n')