In [None]:
!unzip /content/drive/MyDrive/tennis_court_det_dataset.zip
#https://drive.google.com/file/d/1lhAaeQCmk2y440PmagA0KmIVBIysVMwu/view

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
import cv2
import json
from PIL import Image
import numpy as np

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


# create torch dataset

In [None]:
class KeypointsDataset(Dataset):
    def __init__(self, img_dir, data_file):
        self.img_dir = img_dir
        with open(data_file,"r") as f:
            self.data = json.load(f)

        self.transforms = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

        ])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        img = cv2.imread(f"{self.img_dir}/{item['id']}.png")
        h,w = img.shape[:2]
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = self.transforms(img)
        kps = np.array(item["kps"]).flatten()
        kps = kps.astype(np.float32)
        kps[::2] *= 224.0 / w # adjust x coord
        kps[1::2] *= 224.0 / h # adjust y coord
        return img, kps

In [None]:
train_dataset = KeypointsDataset("data/images","data/data_train.json")
val_dataset = KeypointsDataset("data/images","data/data_val.json")

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)

# Create model

In [None]:
model = models.resnet50(pretrained=True)
model.fc = torch.nn.Linear(model.fc.in_features, 14*2) # replace the last layer fc: fully connected layer
# we have 14 keypoints and each points has x and y
model = model.to(device)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 122MB/s]


# Train model

In [None]:
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
epochs = 10
for epoch in range(epochs):
    for i, (imgs,kps) in enumerate(train_loader):
        imgs = imgs.to(device)
        kps = kps.to(device)

        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, kps)
        loss.backward()
        optimizer.step()

        if i%100 == 0:
            print(f"Epoch {epoch+1}/{epochs}, Step {i+1}/{len(train_loader)}, Loss: {loss.item()}")

Epoch 1/10, Step 1/829, Loss: 15272.8642578125
Epoch 1/10, Step 101/829, Loss: 11448.4072265625
Epoch 1/10, Step 201/829, Loss: 7846.11767578125
Epoch 1/10, Step 301/829, Loss: 5520.09228515625
Epoch 1/10, Step 401/829, Loss: 3392.70166015625
Epoch 1/10, Step 501/829, Loss: 1963.59326171875
Epoch 1/10, Step 601/829, Loss: 1059.955810546875
Epoch 1/10, Step 701/829, Loss: 555.0203857421875
Epoch 1/10, Step 801/829, Loss: 290.1620178222656
Epoch 2/10, Step 1/829, Loss: 198.50535583496094
Epoch 2/10, Step 101/829, Loss: 92.41991424560547
Epoch 2/10, Step 201/829, Loss: 68.82637786865234
Epoch 2/10, Step 301/829, Loss: 69.046630859375
Epoch 2/10, Step 401/829, Loss: 28.460304260253906
Epoch 2/10, Step 501/829, Loss: 39.4101676940918
Epoch 2/10, Step 601/829, Loss: 31.199886322021484
Epoch 2/10, Step 701/829, Loss: 32.50458908081055
Epoch 2/10, Step 801/829, Loss: 20.02859115600586
Epoch 3/10, Step 1/829, Loss: 57.51396942138672
Epoch 3/10, Step 101/829, Loss: 27.276966094970703
Epoch 3/10,

In [None]:
torch.save(model.state_dict(), "keypoints_model.pth")