In [34]:
from ultralytics import YOLO
import os
from PIL import Image
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset

import torchvision
from torchvision import transforms
import torchvision.transforms as T
import torchvision.transforms.functional as F


In [19]:
class ShoppingCartDataset(Dataset):
    def __init__(self, images_dir, labels_dir, transform=None, save_dir=None):
        self.images_dir = images_dir
        self.labels_dir = labels_dir
        self.save_dir = save_dir
        self.transform = transform
        self.image_files = [f for f in os.listdir(images_dir) if f.endswith(('.jpg', '.png'))]
        if self.save_dir and not os.path.exists(self.save_dir):
            os.makedirs(self.save_dir)
    
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.images_dir, img_name)
        label_path = os.path.join(self.labels_dir, img_name.replace('.jpg', '.txt').replace('.png', '.txt'))
        
        image = Image.open(img_path).convert('RGB')
        original_size = image.size
        if self.transform:
            image = self.transform(image)
        
        if self.save_dir:
            save_path = os.path.join(self.save_dir, img_name)
            image_pil = F.to_pil_image(image)
            image_pil.save(save_path)
        
        with open(label_path, 'r') as file:
            label = file.readline().strip().split()
        label = torch.tensor([float(x) for x in label], dtype=torch.float32)

        return image, label, original_size

In [29]:
def get_transforms(resize, mean_rgb, std_rgb, tf=None):
    return {
    "train": transforms.Compose([
        # transforms.Resize(resize),
        transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize(mean_rgb, std_rgb)
    ]),

    "test": transforms.Compose([
        transforms.Resize(resize),
        transforms.CenterCrop(resize),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean_rgb, std_rgb)
    ])
}

## Augumentation

In [31]:
# Perform Augumentations
resize = (640, 640) # TODO: Change here if Cart1 or 5
mean_rgb = [0.485,0.456,0.406] # TODO: Need update
std_rgb = [0.229,0.224,0.225] # TODO: Need update
transform = get_transforms(resize, mean_rgb, std_rgb)

train_dataset = ShoppingCartDataset(
    "../datasets/Shopping-Cart-1/train/images",
    "../datasets/Shopping-Cart-1/train/labels",
    transform=transform["train"],
    save_dir="../datasets/Shopping-Cart-1/train/transformed_images"
)

test_dataset = ShoppingCartDataset(
    "../datasets/Shopping-Cart-1/test/images",
    "../datasets/Shopping-Cart-1/test/labels",
    transform=transform["test"],
    save_dir="../datasets/Shopping-Cart-1/test/transformed_images"
)

# load and save each
for i in range(len(train_dataset)):
    train_dataset[i]

for j in range(len(test_dataset)):
    test_dataset[j]

# Calculate R, G, B Stat

In [None]:
def statsRGB(dir):
    means_r = []
    means_g = []
    means_b = []
    stds_r = []
    stds_g = []
    stds_b = []

    for filename in os.listdir(dir):
        if filename.endswith('.jpg'):
            img_path = os.path.join(dir, filename)
            with Image.open(img_path) as img:
                img = img.convert('RGB')
                img_array = np.array(img)

                means_r.append(np.mean(img_array[:, :, 0]))
                means_g.append(np.mean(img_array[:, :, 1]))
                means_b.append(np.mean(img_array[:, :, 2]))
                stds_r.append(np.std(img_array[:, :, 0]))
                stds_g.append(np.std(img_array[:, :, 1]))
                stds_b.append(np.std(img_array[:, :, 2]))

    overall_mean_r = np.mean(means_r)
    overall_mean_g = np.mean(means_g)
    overall_mean_b = np.mean(means_b)
    overall_std_r = np.mean(stds_r)
    overall_std_g = np.mean(stds_g)
    overall_std_b = np.mean(stds_b)

    return (overall_mean_r, overall_mean_g, overall_mean_b), (overall_std_r, overall_std_g, overall_std_b)

# usage: statsRGB("../datasets/..")