### This notebook is to prepare data as expected by YOLO. 

In [7]:
import os
import json
import shutil
import random
from tqdm import tqdm
from pathlib import Path

import cv2
from PIL import Image
import matplotlib.pyplot as plt
from matplotlib import patches

import numpy as np
import pandas as pd

from ultralytics import YOLO

from utils import *

In [8]:
class config:

    TRAIN_DIR = 'train/'
    VALID_DIR = 'val/'
    TEST_DIR = 'test/'

    seed = 29


In [9]:
class_labels = {
    'coca-cola' : 0,
    'fanta' : 1,
    'sprite' : 2
}

In [None]:
modes =['train', 'val', 'test']
make_df(modes)

In [10]:
train = pd.read_csv('train_data.csv')
valid = pd.read_csv('val_data.csv')
test = pd.read_csv('test_data.csv')

In [11]:
train_count = count_bottles(train)
val_count = count_bottles(valid)
test_count = count_bottles(test)

In [12]:
train_df = preprocess_bbox(train)
val_df = preprocess_bbox(valid)
test_df = preprocess_bbox(test)

In [None]:
# make output_dir
dest_dir = "yolo_data"
!mkdir {dest_dir}

_ = Path(f"{dest_dir}/dataset.yaml").write_text(f"""path: {dest_dir}
train: *folder path*\\train\\images
val: *folder path*\\val\\images
test: *folder path*\\test\\images


nc: 3
names: ['coca-cola', 'fanta', 'sprite']
""")


In [None]:
def create_txt_file(path: Path, bboxes, category_ids, width, height):
    """Creates a .txt file with annotation strings for the given bounding boxes"""
    
    anno_str = []
    for bbox, category_id in zip(bboxes, category_ids):
        x, y, w, h = bbox[0], bbox[1], bbox[2], bbox[3]
        xc = x + w / 2
        yc = y + h / 2
        xc /= width
        yc /= height
        w /= width
        h /= height
        anno_str.append(f"{category_id} {xc} {yc} {w} {h}")
    path.write_text("\n".join(anno_str))

In [None]:
path_list = []
for mode in ["train", "val", "test"]:
    image_folder = Path(dest_dir) / f"{mode}" / "images" 
    image_folder.mkdir(parents=True, exist_ok=True)

    label_folder = Path(dest_dir) / f"{mode}" / "labels"
    label_folder.mkdir(parents=True, exist_ok=True)

    df = locals().get(f"{mode}_df")

    grouped = df.groupby('file_name')
    for image_id, group_df in tqdm(grouped, total=len(grouped)):
        file_name = group_df.iloc[0].file_name
        width, height = group_df.iloc[0].width, group_df.iloc[0].height
        bboxes = [(row.x, row.y, row.w, row.h) for _, row in group_df.iterrows()]
        category_ids = [(row.category_id) for _, row in group_df.iterrows()]
        img_path = image_folder / f"{file_name}.jpg"
        label_path = label_folder / f"{file_name}.txt"
        shutil.copy(f"{mode}/{file_name}", img_path)
        create_txt_file(label_path, bboxes, category_ids, width, height)
        path_list.append((str(img_path), str(label_path)))