# **** Kitchen Dispatch Inspection Project, includes two steps: ****
### 1. Train an object detection model to identify trays and dishes.
### 2. Train a classification model to categorize the detected objects into three classes: "not_empty", "empty", and "kakigori".


# 1. Install some required libraries

In [1]:
!pip install -q ultralytics --upgrade

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m17.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m47.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m43.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m34.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

# 2. Clone the project

In [2]:
!git clone -b main https://github.com/XuanHiepp/kitchen-dispatch-inspection.git

Cloning into 'kitchen-dispatch-inspection'...
remote: Enumerating objects: 34, done.[K
remote: Counting objects: 100% (34/34), done.[K
remote: Compressing objects: 100% (25/25), done.[K
remote: Total 34 (delta 6), reused 24 (delta 6), pack-reused 0 (from 0)[K
Receiving objects: 100% (34/34), 19.18 KiB | 19.18 MiB/s, done.
Resolving deltas: 100% (6/6), done.


# 2. Load dataset

In [3]:
!gdown --id "1XyYw9ApGeSFkTl298yH4lYM6d_RwtHgg"
!unzip -q Dataset.zip

Downloading...
From (original): https://drive.google.com/uc?id=1XyYw9ApGeSFkTl298yH4lYM6d_RwtHgg
From (redirected): https://drive.google.com/uc?id=1XyYw9ApGeSFkTl298yH4lYM6d_RwtHgg&confirm=t&uuid=c4e9e965-8d57-49da-ae62-d200ad107f01
To: /content/Dataset.zip
100% 91.7M/91.7M [00:01<00:00, 70.7MB/s]


In [4]:
!mv Dataset kitchen-dispatch-inspection
%cd kitchen-dispatch-inspection

/content/kitchen-dispatch-inspection


# 3. Setup model structure and training for detection task

## 3.1 Proprocess detection dataset

In [5]:
    from modules.preprocess_image_det import YOLODatasetAugmentor

    # New detection dataset saved at "Dataset/Detection_augmented" directory
    augmentor = YOLODatasetAugmentor(
        input_train_images='Dataset/Detection/train/images',
        input_train_labels='Dataset/Detection/train/labels',
        input_val_images='Dataset/Detection/val/images',
        input_val_labels='Dataset/Detection/val/labels',
        output_train_images='Dataset/Detection_augmented/train/images',
        output_train_labels='Dataset/Detection_augmented/train/labels',
        output_val_images='Dataset/Detection_augmented/val/images',
        output_val_labels='Dataset/Detection_augmented/val/labels',
        augmentations_per_image=5
    )

    augmentor.run()

Augmentation completed.


## 3.2 Train the new detection dataset with yolov11

In [6]:
# Download yolov11 model
!gdown --id "1nPpgjSd3nSh37-rORhkvJE1DfJZsgWu7"
!mv yolo11m.pt kitchen-dispatch-inspection

Downloading...
From (original): https://drive.google.com/uc?id=1nPpgjSd3nSh37-rORhkvJE1DfJZsgWu7
From (redirected): https://drive.google.com/uc?id=1nPpgjSd3nSh37-rORhkvJE1DfJZsgWu7&confirm=t&uuid=1262b89c-a97f-4a7f-bcf6-5a4478b4decc
To: /content/kitchen-dispatch-inspection/yolo11m.pt
100% 40.7M/40.7M [00:00<00:00, 141MB/s]


In [None]:
# Model will be saved in "kitchen-dispatch-inspection/runs/detect/train/weights"
from ultralytics import YOLO

# Load pretrained YOLOv11 model
model = YOLO('yolo11m.pt')

# Start training
results = model.train(
    data='Dataset/Detection/dataset.yaml',
    epochs=120,
    imgsz=640,
    batch=8,
    device=0,
    patience=10,
    verbose=False,

    # Augmentation for realtime images
    degrees=5,
    translate=0.02,
    scale=0.3,
    shear=0.3,
    perspective=0.0001,
    hsv_h=0.015,
    hsv_s=0.7,
    hsv_v=0.4,
    flipud=0.0,
    fliplr=0.5,
    mosaic=0.8,
    mixup=0.0,

    # Optimizer
    optimizer='auto',
    cache=True,
    warmup_epochs=3,
    multi_scale=True
)

# 4. Setup model structure and training for classification task

In [8]:
import os
model_save_dir = "models"
os.makedirs(model_save_dir, exist_ok=True)

In [9]:
import torch
import torch.nn as nn
from torchvision import models
from modules.classifier import ClsModel
from modules.preprocess_image_cls import ClsDataset

def train_classifier(dataset, model_save_path, num_epochs=50, device='cuda'):
    model = ClsModel(num_classes=len(dataset.classes)).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=1)

    best_val_acc = 0
    early_stop_counter = 0
    patience = 5

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for images, labels in dataset.train_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        scheduler.step()

        # Validation
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in dataset.val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_acc = 100 * correct / total
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(dataset.train_loader):.4f}, Val Acc: {val_acc:.2f}%")

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            save_dict = {
                'model_state': model.state_dict(),
                'classes': dataset.classes
            }
            torch.save(save_dict, model_save_path)
            early_stop_counter = 0
        else:
            early_stop_counter += 1
            if early_stop_counter >= patience:
                print("Early stopping triggered!")
                break

    print("Training complete. Best Val Acc:", best_val_acc)

In [None]:
# Augmentation for the classification dataset with tray object
tray_dataset = ClsDataset(dataset_path='Dataset/Classification/tray', batch_size=64)

# Start training for tray object
train_classifier(
    tray_dataset,
    model_save_path=model_save_dir + "/cls_tray_best.pth"
)

# Model will be saved in "kitchen-dispatch-inspection/models/cls_tray_best.pth"

In [None]:
# Augmentation for the classification dataset with dish object
dish_dataset = ClsDataset(dataset_path='Dataset/Classification/dish', batch_size=64)

# Start training for dish object
train_classifier(
    dish_dataset,
    model_save_path=model_save_dir + "/cls_dish_best.pth"
)

# Model will be saved in "kitchen-dispatch-inspection/models/cls_dish_best.pth"