# LSML2 HSE Final Project

 - **Models Manegement:** MlFlow
 - **Web Interface Manegement:** Streamlit App
 - **ModelAPI**: ML flow serving

 ## Task

 Vehicles Damages Detection. 
 Detect vehicle damages using modern CV models

## Models

Compared Models: Yolo11n, Yolo11s


 

## Service Start With Docker Compose



In [None]:
%%writefile docker-compose.yml
version: "3.7"

services:
  mlflow:
    build: mlflow
    ports:
      - 5001:5001
    volumes:
      - ./mlflow/data:/mldata
  webapp:
    build: web_app
    ports:
      - 8501:8501


In [1]:
import torch

if torch.cuda.is_available():
    device="cuda"
else:
    device="cpu"
device    

'cuda'

## Dataset

https://universe.roboflow.com/claimoo-52a5r/car-damage-detection-20na7/dataset/6

In [6]:
## Loading Dataset

from roboflow import Roboflow
rf = Roboflow(api_key="nAWJgGz38EcfJn4sHCZR")
project = rf.workspace("ae-43fv6").project("damage-type-nogzj")
version = project.version(4)
dataset = version.download("yolov11", 'datasets/damage_yolo')


loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in datasets/damage_yolo to yolov11:: 100%|██████████| 116384/116384 [00:18<00:00, 6457.77it/s]





Extracting Dataset Version Zip to datasets/damage_yolo in yolov11:: 100%|██████████| 4864/4864 [00:02<00:00, 2198.12it/s]


# ML Flow

In [2]:
import mlflow
MLFLOW_SERVER_URL = 'http://localhost:5001/'
experiment_name = 'damage_detection_yolo'
client = mlflow.tracking.MlflowClient(MLFLOW_SERVER_URL)

mlflow.set_tracking_uri(MLFLOW_SERVER_URL)
experiment = mlflow.set_experiment(experiment_name)
experiment

<Experiment: artifact_location='mlflow-artifacts:/805406121781784106', creation_time=1734295692503, experiment_id='805406121781784106', last_update_time=1734295692503, lifecycle_stage='active', name='damage_detection_yolo', tags={}>

## Load and finetune Yolo Models

In [2]:
## Prepare MLFlow Yolo Models Tracking
from ultralytics import settings
import os

os.curdir

# Update a setting
settings.update({"mlflow": True})
# os.environ["MLFLOW_EXPERIMENT_NAME"]=experiment_name

In [8]:
%%writefile damage_dataset.yaml

path: datasets/damage_yolo # dataset root dir
train: train # train images (relative to 'path') 4 images
val: valid # val images (relative to 'path') 4 images

# Classes (4 classes)
names:
    0: dent
    1: glass_break
    2: scratch
    3: smash

Overwriting damage_dataset.yaml


In [34]:
from ultralytics import YOLO

VERSION_ALIASES = {
    "production": "production",
    "best_candidate": "best_candidate",
    "archived": "archived",
}

def train_and_log_to_mlflow(experiment, yolo_model,epochs):
    model = YOLO(yolo_model)
    with mlflow.start_run() as run:
        model.train(
            project=experiment.name, # ML FLow Experiment Name
            name=run.info.run_name, #ML FLow Run Name
            data="damage_dataset.yaml", 
            device=device,
            epochs=epochs, 
            imgsz=640)

def current_prod(experiment_name):
    experiment_model = client.get_registered_model(experiment_name)
    current_prod = [i for i in experiment_model.latest_versions if i.current_stage == 'Production']
    current_prod.sort(key=lambda x: -x.last_updated_timestamp)
    if current_prod:
        return current_prod[0]
    return  None

def rc_candidates(experiment):
    candidates = []
    for run in client.search_runs(experiment.experiment_id):
        if 'staging' in run.data.tags and run.data.tags['staging'] != 'rejected':
            candidates.append(run)
        else:
            candidates.append(run)
            client.set_tag(run.info.run_id, 'staging', 'rc')
    candidates.sort(key=lambda x: -x.data.metrics['metrics/mAP50B'])
    return candidates

def roll_best_to_staging(experiment):
    prod = current_prod(experiment.name)
    if prod:
        prod_mAP = float(prod.tags['mAP50'])
    else:
        prod_mAP = 0

    candidates = rc_candidates(experiment)
    if len(candidates) == 0:
        return None
    
    best = candidates[0]
    
    # Reject all expcept best
    rejected = candidates[1:]
    for r in rejected:
        client.set_tag(r.info.run_id, 'staging', 'rejected')

    if best.data.metrics['metrics/mAP50B'] > prod_mAP:
        new_version = create_model_version(best)
        client.set_tag(best.info.run_id, 'staging', 'staging')
        client.set_registered_model_alias(new_version.name, VERSION_ALIASES['best_candidate'], new_version.version)
        return client.get_model_version_by_alias(new_version.name, VERSION_ALIASES['best_candidate'])
    else:
        client.set_tag(best.info.run_id, 'staging', 'rejected')
        return None

def create_model_version(run):
    epochs = run.data.params['epochs']
    mAP50 = run.data.metrics['metrics/mAP50B']
    model_src = f"{run.info.artifact_uri}/weights/best.pt"
    mv = client.create_model_version(experiment.name, model_src, run.info.run_id, tags={'epochs': epochs, 'mAP50': mAP50})
    return mv

def test_model_performance(model_version):
    model = YOLO(mlflow.artifacts.download_artifacts(model_version.source))
    metrics = model.val()
    return metrics.results_dict

def prepdoduction_test(model_version):
    production = current_prod(model_version.name)
    model_performance = test_model_performance(model_version)
    client.set_model_version_tag(model_version.name, model_version.version, 'test_mAP50', model_performance['metrics/mAP50(B)'])
    if production and 'test_mAP50' in production.tags and production.tags['test_mAP50'] < model_performance['metrics/mAP50(B)']:
        return True
    else: 
        return True                                                                    


def roll_to_production(model_version):
    # Mark model as production
    client.set_registered_model_alias(model_version.name, VERSION_ALIASES['production'], model_version.version)
    client.transition_model_version_stage(model_version.name, model_version.version, 'Production', True)
    return model_version

# Model staging->production cycle
def perform_deployment(experiment):
    version = roll_best_to_staging(experiment)
    if version is None:
        print('Nothing to deploy')
        return False
    if prepdoduction_test(version) == False:
        print('Test failed')
        return False
    else:
        roll_to_production(version)
        return True


In [26]:
# Perform deployment
perform_deployment(experiment)

2024/12/16 16:53:36 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: damage_detection_yolo, version 9
  client.transition_model_version_stage(model_version.name, model_version.version, 'Production', True)


True

In [None]:
# No new best candidates 
perform_deployment(experiment)

Nothing to deploy


False

# Reload / Start Model API Service 