# MLFlow Hands On

In [15]:
import os
import warnings
import sys

import pandas as pd
import numpy as np

from sklearn.metrics import (
    root_mean_squared_error, 
    mean_absolute_error,
    r2_score
)
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet

import mlflow
import mlflow.sklearn

# 0. The Data

In [11]:
data_path = "data/wine-quality.csv"
data = pd.read_csv(data_path)
data.sample(10)

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
4625,8.3,0.49,0.23,6.65,0.034,6.0,158.0,0.99344,3.05,0.48,11.2,5
3531,6.4,0.28,0.44,7.1,0.048,49.0,179.0,0.99528,3.15,0.48,9.2,5
2212,7.0,0.32,0.35,1.5,0.039,24.0,125.0,0.9918,3.17,0.64,12.2,6
670,7.8,0.42,0.26,9.2,0.058,34.0,199.0,0.9972,3.14,0.55,9.3,6
2864,5.3,0.16,0.39,1.0,0.028,40.0,101.0,0.99156,3.57,0.59,10.6,6
178,6.0,0.67,0.07,1.2,0.06,9.0,108.0,0.9931,3.11,0.35,8.7,4
73,8.6,0.23,0.46,1.0,0.054,9.0,72.0,0.9941,2.95,0.49,9.1,6
1828,7.3,0.17,0.23,6.3,0.051,35.0,240.0,0.9963,3.36,0.54,10.0,6
3403,8.8,0.27,0.25,5.0,0.024,52.0,99.0,0.9925,2.87,0.49,11.4,5
1341,7.6,0.54,0.23,2.0,0.029,13.0,151.0,0.9931,3.04,0.33,10.4,5


# 1. Tracking Experiments

- Command to run the mlflow locally

```
mlflow server --backend-store-uri mlruns/ --default-artifact-root mlruns/ --host 0.0.0.0 --port 5000
```

In [12]:
remote_server_uri = "http://0.0.0.0:5000"
mlflow.set_tracking_uri(remote_server_uri)

In [13]:
mlflow.get_tracking_uri()

'http://0.0.0.0:5000'

In [14]:
exp_name = "ElasticNet_Wine"
mlflow.set_experiment(exp_name)

2024/10/17 19:50:46 INFO mlflow.tracking.fluent: Experiment with name 'ElasticNet_Wine' does not exist. Creating a new experiment.


<Experiment: artifact_location='/home/ujtyagi/mle-training/mlflow/mlruns/178310088390928711', creation_time=1729174846927, experiment_id='178310088390928711', last_update_time=1729174846927, lifecycle_stage='active', name='ElasticNet_Wine', tags={}>

#### What do we track?

- Code Version
- Start & End Time
- Source
- Parameters
- Metrics
- Artifacts

In [16]:
def eval_metrics(actual, pred):
    rmse = root_mean_squared_error(actual, pred)
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2

In [21]:
def load_data(data_path):
    df = pd.read_csv(data_path)
    train, test = train_test_split(data)
    X_train = train.drop(columns='quality')
    X_test = test.drop(columns='quality')
    y_train = train[['quality']]
    y_test = test[['quality']]
    return X_train, y_train, X_test, y_test

In [22]:
def train(alpha=0.5, l1_ratio=0.5):
    warnings.filterwarnings("ignore")
    np.random.seed(40)

    # Read Data
    data_path = "data/wine-quality.csv"
    X_train, y_train, X_test, y_test = load_data(data_path)

    # Useful for multiple runs
    with mlflow.start_run():
        # Train Model
        lr = ElasticNet(
            alpha=alpha,
            l1_ratio=l1_ratio,
            random_state=42
        )
        lr.fit(X_train, y_train)
        
        # Evaluate Metrics
        predicted_qualities = lr.predict(X_test)
        rmse, mae, r2 = eval_metrics(y_test, predicted_qualities)

        print(f"ElasticNet Model (alpha={alpha}, l1_ratio={l1_ratio}")
        print(f"RMSE: {rmse} MAE: {mae} R2: {r2}")

        mlflow.log_param(key='alpha', value=alpha)
        mlflow.log_param(key='l1_ratio', value=l1_ratio)

        mlflow.log_metric(key='rmse', value=rmse)
        mlflow.log_metrics({"mae":mae,"r2":r2})

        mlflow.log_artifact(data_path)
        print(f"Save to {mlflow.get_artifact_uri()}")
        mlflow.sklearn.log_model(lr,"model")

In [23]:
train(0.5, 0.5)

ElasticNet Model (alpha=0.5, l1_ratio=0.5
RMSE: 0.82224284975954 MAE: 0.6278761410160693 R2: 0.12678721972772689
Save to /home/ujtyagi/mle-training/mlflow/mlruns/178310088390928711/a897319064e4455489b2a6ee2cc726f1/artifacts


2024/10/17 20:16:15 INFO mlflow.tracking._tracking_service.client: 🏃 View run gaudy-mole-700 at: http://0.0.0.0:5000/#/experiments/178310088390928711/runs/a897319064e4455489b2a6ee2cc726f1.
2024/10/17 20:16:15 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://0.0.0.0:5000/#/experiments/178310088390928711.


In [24]:
train(0.2, 0.2)

ElasticNet Model (alpha=0.2, l1_ratio=0.2
RMSE: 0.7859129997062341 MAE: 0.6155290394093893 R2: 0.20224631822892103
Save to /home/ujtyagi/mle-training/mlflow/mlruns/178310088390928711/d32e443c133845fe853965afb77cf867/artifacts


2024/10/17 20:16:36 INFO mlflow.tracking._tracking_service.client: 🏃 View run debonair-grouse-929 at: http://0.0.0.0:5000/#/experiments/178310088390928711/runs/d32e443c133845fe853965afb77cf867.
2024/10/17 20:16:36 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://0.0.0.0:5000/#/experiments/178310088390928711.


In [25]:
train(0.1, 0.1)

ElasticNet Model (alpha=0.1, l1_ratio=0.1
RMSE: 0.7792546522251949 MAE: 0.6112547988118587 R2: 0.2157063843066196
Save to /home/ujtyagi/mle-training/mlflow/mlruns/178310088390928711/927eca3e89ca4f53814698c1eaea0a70/artifacts


2024/10/17 20:18:01 INFO mlflow.tracking._tracking_service.client: 🏃 View run overjoyed-swan-744 at: http://0.0.0.0:5000/#/experiments/178310088390928711/runs/927eca3e89ca4f53814698c1eaea0a70.
2024/10/17 20:18:01 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://0.0.0.0:5000/#/experiments/178310088390928711.
