In [2]:
import os

import pandas as pd
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (accuracy_score, f1_score, precision_score,
                             recall_score)
from sklearn.model_selection import train_test_split

import mlflow

In [3]:
# Set MLflow tracking URI
# mlflow.set_tracking_uri("http://mlflow.lab")  # or http://localhost:5001

In [4]:
mlflow.set_experiment("MLflow Quickstart")

<Experiment: artifact_location='s3://mlflow/2', creation_time=1765594362225, experiment_id='2', last_update_time=1765594362225, lifecycle_stage='active', name='MLflow Quickstart', tags={}>

In [5]:
# Enable autologging for scikit-learn
mlflow.sklearn.autolog()



In [6]:
# Configure S3 credentials for artifact storage
os.environ['AWS_ACCESS_KEY_ID'] = 'minioadmin'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'minioadmin123'
os.environ['MLFLOW_S3_ENDPOINT_URL'] = 'http://minio-api.lab'  # or http://localhost:9000

In [7]:
# Load the Iris dataset
X, y = datasets.load_iris(return_X_y=True)

In [8]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [9]:
# Define the model hyperparameters
params = {
    "solver": "lbfgs",
    "max_iter": 1000,
    "random_state": 8888,
}

In [10]:
# Just train the model normally
lr = LogisticRegression(**params)
lr.fit(X_train, y_train)

2025/12/13 02:59:46 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '2b204b6a9dae4463939bbdd6f4d427a7', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


üèÉ View run wistful-owl-533 at: http://mlflow.lab/#/experiments/2/runs/2b204b6a9dae4463939bbdd6f4d427a7
üß™ View experiment at: http://mlflow.lab/#/experiments/2


In [11]:
# Start an MLflow run
with mlflow.start_run():
    # Log the hyperparameters
    mlflow.log_params(params)

    # Train the model
    lr = LogisticRegression(**params)
    lr.fit(X_train, y_train)

    # Log the model with input example for signature inference
    model_info = mlflow.sklearn.log_model(
        sk_model=lr,
        artifact_path="iris_model",
        input_example=X_train[:5]
    )

    # Predict on the test set, compute and log the loss metric
    y_pred = lr.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    mlflow.log_metric("accuracy", accuracy)

    # Optional: Set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag("Training Info", "Basic LR model for iris data")
    



üèÉ View run suave-jay-460 at: http://mlflow.lab/#/experiments/2/runs/a9e3eb378ce449ee8c7033cba1f36638
üß™ View experiment at: http://mlflow.lab/#/experiments/2


In [None]:
# Load the model back for predictions as a generic Python Function model
loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)

predictions = loaded_model.predict(X_test)

iris_feature_names = datasets.load_iris().feature_names

result = pd.DataFrame(X_test, columns=iris_feature_names)
result["actual_class"] = y_test
result["predicted_class"] = predictions

result[:4]