In [None]:
import os
import shutil
import mlflow
import mlflow.sklearn
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from loguru import logger

# Configure Loguru
logger.add("mlflow_training.log", rotation="1 MB", level="INFO")

# Set MLflow tracking URI
mlflow_uri = os.getenv("MLFLOW_TRACKING_URI", "http://localhost:5000")  # Default if not set
mlflow.set_tracking_uri(mlflow_uri)

# Define Experiment
experiment_name = "iris_classification"
mlflow.set_experiment(experiment_name)
logger.info(f"Experiment set: {experiment_name}")

# Load dataset
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=42)

# Define hyperparameters
n_estimators = 100
random_state = 42

# Train model
model = RandomForestClassifier(n_estimators=n_estimators, random_state=random_state)
model.fit(X_train, y_train)

# Start MLflow run
with mlflow.start_run() as run:
    run_id = run.info.run_id
    logger.info(f"Run started: {run_id}")

    # Log hyperparameters
    mlflow.log_param("n_estimators", n_estimators)
    mlflow.log_param("random_state", random_state)
    logger.info(f"Logged hyperparameters: n_estimators={n_estimators}, random_state={random_state}")

    # Log metrics
    train_accuracy = model.score(X_train, y_train)
    test_accuracy = model.score(X_test, y_test)
    
    mlflow.log_metric("train_accuracy", train_accuracy)
    mlflow.log_metric("test_accuracy", test_accuracy)
    logger.info(f"Logged metrics: train_accuracy={train_accuracy}, test_accuracy={test_accuracy}")

    # Log model
    model_uri = "models:/iris_classifier"
    mlflow.sklearn.log_model(model, "model")
    logger.info(f"Model logged to MLflow")

    # Register model
    result = mlflow.register_model(
        model_uri=f"runs:/{run_id}/model",
        name="iris_classifier"
    )
    logger.info(f"Model registered: {result.name}, version: {result.version}")

    # Save artifacts
    artifact_path = "artifacts"
    os.makedirs(artifact_path, exist_ok=True)
    artifact_file = os.path.join(artifact_path, "iris_features.txt")
    with open(artifact_file, "w") as f:
        f.write(str(iris.feature_names))
    
    mlflow.log_artifact(artifact_file)
    logger.info(f"Artifact logged: {artifact_file}")

logger.info("MLflow training run completed.")