# Access MLflow UI via APIs

In [1]:
from mlflow.tracking import MlflowClient
from mlflow.entities import ViewType
import mlflow
from datetime import datetime

## Get experiements

In [24]:
MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"

client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

In [25]:
client.search_experiments()

## Create new experiment

In [5]:
client.create_experiment(name="demo_experiment")

In [6]:
runs = client.search_runs(
    experiment_ids='1',
    filter_string='metrics.rmse < 6.8',
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=5,
    order_by=['metrics.rmse ASC']
)

In [7]:
for r in runs:
    print(f"run id: {r.info.run_id}, rmse: {r.data.metrics['rmse']:.4f}")

# Promote models for registry

In [28]:
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

In [29]:
run_id = "b9078223b5944af9ac8a0a6c57127cdb"
model_uri = f"runs:/{run_id}/model"
name = "nyc_taxis_xgboost"
mlflow.register_model(model_uri=model_uri, name=name)

## Get latest versions

Registered model 'nyc_taxis_xgboost' already exists. Creating a new version of this model...
Created version '2' of model 'nyc_taxis_xgboost'.

This will reflect on mlflow ui that the Latest version is "Version 2"

In [None]:
latest_versions = client.get_latest_versions(name=name)
for v in latest_versions:
    print(f"version: {v.version}, stage: {v.current_stage}")

## Update model stage
Assuming we want to transit "version 2" to "Staging". 

In [None]:
client.transition_model_version_stage(
    name=name,
    version=2,
    stage="Staging",
    archive_existing_versions=False
)

## Annotate model

In [None]:
date = datetime.today().date()
client.update_model_version(
    name=name,
    version=2,
    description=f"The model version was transitioned to staging on {date}."
)

## Compare versions 

In [9]:
from sklearn.metrics import mean_squared_error
import pandas as pd


def read_dataframe(path):
    df = pd.read_parquet(path)
    df['duration'] = df['lpep_dropoff_datetime'] - df['lpep_pickup_datetime']
    df['duration'] = df['duration'].apply(lambda x: x.total_seconds()/60)
    df = df[(df['duration'] > 1.0) & (df['duration'] < 60.0)]
    categorical = ['PULocationID','DOLocationID']
    df[categorical] = df[categorical].astype(str)
    return df

def preprocess(df, dv):
    df['PU_DO'] = df['PULocationID'] + '_' + df['DOLocationID']
    categorical = ['PU_DO']
    numerical = ['trip_distance']
    train_dicts = df[categorical + numerical].to_dict(orient='records')
    return dv.transform(train_dicts)


def test_model(name, stage, X_test, y_test):
    model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
    y_pred = model.predict(X_test)
    return {"rmse": mean_squared_error(y_test, y_pred, squared=False)}

In [11]:
data_2403_path = "/Users/yihanzhou/PycharmProjects/mlops-zoomcamp/data/green_tripdata_2024-03.parquet"
df = read_dataframe(data_2403_path)

In [16]:
client.download_artifacts(run_id=run_id, path="preprocessor", dst_path=".")

In [17]:
import pickle

with open("preprocessor/preprocessor.b","rb") as f_in:
    dv = pickle.load(f_in)

In [19]:
x_test = preprocess(df, dv)

In [20]:
target = "duration"
y_test = df[target].values

In [31]:
%time test_model(name=name, stage="Production", X_test=x_test, y_test=y_test)

In [32]:
%time test_model(name=name, stage="Staging", X_test=x_test, y_test=y_test)