In [1]:
from mlflow.tracking import MlflowClient

MLFLOW_TRACKING_URI = 'sqlite:///mlflow.db'
client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

In [2]:
# List experiment in mlflow server
client.list_experiments()

[<Experiment: artifact_location='./mlruns/0', experiment_id='0', lifecycle_stage='active', name='Default', tags={}>,
 <Experiment: artifact_location='./mlruns/1', experiment_id='1', lifecycle_stage='active', name='nyc-taxi-experimentc', tags={}>]

In [3]:
# Create experiment by python script
# return is 'experiment_id'
client.create_experiment(name = "nyc-taxi-regressor-advance")

'2'

In [8]:
from mlflow.entities import ViewType

# experiment_ids is getting from .list_experiments() in above script
runs = client.search_runs(
    experiment_ids = '1',
    filter_string ="metrics.rmse < 6.8",
    run_view_type = ViewType.ACTIVE_ONLY,
    max_results = 5,
    order_by = ["metrics.rmse ASC"]
)

In [9]:
for run in runs:
    print(f"run id :  {run.info.run_id}, rmse : {run.data.metrics['rmse']:.4f}")

run id :  32bc6b6711534aaaa56eb1dce64e65e3, rmse : 6.2907
run id :  e483667a1ac3497bb2bc18fe374a649f, rmse : 6.2923
run id :  fe9eea2587ff4568b5cc785d88f4a981, rmse : 6.2974
run id :  a8fb39dba0004ebfa784e1d3d2e772f5, rmse : 6.3250
run id :  00d188964e10400da712413a04005960, rmse : 6.3282


In [29]:
import mlflow

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

# Register model 
run_id = "fe9eea2587ff4568b5cc785d88f4a981"
model_uri = f"runs:/{run_id}/model"
mlflow.register_model(model_uri = model_uri, name ="nyc-taxi-regressor")

Registered model 'nyc-taxi-regressor' already exists. Creating a new version of this model...
2022/08/06 16:10:59 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: nyc-taxi-regressor, version 4
Created version '4' of model 'nyc-taxi-regressor'.


<ModelVersion: creation_timestamp=1659777059080, current_stage='None', description=None, last_updated_timestamp=1659777059080, name='nyc-taxi-regressor', run_id='fe9eea2587ff4568b5cc785d88f4a981', run_link=None, source='./mlruns/1/fe9eea2587ff4568b5cc785d88f4a981/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=4>

In [12]:
client.list_registered_models()

[<RegisteredModel: creation_timestamp=1659772795062, description='', last_updated_timestamp=1659773491239, latest_versions=[<ModelVersion: creation_timestamp=1659772795082, current_stage='Archived', description='', last_updated_timestamp=1659772904522, name='nyc-taxi-regressor', run_id='fe9eea2587ff4568b5cc785d88f4a981', run_link='', source='./mlruns/1/fe9eea2587ff4568b5cc785d88f4a981/artifacts/models_mlflow', status='READY', status_message=None, tags={'model': 'xgboost-regressor'}, user_id=None, version=1>,
  <ModelVersion: creation_timestamp=1659772836844, current_stage='Staging', description='', last_updated_timestamp=1659772890197, name='nyc-taxi-regressor', run_id='9225e3ab75e141daa16ccdbfb297ab30', run_link='', source='./mlruns/1/9225e3ab75e141daa16ccdbfb297ab30/artifacts/model', status='READY', status_message=None, tags={'model': 'gradient-boosting-regressor'}, user_id=None, version=2>,
  <ModelVersion: creation_timestamp=1659773491239, current_stage='None', description=None, la

In [30]:
# List model in mlflow client
model_name = "nyc-taxi-regressor"
latest_version = client.get_latest_versions(model_name)
for version in latest_version:
    print(f"version : {version.version}, stage : {version.current_stage}")

version : 1, stage : Archived
version : 2, stage : Production
version : 3, stage : Staging
version : 4, stage : None


In [33]:
# Model stage transition
model_version = 4
new_stage = "Staging"

client.transition_model_version_stage(
    name = model_name,
    version = model_version ,
    stage = new_stage,
    archive_existing_versions = False
)

<ModelVersion: creation_timestamp=1659777059080, current_stage='Staging', description=None, last_updated_timestamp=1659777090083, name='nyc-taxi-regressor', run_id='fe9eea2587ff4568b5cc785d88f4a981', run_link=None, source='./mlruns/1/fe9eea2587ff4568b5cc785d88f4a981/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=4>

In [34]:
from datetime import datetime

date = datetime.today().date()

# Update model details
client.update_model_version(
    name = model_name,
    version = model_version,
    description = f"The model version {model_version} was transaitioned to {new_stage} on {date}"
)

<ModelVersion: creation_timestamp=1659777059080, current_stage='Staging', description='The model version 4 was transaitioned to Staging on 2022-08-06', last_updated_timestamp=1659777094406, name='nyc-taxi-regressor', run_id='fe9eea2587ff4568b5cc785d88f4a981', run_link=None, source='./mlruns/1/fe9eea2587ff4568b5cc785d88f4a981/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=4>

In [60]:
# Model stage transition
client.transition_model_version_stage(
    name = model_name,
    version = 4 ,
    stage = "Staging",
    archive_existing_versions = False
)

<ModelVersion: creation_timestamp=1659777059080, current_stage='Staging', description='The model version 4 was transaitioned to Staging on 2022-08-06', last_updated_timestamp=1659778197450, name='nyc-taxi-regressor', run_id='fe9eea2587ff4568b5cc785d88f4a981', run_link=None, source='./mlruns/1/fe9eea2587ff4568b5cc785d88f4a981/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=4>

In [50]:
# Model stage transition
client.transition_model_version_stage(
    name = model_name,
    version = 3 ,
    stage = "Archived",
    archive_existing_versions = False
)

<ModelVersion: creation_timestamp=1659773491239, current_stage='Archived', description='The model version 3 was transaitioned to Staging on 2022-08-06', last_updated_timestamp=1659777685124, name='nyc-taxi-regressor', run_id='32bc6b6711534aaaa56eb1dce64e65e3', run_link=None, source='./mlruns/1/32bc6b6711534aaaa56eb1dce64e65e3/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

### Testing registered model

In [40]:
from sklearn.metrics import mean_squared_error
import pandas as pd

def read_dataframe(filename:str):
    df = pd.read_parquet(filename)
    df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
    df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds()/60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID' , 'DOLocationID']
    df[categorical] = df[categorical].astype(str)

    return df

def preprocess(df, dv):
    df['PU_DO'] = df['PULocationID'] + "_" + df["DOLocationID"]
    categorical = ["PU_DO"]
    numerical = ["trip_distance"]
    train_dicts = df[categorical + numerical].to_dict(orient = "records")
    return dv.transform(train_dicts)

def test_model(name, stage, X_test, y_test):
    model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
    y_pred = model.predict(X_test)
    return {"rmse" : mean_squared_error(y_test, y_pred, squared = False)}

In [36]:
df = read_dataframe("../data/green_tripdata_2021-03.parquet")

In [45]:
client.download_artifacts(run_id = run_id, path = 'preprocessor', dst_path = '.')

'/Users/wasuratsoontronchai/Documents/GitHub/mlops-bootcamp/02-experiment-tracking/preprocessor'

In [46]:
import pickle

with open("../models/preprocessor/preprocessor.b", "rb") as f_in:
    dv = pickle.load(f_in)

In [47]:
X_test = preprocess(df, dv)

target = "duration"
y_test = df[target].values

In [63]:
%time test_model(name = model_name, stage = "Production", X_test=X_test, y_test = y_test)

CPU times: user 106 ms, sys: 2.74 ms, total: 109 ms
Wall time: 107 ms


{'rmse': 6.659623830022513}

In [68]:
%time test_model(name = model_name, stage = "Staging", X_test=X_test, y_test = y_test)

CPU times: user 10.3 s, sys: 63 ms, total: 10.3 s
Wall time: 796 ms


{'rmse': 6.244794058492957}