In [36]:
import pandas as pd
from sklearn.metrics import mean_squared_error
import pickle

import mlflow
from mlflow.tracking import MlflowClient
from mlflow.entities import ViewType

MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"

client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

In [3]:
client.list_experiments()

[<Experiment: artifact_location='./mlruns/1', experiment_id='1', lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>]

In [4]:
client.create_experiment(name="new-test-experiment")
#returns experiment id

'2'

In [8]:
runs = client.search_runs(
    experiment_ids="1",
    filter_string="",
    run_view_type=ViewType.ACTIVE_ONLY,
    max_results=5,
    order_by=["metrics.rmse ASC"]
)

for run in runs:
    print("ID", run.info.run_id, "RMSE", run.data.metrics["rmse"])

ID a58990e184d9409387e4575e4b58f9d5 RMSE 6.281218009177687
ID 5869f907f90c43a5a9d3e82c70fa0cd5 RMSE 6.286869922882385
ID 51b5b9b91178465e97fa9d815950447c RMSE 6.291361331044779
ID 5690a67b23234a05ba8519f1487f4811 RMSE 6.29655046845938
ID 2d87bf062fc8401c83274ffee9336ddc RMSE 6.297031457288569


## Promote to Model Registry

In [14]:
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
model_name = "nyc-taxi-regressor"

In [11]:
run_id = "a58990e184d9409387e4575e4b58f9d5"
model_uri = f"runs:/{run_id}/model"

mlflow.register_model(model_uri=model_uri, name=model_name)

Registered model 'nyc-taxi-regressor' already exists. Creating a new version of this model...
2022/05/29 12:36:23 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: nyc-taxi-regressor, version 2
Created version '2' of model 'nyc-taxi-regressor'.


<ModelVersion: creation_timestamp=1653820583143, current_stage='None', description=None, last_updated_timestamp=1653820583143, name='nyc-taxi-regressor', run_id='a58990e184d9409387e4575e4b58f9d5', run_link=None, source='./mlruns/1/a58990e184d9409387e4575e4b58f9d5/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=2>

In [12]:
run_id = "5869f907f90c43a5a9d3e82c70fa0cd5"
model_uri = f"runs:/{run_id}/model"

mlflow.register_model(model_uri=model_uri, name=model_name)

Registered model 'nyc-taxi-regressor' already exists. Creating a new version of this model...
2022/05/29 12:37:10 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: nyc-taxi-regressor, version 3
Created version '3' of model 'nyc-taxi-regressor'.


<ModelVersion: creation_timestamp=1653820630536, current_stage='None', description=None, last_updated_timestamp=1653820630536, name='nyc-taxi-regressor', run_id='5869f907f90c43a5a9d3e82c70fa0cd5', run_link=None, source='./mlruns/1/5869f907f90c43a5a9d3e82c70fa0cd5/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

In [16]:
# Transition Model between stages

client.list_registered_models()

[<RegisteredModel: creation_timestamp=1653819465818, description='NAY taxi regressor for duration prediction', last_updated_timestamp=1653820630536, latest_versions=[<ModelVersion: creation_timestamp=1653819465853, current_stage='Production', description='', last_updated_timestamp=1653819554666, name='nyc-taxi-regressor', run_id='b28b262e25d54746afc299df2027c5d7', run_link='', source='./mlruns/1/b28b262e25d54746afc299df2027c5d7/artifacts/models_mlflow', status='READY', status_message=None, tags={}, user_id=None, version=1>,
  <ModelVersion: creation_timestamp=1653820630536, current_stage='None', description=None, last_updated_timestamp=1653820630536, name='nyc-taxi-regressor', run_id='5869f907f90c43a5a9d3e82c70fa0cd5', run_link=None, source='./mlruns/1/5869f907f90c43a5a9d3e82c70fa0cd5/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>], name='nyc-taxi-regressor', tags={}>]

In [23]:
versions = client.get_latest_versions(name=model_name)

for version in versions:
    print(f"version: {version.version}, current stage: {version.current_stage}")

version: 1, current stage: Production
version: 3, current stage: Staging


In [22]:
client.transition_model_version_stage(
    name=model_name,
    version=2,
    stage="Staging",
    archive_existing_versions=False
)

<ModelVersion: creation_timestamp=1653820583143, current_stage='Staging', description='The model version 2 was transitioned to Staging today.', last_updated_timestamp=1653821326517, name='nyc-taxi-regressor', run_id='a58990e184d9409387e4575e4b58f9d5', run_link=None, source='./mlruns/1/a58990e184d9409387e4575e4b58f9d5/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=2>

In [24]:
client.update_model_version(
    name=model_name,
    version=2,
    description="The model version 2 was transitioned to Staging today."
)

<ModelVersion: creation_timestamp=1653820583143, current_stage='Staging', description='The model version 2 was transitioned to Staging today.', last_updated_timestamp=1653821338050, name='nyc-taxi-regressor', run_id='a58990e184d9409387e4575e4b58f9d5', run_link=None, source='./mlruns/1/a58990e184d9409387e4575e4b58f9d5/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=2>

## Testing a Model whether it is best for Production

In [32]:
def read_dataframe(filename):
    df = pd.read_parquet(filename)

    # df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
    # df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df


def preprocess(df, dv):
    df['PU_DO'] = df['PULocationID'] + '_' + df['DOLocationID']
    categorical = ['PU_DO']
    numerical = ['trip_distance']
    train_dicts = df[categorical + numerical].to_dict(orient='records')
    return dv.transform(train_dicts)


def test_model(name, stage, X_test, y_test):
    model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
    y_pred = model.predict(X_test)
    return {"rmse": mean_squared_error(y_test, y_pred, squared=False)}

In [34]:
data = read_dataframe("./data/green_tripdata_2021-03.parquet")
data.head(5)

Unnamed: 0,VendorID,lpep_pickup_datetime,lpep_dropoff_datetime,store_and_fwd_flag,RatecodeID,PULocationID,DOLocationID,passenger_count,trip_distance,fare_amount,extra,mta_tax,tip_amount,tolls_amount,ehail_fee,improvement_surcharge,total_amount,payment_type,trip_type,congestion_surcharge,duration
0,2,2021-03-01 00:05:42,2021-03-01 00:14:03,N,1.0,83,129,1.0,1.56,7.5,0.5,0.5,0.0,0.0,,0.3,8.8,1.0,1.0,0.0,8.35
1,2,2021-03-01 00:21:03,2021-03-01 00:26:17,N,1.0,243,235,1.0,0.96,6.0,0.5,0.5,0.0,0.0,,0.3,7.3,2.0,1.0,0.0,5.233333
2,2,2021-03-01 00:02:06,2021-03-01 00:22:26,N,1.0,75,242,1.0,9.93,28.0,0.5,0.5,2.0,0.0,,0.3,31.3,1.0,1.0,0.0,20.333333
3,2,2021-03-01 00:24:03,2021-03-01 00:31:43,N,1.0,242,208,1.0,2.57,9.5,0.5,0.5,0.0,0.0,,0.3,10.8,2.0,1.0,0.0,7.666667
4,1,2021-03-01 00:11:10,2021-03-01 00:14:46,N,1.0,41,151,1.0,0.8,5.0,0.5,0.5,1.85,0.0,,0.3,8.15,1.0,1.0,0.0,3.6


In [39]:
# download dv from mlflow
run_id="b28b262e25d54746afc299df2027c5d7" #run id where preprocessor is saved, version 1 model-
client.download_artifacts(run_id=run_id, path='preprocessor', dst_path='.')

'/Users/da.weber/Documents/training/mlops-camp/preprocessor'

In [40]:
with open("./preprocessor/preprocessor.b", "rb") as f_in:
    dv = pickle.load(f_in)

In [41]:
X_test = preprocess(data, dv)

In [42]:
target = "duration"
Y_test = data[target].values

In [43]:
# Test model from a specific stage
%time test_model(name=model_name, stage="Production", X_test=X_test, y_test=Y_test)

  from pandas import MultiIndex, Int64Index


CPU times: user 15.9 s, sys: 163 ms, total: 16.1 s
Wall time: 2.38 s


{'rmse': 6.738996605250057}

In [46]:
client.transition_model_version_stage(
    name=model_name,
    version=3,
    stage="Production",
    archive_existing_versions=True
)

<ModelVersion: creation_timestamp=1653820630536, current_stage='Production', description=None, last_updated_timestamp=1653822726186, name='nyc-taxi-regressor', run_id='5869f907f90c43a5a9d3e82c70fa0cd5', run_link=None, source='./mlruns/1/5869f907f90c43a5a9d3e82c70fa0cd5/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>