In [1]:
from mlflow.tracking import MlflowClient

MLFLOW_TRACKING_URI = 'http://13.215.46.159:5000/'

In [2]:
client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)
client.list_experiments()

[<Experiment: artifact_location='s3://mlflow-artifacts-remote-1212/0', experiment_id='0', lifecycle_stage='active', name='Default', tags={}>,
 <Experiment: artifact_location='s3://mlflow-artifacts-remote-1212/2', experiment_id='2', lifecycle_stage='active', name='nyc-taxi-experiment', tags={}>]

In [3]:
import mlflow

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

In [4]:
run_id = '8fa4fdbc841b4a0e9a2670080dbeabd4'
model_uri = f'runs:/{run_id}/model'

mlflow.register_model(model_uri=model_uri, name='nyc-taxi-regressor')

Successfully registered model 'nyc-taxi-regressor'.
2022/06/22 20:46:09 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: nyc-taxi-regressor, version 1
Created version '1' of model 'nyc-taxi-regressor'.


<ModelVersion: creation_timestamp=1655945169111, current_stage='None', description='', last_updated_timestamp=1655945169111, name='nyc-taxi-regressor', run_id='8fa4fdbc841b4a0e9a2670080dbeabd4', run_link='', source='s3://mlflow-artifacts-remote-1212/2/8fa4fdbc841b4a0e9a2670080dbeabd4/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='1'>

In [5]:
model_name = 'nyc-taxi-regressor'

latest = client.get_latest_versions(name=model_name)
for version in latest:
    print(f'version: {version.version}, stage: {version.current_stage}')

version: 1, stage: None


In [7]:
model_version = 1
new_stg = 'staging'
client.transition_model_version_stage(
    name=model_name,
    version=model_version,
    stage=new_stg,
    archive_existing_versions=False
)

<ModelVersion: creation_timestamp=1655945169111, current_stage='Staging', description='', last_updated_timestamp=1655945351876, name='nyc-taxi-regressor', run_id='8fa4fdbc841b4a0e9a2670080dbeabd4', run_link='', source='s3://mlflow-artifacts-remote-1212/2/8fa4fdbc841b4a0e9a2670080dbeabd4/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='1'>

In [8]:
from datetime import datetime

date = datetime.today().date()

client.update_model_version(
    name=model_name,
    version=model_version,
    description=f'Model version {model_version} was transitioned to {new_stg} on {date}'
)

<ModelVersion: creation_timestamp=1655945169111, current_stage='Staging', description='Model version 1 was transitioned to staging on 2022-06-22', last_updated_timestamp=1655945455862, name='nyc-taxi-regressor', run_id='8fa4fdbc841b4a0e9a2670080dbeabd4', run_link='', source='s3://mlflow-artifacts-remote-1212/2/8fa4fdbc841b4a0e9a2670080dbeabd4/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='1'>

Retrieve model from registry:


In [20]:
from sklearn.metrics import mean_squared_error
import pandas as pd
import pickle

def read_dataframe(filename):
    df = pd.read_parquet(filename)

    df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
    df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df


def preprocess(df, dv):
    df['PU_DO'] = df['PULocationID'] + '_' + df['DOLocationID']
    categorical = ['PU_DO']
    numerical = ['trip_distance']
    train_dicts = df[categorical + numerical].to_dict(orient='records')
    return dv.transform(train_dicts)

def test_model(name, stage, X_test, y_test):
    model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
    y_pred = model.predict(X_test)
    return {"rmse": mean_squared_error(y_test, y_pred, squared=False)}

In [17]:
# `path` is the relative path entered in `.log_artifact` or `.log_model`
# full path would be 
# s3://mlflow-artifacts-remote-1212/2/8fa4fdbc841b4a0e9a2670080dbeabd4/artifacts/model/model.pkl
# model is loaded via mlflow.pyfunc.load_model within test_model()
# since it's already registered
# client.download_artifacts(run_id=run_id, path='model/model.pkl', dst_path='.')
client.download_artifacts(run_id=run_id, path='dict_vectorizer.bin', dst_path='./model')

'/home/klang/mlops-notes/w4-deployment/web-service-mlflow/model/dict_vectorizer.bin'

In [21]:

df = read_dataframe('../../data/green_tripdata_2021-03.parquet')

# X_test = pd.read_pickle('../../data/output/test.pkl')
with open('model/dict_vectorizer.bin', 'rb') as f_in:
     dv = pickle.load(f_in)

In [23]:
X_test = preprocess(df, dv)
y_test = df['duration'].values

In [24]:
test_model(name=model_name, stage='staging', X_test=X_test, y_test=y_test)

{'rmse': 6.549816636724069}