# Interact with Mlflow with MlflowClient

In [None]:
from mlflow.tracking import MlflowClient

MLFLOW_TRACKING_URI = 'sqlite:///mlflow.db'
client = MlflowClient(tracking_uri= MLFLOW_TRACKING_URI)


In [3]:
client.search_experiments()

[<Experiment: artifact_location='./mlruns/1', creation_time=1754419695541, experiment_id='1', last_update_time=1754419695541, lifecycle_stage='active', name='nyc-taxi', tags={}>,
 <Experiment: artifact_location='./mlruns/0', creation_time=1754418733731, experiment_id='0', last_update_time=1754418733731, lifecycle_stage='active', name='Default', tags={}>]

In [4]:
client.create_experiment(name = 'just-for-test')

'2'

In [11]:
from mlflow.entities import ViewType

runs = client.search_runs(experiment_ids = '1',
                   filter_string ='metrics.rmse < 7', 
                   run_view_type=ViewType.ACTIVE_ONLY,
                   max_results=5,
                   order_by = ['metrics.rmse ASC'])

In [12]:
runs

[<Run: data=<RunData: metrics={'rmse': 6.411271041406116}, params={'learning_rate': '0.205',
  'max_depth': '17',
  'min_child_weright': '1.24',
  'objective': 'reg:linear',
  'reg_alpha': '0.2856789',
  'reg_lambda': '0.0042644',
  'seed': '42'}, tags={'mlflow.log-model.history': '[{"run_id": "995385cc53d44a12864148edf8068bda", '
                              '"artifact_path": "model_mlflow", '
                              '"utc_time_created": "2025-08-05 '
                              '20:49:31.790683", "flavors": {"python_function": '
                              '{"loader_module": "mlflow.xgboost", '
                              '"python_version": "3.9.12", "data": "model.xgb", '
                              '"env": "conda.yaml"}, "xgboost": {"xgb_version": '
                              '"2.1.4", "data": "model.xgb", "model_class": '
                              '"xgboost.core.Booster", "code": null}}, '
                              '"model_uuid": '
                       

In [13]:
for run in runs:
    print(f"run id:{run.info.run_id}, rmse: {run.data.metrics['rmse']:.4f}")

run id:995385cc53d44a12864148edf8068bda, rmse: 6.4113
run id:7d8fcbea9cbd45828a50ee59f0db0029, rmse: 6.4113
run id:5db0e6c566484f77a11017b084e065a5, rmse: 6.7423
run id:c6596e39d8354a65b78dfb18f6f21f0b, rmse: 6.9158


## register model

In [16]:
import mlflow
MLFLOW_TRACKING_URI = 'sqlite:///mlflow.db'
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

In [17]:
run_id = 'c6596e39d8354a65b78dfb18f6f21f0b'
model_uri = f'runs:/{run_id}/model'
mlflow.register_model(model_uri=model_uri, name = 'nyc-taxi-regressor')

Registered model 'nyc-taxi-regressor' already exists. Creating a new version of this model...
2025/08/06 20:20:42 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: nyc-taxi-regressor, version 3
Created version '3' of model 'nyc-taxi-regressor'.


<ModelVersion: creation_timestamp=1754511642693, current_stage='None', description=None, last_updated_timestamp=1754511642693, name='nyc-taxi-regressor', run_id='c6596e39d8354a65b78dfb18f6f21f0b', run_link=None, source='./mlruns/1/c6596e39d8354a65b78dfb18f6f21f0b/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

## transition of model to different stage

In [19]:
model_name = 'nyc-taxi-regressor'
latest_versions = client.get_latest_versions(name=model_name)

for version in latest_versions:
    print(f'version:{version.version},stage:{version.current_stage}')


version:2,stage:Staging
version:3,stage:None


In [20]:
client.transition_model_version_stage(name=model_name,
                                      version=3,
                                      stage='Staging',
                                      archive_existing_versions=False)

<ModelVersion: creation_timestamp=1754511642693, current_stage='Staging', description=None, last_updated_timestamp=1754512003018, name='nyc-taxi-regressor', run_id='c6596e39d8354a65b78dfb18f6f21f0b', run_link=None, source='./mlruns/1/c6596e39d8354a65b78dfb18f6f21f0b/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

## Compare model performance between staging and production

In [None]:
from sklearn.metrics import mean_squared_error
import pandas as pd


def read_dataframe(filename):
    df = pd.read_parquet(filename)

    df.lpep_dropoff_datetime = pd.to_datetime(df.lpep_dropoff_datetime)
    df.lpep_pickup_datetime = pd.to_datetime(df.lpep_pickup_datetime)

    df['duration'] = df.lpep_dropoff_datetime - df.lpep_pickup_datetime
    df.duration = df.duration.apply(lambda td: td.total_seconds() / 60)

    df = df[(df.duration >= 1) & (df.duration <= 60)]

    categorical = ['PULocationID', 'DOLocationID']
    df[categorical] = df[categorical].astype(str)
    
    return df


def preprocess(df, dv):
    df['PU_DO'] = df['PULocationID'] + '_' + df['DOLocationID']
    categorical = ['PU_DO']
    numerical = ['trip_distance']
    train_dicts = df[categorical + numerical].to_dict(orient='records')
    return dv.transform(train_dicts)


def test_model(name, stage, X_test, y_test):
    model = mlflow.pyfunc.load_model(f"models:/{name}/{stage}")
    y_pred = model.predict(X_test)
    return {"rmse": mean_squared_error(y_test, y_pred, squared=False)}

In [24]:
df = read_dataframe('data/green_tripdata_2021-02.parquet')
client.download_artifacts(run_id=run_id, path='preprocessor',dst_path='.')


'/workspaces/MLops-practice/02-experiment-tracking/preprocessor'

In [26]:
import pickle

with open("preprocessor/preprocessor.b",'rb') as f_in:
    dv = pickle.load(f_in)

In [27]:
X_test = preprocess(df,dv)
target = 'duration'
y_test = df[target].values

In [29]:
%time test_model(name= model_name, stage="Production", X_test = X_test, y_test = y_test)

CPU times: user 8.7 s, sys: 110 ms, total: 8.81 s
Wall time: 9.8 s


{'rmse': 6.411271041406116}

In [31]:
%time test_model(name= model_name, stage="Staging", X_test = X_test, y_test = y_test)

CPU times: user 79.8 ms, sys: 3.84 ms, total: 83.6 ms
Wall time: 98.2 ms


{'rmse': 6.742303328497425}