## Model Registry

### Init

In [2]:
import mlflow
mlflow.set_tracking_uri('http://localhost:5000')
mlflow.set_experiment('model-registry')

2024/12/24 11:56:21 INFO mlflow.tracking.fluent: Experiment with name 'model-registry' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/758777158917142144', creation_time=1735012581944, experiment_id='758777158917142144', last_update_time=1735012581944, lifecycle_stage='active', name='model-registry', tags={}>

In [4]:
# !uv tool run mlflow ui

### Register model

In [3]:
from sklearn.datasets import make_regression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

import mlflow
import mlflow.sklearn

with mlflow.start_run() as run:
    X, y = make_regression(n_features=4, n_informative=2, random_state=0, shuffle=False)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    params = {"max_depth": 2, "random_state": 42}
    model = RandomForestRegressor(**params)
    model.fit(X_train, y_train)

    # Log parameters and metrics using the MLflow APIs
    mlflow.log_params(params)

    y_pred = model.predict(X_test)
    mlflow.log_metrics({"mse": mean_squared_error(y_test, y_pred)})

    # Log the sklearn model and register as version 1
    mlflow.sklearn.log_model(
        sk_model=model,
        artifact_path="sklearn-model",
        input_example=X_train,
        registered_model_name="sk-learn-random-forest-reg-model",
    )


Successfully registered model 'sk-learn-random-forest-reg-model'.
2024/12/24 11:56:40 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: sk-learn-random-forest-reg-model, version 1


🏃 View run secretive-bee-975 at: http://localhost:5000/#/experiments/758777158917142144/runs/ad9524f79bc34f94a5d247da2fa236fa
🧪 View experiment at: http://localhost:5000/#/experiments/758777158917142144


Created version '1' of model 'sk-learn-random-forest-reg-model'.


### `mlruns` Directory

- All model runs and artifacts are stored in the `mlruns` directory created automatically when you start logging
- Explore it at your leisure

### Amend model registration via mlflow UI

- The code above automatically registers the model for you!
- Feel free to change tags etc. via the UI under `Models` tab

### Load Registered Model for Inference

- Now that we've registered a model, suppose we want to use it

In [5]:
import mlflow.sklearn
from sklearn.datasets import make_regression

model_name = "sk-learn-random-forest-reg-model"
model_version = "latest"

# Load the model from the Model Registry
model_uri = f"models:/{model_name}/{model_version}"
model = mlflow.sklearn.load_model(model_uri)

# Generate a new dataset for prediction and predict
X_new, _ = make_regression(n_features=4, n_informative=2, random_state=0, shuffle=False)
y_pred_new = model.predict(X_new)

print(y_pred_new)

  latest = client.get_latest_versions(name, None if stage is None else [stage])
  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 79.74it/s]  

[ 16.36355607 -20.09258424   8.0136586    6.16919118  -1.81185423
   4.03116362 -24.95801449  68.78053495 -45.0766513   64.44760141
 -40.16931792 -25.54191065 -14.39985794 -38.0567874    8.05358765
 -25.73029816 -15.91990041 -10.99985266 -24.2475118  -32.70582446
  17.34781751  68.78053495  44.27341488  41.31593646  48.16602726
 -23.62019943  47.15590018  69.12741949  48.16602726  -0.26024544
 -28.49126919 -10.99985266  10.73067585 -10.61092056  -4.7324722
   2.76556278  58.93099448 -31.19567455 -35.55773052 -23.99366895
  48.16602726  13.34984948  12.56552213 -18.66808469 -32.70582446
 -39.30386685 -34.29680647  48.16602726 -33.40149961  20.35083862
 -15.0214084  -34.55064932  -2.28963784 -19.61227378   7.6979477
 -25.86538741 -11.95702358 -15.36598686   5.88539811 -30.23881739
 -25.47645531 -43.61170248 -43.7442754  -14.59055495 -40.16931792
 -32.70582446  -2.68114572  -5.39418041  16.15991316  -2.28963784
  41.662821    10.04512765  51.22797543 -23.09874036  10.04512765
  46.5774364




- There are 3 ways mlflow lets you load a registered model
    - Via Name and Version
    - Via Relative local path
    - Via Run ID

#### Name and Version

In [6]:
## Name and Version
model = mlflow.sklearn.load_model(f"models:/{model_name}/{model_version}")

  latest = client.get_latest_versions(name, None if stage is None else [stage])
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 259.22it/s]


#### Name and Version, with version aliasing 

In [7]:
import mlflow.sklearn
from mlflow import MlflowClient

client = MlflowClient()

# Set model version alias
model_name = "sk-learn-random-forest-reg-model"
model_version_alias = "the_best_model_ever"
client.set_registered_model_alias(
    model_name, model_version_alias, "1"
)  # Duplicate of step in UI

# Get information about the model
model_info = client.get_model_version_by_alias(model_name, model_version_alias)
model_tags = model_info.tags
print(model_tags)

# Get the model version using a model URI
model_uri = f"models:/{model_name}@{model_version_alias}"
model = mlflow.sklearn.load_model(model_uri)

print(model)


{}


Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 161.65it/s]

RandomForestRegressor(max_depth=2, random_state=42)





#### Via Paths

In [None]:
#Absolute local path: 
mlflow.sklearn.load_model("/Users/me/path/to/local/model")

#Relative local path: 
mlflow.sklearn.load_model("relative/path/to/local/model")

#### Via Run ID

In [None]:
#Run id
mlflow.sklearn.load_model(f"runs:/{mlflow_run_id}/{run_relative_path_to_model}")