In [1]:
import json
import os
import pathlib

import joblib
import mlflow
import pandas as pd
from dotenv import load_dotenv
from mlflow import MlflowClient

load_dotenv()

True

In [None]:
data_path = pathlib.Path(
    "/home/mle-user/mle_projects/project-sprint-1/mle-project-sprint-1-v001/part2_dvc/data/initial_data.csv"
)
model_path = pathlib.Path(
    "/home/mle-user/mle_projects/project-sprint-1/mle-project-sprint-1-v001/part2_dvc/models/fitted_model.pkl"
)
results_path = pathlib.Path(
    "/home/mle-user/mle_projects/project-sprint-1/mle-project-sprint-1-v001/part2_dvc/cv_results/cv_res.json"
)

initial_df = pd.read_csv(data_path)
with open(model_path, "rb") as fd:
    model = joblib.load(fd)
with open(results_path, "r") as fd:
    metrics = json.load(fd)

In [3]:
TRACKING_SERVER_HOST = "127.0.0.1"
TRACKING_SERVER_PORT = 5000

os.environ["MLFLOW_S3_ENDPOINT_URL"] = "https://storage.yandexcloud.net"
os.environ["AWS_ACCESS_KEY_ID"] = os.getenv("AWS_ACCESS_KEY_ID")
os.environ["AWS_SECRET_ACCESS_KEY"] = os.getenv("AWS_SECRET_ACCESS_KEY")

mlflow.set_tracking_uri(f"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}")
mlflow.set_registry_uri(f"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}")

In [4]:
EXPERIMENT_NAME = "project_sprint_2_1"
RUN_NAME = "base_pipe_model"
REGISTRY_MODEL_NAME = "base_model"

X = initial_df.drop("target", axis=1)
y = initial_df["target"]

pip_requirements = "../requirements.txt"
signature = mlflow.models.infer_signature(X[:10], y[:10])
input_example = X[:10]

experiment_id = mlflow.create_experiment(EXPERIMENT_NAME)

with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:
    run_id = run.info.run_id
    print(run_id, RUN_NAME)

    mlflow.log_metrics(metrics)

    mlflow.log_artifact(data_path, "dataframe")

    model_info = mlflow.sklearn.log_model(
        sk_model=model,
        artifact_path="models",
        registered_model_name=REGISTRY_MODEL_NAME,
        signature=signature,
        input_example=input_example,
        await_registration_for=60,
        pip_requirements=pip_requirements,
    )

  inputs = _infer_schema(model_input) if model_input is not None else None
  outputs = _infer_schema(model_output) if model_output is not None else None


d831908157f043b4945723bf936727f3 base_pipe_model


Registered model 'base_model' already exists. Creating a new version of this model...
2024/08/14 16:27:17 INFO mlflow.tracking._model_registry.client: Waiting up to 60 seconds for model version to finish creation. Model name: base_model, version 2
Created version '2' of model 'base_model'.


In [5]:
client = MlflowClient()

client.set_registered_model_tag(REGISTRY_MODEL_NAME, "sprint", "project_2_1")

In [None]:
metrics.update(
    {
        "test_mse": -metrics["test_neg_mean_squared_error"],
        "test_mape": -metrics["test_neg_mean_absolute_percentage_error"],
    }
)

In [7]:
run_id = "d831908157f043b4945723bf936727f3"
with mlflow.start_run(run_id=run_id) as run:
    mlflow.log_metrics(metrics)
    mlflow.log_params(model.steps[1][1].get_params())
    print(run_id)

d831908157f043b4945723bf936727f3
