In [2]:
import argparse
import os
import pickle

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error


def load_pickle(filename: str):
    with open(filename, "rb") as f_in:
        return pickle.load(f_in)


MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"
# MLFLOW_TRACKING_URI = "http://127.0.0.1:5000/"
exp_name = "nyc-taxi-homework"


# START an experiment
import mlflow
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
mlflow.set_experiment(exp_name)


def fetch_logged_data(run_id):
    client = mlflow.tracking.MlflowClient()
    data = client.get_run(run_id).data
    tags = {k: v for k, v in data.tags.items() if not k.startswith("mlflow.")}
    artifacts = [f.path for f in client.list_artifacts(run_id, "model")]
    return data.params, data.metrics, tags, artifacts


def run(data_path):
    mlflow.sklearn.autolog()
    with mlflow.start_run() as run:
        
        X_train, y_train = load_pickle(os.path.join(data_path, "train.pkl"))
        X_valid, y_valid = load_pickle(os.path.join(data_path, "valid.pkl"))

        rf = RandomForestRegressor(max_depth=10, random_state=0)
        rf.fit(X_train, y_train)
        y_pred = rf.predict(X_valid)

        rmse = mean_squared_error(y_valid, y_pred, squared=False)
    params, metrics, tags, artifacts = fetch_logged_data(run.info.run_id)
    print(params)



# if __name__ == '__main__':

#     parser = argparse.ArgumentParser()
#     parser.add_argument(
#         "--data_path",
#         default="./output",
#         help="the location where the processed NYC taxi trip data was saved."
#     )
#     args = parser.parse_args()

run("./output")


{'bootstrap': 'True', 'ccp_alpha': '0.0', 'criterion': 'squared_error', 'max_depth': '10', 'max_features': '1.0', 'max_leaf_nodes': 'None', 'max_samples': 'None', 'min_impurity_decrease': '0.0', 'min_samples_leaf': '1', 'min_samples_split': '2', 'min_weight_fraction_leaf': '0.0', 'n_estimators': '100', 'n_jobs': 'None', 'oob_score': 'False', 'random_state': '0', 'verbose': '0', 'warm_start': 'False'}


### Q4

We want to manage the entire lifecycle of our ML model. In this step, you'll need to launch a tracking server. This way we will also have access to the model registry.

Your task is to launch the tracking server on your local machine
select a SQLite db for the backend store and a folder called artifacts for the artifacts store
You should keep the tracking server running to work on the next two exercises that use the server.

In addition to backend-store-uri, what else do you need to pass to properly configure the server?

default-artifact-root
serve-artifacts
artifacts-only
artifacts-destination