In [1]:
import sys
sys.path.append("../")

In [2]:
import os
import json

import numpy as np
import mlflow
from sklearn.ensemble import RandomForestRegressor

from src.utils import (
    get_metadata,
    mean_squared_error,
)

In [3]:
with open("../credentials.json", "r") as f:
    credentials = json.load(f)

In [4]:
os.environ["MLFLOW_TRACKING_USERNAME"] = credentials["MLFLOW_TRACKING_USERNAME"]
os.environ["MLFLOW_TRACKING_PASSWORD"] = credentials["MLFLOW_TRACKING_PASSWORD"]

## read dataset

In [5]:
(X_train, y_train), (X_test, y_test) = get_metadata(train_valid_split=True)

In [6]:
X_train.shape, y_train.shape

((8920, 12), (8920, 1))

In [7]:
X_test.shape, y_test.shape

((992, 12), (992, 1))

## set model

In [8]:
model = RandomForestRegressor(
    n_estimators=1500,
    random_state=1234,
    max_depth=5,
    n_jobs=-1,
    min_samples_split=3,
    max_features="sqrt",
)

## mlflow run experiments

In [9]:
mlflow.set_experiment("pawpularity_test")

<Experiment: artifact_location='mlflow-artifacts:/8fae09594b404c81a887d135847b7937', experiment_id='3', lifecycle_stage='active', name='pawpularity_test', tags={}>

In [10]:
mlflow.sklearn.autolog()

In [11]:
with mlflow.start_run(run_name="sklean_baseline"):
    model.fit(X_train, y_train)
    pred_train = model.predict(X_train)
    pred_test = model.predict(X_test)
    
    print(f"Train MSE: {mean_squared_error(y_train, pred_train)}")
    print(f"Test MSE: {mean_squared_error(y_test, pred_test)}")



Train MSE: 20.496544974575347
Test MSE: 20.612500504589608
