Import statements

In [3]:
%load_ext autoreload
%autoreload 2
import sys
import os
import pandas as pd
from datetime import datetime
import xgboost as xgb
from dotenv import load_dotenv
load_dotenv() 
from sklearn.metrics import mean_absolute_error

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

from src.config import TRANSFORMED_DATA_DIR
from src.data_utils import split_time_series_data
from src.experiment_utils import set_mlflow_tracking, log_model_to_mlflow


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Data Loading and Test Train split

In [4]:
df = pd.read_parquet(TRANSFORMED_DATA_DIR / "tabular_data.parquet")
df.head(5)

X_train, y_train, X_test, y_test = split_time_series_data(
    df,
    cutoff_date=datetime(2023, 9, 1, 0, 0, 0),
    target_column="target"
)

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

past_ride_columns = [c for c in X_train.columns if c.startswith("rides_")]
X_train_only_numeric = X_train[past_ride_columns]
X_test_only_numeric = X_test[past_ride_columns]

(55900, 674)
(55900,)
(31720, 674)
(31720,)


XGBoost Model Predictions and Logging

In [5]:
model = xgb.XGBRegressor(max_depth=10)
model.fit(X_train_only_numeric, y_train)

predictions = model.predict(X_test_only_numeric)

test_mae = mean_absolute_error(y_test, predictions)
print(f"{test_mae:.4f}")

mlflow = set_mlflow_tracking()
log_model_to_mlflow(model, X_test_only_numeric, "XGBoost", "mean_absolute_error", score=test_mae)

INFO:src.experiment_utils:MLflow tracking URI and credentials set.


3.4586


2025/03/03 16:32:47 INFO mlflow.tracking.fluent: Experiment with name 'XGBoost' does not exist. Creating a new experiment.
INFO:src.experiment_utils:Experiment set to: XGBoost
INFO:src.experiment_utils:Logged mean_absolute_error: 3.458620309829712
INFO:src.experiment_utils:Model signature inferred.
Successfully registered model 'XGBRegressor'.
2025/03/03 16:36:32 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: XGBRegressor, version 1
Created version '1' of model 'XGBRegressor'.
INFO:src.experiment_utils:Model logged with name: XGBRegressor


🏃 View run handsome-rook-348 at: https://dagshub.com/singhvarunnn789/CDA500P1.mlflow/#/experiments/3/runs/14eff0e88c0e4fdeb79c50cd58eacaf3
🧪 View experiment at: https://dagshub.com/singhvarunnn789/CDA500P1.mlflow/#/experiments/3


<mlflow.models.model.ModelInfo at 0x28ef3418200>