In [1]:
%load_ext autoreload
%autoreload 2

In [5]:
import pandas as pd
from src.config import TRANSFORMED_DATA_DIR
from prophet import Prophet
from sklearn.metrics import mean_absolute_error
from src.experiment_utils import set_mlflow_tracking
from dotenv import load_dotenv
import mlflow
import pickle 

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import sys
import os

# Add the parent directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

In [3]:
import pandas as pd
from src.config import TRANSFORMED_DATA_DIR

df = pd.read_parquet(TRANSFORMED_DATA_DIR / "tabular_data.parquet")

✅ Time-Based Features

week_day (Captures weekly patterns)

hour_of_day (Captures daily ride demand cycles)

month (Captures seasonal shifts)



✅ Better Trend Features

rolling_mean_7d: 7-day rolling mean of past rides (captures weekly trend)

rolling_mean_24h: 24-hour rolling mean (captures daily trend)

rolling_std_7d: 7-day rolling standard deviation (captures variability)

In [22]:
df['pickup_hour'] = pd.to_datetime(df['pickup_hour'])
df['week_day'] = df['pickup_hour'].dt.weekday
df['hour_of_day'] = df['pickup_hour'].dt.hour
df['month'] = df['pickup_hour'].dt.month

In [23]:
df['rolling_mean_7d'] = df['target'].rolling(window=7, min_periods=1).mean()
df['rolling_mean_24h'] = df['target'].rolling(window=24, min_periods=1).mean()
df['rolling_std_7d'] = df['target'].rolling(window=7, min_periods=1).std()

In [28]:
# Adding rolling window features
df['rolling_mean_7d'] = df['target'].rolling(window=7, min_periods=1).mean()
df['rolling_mean_24h'] = df['target'].rolling(window=24, min_periods=1).mean()
df['rolling_std_7d'] = df['target'].rolling(window=7, min_periods=1).std()

# Fill NaNs with 0 (or you can use forward fill: df['rolling_std_7d'].ffill())
df['rolling_std_7d'].fillna(0, inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['rolling_std_7d'].fillna(0, inplace=True)


In [29]:
df_prophet = df.rename(columns={'pickup_hour': 'ds', 'target': 'y'})

In [30]:
exog_features = ['week_day', 'hour_of_day', 'month', 'rolling_mean_7d', 'rolling_mean_24h', 'rolling_std_7d']
available_features = [feat for feat in exog_features if feat in df.columns]

In [31]:
train_size = int(len(df) * 0.85)
train, test = df_prophet.iloc[:train_size], df_prophet.iloc[train_size:]

In [32]:
model = Prophet(
    growth="linear",
    changepoint_prior_scale=0.5,  # More flexibility in trends
    seasonality_prior_scale=10.0,  # Give seasonality more importance
    weekly_seasonality=False,  # Manually adding it
    daily_seasonality=False,  # Manually adding it
)

# Manually add seasonality components with higher Fourier orders
model.add_seasonality(name="weekly", period=7, fourier_order=10)
model.add_seasonality(name="daily", period=24, fourier_order=8)

# Add regressors
for feature in available_features:
    model.add_regressor(feature)

# Fit the model
model.fit(train[['ds', 'y'] + available_features])


DEBUG:cmdstanpy:cmd: where.exe tbb.dll
cwd: None
DEBUG:cmdstanpy:TBB already found in load path
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: C:\Users\vidyu\AppData\Local\Temp\tmp_awtdk2v\mx_53a3d.json
DEBUG:cmdstanpy:input tempfile: C:\Users\vidyu\AppData\Local\Temp\tmp_awtdk2v\iz69pbzh.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['C:\\Users\\vidyu\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\prophet\\stan_model\\prophet_model.bin', 'random', 'seed=80173', 'data', 'file=C:\\Users\\vidyu\\AppData\\Local\\Temp\\tmp_awtdk2v\\mx_53a3d.json', 'init=C:\\Users\\vidyu\\AppData\\Local\\Temp\\tmp_awtdk2v\\iz69pbzh.json', 'output', 'file=C:\\Users\\vidyu\\AppData\\Local\\Temp\\tmp_awtdk2v\\prophet_model9dcecjl7\\prophet_model-20250223234906.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
23:49:06 - cmdstanpy - INFO 

<prophet.forecaster.Prophet at 0x1f8ff8b41d0>

In [33]:
# Forecasting
future = test[['ds'] + available_features]
forecast = model.predict(future)

# Get test predictions
test_predictions = forecast['yhat'].values
test_mae = mean_absolute_error(test['y'], test_predictions)

# Train MAE calculation using fitted values
train_forecast = model.predict(train[['ds'] + available_features])
train_mae = mean_absolute_error(train['y'], train_forecast['yhat'])

print(f"🚀 Optimized Prophet Train MAE: {train_mae}")
print(f"🚀 Optimized Prophet Test MAE: {test_mae}")

🚀 Optimized Prophet Train MAE: 20.722776731620133
🚀 Optimized Prophet Test MAE: 31.544080874069476
