In [2]:
%load_ext autoreload
%autoreload 2

import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

import pandas as pd
import hopsworks
from datetime import timedelta
import src.config as config
from src.inference import get_feature_store, fetch_predictions


In [3]:
def fetch_hourly_rides(hours_back):
    current_hour = (pd.Timestamp("2025-04-30 00:00:00", tz="UTC") - timedelta(hours=hours_back)).floor('h')

    fs = get_feature_store()
    fg = fs.get_feature_group(
        name=config.FEATURE_GROUP_NAME,
        version=config.FEATURE_GROUP_VERSION
    )

    query = fg.select_all()
    query = query.filter(fg.pickup_hour >= current_hour)

    return query.read()

df_actual = fetch_hourly_rides(10000)
df_actual


2025-05-10 13:34:53,741 INFO: Initializing external client
2025-05-10 13:34:53,742 INFO: Base URL: https://c.app.hopsworks.ai:443




To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 13:34:55,654 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215665
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.90s) 


Unnamed: 0,pickup_hour,pickup_location_id,rides
0,2024-08-22 08:00:00+00:00,HB101,10
1,2024-08-17 02:00:00+00:00,JC066,0
2,2025-02-27 07:00:00+00:00,HB101,3
3,2024-05-25 20:00:00+00:00,JC066,15
4,2024-11-14 20:00:00+00:00,HB101,0
...,...,...,...
50836,2025-04-01 01:00:00+00:00,JC009,0
50837,2025-04-19 09:00:00+00:00,JC009,6
50838,2025-04-08 22:00:00+00:00,JC009,1
50839,2025-04-05 12:00:00+00:00,JC009,1


In [4]:
def fetch_recent_predictions(hours_back):
    current_hour = (pd.Timestamp("2025-04-30 00:00:00", tz="UTC") - timedelta(hours=hours_back)).floor('h')

    fs = get_feature_store()
    fg = fs.get_feature_group(
        name=config.FEATURE_GROUP_MODEL_PREDICTION,
        version=1
    )

    query = fg.select_all()
    query = query.filter(fg.pickup_hour >= current_hour)

    return query.read()

df_pred = fetch_recent_predictions(10000)
df_pred


2025-05-10 13:35:36,347 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-10 13:35:36,360 INFO: Initializing external client
2025-05-10 13:35:36,361 INFO: Base URL: https://c.app.hopsworks.ai:443




To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 13:35:37,509 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215665
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.39s) 


Unnamed: 0,pickup_location_id,predicted_demand,pickup_hour
0,HB105,2.0,2025-05-01 00:00:00+00:00
1,JC115,2.0,2025-05-01 00:00:00+00:00
2,HB101,3.0,2025-05-01 00:00:00+00:00
3,JC066,5.0,2025-05-01 00:00:00+00:00
4,HB102,5.0,2025-05-01 00:00:00+00:00
...,...,...,...
328,HB105,2.0,2025-03-12 11:00:00+00:00
329,JC115,0.0,2025-03-20 03:00:00+00:00
330,HB101,1.0,2025-03-03 20:00:00+00:00
331,JC115,0.0,2025-03-19 04:00:00+00:00


In [5]:
merged_df = pd.merge(
    df_actual,
    df_pred,
    on=['pickup_location_id', 'pickup_hour']
)

merged_df['absolute_error'] = abs(merged_df['predicted_demand'] - merged_df['rides'])
merged_df.head()


Unnamed: 0,pickup_hour,pickup_location_id,rides,predicted_demand,absolute_error
0,2025-03-31 15:00:00+00:00,HB101,4,4.0,0.0
1,2025-04-30 08:00:00+00:00,HB101,3,3.0,0.0
2,2025-04-29 09:00:00+00:00,HB105,1,2.0,1.0
3,2025-03-04 19:00:00+00:00,JC066,6,6.0,0.0
4,2025-03-20 03:00:00+00:00,HB102,0,0.0,0.0


In [6]:
mae_by_hour = (
    merged_df
    .groupby('pickup_hour')['absolute_error']
    .mean()
    .reset_index()
    .rename(columns={'absolute_error': 'MAE'})
)
mae_by_hour.head()


Unnamed: 0,pickup_hour,MAE
0,2025-02-28 00:00:00+00:00,0.6
1,2025-02-28 23:00:00+00:00,1.4
2,2025-03-01 22:00:00+00:00,0.4
3,2025-03-02 21:00:00+00:00,0.6
4,2025-03-03 20:00:00+00:00,1.6


In [7]:
import plotly.express as px

fig = px.line(
    mae_by_hour,
    x='pickup_hour',
    y='MAE',
    title='Mean Absolute Error (MAE) by Pickup Hour',
    labels={'pickup_hour': 'Pickup Hour', 'MAE': 'Mean Absolute Error'},
    markers=True
)

fig.show()


In [8]:
print("🔢 Average MAE across hours:", mae_by_hour["MAE"].mean())


🔢 Average MAE across hours: 1.635820895522388
