In [19]:
# Reg fetch new batch of features and compute predictions and save to feature store
# 

In [20]:
%load_ext autoreload
%autoreload 2

import sys
import os
from datetime import timedelta
import pandas as pd

# Add src directory to path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

import src.config as config
from src.inference import get_feature_store


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [21]:
# Use a fixed timestamp since April data is unavailable
current_date = pd.Timestamp("2025-05-01 00:00:00", tz="UTC")
fetch_data_to = current_date - timedelta(hours=1)
fetch_data_from = current_date - timedelta(days=90)

print(f"⏳ Fetching data from {fetch_data_from} to {fetch_data_to}")


⏳ Fetching data from 2025-01-31 00:00:00+00:00 to 2025-04-30 23:00:00+00:00


In [23]:
feature_store = get_feature_store()
feature_view = feature_store.get_feature_view(
    name=config.FEATURE_VIEW_NAME,
    version=config.FEATURE_VIEW_VERSION
)

# Retrieve data with some buffer days
ts_data = feature_view.get_batch_data(
    start_time=(fetch_data_from - timedelta(days=1)),
    end_time=(fetch_data_to + timedelta(days=1)),
)

# Keep only records within desired range
ts_data = ts_data[ts_data.pickup_hour.between(fetch_data_from, fetch_data_to)]
ts_data = ts_data.sort_values(["pickup_location_id", "pickup_hour"]).reset_index(drop=True)
ts_data["pickup_hour"] = ts_data["pickup_hour"].dt.tz_localize(None)


2025-05-10 13:28:04,212 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-10 13:28:04,220 INFO: Initializing external client
2025-05-10 13:28:04,220 INFO: Base URL: https://c.app.hopsworks.ai:443




To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 13:28:05,399 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215665
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.60s) 


In [24]:
ts_data

Unnamed: 0,pickup_hour,pickup_location_id,rides
0,2025-01-31 00:00:00,HB101,0
1,2025-01-31 01:00:00,HB101,0
2,2025-01-31 02:00:00,HB101,0
3,2025-01-31 03:00:00,HB101,0
4,2025-01-31 04:00:00,HB101,0
...,...,...,...
11491,2025-04-30 19:00:00,JC115,4
11492,2025-04-30 20:00:00,JC115,7
11493,2025-04-30 21:00:00,JC115,1
11494,2025-04-30 22:00:00,JC115,0


In [25]:
from src.data_utils import transform_ts_data_info_features

features = transform_ts_data_info_features(ts_data, window_size=24*28, step_size=23)


In [26]:
from src.inference import load_model_from_registry, get_model_predictions

model = load_model_from_registry()
predictions = get_model_predictions(model, features)

# Add current prediction hour
# ✅ Step 2: Add pickup_hour back from features
predictions["pickup_hour"] = features["pickup_hour"].values
predictions


2025-05-10 13:28:49,981 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-10 13:28:49,996 INFO: Initializing external client
2025-05-10 13:28:49,997 INFO: Base URL: https://c.app.hopsworks.ai:443




To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 13:28:51,178 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215665


Unnamed: 0,pickup_location_id,predicted_demand,pickup_hour
0,HB101,0,2025-02-28 00:00:00
1,HB101,5,2025-02-28 23:00:00
2,HB101,2,2025-03-01 22:00:00
3,HB101,2,2025-03-02 21:00:00
4,HB101,1,2025-03-03 20:00:00
...,...,...,...
322,JC115,5,2025-04-26 12:00:00
323,JC115,4,2025-04-27 11:00:00
324,JC115,3,2025-04-28 10:00:00
325,JC115,3,2025-04-29 09:00:00


In [27]:
# Login and load feature store/view
import hopsworks

project = hopsworks.login(
    project=config.HOPSWORKS_PROJECT_NAME,
    api_key_value=config.HOPSWORKS_API_KEY
)

2025-05-10 13:29:22,380 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-10 13:29:22,387 INFO: Initializing external client
2025-05-10 13:29:22,389 INFO: Base URL: https://c.app.hopsworks.ai:443




To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 13:29:23,561 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215665


In [28]:
# fs = project.get_feature_store()
# fg = fs.get_feature_group(
#     name=config.FEATURE_GROUP_NAME,
#     version=config.FEATURE_GROUP_VERSION
# )

# feature_view = fs.get_feature_view(
#     name=config.FEATURE_VIEW_NAME,
#     version=config.FEATURE_VIEW_VERSION
# )


feature_group = get_feature_store().get_or_create_feature_group(
    name=config.FEATURE_GROUP_MODEL_PREDICTION,
    version=1,
    description="Predictions from Citi Ride LGBM Model",
    primary_key=["pickup_location_id", "pickup_hour"],
    event_time="pickup_hour",
)

# Ensure correct data types before insert
predictions["predicted_demand"] = predictions["predicted_demand"].astype("float")
predictions["pickup_location_id"] = predictions["pickup_location_id"].astype(str)
predictions["pickup_hour"] = pd.to_datetime(predictions["pickup_hour"])

# Insert into Hopsworks feature group
feature_group.insert(predictions, write_options={"wait_for_job": False})


2025-05-10 13:30:00,012 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-10 13:30:00,026 INFO: Initializing external client
2025-05-10 13:30:00,028 INFO: Base URL: https://c.app.hopsworks.ai:443






2025-05-10 13:30:01,232 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215665


Uploading Dataframe: 100.00% |██████████| Rows 327/327 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: citi_bike_model_prediction_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1215665/jobs/named/citi_bike_model_prediction_1_offline_fg_materialization/executions


(Job('citi_bike_model_prediction_1_offline_fg_materialization', 'SPARK'), None)

In [29]:
predictions

Unnamed: 0,pickup_location_id,predicted_demand,pickup_hour
0,HB101,0.0,2025-02-28 00:00:00
1,HB101,5.0,2025-02-28 23:00:00
2,HB101,2.0,2025-03-01 22:00:00
3,HB101,2.0,2025-03-02 21:00:00
4,HB101,1.0,2025-03-03 20:00:00
...,...,...,...
322,JC115,5.0,2025-04-26 12:00:00
323,JC115,4.0,2025-04-27 11:00:00
324,JC115,3.0,2025-04-28 10:00:00
325,JC115,3.0,2025-04-29 09:00:00
