In [None]:
%load_ext autoreload
%autoreload 2

import sys
import os
from datetime import datetime, timedelta
import pandas as pd
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

import src.config as config
from src.inference import get_feature_store
from src.data_utils import transform_ts_data_info_features
from src.inference import load_model_from_registry
from src.inference import get_model_predictions
from src.inference import get_feature_store


In [2]:
%load_ext autoreload
%autoreload 2
import sys
import os
from datetime import datetime, timedelta
import pandas as pd
import ipywidgets as widgets
from IPython.display import display
from sklearn.feature_selection import SelectKBest, f_regression
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Add the parent directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

import src.config as config
from src.inference import get_feature_store
from src.data_utils import transform_ts_data_info_features
from src.inference import load_batch_of_features_from_store
from src.inference import load_model_from_registry
from src.inference import get_model_predictions
from src.inference import BaselineModelPreviousHour

# Connect to Hopsworks and get the feature store
feature_store = get_feature_store()

# Retrieve the feature group with recent data
feature_group = feature_store.get_feature_group(
    name="recent_time_series_hourly_feature_group",
    version=1
)

# Create or retrieve the feature view for recent data
try:
    feature_store.create_feature_view(
        name="citi_bike_recent_hourly_feature_view",
        version=1,
        query=feature_group.select_all(),
    )
    print(f"Feature view 'citi_bike_recent_hourly_feature_view' (version 1) created successfully.")
except Exception as e:
    print(f"Error creating feature view: {e}")

feature_view = feature_store.get_feature_view(
    name="citi_bike_recent_hourly_feature_view",
    version=1
)

# Get the current datetime
current_date = pd.Timestamp.now(tz='Etc/UTC')
print(f"Current date: {current_date}")

# Read time-series data from the feature store
fetch_data_to = current_date.floor('h')  # Include the current hour
fetch_data_from = current_date - timedelta(days=1*29)
print(f"Fetching data from {fetch_data_from} to {fetch_data_to}")

ts_data = feature_view.get_batch_data(
    start_time=(fetch_data_from - timedelta(days=1)),
    end_time=(fetch_data_to + timedelta(days=1)),
)
ts_data = ts_data[ts_data.pickup_hour.between(fetch_data_from, fetch_data_to)]
ts_data.sort_values(["start_station_name", "pickup_hour"]).reset_index(drop=True)
ts_data.info()

ts_data["pickup_hour"] = ts_data["pickup_hour"].dt.tz_localize(None)
ts_data.info()

# Debugging: Print ts_data to inspect pickup_hour range
print("\nts_data (after filtering):")
print("Shape:", ts_data.shape)
print("pickup_hour range:", ts_data["pickup_hour"].min(), "to", ts_data["pickup_hour"].max())
print(ts_data)

# Transform the data into features
features = transform_ts_data_info_features(ts_data, window_size=24*28, step_size=23)

# Sort features by pickup_hour in descending order to ensure the most recent hour is selected
features.sort_values("pickup_hour", ascending=False, inplace=True)

# Filter features for the most recent hour
features_next_hour = features.groupby("start_station_name").first().reset_index()
recent_hour = features_next_hour["pickup_hour"].max()
print(f"\nMost recent hour in features_next_hour: {recent_hour}")

# Debugging: Print features_next_hour
print("\nfeatures_next_hour:")
print("Shape:", features_next_hour.shape)
print(features_next_hour)

# Define the list of models and corresponding feature group names
models = [
    "baseline_previous_hour",
    "lightgbm_28days_lags",
    "lightgbm_top10_features",
    "gradient_boosting_temporal_features",
    "lightgbm_enhanced_lags_cyclic_temporal_interactions"
]

prediction_feature_groups = {
    "baseline_previous_hour": "predictions_model_baseline",
    "lightgbm_28days_lags": "predictions_model_lgbm_28days",
    "lightgbm_top10_features": "predictions_model_lgbm_top10",
    "gradient_boosting_temporal_features": "predictions_model_gbt",
    "lightgbm_enhanced_lags_cyclic_temporal_interactions": "predictions_model_lgbm_enhanced"
}

# Make predictions with each model and save to separate feature groups
for model_name in models:
    print(f"\nProcessing model: {model_name}")
    
    # Load the model
    model = load_model_from_registry(model_name=model_name)
    
    # Make predictions for the most recent hour
    predictions = get_model_predictions(model, features_next_hour, model_name=model_name)
    predictions["pickup_hour"] = recent_hour  # Use the most recent actual hour
    predictions
    
    # Create or retrieve the feature group for this model's predictions
    feature_group = feature_store.get_or_create_feature_group(
        name=prediction_feature_groups[model_name],
        version=1,
        description=f"Predictions from {model_name} model",
        primary_key=["start_station_name", "pickup_hour"],
        event_time="pickup_hour",
    )
    
    # Insert the predictions into the feature group
    feature_group.insert(predictions, write_options={"wait_for_job": False})
    print(f"Saved predictions to feature group: {prediction_feature_groups[model_name]}")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
2025-05-10 04:52:18,053 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-10 04:52:18,059 INFO: Initializing external client
2025-05-10 04:52:18,059 INFO: Base URL: https://c.app.hopsworks.ai:443






2025-05-10 04:52:18,746 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1225907
Error creating feature view: Metadata operation error: (url: https://c.app.hopsworks.ai/hopsworks-api/api/project/1225907/featurestores/1212511/featureview). Server response: 
HTTP code: 400, HTTP reason: Bad Request, body: b'{"errorCode":270179,"usrMsg":"Feature view: citi_bike_recent_hourly_feature_view, version: 1","errorMsg":"The provided feature view name and version already exists"}', error code: 270179, error msg: The provided feature view name and version already exists, user msg: Feature view: citi_bike_recent_hourly_feature_view, version: 1
Current date: 2025-05-10 08:52:21.159984+00:00
Fetching data from 2025-04-11 08:52:21.159984+00:00 to 2025-05-10 08:00:00+00:00
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.62s) 


Unnamed: 0,pickup_hour,start_station_name,rides
0,2025-04-11 09:00:00+00:00,11 Ave & W 41 St,16
1,2025-04-11 10:00:00+00:00,11 Ave & W 41 St,14
2,2025-04-11 11:00:00+00:00,11 Ave & W 41 St,7
3,2025-04-11 12:00:00+00:00,11 Ave & W 41 St,14
4,2025-04-11 13:00:00+00:00,11 Ave & W 41 St,12
...,...,...,...
2080,2025-05-10 03:00:00+00:00,W 31 St & 7 Ave,0
2081,2025-05-10 04:00:00+00:00,W 31 St & 7 Ave,1
2082,2025-05-10 05:00:00+00:00,W 31 St & 7 Ave,1
2083,2025-05-10 06:00:00+00:00,W 31 St & 7 Ave,10


<class 'pandas.core.frame.DataFrame'>
Index: 2085 entries, 0 to 2099
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype                  
---  ------              --------------  -----                  
 0   pickup_hour         2085 non-null   datetime64[us, Etc/UTC]
 1   start_station_name  2085 non-null   object                 
 2   rides               2085 non-null   int32                  
dtypes: datetime64[us, Etc/UTC](1), int32(1), object(1)
memory usage: 57.0+ KB
<class 'pandas.core.frame.DataFrame'>
Index: 2085 entries, 0 to 2099
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   pickup_hour         2085 non-null   datetime64[us]
 1   start_station_name  2085 non-null   object        
 2   rides               2085 non-null   int32         
dtypes: datetime64[us](1), int32(1), object(1)
memory usage: 57.0+ KB

ts_data (after filtering):
Shape: (2085, 3)



To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 04:52:24,413 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1225907
Downloading model artifact (0 dirs, 1 files)... DONE

Unnamed: 0,start_station_name,predicted_rides,pickup_hour
0,11 Ave & W 41 St,2,2025-05-10 07:00:00
1,W 21 St & 6 Ave,3,2025-05-10 07:00:00
2,W 31 St & 7 Ave,10,2025-05-10 07:00:00


Uploading Dataframe: 100.00% |██████████| Rows 3/3 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: predictions_model_baseline_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1225907/jobs/named/predictions_model_baseline_1_offline_fg_materialization/executions


(Job('predictions_model_baseline_1_offline_fg_materialization', 'SPARK'), None)

Saved predictions to feature group: predictions_model_baseline

Processing model: lightgbm_28days_lags
2025-05-10 04:52:35,476 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-10 04:52:35,487 INFO: Initializing external client
2025-05-10 04:52:35,487 INFO: Base URL: https://c.app.hopsworks.ai:443




To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 04:52:36,139 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1225907
Downloading model artifact (0 dirs, 1 files)... DONE

Unnamed: 0,start_station_name,predicted_rides,pickup_hour
0,11 Ave & W 41 St,2.0,2025-05-10 07:00:00
1,W 21 St & 6 Ave,1.0,2025-05-10 07:00:00
2,W 31 St & 7 Ave,5.0,2025-05-10 07:00:00


Uploading Dataframe: 100.00% |██████████| Rows 3/3 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: predictions_model_lgbm_28days_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1225907/jobs/named/predictions_model_lgbm_28days_1_offline_fg_materialization/executions


(Job('predictions_model_lgbm_28days_1_offline_fg_materialization', 'SPARK'),
 None)

Saved predictions to feature group: predictions_model_lgbm_28days

Processing model: lightgbm_top10_features
2025-05-10 04:52:47,802 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-10 04:52:47,808 INFO: Initializing external client
2025-05-10 04:52:47,809 INFO: Base URL: https://c.app.hopsworks.ai:443




To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 04:52:48,559 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1225907
Downloading model artifact (0 dirs, 1 files)... DONE

Unnamed: 0,start_station_name,predicted_rides,pickup_hour
0,11 Ave & W 41 St,6.0,2025-05-10 07:00:00
1,W 21 St & 6 Ave,3.0,2025-05-10 07:00:00
2,W 31 St & 7 Ave,9.0,2025-05-10 07:00:00


Uploading Dataframe: 100.00% |██████████| Rows 3/3 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: predictions_model_lgbm_top10_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1225907/jobs/named/predictions_model_lgbm_top10_1_offline_fg_materialization/executions


(Job('predictions_model_lgbm_top10_1_offline_fg_materialization', 'SPARK'),
 None)

Saved predictions to feature group: predictions_model_lgbm_top10

Processing model: gradient_boosting_temporal_features
2025-05-10 04:53:02,724 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-10 04:53:02,730 INFO: Initializing external client
2025-05-10 04:53:02,731 INFO: Base URL: https://c.app.hopsworks.ai:443




To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 04:53:03,354 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1225907
Downloading model artifact (0 dirs, 1 files)... DONE

Unnamed: 0,start_station_name,predicted_rides,pickup_hour
0,11 Ave & W 41 St,7.0,2025-05-10 07:00:00
1,W 21 St & 6 Ave,4.0,2025-05-10 07:00:00
2,W 31 St & 7 Ave,7.0,2025-05-10 07:00:00


Uploading Dataframe: 100.00% |██████████| Rows 3/3 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: predictions_model_gbt_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1225907/jobs/named/predictions_model_gbt_1_offline_fg_materialization/executions


(Job('predictions_model_gbt_1_offline_fg_materialization', 'SPARK'), None)

Saved predictions to feature group: predictions_model_gbt

Processing model: lightgbm_enhanced_lags_cyclic_temporal_interactions
2025-05-10 04:53:14,396 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-10 04:53:14,403 INFO: Initializing external client
2025-05-10 04:53:14,404 INFO: Base URL: https://c.app.hopsworks.ai:443




To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 04:53:15,077 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1225907
Downloading model artifact (0 dirs, 1 files)... DONE

Unnamed: 0,start_station_name,predicted_rides,pickup_hour
0,11 Ave & W 41 St,8.0,2025-05-10 07:00:00
1,W 21 St & 6 Ave,5.0,2025-05-10 07:00:00
2,W 31 St & 7 Ave,11.0,2025-05-10 07:00:00


Uploading Dataframe: 100.00% |██████████| Rows 3/3 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: predictions_model_lgbm_enhanced_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1225907/jobs/named/predictions_model_lgbm_enhanced_1_offline_fg_materialization/executions


(Job('predictions_model_lgbm_enhanced_1_offline_fg_materialization', 'SPARK'),
 None)

Saved predictions to feature group: predictions_model_lgbm_enhanced


In [26]:
%load_ext autoreload
%autoreload 2
import sys
import os
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display
from sklearn.feature_selection import SelectKBest, f_regression
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Add the parent directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

import src.config as config
from src.inference import get_feature_store
from src.data_utils import transform_ts_data_info_features
from src.inference import load_batch_of_features_from_store
from src.inference import load_model_from_registry
from src.inference import get_model_predictions
from src.inference import BaselineModelPreviousHour

# Connect to Hopsworks and get the feature store
feature_store = get_feature_store()

# Get the current datetime
current_date = pd.Timestamp.now(tz='Etc/UTC')
current_hour = current_date.floor('h')
print(f"Current date: {current_date}")
print(f"Current hour: {current_hour}")

# Define the range for the past 24 hours
hours_to_predict = 24  # 24 hours
window_size = 24 * 28  # 672 hours (28 days)
start_hour = current_hour - timedelta(hours=(hours_to_predict - 1))  # Go back 23 hours from current_hour
end_hour = current_hour  # Include the current hour

# Adjust start_hour to ensure we have enough historical data for the window_size
earliest_feature_hour = start_hour + timedelta(hours=window_size)
if earliest_feature_hour > current_hour:
    start_hour = current_hour - timedelta(hours=window_size + (hours_to_predict - 1))
print(f"Adjusted start_hour: {start_hour}")
print(f"End hour (before adjustment): {end_hour}")

# Fetch data from citi_bike_hourly_feature_group
# We need data for the past 24 hours plus the window size (28 days) to generate lagged features
fetch_data_to = end_hour
fetch_data_from = start_hour - timedelta(hours=window_size)  # Ensure enough history for lagging
print(f"Fetching data from {fetch_data_from} to {fetch_data_to}")

fg = feature_store.get_feature_group(
    name="citi_bike_hourly_feature_group",
    version=1
)

query = fg.select_all()
query = query.filter((fg.pickup_hour >= fetch_data_from) & (fg.pickup_hour <= fetch_data_to))
ts_data = query.read()
ts_data["pickup_hour"] = pd.to_datetime(ts_data["pickup_hour"]).dt.tz_localize(None)
ts_data.sort_values(["start_station_name", "pickup_hour"]).reset_index(drop=True)
ts_data.info()

# Debugging: Print ts_data to inspect pickup_hour range
print("\nts_data (after filtering):")
print("Shape:", ts_data.shape)
print("pickup_hour range:", ts_data["pickup_hour"].min(), "to", ts_data["pickup_hour"].max())
print(ts_data)

# Adjust end_hour to match the latest available data in ts_data
end_hour = ts_data["pickup_hour"].max()
print(f"Adjusted end_hour to match ts_data: {end_hour}")

# Transform the data into features using the updated transform_ts_data_info_features
# Use step_size=1 to generate features for every hour
features = transform_ts_data_info_features(ts_data, window_size=window_size, step_size=1)

# Sort features by pickup_hour in descending order to ensure the most recent hour is selected
features.sort_values("pickup_hour", ascending=False, inplace=True)

# Debugging: Print features to inspect pickup_hour range
print("\nfeatures:")
print("Shape:", features.shape)
print("pickup_hour range:", features["pickup_hour"].min(), "to", features["pickup_hour"].max())
print(features[["start_station_name", "pickup_hour", "rides_t-1"]])

# Define the model and corresponding feature group name
model_name = "baseline_previous_hour"
prediction_feature_group = "predictions_model_baseline"

# Define the expected data type for this model's predicted_rides
# baseline_previous_hour expects bigint (int64)
model_predicted_rides_type = "int64"

# Collect predictions for all hours
all_predictions = []

# Loop over the hours in the target range to generate predictions
for hour_offset in range(hours_to_predict):
    target_hour = (start_hour + timedelta(hours=hour_offset)).tz_localize(None)  # Make timezone-naive
    if target_hour > end_hour:
        print(f"Target hour {target_hour} exceeds available data ({end_hour}). Skipping.")
        continue
    print(f"\nGenerating predictions for hour: {target_hour}")
    
    # Filter features for the target hour
    features_for_hour = features[features["pickup_hour"] == target_hour]
    if features_for_hour.empty:
        print(f"No features found for {target_hour}. Skipping.")
        continue

    print(f"Processing model: {model_name} for {target_hour}")
    
    # Load the model
    model = load_model_from_registry(model_name=model_name)
    
    # Make predictions for this hour
    predictions = get_model_predictions(model, features_for_hour, model_name=model_name)
    predictions["pickup_hour"] = target_hour
    
    # Debugging: Inspect features_for_hour for NaN or inf
    print(f"Features for {model_name} at {target_hour}:")
    print(features_for_hour)
    print("Any NaN in features_for_hour:", features_for_hour.isna().any().any())
    print("Any inf in features_for_hour:", np.isinf(features_for_hour.select_dtypes(include=[np.number])).any().any())
    
    # Debugging: Inspect raw predictions for NaN or inf
    raw_predictions = model.predict(features_for_hour.drop(columns=["start_station_name", "pickup_hour"]))
    print(f"Raw predictions for {model_name} at {target_hour}:")
    print(raw_predictions)
    print("Any NaN in raw predictions:", np.isnan(raw_predictions).any())
    print("Any inf in raw predictions:", np.isinf(raw_predictions).any())
    
    # Replace NaN or inf values in predicted_rides with 0
    predictions["predicted_rides"] = predictions["predicted_rides"].replace([np.inf, -np.inf, np.nan], 0)
    
    # Convert predicted_rides to int64 to match feature group schema (bigint)
    predictions["predicted_rides"] = predictions["predicted_rides"].astype("int64")
    
    # Debugging: Print predictions to inspect data types
    print(f"Predictions for {model_name} at {target_hour}:")
    print(predictions)
    print("Data types in predictions:")
    print(predictions.dtypes)
    
    # Append predictions to the list
    all_predictions.append(predictions)

# Combine all predictions into a single DataFrame
if all_predictions:
    all_predictions_df = pd.concat(all_predictions, ignore_index=True)
else:
    raise ValueError("No predictions were generated for any hour.")

# Debugging: Print combined predictions
print("\nAll predictions combined:")
print(all_predictions_df)
print("Data types in all_predictions_df:")
print(all_predictions_df.dtypes)

# Create or retrieve the feature group for this model's predictions
# Use version=1 since this matches the original working schema
feature_group = feature_store.get_or_create_feature_group(
    name=prediction_feature_group,
    version=1,
    description=f"Predictions from {model_name} model",
    primary_key=["start_station_name", "pickup_hour"],
    event_time="pickup_hour",
)

# Insert all predictions into the feature group in a single batch
feature_group.insert(all_predictions_df, write_options={"wait_for_job": False})
print(f"Saved all predictions for {model_name} to feature group: {prediction_feature_group} (version 1)")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
2025-05-10 06:10:33,814 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-10 06:10:33,819 INFO: Initializing external client
2025-05-10 06:10:33,821 INFO: Base URL: https://c.app.hopsworks.ai:443






2025-05-10 06:10:34,518 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1225907
Current date: 2025-05-10 10:10:35.140489+00:00
Current hour: 2025-05-10 10:00:00+00:00
Adjusted start_hour: 2025-04-11 11:00:00+00:00
End hour (before adjustment): 2025-05-10 10:00:00+00:00
Fetching data from 2025-03-14 11:00:00+00:00 to 2025-05-10 10:00:00+00:00
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.56s) 


Unnamed: 0,pickup_hour,start_station_name,rides
0,2025-04-11 08:00:00,11 Ave & W 41 St,16
1,2025-04-11 09:00:00,11 Ave & W 41 St,16
2,2025-04-11 10:00:00,11 Ave & W 41 St,14
3,2025-04-11 11:00:00,11 Ave & W 41 St,7
4,2025-04-11 12:00:00,11 Ave & W 41 St,14
...,...,...,...
3355,2025-05-10 02:00:00,W 31 St & 7 Ave,1
3356,2025-05-10 03:00:00,W 31 St & 7 Ave,0
3357,2025-05-10 04:00:00,W 31 St & 7 Ave,1
3358,2025-05-10 05:00:00,W 31 St & 7 Ave,1


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3360 entries, 0 to 3359
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   pickup_hour         3360 non-null   datetime64[us]
 1   start_station_name  3360 non-null   object        
 2   rides               3360 non-null   int32         
dtypes: datetime64[us](1), int32(1), object(1)
memory usage: 65.8+ KB

ts_data (after filtering):
Shape: (3360, 3)
pickup_hour range: 2025-03-14 07:00:00 to 2025-05-10 06:00:00
             pickup_hour       start_station_name  rides
0    2025-03-14 12:00:00          W 21 St & 6 Ave     23
1    2025-03-27 11:00:00          W 21 St & 6 Ave     25
2    2025-03-16 02:00:00          8 Ave & W 31 St      2
3    2025-03-19 03:00:00  University Pl & E 14 St      0
4    2025-03-22 16:00:00          W 21 St & 6 Ave     28
...                  ...                      ...    ...
3355 2025-04-27 06:00:00         11 Av

ValueError: No predictions were generated for any hour.

In [25]:
%load_ext autoreload
%autoreload 2
import sys
import os
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display
from sklearn.feature_selection import SelectKBest, f_regression
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Add the parent directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

import src.config as config
from src.inference import get_feature_store
from src.data_utils import transform_ts_data_info_features
from src.inference import load_batch_of_features_from_store
from src.inference import load_model_from_registry
from src.inference import get_model_predictions
from src.inference import BaselineModelPreviousHour

# Connect to Hopsworks and get the feature store
feature_store = get_feature_store()

# Get the current datetime
current_date = pd.Timestamp.now(tz='Etc/UTC')
current_hour = current_date.floor('h')
print(f"Current date: {current_date}")
print(f"Current hour: {current_hour}")

# Define the range for the past 14 days (336 hours, including the current hour)
hours_to_predict = 14 * 24  # 336 hours
window_size = 24 * 28  # 672 hours (28 days)
start_hour = current_hour - timedelta(hours=(hours_to_predict - 1))  # Go back 335 hours from current_hour
end_hour = current_hour  # Include the current hour

# Adjust start_hour to ensure we have enough historical data for the window_size
earliest_feature_hour = start_hour + timedelta(hours=window_size)
if earliest_feature_hour > current_hour:
    start_hour = current_hour - timedelta(hours=window_size + (hours_to_predict - 1))
print(f"Adjusted start_hour: {start_hour}")
print(f"End hour (before adjustment): {end_hour}")

# Fetch data from citi_bike_hourly_feature_group
# We need data for the past 14 days plus the window size (28 days) to generate lagged features
fetch_data_to = end_hour
fetch_data_from = start_hour - timedelta(hours=window_size)  # Ensure enough history for lagging
print(f"Fetching data from {fetch_data_from} to {fetch_data_to}")

fg = feature_store.get_feature_group(
    name="citi_bike_hourly_feature_group",
    version=1
)

query = fg.select_all()
query = query.filter((fg.pickup_hour >= fetch_data_from) & (fg.pickup_hour <= fetch_data_to))
ts_data = query.read()
ts_data["pickup_hour"] = pd.to_datetime(ts_data["pickup_hour"]).dt.tz_localize(None)
ts_data.sort_values(["start_station_name", "pickup_hour"]).reset_index(drop=True)
ts_data.info()

# Debugging: Print ts_data to inspect pickup_hour range
print("\nts_data (after filtering):")
print("Shape:", ts_data.shape)
print("pickup_hour range:", ts_data["pickup_hour"].min(), "to", ts_data["pickup_hour"].max())
print(ts_data)

# Adjust end_hour to match the latest available data in ts_data
end_hour = ts_data["pickup_hour"].max()
print(f"Adjusted end_hour to match ts_data: {end_hour}")

# Transform the data into features using the updated transform_ts_data_info_features
# Use step_size=1 to generate features for every hour
features = transform_ts_data_info_features(ts_data, window_size=window_size, step_size=1)

# Sort features by pickup_hour in descending order to ensure the most recent hour is selected
features.sort_values("pickup_hour", ascending=False, inplace=True)

# Debugging: Print features to inspect pickup_hour range
print("\nfeatures:")
print("Shape:", features.shape)
print("pickup_hour range:", features["pickup_hour"].min(), "to", features["pickup_hour"].max())
print(features[["start_station_name", "pickup_hour", "rides_t-1"]])

# Define the model and corresponding feature group name
model_name = "baseline_previous_hour"
prediction_feature_group = "predictions_model_baseline"

# Define the expected data type for this model's predicted_rides
# baseline_previous_hour expects bigint (int64)
model_predicted_rides_type = "int64"

# Collect predictions for all hours
all_predictions = []

# Loop over the hours in the target range to generate predictions
for hour_offset in range(hours_to_predict):
    target_hour = (start_hour + timedelta(hours=hour_offset)).tz_localize(None)  # Make timezone-naive
    if target_hour > end_hour:
        print(f"Target hour {target_hour} exceeds available data ({end_hour}). Skipping.")
        continue
    print(f"\nGenerating predictions for hour: {target_hour}")
    
    # Filter features for the target hour
    features_for_hour = features[features["pickup_hour"] == target_hour]
    if features_for_hour.empty:
        print(f"No features found for {target_hour}. Skipping.")
        continue

    print(f"Processing model: {model_name} for {target_hour}")
    
    # Load the model
    model = load_model_from_registry(model_name=model_name)
    
    # Make predictions for this hour
    predictions = get_model_predictions(model, features_for_hour, model_name=model_name)
    predictions["pickup_hour"] = target_hour
    
    # Debugging: Inspect features_for_hour for NaN or inf
    print(f"Features for {model_name} at {target_hour}:")
    print(features_for_hour)
    print("Any NaN in features_for_hour:", features_for_hour.isna().any().any())
    print("Any inf in features_for_hour:", np.isinf(features_for_hour.select_dtypes(include=[np.number])).any().any())
    
    # Debugging: Inspect raw predictions for NaN or inf
    raw_predictions = model.predict(features_for_hour.drop(columns=["start_station_name", "pickup_hour"]))
    print(f"Raw predictions for {model_name} at {target_hour}:")
    print(raw_predictions)
    print("Any NaN in raw predictions:", np.isnan(raw_predictions).any())
    print("Any inf in raw predictions:", np.isinf(raw_predictions).any())
    
    # Replace NaN or inf values in predicted_rides with 0
    predictions["predicted_rides"] = predictions["predicted_rides"].replace([np.inf, -np.inf, np.nan], 0)
    
    # Convert predicted_rides to int64 to match feature group schema (bigint)
    predictions["predicted_rides"] = predictions["predicted_rides"].astype("int64")
    
    # Debugging: Print predictions to inspect data types
    print(f"Predictions for {model_name} at {target_hour}:")
    print(predictions)
    print("Data types in predictions:")
    print(predictions.dtypes)
    
    # Append predictions to the list
    all_predictions.append(predictions)

# Combine all predictions into a single DataFrame
if all_predictions:
    all_predictions_df = pd.concat(all_predictions, ignore_index=True)
else:
    raise ValueError("No predictions were generated for any hour.")

# Debugging: Print combined predictions
print("\nAll predictions combined:")
print(all_predictions_df)
print("Data types in all_predictions_df:")
print(all_predictions_df.dtypes)

# Create or retrieve the feature group for this model's predictions
# Use version=1 since this matches the original working schema
feature_group = feature_store.get_or_create_feature_group(
    name=prediction_feature_group,
    version=1,
    description=f"Predictions from {model_name} model",
    primary_key=["start_station_name", "pickup_hour"],
    event_time="pickup_hour",
)

# Insert all predictions into the feature group in a single batch
feature_group.insert(all_predictions_df, write_options={"wait_for_job": False})
print(f"Saved all predictions for {model_name} to feature group: {prediction_feature_group} (version 1)")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
2025-05-10 06:07:06,789 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-05-10 06:07:06,796 INFO: Initializing external client
2025-05-10 06:07:06,796 INFO: Base URL: https://c.app.hopsworks.ai:443




To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 06:07:07,477 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1225907
Current date: 2025-05-10 10:07:08.115339+00:00
Current hour: 2025-05-10 10:00:00+00:00
Adjusted start_hour: 2025-03-29 11:00:00+00:00
End hour (before adjustment): 2025-05-10 10:00:00+00:00
Fetching data from 2025-03-01 11:00:00+00:00 to 2025-05-10 10:00:00+00:00
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.56s) 


Unnamed: 0,pickup_hour,start_station_name,rides
0,2025-04-11 08:00:00,11 Ave & W 41 St,16
1,2025-04-11 09:00:00,11 Ave & W 41 St,16
2,2025-04-11 10:00:00,11 Ave & W 41 St,14
3,2025-04-11 11:00:00,11 Ave & W 41 St,7
4,2025-04-11 12:00:00,11 Ave & W 41 St,14
...,...,...,...
4294,2025-05-10 02:00:00,W 31 St & 7 Ave,1
4295,2025-05-10 03:00:00,W 31 St & 7 Ave,0
4296,2025-05-10 04:00:00,W 31 St & 7 Ave,1
4297,2025-05-10 05:00:00,W 31 St & 7 Ave,1


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4299 entries, 0 to 4298
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   pickup_hour         4299 non-null   datetime64[us]
 1   start_station_name  4299 non-null   object        
 2   rides               4299 non-null   int32         
dtypes: datetime64[us](1), int32(1), object(1)
memory usage: 84.1+ KB

ts_data (after filtering):
Shape: (4299, 3)
pickup_hour range: 2025-03-01 06:00:00 to 2025-05-10 06:00:00
             pickup_hour start_station_name  rides
0    2025-03-14 12:00:00    W 21 St & 6 Ave     23
1    2025-03-12 17:00:00    W 21 St & 6 Ave     60
2    2025-03-06 03:00:00    W 21 St & 6 Ave      0
3    2025-03-27 11:00:00    W 21 St & 6 Ave     25
4    2025-03-05 03:00:00    W 21 St & 6 Ave      0
...                  ...                ...    ...
4294 2025-04-27 06:00:00   11 Ave & W 41 St      1
4295 2025-04-17 12:00:00    W



To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 06:07:10,912 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1225907
Features for baseline_previous_hour at 2025-03-29 11:00:00:
     rides_t-672  rides_t-671  rides_t-670  rides_t-669  rides_t-668  \
755           38           42           46           42           27   
887           19           23           44           29           28   
821           11            5           14           13           14   

     rides_t-667  rides_t-666  rides_t-665  rides_t-664  rides_t-663  ...  \
755           28           31           18           15           13  ...   
887           27           17           18           13            4  ...   
821           20           19           14            6            2  ...   

     rides_t-8  rides_t-7  rides_t-6  rides_t-5  rides_t-4  rides_t-3  \
755          5          2          1          1          1         13   
887          2          0          0          3          2 



To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 06:07:13,398 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1225907
Features for baseline_previous_hour at 2025-03-29 12:00:00:
     rides_t-672  rides_t-671  rides_t-670  rides_t-669  rides_t-668  \
886           23           44           29           28           27   
754           42           46           42           27           28   
820            5           14           13           14           20   

     rides_t-667  rides_t-666  rides_t-665  rides_t-664  rides_t-663  ...  \
886           17           18           13            4            3  ...   
754           31           18           15           13            5  ...   
820           19           14            6            2            4  ...   

     rides_t-8  rides_t-7  rides_t-6  rides_t-5  rides_t-4  rides_t-3  \
886          0          0          3          2          4         10   
754          2          1          1          1         13 



To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 06:07:15,791 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1225907
Features for baseline_previous_hour at 2025-03-29 13:00:00:
     rides_t-672  rides_t-671  rides_t-670  rides_t-669  rides_t-668  \
819           14           13           14           20           19   
885           44           29           28           27           17   
753           46           42           27           28           31   

     rides_t-667  rides_t-666  rides_t-665  rides_t-664  rides_t-663  ...  \
819           14            6            2            4            5  ...   
885           18           13            4            3            5  ...   
753           18           15           13            5            4  ...   

     rides_t-8  rides_t-7  rides_t-6  rides_t-5  rides_t-4  rides_t-3  \
819          0          7          0          4          3         14   
885          0          3          2          4         10 



To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 06:07:17,997 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1225907
Features for baseline_previous_hour at 2025-03-29 14:00:00:
     rides_t-672  rides_t-671  rides_t-670  rides_t-669  rides_t-668  \
884           29           28           27           17           18   
818           13           14           20           19           14   
752           42           27           28           31           18   

     rides_t-667  rides_t-666  rides_t-665  rides_t-664  rides_t-663  ...  \
884           13            4            3            5            3  ...   
818            6            2            4            5            0  ...   
752           15           13            5            4            2  ...   

     rides_t-8  rides_t-7  rides_t-6  rides_t-5  rides_t-4  rides_t-3  \
884          3          2          4         10         22         37   
818          7          0          4          3         14 



To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 06:07:20,466 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1225907
Features for baseline_previous_hour at 2025-03-29 15:00:00:
     rides_t-672  rides_t-671  rides_t-670  rides_t-669  rides_t-668  \
751           27           28           31           18           15   
883           28           27           17           18           13   
817           14           20           19           14            6   

     rides_t-667  rides_t-666  rides_t-665  rides_t-664  rides_t-663  ...  \
751           13            5            4            2            0  ...   
883            4            3            5            3            2  ...   
817            2            4            5            0            3  ...   

     rides_t-8  rides_t-7  rides_t-6  rides_t-5  rides_t-4  rides_t-3  \
751          1         13         27         24         32         37   
883          2          4         10         22         37 



To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 06:07:22,798 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1225907
Features for baseline_previous_hour at 2025-03-29 16:00:00:
     rides_t-672  rides_t-671  rides_t-670  rides_t-669  rides_t-668  \
816           20           19           14            6            2   
882           27           17           18           13            4   
750           28           31           18           15           13   

     rides_t-667  rides_t-666  rides_t-665  rides_t-664  rides_t-663  ...  \
816            4            5            0            3            1  ...   
882            3            5            3            2            3  ...   
750            5            4            2            0            0  ...   

     rides_t-8  rides_t-7  rides_t-6  rides_t-5  rides_t-4  rides_t-3  \
816          4          3         14         14         21         26   
882          4         10         22         37         31 



To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 06:07:24,965 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1225907
Features for baseline_previous_hour at 2025-03-29 17:00:00:
     rides_t-672  rides_t-671  rides_t-670  rides_t-669  rides_t-668  \
815           19           14            6            2            4   
881           17           18           13            4            3   
749           31           18           15           13            5   

     rides_t-667  rides_t-666  rides_t-665  rides_t-664  rides_t-663  ...  \
815            5            0            3            1            1  ...   
881            5            3            2            3            2  ...   
749            4            2            0            0            0  ...   

     rides_t-8  rides_t-7  rides_t-6  rides_t-5  rides_t-4  rides_t-3  \
815          3         14         14         21         26         22   
881         10         22         37         31         54 



To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 06:07:27,103 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1225907
Features for baseline_previous_hour at 2025-03-29 18:00:00:
     rides_t-672  rides_t-671  rides_t-670  rides_t-669  rides_t-668  \
880           18           13            4            3            5   
748           18           15           13            5            4   
814           14            6            2            4            5   

     rides_t-667  rides_t-666  rides_t-665  rides_t-664  rides_t-663  ...  \
880            3            2            3            2            0  ...   
748            2            0            0            0            2  ...   
814            0            3            1            1            1  ...   

     rides_t-8  rides_t-7  rides_t-6  rides_t-5  rides_t-4  rides_t-3  \
880         22         37         31         54         41         44   
748         24         32         37         47         45 



To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 06:07:29,295 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1225907
Features for baseline_previous_hour at 2025-03-29 19:00:00:
     rides_t-672  rides_t-671  rides_t-670  rides_t-669  rides_t-668  \
813            6            2            4            5            0   
879           13            4            3            5            3   
747           15           13            5            4            2   

     rides_t-667  rides_t-666  rides_t-665  rides_t-664  rides_t-663  ...  \
813            3            1            1            1            0  ...   
879            2            3            2            0            0  ...   
747            0            0            0            2            1  ...   

     rides_t-8  rides_t-7  rides_t-6  rides_t-5  rides_t-4  rides_t-3  \
813         14         21         26         22         19         21   
879         37         31         54         41         44 



To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 06:07:31,642 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1225907
Features for baseline_previous_hour at 2025-03-29 20:00:00:
     rides_t-672  rides_t-671  rides_t-670  rides_t-669  rides_t-668  \
878            4            3            5            3            2   
812            2            4            5            0            3   
746           13            5            4            2            0   

     rides_t-667  rides_t-666  rides_t-665  rides_t-664  rides_t-663  ...  \
878            3            2            0            0            2  ...   
812            1            1            1            0            0  ...   
746            0            0            2            1            1  ...   

     rides_t-8  rides_t-7  rides_t-6  rides_t-5  rides_t-4  rides_t-3  \
878         31         54         41         44         31         38   
812         21         26         22         19         21 



To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 06:07:33,863 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1225907
Features for baseline_previous_hour at 2025-03-29 21:00:00:
     rides_t-672  rides_t-671  rides_t-670  rides_t-669  rides_t-668  \
877            3            5            3            2            3   
745            5            4            2            0            0   
811            4            5            0            3            1   

     rides_t-667  rides_t-666  rides_t-665  rides_t-664  rides_t-663  ...  \
877            2            0            0            2            2  ...   
745            0            2            1            1            1  ...   
811            1            1            0            0            1  ...   

     rides_t-8  rides_t-7  rides_t-6  rides_t-5  rides_t-4  rides_t-3  \
877         54         41         44         31         38         20   
745         47         45         30         27         31 



To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 06:07:36,095 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1225907
Features for baseline_previous_hour at 2025-03-29 22:00:00:
     rides_t-672  rides_t-671  rides_t-670  rides_t-669  rides_t-668  \
810            5            0            3            1            1   
744            4            2            0            0            0   
876            5            3            2            3            2   

     rides_t-667  rides_t-666  rides_t-665  rides_t-664  rides_t-663  ...  \
810            1            0            0            1            4  ...   
744            2            1            1            1            2  ...   
876            0            0            2            2            2  ...   

     rides_t-8  rides_t-7  rides_t-6  rides_t-5  rides_t-4  rides_t-3  \
810         22         19         21         13         18         13   
744         45         30         27         31         23 



To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 06:07:38,244 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1225907
Features for baseline_previous_hour at 2025-03-29 23:00:00:
     rides_t-672  rides_t-671  rides_t-670  rides_t-669  rides_t-668  \
875            3            2            3            2            0   
743            2            0            0            0            2   
809            0            3            1            1            1   

     rides_t-667  rides_t-666  rides_t-665  rides_t-664  rides_t-663  ...  \
875            0            2            2            2            2  ...   
743            1            1            1            2            7  ...   
809            0            0            1            4            3  ...   

     rides_t-8  rides_t-7  rides_t-6  rides_t-5  rides_t-4  rides_t-3  \
875         44         31         38         20         11         11   
743         30         27         31         23         22 



To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 06:07:40,329 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1225907
Features for baseline_previous_hour at 2025-03-30 00:00:00:
     rides_t-672  rides_t-671  rides_t-670  rides_t-669  rides_t-668  \
742            0            0            0            2            1   
808            3            1            1            1            0   
874            2            3            2            0            0   

     rides_t-667  rides_t-666  rides_t-665  rides_t-664  rides_t-663  ...  \
742            1            1            2            7           13  ...   
808            0            1            4            3            2  ...   
874            2            2            2            2            2  ...   

     rides_t-8  rides_t-7  rides_t-6  rides_t-5  rides_t-4  rides_t-3  \
742         27         31         23         22         10         10   
808         21         13         18         13          5 



To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


KeyboardInterrupt: 

In [None]:
%load_ext autoreload
%autoreload 2
import sys
import os
from datetime import datetime, timedelta
import pandas as pd
import ipywidgets as widgets
from IPython.display import display
from sklearn.feature_selection import SelectKBest, f_regression
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Add the parent directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

import src.config as config
from src.inference import get_feature_store
from src.data_utils import transform_ts_data_info_features
from src.inference import load_batch_of_features_from_store
from src.inference import load_model_from_registry
from src.inference import get_model_predictions
from src.inference import BaselineModelPreviousHour

# Connect to Hopsworks and get the feature store
feature_store = get_feature_store()

# Retrieve the feature group with recent data
feature_group = feature_store.get_feature_group(
    name="recent_time_series_hourly_feature_group",
    version=1
)

# Create or retrieve the feature view for recent data
try:
    feature_store.create_feature_view(
        name="citi_bike_recent_hourly_feature_view",
        version=1,
        query=feature_group.select_all(),
    )
    print(f"Feature view 'citi_bike_recent_hourly_feature_view' (version 1) created successfully.")
except Exception as e:
    print(f"Error creating feature view: {e}")

feature_view = feature_store.get_feature_view(
    name="citi_bike_recent_hourly_feature_view",
    version=1
)

# Get the current datetime
current_date = pd.Timestamp.now(tz='Etc/UTC')

# Read time-series data from the feature store
fetch_data_to = current_date - timedelta(hours=1)
fetch_data_from = current_date - timedelta(days=1*29)
print(f"Fetching data from {fetch_data_from} to {fetch_data_to}")

ts_data = feature_view.get_batch_data(
    start_time=(fetch_data_from - timedelta(days=1)),
    end_time=(fetch_data_to + timedelta(days=1)),
)
ts_data = ts_data[ts_data.pickup_hour.between(fetch_data_from, fetch_data_to)]
ts_data.sort_values(["start_station_name", "pickup_hour"]).reset_index(drop=True)
ts_data.info()

ts_data["pickup_hour"] = ts_data["pickup_hour"].dt.tz_localize(None)
ts_data.info()

# Transform the data into features
features = transform_ts_data_info_features(ts_data, window_size=24*28, step_size=23)

# Filter features for the most recent hour (next hour prediction)
features_next_hour = features.groupby("start_station_name").last().reset_index()
next_hour = features_next_hour["pickup_hour"].max()
print(f"Making predictions for the next hour: {next_hour}")

# Define the list of models
models = [
    "baseline_previous_hour",
    "lightgbm_28days_lags",
    "lightgbm_top10_features",
    "gradient_boosting_temporal_features",
    "lightgbm_enhanced_lags_cyclic_temporal_interactions"
]

# Map models to shorter feature group names
prediction_feature_groups = {
    "baseline_previous_hour": "predictions_model_baseline",
    "lightgbm_28days_lags": "predictions_model_lgbm_28days",
    "lightgbm_top10_features": "predictions_model_lgbm_top10",
    "gradient_boosting_temporal_features": "predictions_model_gbt",
    "lightgbm_enhanced_lags_cyclic_temporal_interactions": "predictions_model_lgbm_enhanced"
}

# Make predictions with each model and save to separate feature groups
for model_name in models:
    print(f"\nProcessing model: {model_name}")
    
    # Load the model
    model = load_model_from_registry(model_name=model_name)
    
    # Make predictions for the next hour
    predictions = get_model_predictions(model, features_next_hour, model_name=model_name)
    predictions["pickup_hour"] = current_date.ceil('h')
    predictions
    
    # Create or retrieve the feature group for this model's predictions
    feature_group = feature_store.get_or_create_feature_group(
        name=prediction_feature_groups[model_name],
        version=1,
        description=f"Predictions from {model_name} model",
        primary_key=["start_station_name", "pickup_hour"],
        event_time="pickup_hour",
    )
    
    # Insert the predictions into the feature group
    feature_group.insert(predictions, write_options={"wait_for_job": False})
    print(f"Saved predictions to feature group: {prediction_feature_groups[model_name]}")

In [None]:
%load_ext autoreload
%autoreload 2
import sys
import os
from datetime import datetime, timedelta
import pandas as pd
import ipywidgets as widgets
from IPython.display import display
from sklearn.feature_selection import SelectKBest, f_regression
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Add the parent directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

import src.config as config
from src.inference import get_feature_store
from src.data_utils import transform_ts_data_info_features
from src.inference import load_batch_of_features_from_store
from src.inference import load_model_from_registry
from src.inference import get_model_predictions
from src.inference import BaselineModelPreviousHour

# Connect to Hopsworks and get the feature store
feature_store = get_feature_store()

# Retrieve the feature group with recent data
feature_group = feature_store.get_feature_group(
    name="recent_time_series_hourly_feature_group",
    version=1
)

# Create or retrieve the feature view for recent data
try:
    feature_store.create_feature_view(
        name="citi_bike_recent_hourly_feature_view",
        version=1,
        query=feature_group.select_all(),
    )
    print(f"Feature view 'citi_bike_recent_hourly_feature_view' (version 1) created successfully.")
except Exception as e:
    print(f"Error creating feature view: {e}")

feature_view = feature_store.get_feature_view(
    name="citi_bike_recent_hourly_feature_view",
    version=1
)

# Get the current datetime
current_date = pd.Timestamp.now(tz='Etc/UTC')

# Read time-series data from the feature store
fetch_data_to = current_date - timedelta(hours=1)
fetch_data_from = current_date - timedelta(days=1*29)
print(f"Fetching data from {fetch_data_from} to {fetch_data_to}")

ts_data = feature_view.get_batch_data(
    start_time=(fetch_data_from - timedelta(days=1)),
    end_time=(fetch_data_to + timedelta(days=1)),
)
ts_data = ts_data[ts_data.pickup_hour.between(fetch_data_from, fetch_data_to)]
ts_data.sort_values(["start_station_name", "pickup_hour"]).reset_index(drop=True)
ts_data.info()

ts_data["pickup_hour"] = ts_data["pickup_hour"].dt.tz_localize(None)
ts_data.info()

# Transform the data into features
features = transform_ts_data_info_features(ts_data, window_size=24*28, step_size=23)

# Filter features for the most recent hour (next hour prediction)
features_next_hour = features.groupby("start_station_name").last().reset_index()
recent_hour = features_next_hour["pickup_hour"].max()
print(f"Making predictions for the most recent hour: {recent_hour}")

# Define the list of models and corresponding feature group names
models = [
    "baseline_previous_hour",
    "lightgbm_28days_lags",
    "lightgbm_top10_features",
    "gradient_boosting_temporal_features",
    "lightgbm_enhanced_lags_cyclic_temporal_interactions"
]

prediction_feature_groups = {
    "baseline_previous_hour": "predictions_model_baseline",
    "lightgbm_28days_lags": "predictions_model_lgbm_28days",
    "lightgbm_top10_features": "predictions_model_lgbm_top10",
    "gradient_boosting_temporal_features": "predictions_model_gbt",
    "lightgbm_enhanced_lags_cyclic_temporal_interactions": "predictions_model_lgbm_enhanced"
}

# Make predictions with each model and save to separate feature groups
for model_name in models:
    print(f"\nProcessing model: {model_name}")
    
    # Load the model
    model = load_model_from_registry(model_name=model_name)
    
    # Make predictions for the most recent hour
    predictions = get_model_predictions(model, features_next_hour, model_name=model_name)
    predictions["pickup_hour"] = recent_hour  # Use the most recent actual hour
    predictions
    
    # Create or retrieve the feature group for this model's predictions
    feature_group = feature_store.get_or_create_feature_group(
        name=prediction_feature_groups[model_name],
        version=1,
        description=f"Predictions from {model_name} model",
        primary_key=["start_station_name", "pickup_hour"],
        event_time="pickup_hour",
    )
    
    # Insert the predictions into the feature group
    feature_group.insert(predictions, write_options={"wait_for_job": False})
    print(f"Saved predictions to feature group: {prediction_feature_groups[model_name]}")

In [None]:
%load_ext autoreload
%autoreload 2
import sys
import os
from datetime import datetime, timedelta
import pandas as pd
import ipywidgets as widgets
from IPython.display import display
from sklearn.feature_selection import SelectKBest, f_regression
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Add the parent directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

import src.config as config
from src.inference import get_feature_store
from src.data_utils import transform_ts_data_info_features
from src.inference import load_batch_of_features_from_store
from src.inference import load_model_from_registry
from src.inference import get_model_predictions
from src.inference import BaselineModelPreviousHour

# Connect to Hopsworks and get the feature store
feature_store = get_feature_store()

# Retrieve the feature group with recent data
feature_group = feature_store.get_feature_group(
    name="recent_time_series_hourly_feature_group",
    version=1
)

# Create or retrieve the feature view for recent data
try:
    feature_store.create_feature_view(
        name="citi_bike_recent_hourly_feature_view",
        version=1,
        query=feature_group.select_all(),
    )
    print(f"Feature view 'citi_bike_recent_hourly_feature_view' (version 1) created successfully.")
except Exception as e:
    print(f"Error creating feature view: {e}")

feature_view = feature_store.get_feature_view(
    name="citi_bike_recent_hourly_feature_view",
    version=1
)

# Get the current datetime
current_date = pd.Timestamp.now(tz='Etc/UTC')
print(f"Current date: {current_date}")

# Read time-series data from the feature store
fetch_data_to = current_date.floor('h')  # Include the current hour
fetch_data_from = current_date - timedelta(days=1*29)
print(f"Fetching data from {fetch_data_from} to {fetch_data_to}")

ts_data = feature_view.get_batch_data(
    start_time=(fetch_data_from - timedelta(days=1)),
    end_time=(fetch_data_to + timedelta(days=1)),
)
ts_data = ts_data[ts_data.pickup_hour.between(fetch_data_from, fetch_data_to)]
ts_data.sort_values(["start_station_name", "pickup_hour"]).reset_index(drop=True)
ts_data.info()

ts_data["pickup_hour"] = ts_data["pickup_hour"].dt.tz_localize(None)
ts_data.info()

# Debugging: Print ts_data to inspect pickup_hour range
print("\nts_data (after filtering):")
print("Shape:", ts_data.shape)
print("pickup_hour range:", ts_data["pickup_hour"].min(), "to", ts_data["pickup_hour"].max())
print(ts_data)

# Transform the data into features
features = transform_ts_data_info_features(ts_data, window_size=24*28, step_size=23)

# Filter features for the most recent hour
features_next_hour = features.groupby("start_station_name").last().reset_index()
recent_hour = features_next_hour["pickup_hour"].max()
print(f"\nMost recent hour in features_next_hour: {recent_hour}")

# Debugging: Print features_next_hour
print("\nfeatures_next_hour:")
print("Shape:", features_next_hour.shape)
print(features_next_hour)

# Define the list of models and corresponding feature group names
models = [
    "baseline_previous_hour",
    "lightgbm_28days_lags",
    "lightgbm_top10_features",
    "gradient_boosting_temporal_features",
    "lightgbm_enhanced_lags_cyclic_temporal_interactions"
]

prediction_feature_groups = {
    "baseline_previous_hour": "predictions_model_baseline",
    "lightgbm_28days_lags": "predictions_model_lgbm_28days",
    "lightgbm_top10_features": "predictions_model_lgbm_top10",
    "gradient_boosting_temporal_features": "predictions_model_gbt",
    "lightgbm_enhanced_lags_cyclic_temporal_interactions": "predictions_model_lgbm_enhanced"
}

# Make predictions with each model and save to separate feature groups
for model_name in models:
    print(f"\nProcessing model: {model_name}")
    
    # Load the model
    model = load_model_from_registry(model_name=model_name)
    
    # Make predictions for the most recent hour
    predictions = get_model_predictions(model, features_next_hour, model_name=model_name)
    predictions["pickup_hour"] = recent_hour  # Use the most recent actual hour
    predictions
    
    # Create or retrieve the feature group for this model's predictions
    feature_group = feature_store.get_or_create_feature_group(
        name=prediction_feature_groups[model_name],
        version=1,
        description=f"Predictions from {model_name} model",
        primary_key=["start_station_name", "pickup_hour"],
        event_time="pickup_hour",
    )
    
    # Insert the predictions into the feature group
    feature_group.insert(predictions, write_options={"wait_for_job": False})
    print(f"Saved predictions to feature group: {prediction_feature_groups[model_name]}")

In [None]:
%load_ext autoreload
%autoreload 2
import sys
import os
from datetime import datetime, timedelta
import pandas as pd
import ipywidgets as widgets
from IPython.display import display
from sklearn.feature_selection import SelectKBest, f_regression
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Add the parent directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

import src.config as config
from src.inference import get_feature_store
from src.data_utils import transform_ts_data_info_features
from src.inference import load_batch_of_features_from_store
from src.inference import load_model_from_registry
from src.inference import get_model_predictions
from src.inference import BaselineModelPreviousHour

# Connect to Hopsworks and get the feature store
feature_store = get_feature_store()

# Retrieve the feature group with recent data
feature_group = feature_store.get_feature_group(
    name="recent_time_series_hourly_feature_group",
    version=1
)

# Create or retrieve the feature view for recent data
try:
    feature_store.create_feature_view(
        name="citi_bike_recent_hourly_feature_view",
        version=1,
        query=feature_group.select_all(),
    )
    print(f"Feature view 'citi_bike_recent_hourly_feature_view' (version 1) created successfully.")
except Exception as e:
    print(f"Error creating feature view: {e}")

feature_view = feature_store.get_feature_view(
    name="citi_bike_recent_hourly_feature_view",
    version=1
)

# Get the current datetime
current_date = pd.Timestamp.now(tz='Etc/UTC')
print(f"Current date: {current_date}")

# Read time-series data from the feature store
fetch_data_to = current_date.floor('h')  # Include the current hour
fetch_data_from = current_date - timedelta(days=1*29)
print(f"Fetching data from {fetch_data_from} to {fetch_data_to}")

ts_data = feature_view.get_batch_data(
    start_time=(fetch_data_from - timedelta(days=1)),
    end_time=(fetch_data_to + timedelta(days=1)),
)
ts_data = ts_data[ts_data.pickup_hour.between(fetch_data_from, fetch_data_to)]
ts_data.sort_values(["start_station_name", "pickup_hour"]).reset_index(drop=True)
ts_data.info()

ts_data["pickup_hour"] = ts_data["pickup_hour"].dt.tz_localize(None)
ts_data.info()

# Debugging: Print ts_data to inspect pickup_hour range
print("\nts_data (after filtering):")
print("Shape:", ts_data.shape)
print("pickup_hour range:", ts_data["pickup_hour"].min(), "to", ts_data["pickup_hour"].max())
print(ts_data)

# Transform the data into features
features = transform_ts_data_info_features(ts_data, window_size=24*28, step_size=23)

# Sort features by pickup_hour in descending order to ensure the most recent hour is selected
features.sort_values("pickup_hour", ascending=False, inplace=True)

# Filter features for the most recent hour
features_next_hour = features.groupby("start_station_name").first().reset_index()
recent_hour = features_next_hour["pickup_hour"].max()
print(f"\nMost recent hour in features_next_hour: {recent_hour}")

# Debugging: Print features_next_hour
print("\nfeatures_next_hour:")
print("Shape:", features_next_hour.shape)
print(features_next_hour)

# Define the list of models and corresponding feature group names
models = [
    "baseline_previous_hour",
    "lightgbm_28days_lags",
    "lightgbm_top10_features",
    "gradient_boosting_temporal_features",
    "lightgbm_enhanced_lags_cyclic_temporal_interactions"
]

prediction_feature_groups = {
    "baseline_previous_hour": "predictions_model_baseline",
    "lightgbm_28days_lags": "predictions_model_lgbm_28days",
    "lightgbm_top10_features": "predictions_model_lgbm_top10",
    "gradient_boosting_temporal_features": "predictions_model_gbt",
    "lightgbm_enhanced_lags_cyclic_temporal_interactions": "predictions_model_lgbm_enhanced"
}

# Make predictions with each model and save to separate feature groups
for model_name in models:
    print(f"\nProcessing model: {model_name}")
    
    # Load the model
    model = load_model_from_registry(model_name=model_name)
    
    # Make predictions for the most recent hour
    predictions = get_model_predictions(model, features_next_hour, model_name=model_name)
    predictions["pickup_hour"] = recent_hour  # Use the most recent actual hour
    predictions
    
    # Create or retrieve the feature group for this model's predictions
    feature_group = feature_store.get_or_create_feature_group(
        name=prediction_feature_groups[model_name],
        version=1,
        description=f"Predictions from {model_name} model",
        primary_key=["start_station_name", "pickup_hour"],
        event_time="pickup_hour",
    )
    
    # Insert the predictions into the feature group
    feature_group.insert(predictions, write_options={"wait_for_job": False})
    print(f"Saved predictions to feature group: {prediction_feature_groups[model_name]}")

In [None]:
%load_ext autoreload
%autoreload 2
import sys
import os
from datetime import datetime, timedelta
import pandas as pd
import ipywidgets as widgets
from IPython.display import display
from sklearn.feature_selection import SelectKBest, f_regression
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Add the parent directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

import src.config as config
from src.inference import get_feature_store
from src.data_utils import transform_ts_data_info_features
from src.inference import load_batch_of_features_from_store
from src.inference import load_model_from_registry
from src.inference import get_model_predictions
from src.inference import BaselineModelPreviousHour

# Connect to Hopsworks and get the feature store
feature_store = get_feature_store()

# Retrieve the feature group with recent data
feature_group = feature_store.get_feature_group(
    name="recent_time_series_hourly_feature_group",
    version=1
)

# Create or retrieve the feature view for recent data
try:
    feature_store.create_feature_view(
        name="citi_bike_recent_hourly_feature_view",
        version=1,
        query=feature_group.select_all(),
    )
    print(f"Feature view 'citi_bike_recent_hourly_feature_view' (version 1) created successfully.")
except Exception as e:
    print(f"Error creating feature view: {e}")

feature_view = feature_store.get_feature_view(
    name="citi_bike_recent_hourly_feature_view",
    version=1
)

# Get the current datetime
current_date = pd.Timestamp.now(tz='Etc/UTC')
print(f"Current date: {current_date}")

# Read time-series data from the feature store
fetch_data_to = current_date.floor('h')  # Include the current hour
fetch_data_from = current_date - timedelta(days=1*29)
print(f"Fetching data from {fetch_data_from} to {fetch_data_to}")

ts_data = feature_view.get_batch_data(
    start_time=(fetch_data_from - timedelta(days=1)),
    end_time=(fetch_data_to + timedelta(days=1)),
)
ts_data = ts_data[ts_data.pickup_hour.between(fetch_data_from, fetch_data_to)]
ts_data.sort_values(["start_station_name", "pickup_hour"]).reset_index(drop=True)
ts_data.info()

ts_data["pickup_hour"] = ts_data["pickup_hour"].dt.tz_localize(None)
ts_data.info()

# Debugging: Print ts_data to inspect pickup_hour range
print("\nts_data (after filtering):")
print("Shape:", ts_data.shape)
print("pickup_hour range:", ts_data["pickup_hour"].min(), "to", ts_data["pickup_hour"].max())
print(ts_data)

# Transform the data into features
features = transform_ts_data_info_features(ts_data, window_size=24*28, step_size=23)

# Debugging: Inspect features before sorting
print("\nfeatures (before sorting):")
print("Shape:", features.shape)
print("pickup_hour range:", features["pickup_hour"].min(), "to", features["pickup_hour"].max())
print(features[["start_station_name", "pickup_hour", "rides_t-1"]])

# Sort features by pickup_hour in descending order to ensure the most recent hour is selected
features.sort_values("pickup_hour", ascending=False, inplace=True)

# Debugging: Inspect features after sorting
print("\nfeatures (after sorting):")
print("Shape:", features.shape)
print("pickup_hour range:", features["pickup_hour"].min(), "to", features["pickup_hour"].max())
print(features[["start_station_name", "pickup_hour", "rides_t-1"]])

# Filter features for the most recent hour
features_next_hour = features.groupby("start_station_name").first().reset_index()
recent_hour = features_next_hour["pickup_hour"].max()
print(f"\nMost recent hour in features_next_hour: {recent_hour}")

# Debugging: Print features_next_hour
print("\nfeatures_next_hour:")
print("Shape:", features_next_hour.shape)
print(features_next_hour)

# Define the list of models and corresponding feature group names
models = [
    "baseline_previous_hour",
    "lightgbm_28days_lags",
    "lightgbm_top10_features",
    "gradient_boosting_temporal_features",
    "lightgbm_enhanced_lags_cyclic_temporal_interactions"
]

prediction_feature_groups = {
    "baseline_previous_hour": "predictions_model_baseline",
    "lightgbm_28days_lags": "predictions_model_lgbm_28days",
    "lightgbm_top10_features": "predictions_model_lgbm_top10",
    "gradient_boosting_temporal_features": "predictions_model_gbt",
    "lightgbm_enhanced_lags_cyclic_temporal_interactions": "predictions_model_lgbm_enhanced"
}

# Make predictions with each model and save to separate feature groups
for model_name in models:
    print(f"\nProcessing model: {model_name}")
    
    # Load the model
    model = load_model_from_registry(model_name=model_name)
    
    # Make predictions for the most recent hour
    predictions = get_model_predictions(model, features_next_hour, model_name=model_name)
    predictions["pickup_hour"] = recent_hour  # Use the most recent actual hour
    predictions
    
    # Create or retrieve the feature group for this model's predictions
    feature_group = feature_store.get_or_create_feature_group(
        name=prediction_feature_groups[model_name],
        version=1,
        description=f"Predictions from {model_name} model",
        primary_key=["start_station_name", "pickup_hour"],
        event_time="pickup_hour",
    )
    
    # Insert the predictions into the feature group
    feature_group.insert(predictions, write_options={"wait_for_job": False})
    print(f"Saved predictions to feature group: {prediction_feature_groups[model_name]}")

In [None]:
%load_ext autoreload
%autoreload 2
import sys
import os
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Add the parent directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

import src.config as config
from src.inference import get_feature_store
from src.inference import load_model_from_registry
from src.inference import get_model_predictions
from src.inference import BaselineModelPreviousHour

# Define the corrected transform_ts_data_info_features function inline
def transform_ts_data_info_features(
    df, feature_col="rides", window_size=12, step_size=1
):
    if df.empty:
        raise ValueError("Input DataFrame is empty. Cannot transform into features.")

    station_names = df["start_station_name"].unique()
    transformed_data = []

    for station_name in station_names:
        try:
            station_data = df[df["start_station_name"] == station_name].reset_index(drop=True)
            # Explicitly sort by pickup_hour to ensure chronological order
            station_data.sort_values("pickup_hour", inplace=True)

            values = station_data[feature_col].values
            times = station_data["pickup_hour"].values

            if len(values) <= window_size:
                raise ValueError("Not enough data to create even one window.")

            rows = []
            # Adjust the loop to include the most recent window
            # Start from the last possible index and work backwards
            end_idx = len(values) - window_size - 1
            if end_idx < 0:
                end_idx = 0  # Ensure at least one window if possible
            for i in range(end_idx, -1, -step_size):
                features = values[i : i + window_size]
                target_time = times[i + window_size]
                row = np.append(features, [station_name, target_time])
                rows.append(row)

            feature_columns = [f"{feature_col}_t-{window_size - i}" for i in range(window_size)]
            all_columns = feature_columns + ["start_station_name", "pickup_hour"]
            transformed_df = pd.DataFrame(rows, columns=all_columns)
            transformed_data.append(transformed_df)

        except ValueError as e:
            print(f"Skipping station_name {station_name}: {str(e)}")

    if not transformed_data:
        raise ValueError("No data could be transformed.")

    final_df = pd.concat(transformed_data, ignore_index=True)
    return final_df

# Connect to Hopsworks and get the feature store
feature_store = get_feature_store()

# Retrieve the feature group with recent data
feature_group = feature_store.get_feature_group(
    name="recent_time_series_hourly_feature_group",
    version=1
)

# Create or retrieve the feature view for recent data
try:
    feature_store.create_feature_view(
        name="citi_bike_recent_hourly_feature_view",
        version=1,
        query=feature_group.select_all(),
    )
    print(f"Feature view 'citi_bike_recent_hourly_feature_view' (version 1) created successfully.")
except Exception as e:
    print(f"Error creating feature view: {e}")

feature_view = feature_store.get_feature_view(
    name="citi_bike_recent_hourly_feature_view",
    version=1
)

# Get the current datetime
current_date = pd.Timestamp.now(tz='Etc/UTC')
print(f"Current date: {current_date}")

# Read time-series data from the feature store
fetch_data_to = current_date.floor('h')  # Include the current hour
fetch_data_from = current_date - timedelta(days=1*29)
print(f"Fetching data from {fetch_data_from} to {fetch_data_to}")

ts_data = feature_view.get_batch_data(
    start_time=(fetch_data_from - timedelta(days=1)),
    end_time=(fetch_data_to + timedelta(days=1)),
)
ts_data = ts_data[ts_data.pickup_hour.between(fetch_data_from, fetch_data_to)]
ts_data.sort_values(["start_station_name", "pickup_hour"]).reset_index(drop=True)
ts_data.info()

ts_data["pickup_hour"] = ts_data["pickup_hour"].dt.tz_localize(None)
ts_data.info()

# Debugging: Print ts_data to inspect pickup_hour range
print("\nts_data (after filtering):")
print("Shape:", ts_data.shape)
print("pickup_hour range:", ts_data["pickup_hour"].min(), "to", ts_data["pickup_hour"].max())
print(ts_data)

# Transform the data into features using the inline function
features = transform_ts_data_info_features(ts_data, window_size=24*28, step_size=23)

# Sort features by pickup_hour in descending order to ensure the most recent hour is selected
features.sort_values("pickup_hour", ascending=False, inplace=True)

# Debugging: Inspect features after sorting
print("\nfeatures (after sorting):")
print("Shape:", features.shape)
print("pickup_hour range:", features["pickup_hour"].min(), "to", features["pickup_hour"].max())
print(features[["start_station_name", "pickup_hour", "rides_t-1"]])

# Filter features for the most recent hour
features_next_hour = features.groupby("start_station_name").first().reset_index()
recent_hour = features_next_hour["pickup_hour"].max()
print(f"\nMost recent hour in features_next_hour: {recent_hour}")

# Debugging: Print features_next_hour
print("\nfeatures_next_hour:")
print("Shape:", features_next_hour.shape)
print(features_next_hour)

# Define the list of models and corresponding feature group names
models = [
    "baseline_previous_hour",
    "lightgbm_28days_lags",
    "lightgbm_top10_features",
    "gradient_boosting_temporal_features",
    "lightgbm_enhanced_lags_cyclic_temporal_interactions"
]

prediction_feature_groups = {
    "baseline_previous_hour": "predictions_model_baseline",
    "lightgbm_28days_lags": "predictions_model_lgbm_28days",
    "lightgbm_top10_features": "predictions_model_lgbm_top10",
    "gradient_boosting_temporal_features": "predictions_model_gbt",
    "lightgbm_enhanced_lags_cyclic_temporal_interactions": "predictions_model_lgbm_enhanced"
}

# Make predictions with each model and save to separate feature groups
for model_name in models:
    print(f"\nProcessing model: {model_name}")
    
    # Load the model
    model = load_model_from_registry(model_name=model_name)
    
    # Make predictions for the most recent hour
    predictions = get_model_predictions(model, features_next_hour, model_name=model_name)
    predictions["pickup_hour"] = recent_hour  # Use the most recent actual hour
    predictions
    
    # Create or retrieve the feature group for this model's predictions
    feature_group = feature_store.get_or_create_feature_group(
        name=prediction_feature_groups[model_name],
        version=1,
        description=f"Predictions from {model_name} model",
        primary_key=["start_station_name", "pickup_hour"],
        event_time="pickup_hour",
    )
    
    # Insert the predictions into the feature group
    feature_group.insert(predictions, write_options={"wait_for_job": False})
    print(f"Saved predictions to feature group: {prediction_feature_groups[model_name]}")

In [None]:
# Get the current datetime64[us, Etc/UTC]  
current_date = pd.Timestamp.now(tz='Etc/UTC')
feature_store = get_feature_store()

# read time-series data from the feature store
fetch_data_to = current_date - timedelta(hours=1)
fetch_data_from = current_date - timedelta(days=1*29)
print(f"Fetching data from {fetch_data_from} to {fetch_data_to}")
feature_view = feature_store.get_feature_view(
    name=config.FEATURE_VIEW_NAME, version=config.FEATURE_VIEW_VERSION
)

ts_data = feature_view.get_batch_data(
    start_time=(fetch_data_from - timedelta(days=1)),
    end_time=(fetch_data_to + timedelta(days=1)),
)
ts_data = ts_data[ts_data.pickup_hour.between(fetch_data_from, fetch_data_to)]
ts_data.sort_values(["pickup_location_id", "pickup_hour"]).reset_index(drop=True)
ts_data["pickup_hour"] = ts_data["pickup_hour"].dt.tz_localize(None)

features = transform_ts_data_info_features(ts_data, window_size=24*28, step_size=23)

In [None]:
model = load_model_from_registry()
predictions = get_model_predictions(model, features)
predictions["pickup_hour"] = current_date.ceil('h')
predictions

In [None]:
feature_group = get_feature_store().get_or_create_feature_group(
    name=config.FEATURE_GROUP_MODEL_PREDICTION,
    version=1,
    description="Predictions from LGBM Model",
    primary_key=["pickup_location_id", "pickup_hour"],
    event_time="pickup_hour",
)
feature_group.insert(predictions, write_options={"wait_for_job": False})