In [None]:
%load_ext autoreload
%autoreload 2
import sys
import os
from datetime import datetime, timedelta
import pandas as pd
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

import src.config as config
from src.inference import get_feature_store
from src.data_utils import transform_ts_data_info_features
from src.inference import load_batch_of_features_from_store
from src.inference import load_model_from_registry
from src.inference import get_model_predictions

In [1]:
%load_ext autoreload
%autoreload 2
import sys
import os
from datetime import datetime, timedelta
import pandas as pd
import ipywidgets as widgets
from IPython.display import display
from sklearn.feature_selection import SelectKBest, f_regression
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Add the parent directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

import src.config as config
from src.inference import get_feature_store
from src.data_utils import transform_ts_data_info_features
from src.inference import load_batch_of_features_from_store
from src.inference import load_model_from_registry
from src.inference import get_model_predictions
from src.inference import BaselineModelPreviousHour

# Connect to Hopsworks and get the feature store
feature_store = get_feature_store()

# Retrieve the feature group with recent data
feature_group = feature_store.get_feature_group(
    name="recent_time_series_hourly_feature_group",
    version=1
)

# Create or retrieve the feature view for recent data
try:
    feature_store.create_feature_view(
        name="citi_bike_recent_hourly_feature_view",
        version=1,
        query=feature_group.select_all(),
    )
    print(f"Feature view 'citi_bike_recent_hourly_feature_view' (version 1) created successfully.")
except Exception as e:
    print(f"Error creating feature view: {e}")

feature_view = feature_store.get_feature_view(
    name="citi_bike_recent_hourly_feature_view",
    version=1
)

# Get the current datetime
current_date = pd.Timestamp.now(tz='Etc/UTC')

# Read time-series data from the feature store
fetch_data_to = current_date - timedelta(hours=1)
fetch_data_from = current_date - timedelta(days=1*29)
print(f"Fetching data from {fetch_data_from} to {fetch_data_to}")

ts_data = feature_view.get_batch_data(
    start_time=(fetch_data_from - timedelta(days=1)),
    end_time=(fetch_data_to + timedelta(days=1)),
)
ts_data = ts_data[ts_data.pickup_hour.between(fetch_data_from, fetch_data_to)]
ts_data.sort_values(["start_station_name", "pickup_hour"]).reset_index(drop=True)
ts_data.info()

ts_data["pickup_hour"] = ts_data["pickup_hour"].dt.tz_localize(None)
ts_data.info()

# Transform the data into features
features = transform_ts_data_info_features(ts_data, window_size=24*28, step_size=23)
features.head()

# Load features using the updated load_batch_of_features_from_store
features = load_batch_of_features_from_store(current_date)
current_date
features.head()

# Filter features for the most recent hour (next hour prediction)
# Since features are sorted by start_station_name and pickup_hour, the last 3 rows (for 3 stations) correspond to the most recent hour
features_next_hour = features.groupby("start_station_name").last().reset_index()
next_hour = features_next_hour["pickup_hour"].max()
print(f"Making predictions for the next hour: {next_hour}")

# Define the list of model names
model_names = [
    "baseline_previous_hour",
    "lightgbm_28days_lags",
    "lightgbm_top10_features",
    "gradient_boosting_temporal_features",
    "lightgbm_enhanced_lags_cyclic_temporal_interactions"
]

# Create a dropdown widget for model selection
model_dropdown = widgets.Dropdown(
    options=model_names,
    value=model_names[0],  # Default to the first model
    description='Select Model:',
    style={'description_width': 'initial'}
)

# Create an output widget to display predictions
output = widgets.Output()

# Define a function to handle model selection and prediction
def on_model_change(change):
    with output:
        output.clear_output()
        selected_model_name = change['new']
        print(f"Selected model: {selected_model_name}")
        
        # Load the selected model
        model = load_model_from_registry(model_name=selected_model_name)
        
        # Make predictions for the next hour
        predictions = get_model_predictions(model, features_next_hour, model_name=selected_model_name)
        predictions
        
        # Display the predictions sorted by predicted rides
        predictions_sorted = predictions.sort_values("predicted_rides", ascending=False)
        print(f"Predictions for the next hour ({next_hour}) using {selected_model_name}:")
        print(predictions_sorted)

# Connect the dropdown to the prediction function
model_dropdown.observe(on_model_change, names='value')

# Display the dropdown and output
display(model_dropdown)
display(output)

# Trigger the initial prediction with the default model
on_model_change({'new': model_dropdown.value})

2025-05-10 03:05:52,423 INFO: Initializing external client
2025-05-10 03:05:52,424 INFO: Base URL: https://c.app.hopsworks.ai:443




To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 03:05:53,326 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1225907
Error creating feature view: Metadata operation error: (url: https://c.app.hopsworks.ai/hopsworks-api/api/project/1225907/featurestores/1212511/featureview). Server response: 
HTTP code: 400, HTTP reason: Bad Request, body: b'{"errorCode":270179,"usrMsg":"Feature view: citi_bike_recent_hourly_feature_view, version: 1","errorMsg":"The provided feature view name and version already exists"}', error code: 270179, error msg: The provided feature view name and version already exists, user msg: Feature view: citi_bike_recent_hourly_feature_view, version: 1
Fetching data from 2025-04-11 07:05:55.458723+00:00 to 2025-05-10 06:05:55.458723+00:00
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.78s) 
<class 'pandas.core.frame.DataFrame'>
Index: 2085 entries, 0 to 2096
Data columns (total 3 columns):
 #   Column              Non-Nu



To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2025-05-10 03:05:58,390 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1225907
Error creating feature view: Metadata operation error: (url: https://c.app.hopsworks.ai/hopsworks-api/api/project/1225907/featurestores/1212511/featureview). Server response: 
HTTP code: 400, HTTP reason: Bad Request, body: b'{"errorCode":270179,"usrMsg":"Feature view: citi_bike_recent_hourly_feature_view, version: 1","errorMsg":"The provided feature view name and version already exists"}', error code: 270179, error msg: The provided feature view name and version already exists, user msg: Feature view: citi_bike_recent_hourly_feature_view, version: 1
Fetching data from 2025-04-11 07:05:55.458723+00:00 to 2025-05-10 06:05:55.458723+00:00
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.61s) 
Making predictions for the next hour: 2025-05-09 08:00:00


Dropdown(description='Select Model:', options=('baseline_previous_hour', 'lightgbm_28days_lags', 'lightgbm_top…

Output()

In [None]:
# Get the current datetime64[us, Etc/UTC]  
current_date = pd.Timestamp.now(tz='Etc/UTC')
feature_store = get_feature_store()

# read time-series data from the feature store
fetch_data_to = current_date - timedelta(hours=1)
fetch_data_from = current_date - timedelta(days=1*29)
print(f"Fetching data from {fetch_data_from} to {fetch_data_to}")
feature_view = feature_store.get_feature_view(
    name=config.FEATURE_VIEW_NAME, version=config.FEATURE_VIEW_VERSION
)

ts_data = feature_view.get_batch_data(
    start_time=(fetch_data_from - timedelta(days=1)),
    end_time=(fetch_data_to + timedelta(days=1)),
)
ts_data = ts_data[ts_data.pickup_hour.between(fetch_data_from, fetch_data_to)]
ts_data.sort_values(["pickup_location_id", "pickup_hour"]).reset_index(drop=True)
ts_data.info()

In [None]:
ts_data["pickup_hour"] = ts_data["pickup_hour"].dt.tz_localize(None)
ts_data.info()
features = transform_ts_data_info_features(ts_data, window_size=24*28, step_size=23)
features.head()
current_date = pd.Timestamp.now(tz='Etc/UTC')
features = load_batch_of_features_from_store(current_date)
current_date
features.head()

In [None]:
model = load_model_from_registry()

In [None]:
predictions = get_model_predictions(model, features)
predictions
predictions.sort_values("predicted_demand", ascending=False).head(10)["pickup_location_id"].values