In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import os

# Add the parent directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
import src.config as config

In [3]:
from src.inference import get_feature_store, fetch_predictions
import pandas as pd
from datetime import datetime, timedelta, timezone


def fetch_days_data(days):
    current_date = pd.to_datetime(datetime.now(timezone.utc))
    fetch_data_from = current_date - timedelta(days=(365+days)) 
    fetch_data_to = current_date - timedelta(days=365)
    print(fetch_data_from, fetch_data_to)
    fs = get_feature_store()
    fg = fs.get_feature_group(
        name=config.FEATURE_GROUP_NAME,
        version=1
    )

    query = fg.select_all()
    # query = query.filter((fg.pickup_hour >= fetch_data_from))
    df = query.read()
    cond = (df["pickup_hour"] >= fetch_data_from) & (df["pickup_hour"] <= fetch_data_to)
    return df[cond]

In [4]:
ts_data = fetch_days_data(180)

2023-09-06 00:59:12.637109+00:00 2024-03-04 00:59:12.637109+00:00
2025-03-03 19:59:12,642 INFO: Initializing external client
2025-03-03 19:59:12,643 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-03-03 19:59:14,514 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215665
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (14.50s) 


In [5]:
from src.data_utils import transform_ts_data_info_features_and_target

features, targets = transform_ts_data_info_features_and_target(ts_data, window_size=24*28, step_size=23)

In [6]:
from src.pipeline_utils import get_pipeline
pipeline = get_pipeline()
pipeline.fit(features, targets)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.299176 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 171646
[LightGBM] [Info] Number of data points in the train set: 24180, number of used features: 675
[LightGBM] [Info] Start training from score 16.799628


In [7]:
from sklearn.metrics import mean_absolute_error
predictions = pipeline.predict(features)

In [8]:
test_mae = mean_absolute_error(targets, predictions)
print(f"{test_mae:.4f}")

2.9953


In [9]:
from src.inference import load_metrics_from_registry 

metric = load_metrics_from_registry()

2025-03-03 20:00:12,958 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-03-03 20:00:12,974 INFO: Initializing external client
2025-03-03 20:00:12,975 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-03-03 20:00:14,095 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215665


In [10]:
metric

{'test_mae': 1.935489834251996}

In [11]:
from src.inference import load_model_from_registry
model = load_model_from_registry()

2025-03-03 20:00:18,640 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-03-03 20:00:18,656 INFO: Initializing external client
2025-03-03 20:00:18,657 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-03-03 20:00:19,881 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215665
Downloading model artifact (0 dirs, 1 files)... DONE

In [12]:
import joblib  
import src.config
# Save the pipeline  
joblib.dump(pipeline, config.MODELS_DIR / "lgb_model.pkl")

['C:\\Users\\Yaseen\\Desktop\\courses\\UB\\Spring25\\CDA500\\sp25_taxi-main\\models\\lgb_model.pkl']

In [13]:
from hsml.schema import Schema
from hsml.model_schema import ModelSchema

input_schema = Schema(features)
output_schema = Schema(targets)
model_schema = ModelSchema(input_schema=input_schema, output_schema=output_schema)

In [15]:
import hopsworks
project = hopsworks.login()

2025-03-03 22:27:10,401 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-03-03 22:27:10,501 INFO: Initializing external client
2025-03-03 22:27:10,505 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-03-03 22:27:12,271 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215665


In [16]:
model_registry = project.get_model_registry()
model_registry

ModelRegistry(project: 'ml_ops_project')

In [17]:
modelv2 = model_registry.sklearn.create_model(
    name="taxi_demand_predictor_next_hour",
    metrics={"test_mae": test_mae},
    description="LightGBM regressor V2",
    input_example=features.sample(),
    model_schema=model_schema,
)

In [18]:
modelv2.save('C:\\Users\\Yaseen\\Desktop\\courses\\UB\\Spring25\\CDA500\\sp25_taxi-main\\models\\lgb_model.pkl')

  0%|          | 0/6 [00:00<?, ?it/s]

Uploading: 0.000%|          | 0/317450 elapsed<00:00 remaining<?

Uploading: 0.000%|          | 0/2049 elapsed<00:00 remaining<?

Uploading: 0.000%|          | 0/51418 elapsed<00:00 remaining<?

Model created, explore it at https://c.app.hopsworks.ai:443/p/1215665/models/taxi_demand_predictor_next_hour/3


Model(name: 'taxi_demand_predictor_next_hour', version: 3)

In [19]:
from src.inference import load_model_from_registry
model = load_model_from_registry(-1)

2025-03-03 22:28:36,464 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-03-03 22:28:36,483 INFO: Initializing external client
2025-03-03 22:28:36,484 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-03-03 22:28:37,971 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215665
Downloading model artifact (0 dirs, 1 files)... DONE

In [20]:
models = model_registry.get_models(name=config.MODEL_NAME)

In [21]:
models

[Model(name: 'taxi_demand_predictor_next_hour', version: 1),
 Model(name: 'taxi_demand_predictor_next_hour', version: 2),
 Model(name: 'taxi_demand_predictor_next_hour', version: 3)]

In [22]:
max(models, key=lambda model: model.version)


Model(name: 'taxi_demand_predictor_next_hour', version: 3)

In [23]:
load_metrics_from_registry()

2025-03-03 22:28:49,775 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-03-03 22:28:49,792 INFO: Initializing external client
2025-03-03 22:28:49,793 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-03-03 22:28:50,986 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1215665


{'test_mae': 2.9952564894590528}