In [2]:
import sys
from pathlib import Path

# Add project root to sys.path so `src/` can be imported
sys.path.append(str(Path("..").resolve()))


In [3]:
import pandas as pd
from pathlib import Path
from src.data_utils import transform_ts_data_info_features_and_target_loop

In [4]:

# === 1. Load hourly time series ===
ts_data = pd.read_parquet("../data/transformed/top_3_hourly_timeseries.parquet")

In [5]:
# === 2. Transform to sliding-window tabular format ===
features, targets = transform_ts_data_info_features_and_target_loop(
    df=ts_data,
    feature_col="rides",
    location_col="start_station_id",
    time_col="start_hour",
    window_size=24 * 28,  # past 28 days
    step_size=24          # daily stride
)

In [6]:
# === 3. Combine features and target ===
tabular_data = features.copy()
tabular_data["target"] = targets

In [7]:
# === 4. Save to disk ===
output_dir = Path("..") / "data" / "transformed"
output_dir.mkdir(parents=True, exist_ok=True)
tabular_path = output_dir / "tabular_data.parquet"

tabular_data.to_parquet(tabular_path, engine="pyarrow", index=False)
print(f"✅ Tabular dataset saved to: {tabular_path}")

✅ Tabular dataset saved to: ..\data\transformed\tabular_data.parquet
