In [1]:
import pandas as pd
from pathlib import Path
import pyarrow.parquet as pq

month = 1
year = 2023
path = Path("..") / "data" / "processed" / f"ts_data_{year}_{month:02}.parquet"

table = pq.read_table(path)
ts_data = table.to_pandas()
ts_data.head()


Unnamed: 0,pickup_hour,pickup_location_id,rides
0,2023-01-01 00:00:00,HB102,2
1,2023-01-01 01:00:00,HB102,8
2,2023-01-01 02:00:00,HB102,2
3,2023-01-01 03:00:00,HB102,1
4,2023-01-01 04:00:00,HB102,1


In [2]:
ts_data[ts_data["pickup_location_id"] == "JC115"].head(36)


Unnamed: 0,pickup_hour,pickup_location_id,rides
1488,2023-01-01 00:00:00,JC115,0
1489,2023-01-01 01:00:00,JC115,1
1490,2023-01-01 02:00:00,JC115,7
1491,2023-01-01 03:00:00,JC115,1
1492,2023-01-01 04:00:00,JC115,3
1493,2023-01-01 05:00:00,JC115,1
1494,2023-01-01 06:00:00,JC115,0
1495,2023-01-01 07:00:00,JC115,1
1496,2023-01-01 08:00:00,JC115,0
1497,2023-01-01 09:00:00,JC115,1


In [3]:
import pandas as pd
import numpy as np

def transform_time_series_to_tabular(df, location_id, feature_col="rides", window_size=12, step_size=1):
    location_data = df[df["pickup_location_id"] == location_id].reset_index(drop=True)
    values = location_data[feature_col].values

    if len(values) <= window_size:
        raise ValueError("Not enough data to create even one window of features and target.")

    rows = []
    for i in range(0, len(values) - window_size, step_size):
        features = values[i:i + window_size] 
        target = values[i + window_size] 
        rows.append(np.append(features, target))

    column_names = [f"feature_{i+1}" for i in range(window_size)] + ["target"]
    transformed_df = pd.DataFrame(rows, columns=column_names)

    return transformed_df


In [4]:
features_targets = transform_time_series_to_tabular(ts_data, "JC115", "rides", 24, 1)


In [5]:
features_targets

Unnamed: 0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,feature_10,...,feature_16,feature_17,feature_18,feature_19,feature_20,feature_21,feature_22,feature_23,feature_24,target
0,0,1,7,1,3,1,0,1,0,1,...,8,5,5,11,7,2,5,2,1,2
1,1,7,1,3,1,0,1,0,1,0,...,5,5,11,7,2,5,2,1,2,1
2,7,1,3,1,0,1,0,1,0,3,...,5,11,7,2,5,2,1,2,1,1
3,1,3,1,0,1,0,1,0,3,0,...,11,7,2,5,2,1,2,1,1,0
4,3,1,0,1,0,1,0,3,0,4,...,7,2,5,2,1,2,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
715,14,5,7,0,1,1,1,0,0,0,...,3,1,1,1,6,8,10,20,31,14
716,5,7,0,1,1,1,0,0,0,2,...,1,1,1,6,8,10,20,31,14,7
717,7,0,1,1,1,0,0,0,2,2,...,1,1,6,8,10,20,31,14,7,8
718,0,1,1,1,0,0,0,2,2,0,...,1,6,8,10,20,31,14,7,8,2
