In [1]:
import pandas as pd
from pathlib import Path

# Step 1: Load all 12 Parquet files
raw_dir = Path("..") / "data" / "raw"
all_dfs = []

for year in [2023, 2024]:
    months = range(5, 13) if year == 2023 else range(1, 5)
    for month in months:
        path = raw_dir / f"rides_{year}_{month:02}.parquet"
        df = pd.read_parquet(path)
        all_dfs.append(df)

df_all = pd.concat(all_dfs, ignore_index=True)

# Step 2: Count rides per station
top_stations = (
    df_all.groupby(["start_station_id", "start_station_name"])
    .size()
    .reset_index(name="total_rides")
    .sort_values("total_rides", ascending=False)
)

# Step 3: Get top 3
top_3 = top_stations.head(3)
print(top_3)


     start_station_id       start_station_name  total_rides
1163          6140.05          W 21 St & 6 Ave        67968
1557          6822.09          1 Ave & E 68 St        57100
1061          5905.14  University Pl & E 14 St        56148
