In [1]:
import pandas as pd
from pathlib import Path

def load_gtfs_for_date(date_str, base_path="../data/raw"):
    day_path = Path(base_path) / date_str
    gtfs = {}

    files = ["routes", "trips", "shapes", "calendar_dates"]
    for name in files:
        file_path = day_path / f"{name}.txt"
        if file_path.exists():
            gtfs[name] = pd.read_csv(file_path)
        else:
            print(f"⚠️ Figyelmeztetés: {file_path} nem található!")

    return gtfs


In [2]:
gtfs_20250518 = load_gtfs_for_date("20250518")
gtfs_20250518["routes"].head()


  gtfs[name] = pd.read_csv(file_path)


Unnamed: 0,agency_id,route_id,route_short_name,route_long_name,route_type,route_desc,route_color,route_text_color,route_sort_order
0,BKK,50,5,,3,"Pasaréti tér / Rákospalota, Kossuth utca",009EE3,FFFFFF,20
1,BKK,70,7,,3,"Albertfalva vasútállomás / Újpalota, Nyírpalot...",009EE3,FFFFFF,24
2,BKK,75,7E,,3,"Blaha Lujza tér M / Újpalota, Nyírpalota út",009EE3,FFFFFF,25
3,BKK,78,7G,,3,"Cinkotai autóbuszgarázs / Újpalota, Nyírpalota út",009EE3,FFFFFF,26
4,BKK,85,8E,,3,"Kelenföld vasútállomás M / Újpalota, Nyírpalot...",009EE3,FFFFFF,27


In [4]:
for key in ["routes", "trips", "shapes", "calendar_dates"]:
    if key not in gtfs_20250518:
        print(f"Hiányzik: {key}")
    else:
        print(f"{key}: {len(gtfs_20250518[key])} rows")


routes: 373 rows
trips: 274082 rows
shapes: 644306 rows
calendar_dates: 11762 rows


In [9]:
import pandas as pd

def filter_gtfs_for_date(gtfs_data: dict, date: int):
    # 1. Aktív szolgáltatások (calendar_dates alapján)
    active_service_ids = gtfs_data["calendar_dates"].query(
        "date == @date and exception_type == 1"
    )["service_id"].unique()

    # 2. Aktív trip-ek (ezek aznap közlekednek)
    trips_active = gtfs_data["trips"].query("service_id in @active_service_ids").copy()

    # 3. Érvényes shape_id-k (az útvonalak)
    active_shape_ids = trips_active["shape_id"].dropna().unique()
    shapes_active = gtfs_data["shapes"].query("shape_id in @active_shape_ids").copy()

    # 4. Érvényes route_id-k (az aktív járatok)
    active_route_ids = trips_active["route_id"].unique()
    routes_active = gtfs_data["routes"].query("route_id in @active_route_ids").copy()

    # 5. Összekapcsolt DataFrame (ha kell egyben)
    trips_routes = trips_active.merge(routes_active, on="route_id", suffixes=("_trip", "_route"))

    return {
        "trips": trips_active,
        "shapes": shapes_active,
        "routes": routes_active,
        "trips_routes": trips_routes,
    }


In [10]:
filtered = filter_gtfs_for_date(gtfs_20250518, 20250518)

In [15]:
filtered['trips_routes']

Unnamed: 0,route_id,trip_id,service_id,trip_headsign,direction_id,block_id,shape_id,wheelchair_accessible,bikes_allowed,agency_id,route_short_name,route_long_name,route_type,route_desc,route_color,route_text_color,route_sort_order
0,3140,C925331008,C92533AVVVK-0031,"Káp.megyer, Megyeri út",0,C92533_3140_24_23,PE55,1.0,1.0,BKK,14,,0,"Lehel tér M / Káposztásmegyer, Megyeri út",FFD800,000000,35
1,3140,C925331011,C92533AVVVK-0031,Lehel u. / Róbert K. krt.,1,C92533_3140_24_24,UX35,1.0,1.0,BKK,14,,0,"Lehel tér M / Káposztásmegyer, Megyeri út",FFD800,000000,35
2,3140,C925331015,C92533AVVVK-0031,"Káp.megyer, Megyeri út",0,C92533_3140_3_28,PE55,2.0,1.0,BKK,14,,0,"Lehel tér M / Káposztásmegyer, Megyeri út",FFD800,000000,35
3,3140,C925331018,C92533AVVVK-0031,Tél utca / Pozsonyi utca,1,C92533_3140_3_29,PE49,2.0,1.0,BKK,14,,0,"Lehel tér M / Káposztásmegyer, Megyeri út",FFD800,000000,35
4,3140,C92533102,C92533AVVVK-0031,Lehel tér M,1,C92533_3140_1_3,PE56,2.0,1.0,BKK,14,,0,"Lehel tér M / Káposztásmegyer, Megyeri út",FFD800,000000,35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17131,H9,H55077_44,51678,Örs vezér tere,1,,44,,1.0,HEV,H9,,109,Örs vezér tere / Csömör,ED6E86,FFFFFF,8
17132,H9,H55081_43,51679,Csömör,0,,43,,1.0,HEV,H9,,109,Örs vezér tere / Csömör,ED6E86,FFFFFF,8
17133,H9,H55125_43,51692,Csömör,0,,43,,1.0,HEV,H9,,109,Örs vezér tere / Csömör,ED6E86,FFFFFF,8
17134,H9,H55176_44,51678,Örs vezér tere,1,,44,,1.0,HEV,H9,,109,Örs vezér tere / Csömör,ED6E86,FFFFFF,8


In [20]:
import folium

def plot_route_on_map(filtered_data, route_short_name):
    # 1. Keresd meg a route_id-t a route_short_name alapján
    route_df = filtered_data["routes"]
    matched_routes = route_df[route_df["route_short_name"] == str(route_short_name)]

    if matched_routes.empty:
        print("Nincs ilyen route_short_name aznap.")
        return None

    map_obj = folium.Map(location=[47.4979, 19.0402], zoom_start=12)

    # 2. Vegyük végig a shape_id-ket, és rajzoljuk meg őket
    shapes_df = filtered_data["shapes"]
    trips_df = filtered_data["trips"]

    for _, route_row in matched_routes.iterrows():
        route_id = route_row["route_id"]
        trips = trips_df[trips_df["route_id"] == route_id]

        for shape_id in trips["shape_id"].unique():
            shape = shapes_df[shapes_df["shape_id"] == shape_id].sort_values("shape_pt_sequence")
            if shape.empty:
                continue

            latlons = list(zip(shape["shape_pt_lat"], shape["shape_pt_lon"]))

            folium.PolyLine(
                latlons,
                color=f"#{route_row['route_color']}",
                weight=4,
                opacity=0.9,
                tooltip=f"{route_short_name} ({route_id})"
            ).add_to(map_obj)

    return map_obj


map_47 = plot_route_on_map(filtered, 47)
map_47


Nincs ilyen route_short_name aznap.


# Structure

In [28]:
import pandas as pd
import os

date_folder = '../data/raw/'
date = '20250518'

routes_path = os.path.join(date_folder, date, 'routes.txt')
trips_path = os.path.join(date_folder, date, 'trips.txt')

routes_df = pd.read_csv(routes_path)
trips_df = pd.read_csv(trips_path)

print("Routes:", routes_df.shape)
print("Trips:", trips_df.shape)


Routes: (373, 9)
Trips: (274082, 9)


  trips_df = pd.read_csv(trips_path)


In [34]:
from datetime import datetime

# Ha nincs még route_versions táblánk, hozzuk létre üresként
route_versions = pd.DataFrame(columns=['route_version_id', 'route_id', 'shape_id', 'route_color', 'created_at'])

def create_route_versions(routes_df, trips_df, current_date):
    # A trips-ből a leggyakoribb shape_id-k kiválasztása route_id-nként
    shape_modes = trips_df.groupby('route_id')['shape_id'].agg(lambda x: x.mode().iloc[0] if not x.mode().empty else None).reset_index()

    # Csatlakozás routes-hoz, hogy legyen route_color is
    merged = routes_df.merge(shape_modes, on='route_id', how='left')

    # Készítsük el a verziókat
    versions = []
    next_version_id = 1

    for _, row in merged.iterrows():
        versions.append({
            'route_version_id': next_version_id,
            'route_id': row['route_id'],
            'shape_id': row['shape_id'],
            'route_color': row['route_color'],
            'created_at': current_date
        })
        next_version_id += 1

    return pd.DataFrame(versions)

current_date = '2025-05-18'  # vagy pl. datetime.now().strftime('%Y-%m-%d')
route_versions = create_route_versions(routes_df, trips_df, date)


In [35]:
route_versions.head(10)

Unnamed: 0,route_version_id,route_id,shape_id,route_color,created_at
0,1,50,VI96,009EE3,20250518
1,2,70,BM97,009EE3,20250518
2,3,75,Z084,009EE3,20250518
3,4,78,V014,009EE3,20250518
4,5,85,JK11,009EE3,20250518
5,6,90,QT07,009EE3,20250518
6,7,100,LV63,009EE3,20250518
7,8,105,AJJ1,009EE3,20250518
8,9,110,KC77,009EE3,20250518
9,10,130,TW43,009EE3,20250518


In [36]:
def create_daily_snapshots(route_versions, trips_df, current_date):
    snapshots = []
    next_snapshot_id = 1

    # Mely route_id-k szerepelnek a trips-ben az adott napon? (az aktívak)
    active_routes = set(trips_df['route_id'].unique())

    for _, row in route_versions.iterrows():
        route_id = row['route_id']
        snapshots.append({
            'snapshot_id': next_snapshot_id,
            'date': current_date,
            'route_version_id': row['route_version_id'],
            'active': route_id in active_routes
        })
        next_snapshot_id += 1

    return pd.DataFrame(snapshots)

daily_snapshots = create_daily_snapshots(route_versions, trips_df, date)


In [37]:
daily_snapshots.head(10)

Unnamed: 0,snapshot_id,date,route_version_id,active
0,1,20250518,1,True
1,2,20250518,2,True
2,3,20250518,3,True
3,4,20250518,4,True
4,5,20250518,5,True
5,6,20250518,6,True
6,7,20250518,7,True
7,8,20250518,8,True
8,9,20250518,9,True
9,10,20250518,10,True
