# Imports

In [1]:
import pandas as pd
import numpy as np
import os

# Structure

In [2]:
def define_paths(data_folder=None, date=None):
    if data_folder:
        shapes_df_path = os.path.join(data_folder, 'shapes.csv')
        routes_df_path = os.path.join(data_folder, 'routes.csv')
        route_versions_df_path = os.path.join(data_folder, 'route_versions.csv')
        shape_variants_df_path = os.path.join(data_folder, 'shape_variants.csv')
        shape_variant_activations_df_path = os.path.join(data_folder, 'shape_variant_activations.csv')

        temporary_changes_df_path = os.path.join(data_folder, 'temporary_changes.csv')
        return shapes_df_path, routes_df_path, route_versions_df_path, shape_variants_df_path, shape_variant_activations_df_path, temporary_changes_df_path

    if date:
        date_folder = '../data/raw/'
        routes_path = os.path.join(date_folder, date, 'routes.txt')
        trips_path = os.path.join(date_folder, date, 'trips.txt')
        shapes_path = os.path.join(date_folder, date, 'shapes.txt')
        calendar_path = os.path.join(date_folder, date, 'calendar.txt')
        calendar_dates_path = os.path.join(date_folder, date, 'calendar_dates.txt')

        return routes_path, trips_path, shapes_path, calendar_path, calendar_dates_path

    raise ValueError("Either data_folder or date must be provided.")

In [3]:
def load_txt_data(date, print_shapes=False):
    routes_path, trips_path, shapes_path, calendar_path, calendar_dates_path = define_paths(date=date)
    routes_txt = pd.read_csv(routes_path)
    trips_txt = pd.read_csv(trips_path)
    shapes_txt = pd.read_csv(shapes_path)
    calendar_dates_txt = pd.read_csv(calendar_dates_path)

    if print_shapes:
        print("Routes:", routes_txt.shape)
        print("Trips:", trips_txt.shape)
        print("Shapes:", shapes_txt.shape)
        print("Calendar Dates:", calendar_dates_txt.shape)

    try:  # Check if the file exists
        calendar_txt = pd.read_csv(calendar_path)
    except FileNotFoundError:
        # Make empty dataframes for the first time
        print("Calendar file not found. Creating empty dataframe.")
        calendar_txt = pd.DataFrame(columns=['service_id', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday', 'start_date', 'end_date'])
    
    return routes_txt, trips_txt, shapes_txt, calendar_txt, calendar_dates_txt

In [4]:
date = '20131018'
routes_txt, trips_txt, shapes_txt, calendar_txt, calendar_dates_txt = load_txt_data(date)

In [5]:
def load_df_data(data_folder):
    shapes_df_path, routes_df_path, route_versions_df_path, shape_variants_df_path, shape_variant_activations_df_path, temporary_changes_df_path = define_paths(data_folder=data_folder)

    try:
        shapes_df = pd.read_csv(shapes_df_path)
        routes_df = pd.read_csv(routes_df_path)
        route_versions_df = pd.read_csv(route_versions_df_path, parse_dates=['valid_from', 'valid_to'])
        shape_variants_df = pd.read_csv(shape_variants_df_path)
        shape_variant_activations_df = pd.read_csv(shape_variant_activations_df_path)
        temporary_changes_df = pd.read_csv(temporary_changes_df_path)
    except FileNotFoundError:
        # Make empty dataframes for the first time
        ### shapes_df ###
        shapes_df = pd.DataFrame(columns=[
            "shape_id", "shape_pt_lat", "shape_pt_lon", "shape_pt_sequence", "shape_dist_traveled", "shape_bkk_ref"
        ])

        ### routes_df ###
        routes_df = pd.DataFrame(columns=[
            "route_id", "agency_id", "route_short_name", "route_type", "route_color", "route_text_color"
        ])

        ### route_versions_df ###
        route_versions_df = pd.DataFrame(columns=[
            "version_id", "route_id", "direction_id", "route_long_name", "route_desc",
            "valid_from", "valid_to", "main_shape_id", "trip_headsign",
            "parent_version_id", "note"
        ])
        # valid_from and valid_to be converted to datetime
        route_versions_df['valid_from'] = pd.to_datetime(route_versions_df['valid_from'])
        route_versions_df['valid_to'] = pd.to_datetime(route_versions_df['valid_to'])
        
        ### shape_variants_df ###
        shape_variants_df = pd.DataFrame(columns=[
            "shape_variant_id", "version_id", "shape_id", "is_main", "note"
        ])

        ### shape_variant_activations_df ###
        shape_variant_activations_df = pd.DataFrame(columns=[
            "date", "shape_variant_id"
        ])

        ### temporary_changes_df ###
        temporary_changes_df = pd.DataFrame(columns=[
            "detour_id", "route_id", "start_date", "end_date", "affects_version_id", "description"
        ])
        # Save
        shapes_df.to_csv(shapes_df_path, index=False)
        routes_df.to_csv(routes_df_path, index=False)
        route_versions_df.to_csv(route_versions_df_path, index=False)
        shape_variant_activations_df.to_csv(shape_variant_activations_df_path, index=False)
        temporary_changes_df.to_csv(temporary_changes_df_path, index=False)

    return shapes_df, routes_df, route_versions_df, shape_variants_df, shape_variant_activations_df, temporary_changes_df

In [6]:
data_folder = '../data/processed/'
shapes_df, routes_df, route_versions_df, shape_variants_df, shape_variant_activations_df, temporary_changes_df = load_df_data(data_folder)

In [7]:
trips_df = trips_txt.copy()
calendar_df = calendar_txt.copy()
calendar_dates_df = calendar_dates_txt.copy()

## Update routes_df

In [8]:
date_dt = pd.to_datetime(date, format="%Y%m%d")

# Dátum tartomány szűrése
calendar_df['start_date'] = pd.to_datetime(calendar_df['start_date'], format="%Y%m%d")
calendar_df['end_date'] = pd.to_datetime(calendar_df['end_date'], format="%Y%m%d")

# Nap neve (pl. 'friday')
day = date_dt.strftime('%A').lower()

# Egyszerű szűrés egy lépésben
valid_service_ids = calendar_df[
    (calendar_df['start_date'] <= date_dt) &
    (calendar_df['end_date'] >= date_dt) &
    (calendar_df[day] == 1)
]["service_id"].tolist()
valid_service_ids

['A87571AHPGPP-021',
 'A88120APPCsZ-011',
 'A88303AHPHA-0011',
 'A88327AHPSzGy011',
 'A88584AHPSzGy011',
 'A88866APPGPP-021',
 'A88957APPCsZ-011',
 'A88991APPKP-0011',
 'A89527APPKP-0011',
 'A91905AHPKZ-0011',
 'A92340AHPCsZ-031',
 'A92429APPPG-0011',
 'A92884APPPG-0011',
 'A93049APPKP-0021',
 'A93473APPKZ-0011',
 'A93584APPHA-0011',
 'A93967APPPG-0011',
 'A94562APPPG-0011',
 'A94762AHPHA-0021',
 'A94769APPCsZ-011',
 'A94963APPHA-0011',
 'A94975APPCsZ-051',
 'A94975APPCsZ-061',
 'A95090APPGER-011',
 'A95129AHPKZ-0011',
 'A95149APPMA-0011',
 'A95551APPCsZ-051',
 'A95576AHPKZ-0011',
 'A95586APPKZ-0011',
 'A95633AHPKZ-0011',
 'A95640APPMA-0021',
 'A95720APPPG-0011',
 'A95730APPHA-0061',
 'A95797AHPHA-0021',
 'A95818APPSRM-021',
 'A95868APPPG-0061',
 'A95886APPCsZ-011',
 'A95886APPCsZ-021',
 'A96075APPHA-0011',
 'A96236APPHA-0011',
 'A96245AHPBA0011',
 'A96307APPBA0011',
 'A96311AHPHA-0031',
 'A96320AHPKM-0011',
 'A96395APPPG-0041',
 'A96405APPPG-0031',
 'A96435APPSRM-021',
 'A96690TJHPSzM

In [9]:
calendar_dates_df[(calendar_dates_df["date"] == int(date)) & (calendar_dates_df["exception_type"] == 2)]

Unnamed: 0,service_id,date,exception_type
1888,A98178AHPCsZ-041,20131018,2
3632,B01134APPCsZ-021,20131018,2
3682,B01188AHPKM-0011,20131018,2
4127,B01326RA1PPHZS-011,20131018,2
4131,B01331RA1PPPG-0011,20131018,2


In [10]:
remove = calendar_dates_df[(calendar_dates_df["date"] == int(date)) & (calendar_dates_df["exception_type"] == 2)]["service_id"]
remove_set = set(remove)
filtered_service_ids = [sid for sid in valid_service_ids if sid not in remove_set]

In [11]:
len(valid_service_ids), len(remove_set), len(filtered_service_ids)

(246, 5, 241)

In [12]:
trips2routes_df = trips_df[trips_df['service_id'].isin(valid_service_ids)]
trips2routes_df = trips2routes_df[["service_id", "route_id", "shape_id", "trip_headsign", "direction_id", "trips_bkk_ref"]]
trips2routes_df = trips2routes_df[trips2routes_df["trips_bkk_ref"].notna()]
trips2routes_df = trips2routes_df.groupby(["service_id", "route_id", "shape_id", "trip_headsign", "direction_id"]).count().reset_index()
trips2routes_df = trips2routes_df.rename(columns={"shape_id" : "main_shape_id"})
trips2routes_df


Unnamed: 0,service_id,route_id,main_shape_id,trip_headsign,direction_id,trips_bkk_ref
0,A87571AHPGPP-021,6100,1225,Gödöllő,0,46
1,A87571AHPGPP-021,6100,1226,Örs vezér tere M+H,1,49
2,A87571AHPGPP-021,6130,5145,Cinkota H,0,26
3,A87571AHPGPP-021,6130,5146,Örs vezér tere M+H,1,26
4,A87571AHPGPP-021,6150,1227,"Csömör, 2. vágány",0,25
...,...,...,...,...,...,...
566,B01842RHPKZ-0011,0641,R798,Hűvösvölgy,1,30
567,B01842RHPKZ-0011,1640,R828,"Solymár, PEMÜ",0,14
568,B01842RHPKZ-0011,1640,R829,Hűvösvölgy,1,26
569,B01842RHPKZ-0011,2640,R312,Hűvösvölgy,1,18


In [13]:
trips2routes_df.sort_values("trips_bkk_ref", ascending=True)

Unnamed: 0,service_id,route_id,main_shape_id,trip_headsign,direction_id,trips_bkk_ref
228,A99031APSBYR-011,9920,V839,Gödöllő H,0,1
229,A99031APSBYR-011,9920,V840,Cinkotai autóbuszgarázs,1,1
28,A92429APPPG-0011,9340,T572,Nyugati pályaudvar M,1,1
167,A97535APPPG-0051,1725,T967,Kosztolányi Dezső tér,1,1
246,A99505APPPG-0011,9430,C224,Békásmegyer H,1,2
...,...,...,...,...,...,...
233,A99312APPKP-0011,5200,1112,Örs vezér tere M+H,0,291
232,A99312APPKP-0011,5200,1111,Déli pályaudvar M,1,291
19,A88991APPKP-0011,5300,1113,Újpest-Központ M,1,293
219,A98862APPSRM-021,5100,1110,Mexikói út M,0,393


In [14]:
latest_routes_df = pd.merge(
    #routes_txt[["route_id", "route_long_name" ,"route_desc"]],
    routes_txt,
    trips2routes_df[["route_id", "main_shape_id", "trip_headsign", "direction_id"]],
    on="route_id",
    how="inner",)

In [15]:
latest_routes_df

Unnamed: 0,route_id,agency_id,route_short_name,route_long_name,route_desc,route_type,route_color,route_text_color,main_shape_id,trip_headsign,direction_id
0,MP52,BKK,M2,,Széll Kálmán tér M / Örs vezér tere M+H,3,1188FF,000000,V040,Örs vezér tere M+H,0
1,MP52,BKK,M2,,Széll Kálmán tér M / Örs vezér tere M+H,3,1188FF,000000,V041,Széll Kálmán tér M,1
2,MP525,BKK,M2E,,Széll Kálmán tér M / Örs vezér tere M+H,3,1188FF,000000,V038,Örs vezér tere M+H,0
3,MP525,BKK,M2E,,Széll Kálmán tér M / Örs vezér tere M+H,3,1188FF,000000,V039,Széll Kálmán tér M,1
4,VP01,BKK,1V,,Bécsi út (Vörösvári út) / Lehel utca/Róbert Ká...,3,1188FF,000000,X032,Lehel utca/Róbert Károly körút,0
...,...,...,...,...,...,...,...,...,...,...,...
566,9960,BKK,996,,Cinkotai autóbuszgarázs / Újpest-Központ M,3,333333,FFFFFF,C583,Cinkotai autóbuszgarázs,0
567,9961,BKK,996A,,Cinkotai autóbuszgarázs / Újpest-Központ M,3,333333,FFFFFF,C182,Újpest-Központ M,1
568,9961,BKK,996A,,Cinkotai autóbuszgarázs / Újpest-Központ M,3,333333,FFFFFF,C584,Cinkotai autóbuszgarázs,0
569,9990,BKK,999,,Határ út M / Dél-pesti autóbuszgarázs,3,333333,FFFFFF,Q605,Dél-pesti autóbuszgarázs,0


In [16]:
routes_df

Unnamed: 0,route_id,agency_id,route_short_name,route_type,route_color,route_text_color


In [17]:
def update_routes_df(routes_df, latest_routes_df):
    # Use relevant columns, without route_desc
    cols_to_use = [col for col in routes_df.columns]

    # Select new rows - rows whats route_id is not in routes_df 
    new_routes = latest_routes_df[~latest_routes_df["route_id"].isin(routes_df["route_id"])][cols_to_use]
    
    # Concatenate new routes
    updated_routes_df = pd.concat([routes_df, new_routes], ignore_index=True)

    # Check for duplicates
    duplicates = updated_routes_df[updated_routes_df.duplicated(subset="route_id", keep=False)]

    if not duplicates.empty:
        print(f"Warning: There are {duplicates['route_id'].nunique()} duplicated route_id(s) in routes_df!")
        print("Duplicated route_id(s):")
        print(duplicates['route_id'].unique())
    else:
        print("No duplicate route_id found in routes_df.")

    return updated_routes_df

def save_routes(routes_df, data_folder):
    _, routes_df_path, _, _, _, _ = define_paths(data_folder=data_folder)
    routes_df.to_csv(routes_df_path, index=False)
    print(f"routes_df saved to {routes_df_path}")

In [18]:
updated_routes_df = update_routes_df(routes_df, latest_routes_df)
save_routes(updated_routes_df, data_folder)

Duplicated route_id(s):
['MP52' 'MP525' 'VP01' 'VP02' 'VP101' 'VP42' 'VP61' '0050' '0070' '0071'
 '0075' '0080' '0090' '0110' '0130' '0131' '0150' '0160' '0161' '0205'
 '0210' '0211' '0220' '0221' '0230' '0235' '0250' '0260' '0270' '0290'
 '0300' '0301' '0320' '0330' '0340' '0350' '0360' '0380' '0390' '0400'
 '0405' '0440' '0530' '0580' '0630' '0640' '0641' '0650' '0660' '0680'
 '0710' '0850' '0855' '0860' '0870' '0871' '0880' '0910' '0930' '0931'
 '0950' '0960' '0985' '0990' '1020' '1030' '1040' '1041' '1050' '1060'
 '1075' '1110' '1120' '1130' '1131' '1140' '1160' '1180' '1190' '1200'
 '1210' '1220' '1230' '1231' '1240' '1250' '1260' '1261' '1280' '1290'
 '1310' '1330' '1335' '1340' '1380' '1390' '1400' '1407' '1410' '1470'
 '1480' '1490' '1500' '1505' '1510' '1520' '1530' '1550' '1560' '1570'
 '1590' '1600' '1611' '1620' '1621' '1640' '1650' '1660' '1661' '1685'
 '1695' '1700' '1725' '1740' '1750' '1765' '1780' '1790' '1810' '1820'
 '1830' '1840' '1850' '1860' '1870' '1910' '1935' '

In [19]:
updated_routes_df

Unnamed: 0,route_id,agency_id,route_short_name,route_type,route_color,route_text_color
0,MP52,BKK,M2,3,1188FF,000000
1,MP52,BKK,M2,3,1188FF,000000
2,MP525,BKK,M2E,3,1188FF,000000
3,MP525,BKK,M2E,3,1188FF,000000
4,VP01,BKK,1V,3,1188FF,000000
...,...,...,...,...,...,...
566,9960,BKK,996,3,333333,FFFFFF
567,9961,BKK,996A,3,333333,FFFFFF
568,9961,BKK,996A,3,333333,FFFFFF
569,9990,BKK,999,3,333333,FFFFFF


## Update route_versions_df

In [20]:
route_versions_df

Unnamed: 0,version_id,route_id,direction_id,route_long_name,route_desc,valid_from,valid_to,main_shape_id,trip_headsign,parent_version_id,note


In [21]:
def version_exists(current_versions, row):
    return (
        ((current_versions["route_id"] == row["route_id"]) &
         (current_versions["direction_id"] == row["direction_id"]) &
         (current_versions["main_shape_id"] == row["main_shape_id"]) &
         (current_versions["trip_headsign"] == row["trip_headsign"]))
        .any()
    )

def update_route_versions(route_versions_df, latest_routes_df, date):
    route_versions_copy_df = route_versions_df.copy()
    # version_id starting point
    START_VERSION_ID = 100_000

    # If the file is empty
    if route_versions_df.empty:
        next_version_id = START_VERSION_ID
    else:
        next_version_id = route_versions_df["version_id"].max() + 1

    # Prepare new versions
    #trips_grouped = trips_txt.groupby(['route_id', 'shape_id', 'trip_headsign', 'direction_id']).count()
    #trips_grouped = trips_grouped.sort_values(by=['route_id', 'service_id'], ascending=[True, False])
    #trips_grouped = trips_grouped.groupby('route_id').head(2).sort_values(by=['route_id', 'direction_id']).reset_index()
    #trips_grouped = trips_grouped[["route_id", "shape_id", "trip_headsign", "direction_id"]]

    # Create a new versions dataframe
    #new_versions_df = pd.merge(trips_grouped, routes_txt[["route_id", "route_long_name", "route_desc"]], on="route_id")
    new_versions_df = latest_routes_df.copy()[["route_id", "main_shape_id", "trip_headsign", "direction_id", "route_desc"]]
    new_versions_df["valid_from"] = pd.to_datetime(date)  # az adott GTFS snapshot dátuma
    new_versions_df["valid_to"] = pd.NaT
    new_versions_df["parent_version_id"] = np.nan
    new_versions_df["note"] = np.nan

    # Define the current versions
    current_versions = route_versions_df[route_versions_df["valid_to"].isna()]

    # Let only the new versions
    ##new_versions_filtered = new_versions_df[~new_versions_df.apply(version_exists, axis=1)].copy()
    new_versions_filtered = new_versions_df[~new_versions_df.apply(lambda row: version_exists(row, current_versions), axis=1)].copy()

    # Update the previous versions valid_to date
    for _, row in new_versions_filtered.iterrows():
        mask = (
            (route_versions_df["route_id"] == row["route_id"]) &
            (route_versions_df["valid_to"].isna())
        )
        route_versions_copy_df.loc[mask, "valid_to"] = row["valid_from"] - pd.Timedelta(days=1)

    new_versions_filtered["version_id"] = range(next_version_id, next_version_id + len(new_versions_filtered))

    # Concat
    extended_route_versions_df = pd.concat([route_versions_copy_df, new_versions_filtered], ignore_index=True)

    return extended_route_versions_df

def save_route_versions(route_versions_df, data_folder):
    _, _, route_versions_df_path, _, _, _ = define_paths(data_folder=data_folder)
    route_versions_df.to_csv(route_versions_df_path, index=False)
    print(f"routes_df saved to {route_versions_df_path}")

In [22]:
extended_route_versions_df = update_route_versions(route_versions_df, latest_routes_df, date)
save_route_versions(extended_route_versions_df, data_folder)
extended_route_versions_df

routes_df saved to ../data/processed/route_versions.csv


Unnamed: 0,version_id,route_id,direction_id,route_long_name,route_desc,valid_from,valid_to,main_shape_id,trip_headsign,parent_version_id,note
0,100000,MP52,0,,Széll Kálmán tér M / Örs vezér tere M+H,2013-10-18,NaT,V040,Örs vezér tere M+H,,
1,100001,MP52,1,,Széll Kálmán tér M / Örs vezér tere M+H,2013-10-18,NaT,V041,Széll Kálmán tér M,,
2,100002,MP525,0,,Széll Kálmán tér M / Örs vezér tere M+H,2013-10-18,NaT,V038,Örs vezér tere M+H,,
3,100003,MP525,1,,Széll Kálmán tér M / Örs vezér tere M+H,2013-10-18,NaT,V039,Széll Kálmán tér M,,
4,100004,VP01,0,,Bécsi út (Vörösvári út) / Lehel utca/Róbert Ká...,2013-10-18,NaT,X032,Lehel utca/Róbert Károly körút,,
...,...,...,...,...,...,...,...,...,...,...,...
566,100566,9960,0,,Cinkotai autóbuszgarázs / Újpest-Központ M,2013-10-18,NaT,C583,Cinkotai autóbuszgarázs,,
567,100567,9961,1,,Cinkotai autóbuszgarázs / Újpest-Központ M,2013-10-18,NaT,C182,Újpest-Központ M,,
568,100568,9961,0,,Cinkotai autóbuszgarázs / Újpest-Központ M,2013-10-18,NaT,C584,Cinkotai autóbuszgarázs,,
569,100569,9990,0,,Határ út M / Dél-pesti autóbuszgarázs,2013-10-18,NaT,Q605,Dél-pesti autóbuszgarázs,,


## OLD Update route_versions_df

In [None]:
def version_exists(current_versions, row):
    return (
        ((current_versions["route_id"] == row["route_id"]) &
         (current_versions["direction_id"] == row["direction_id"]) &
         (current_versions["shape_id"] == row["shape_id"]) &
         (current_versions["trip_headsign"] == row["trip_headsign"]))
        .any()
    )

def update_route_versions(route_versions_df, trips_txt, routes_txt, date):
    route_versions_copy_df = route_versions_df.copy()
    # version_id starting point
    START_VERSION_ID = 100_000

    # If the file is empty
    if route_versions_df.empty:
        next_version_id = START_VERSION_ID
    else:
        next_version_id = route_versions_df["version_id"].max() + 1

    # Prepare new versions
    trips_grouped = trips_txt.groupby(['route_id', 'shape_id', 'trip_headsign', 'direction_id']).count()
    trips_grouped = trips_grouped.sort_values(by=['route_id', 'service_id'], ascending=[True, False])
    trips_grouped = trips_grouped.groupby('route_id').head(2).sort_values(by=['route_id', 'direction_id']).reset_index()
    trips_grouped = trips_grouped[["route_id", "shape_id", "trip_headsign", "direction_id"]]

    # Create a new versions dataframe
    new_versions_df = pd.merge(trips_grouped, routes_txt[["route_id", "route_long_name", "route_desc"]], on="route_id")
    new_versions_df["valid_from"] = pd.to_datetime(date)  # az adott GTFS snapshot dátuma
    new_versions_df["valid_to"] = pd.NaT
    new_versions_df["parent_version_id"] = np.nan
    new_versions_df["note"] = np.nan

    # Define the current versions
    current_versions = route_versions_df[route_versions_df["valid_to"].isna()]

    # Let only the new versions
    ##new_versions_filtered = new_versions_df[~new_versions_df.apply(version_exists, axis=1)].copy()
    new_versions_filtered = new_versions_df[~new_versions_df.apply(lambda row: version_exists(row, current_versions), axis=1)].copy()

    # Update the previous versions valid_to date
    for _, row in new_versions_filtered.iterrows():
        mask = (
            (route_versions_df["route_id"] == row["route_id"]) &
            (route_versions_df["valid_to"].isna())
        )
        route_versions_copy_df.loc[mask, "valid_to"] = row["valid_from"] - pd.Timedelta(days=1)

    new_versions_filtered["version_id"] = range(next_version_id, next_version_id + len(new_versions_filtered))

    # Concat
    extended_route_versions_df = pd.concat([route_versions_copy_df, new_versions_filtered], ignore_index=True)

    return extended_route_versions_df

def save_route_versions(route_versions_df, data_folder):
    _, route_versions_df_path, _, _, _ = define_paths(data_folder=data_folder)
    route_versions_df.to_csv(route_versions_df_path, index=False)
    print(f"routes_df saved to {route_versions_df_path}")

In [None]:
extended_route_versions_df = update_route_versions(route_versions_df, trips_txt, routes_txt, date)
save_route_versions(extended_route_versions_df, data_folder)
extended_route_versions_df.head(4)

routes_df saved to ../data/processed/route_versions.csv


Unnamed: 0,version_id,route_id,direction_id,route_long_name,route_desc,valid_from,valid_to,shape_id,trip_headsign,parent_version_id,note
0,100000,50,0,,"Pasaréti tér / Rákospalota, Kossuth utca",2013-10-18,NaT,R952,"Rákospalota, Kossuth utca",,
1,100001,50,1,,"Pasaréti tér / Rákospalota, Kossuth utca",2013-10-18,NaT,U961,Pasaréti tér,,
2,100002,70,0,,"Albertfalva vasútállomás / Újpalota, Nyírpalot...",2013-10-18,NaT,S704,"Újpalota, Nyírpalota út",,
3,100003,70,1,,"Albertfalva vasútállomás / Újpalota, Nyírpalot...",2013-10-18,NaT,S705,Albertfalva vasútállomás,,
