In [1]:
import pandas as pd
import folium
from datetime import datetime, timedelta
import ipywidgets as widgets
from IPython.display import display, clear_output
import numpy as np
import os

def find_project_root():
    """Find project root by looking for the data folder."""
    current_dir = os.getcwd()
    while current_dir != os.path.dirname(current_dir):  # Until filesystem root
        data_path = os.path.join(current_dir, 'data')
        if os.path.exists(data_path):
            return current_dir
        current_dir = os.path.dirname(current_dir)
    return os.getcwd()

class BudapestRouteVisualizer:
    def __init__(self, data_folder=None):
        """
        Initialize the visualizer with the Budapest time travel data structure
        
        If data_folder is None, automatically detect the project root and use data/processed/
        """
        if data_folder is None:
            project_root = find_project_root()
            data_folder = os.path.join(project_root, 'data', 'processed')
        
        self.data_folder = data_folder
        print(f"Using data folder: {data_folder}")
        
        # Load the CSV files
        self.routes = pd.read_csv(os.path.join(data_folder, 'routes.csv'))
        self.route_versions = pd.read_csv(os.path.join(data_folder, 'route_versions.csv'))
        self.shapes = pd.read_csv(os.path.join(data_folder, 'shapes.csv'))
        
        # Optional files that might not exist
        try:
            self.shape_variants = pd.read_csv(os.path.join(data_folder, 'shape_variants.csv'))
        except FileNotFoundError:
            self.shape_variants = pd.DataFrame()
            
        try:
            self.shape_variant_activations = pd.read_csv(os.path.join(data_folder, 'shape_variant_activations.csv'))
        except FileNotFoundError:
            self.shape_variant_activations = pd.DataFrame()
        
        # Convert date columns - try different date formats
        date_columns = ['start_date', 'end_date', 'activation_date', 'deactivation_date', 
                       'from_date', 'to_date', 'valid_from', 'valid_to', 'date_start', 'date_end']
        for df in [self.route_versions, self.shape_variant_activations]:
            if not df.empty:
                for col in date_columns:
                    if col in df.columns:
                        df[col] = pd.to_datetime(df[col], errors='coerce')
        
        # Setup widgets
        self.setup_widgets()
        
        print(f"Loaded data:")
        print(f"  Routes: {len(self.routes)} entries")
        print(f"  Route versions: {len(self.route_versions)} entries") 
        print(f"  Shapes: {len(self.shapes)} entries")
        print(f"  Shape variants: {len(self.shape_variants)} entries")
        print(f"  Shape variant activations: {len(self.shape_variant_activations)} entries")
        
    def setup_widgets(self):
        """Setup interactive widgets"""
        # Initialize map bounds for zoom preservation
        self.current_bounds = None
        self.current_zoom = 12
        
        # Route selector - handle different possible column names
        route_short_col = 'route_short_name' if 'route_short_name' in self.routes.columns else 'short_name'
        route_long_col = 'route_long_name' if 'route_long_name' in self.routes.columns else 'long_name'
        route_id_col = 'route_id' if 'route_id' in self.routes.columns else 'id'
        
        route_options = []
        for _, row in self.routes.iterrows():
            route_id = row[route_id_col]
            short_name = row.get(route_short_col, str(route_id))
            long_name = row.get(route_long_col, '')
            
            display_name = f"{short_name}" + (f" - {long_name}" if long_name else "")
            route_options.append((display_name, route_id))
        
        self.route_selector = widgets.Dropdown(
            options=route_options,
            value=route_options[0][1] if route_options else None,
            description='Route:',
            style={'description_width': 'initial'}
        )
        
        # Date slider (will be updated when route is selected)
        self.date_slider = widgets.SelectionSlider(
            options=[datetime.now().date()],
            value=datetime.now().date(),
            description='Date:',
            disabled=False,
            continuous_update=False,
            orientation='horizontal',
            readout=True,
            style={'description_width': 'initial'}
        )
        
        # Set up event handlers
        self.route_selector.observe(self.on_route_change, names='value')
        self.date_slider.observe(self.on_date_change, names='value')
        
        # Initialize with first route
        if route_options:
            self.on_route_change({'new': route_options[0][1]})
    
    def get_route_lifetime(self, route_id):
        """Get the lifetime dates for a specific route"""
        route_data = self.route_versions[self.route_versions['route_id'] == route_id]
        if route_data.empty:
            return None, None
        
        # Handle different possible date column names
        start_col = None
        end_col = None
        
        possible_start_cols = ['start_date', 'from_date', 'valid_from', 'date_start']
        possible_end_cols = ['end_date', 'to_date', 'valid_to', 'date_end']
        
        for col in possible_start_cols:
            if col in route_data.columns:
                start_col = col
                break
                
        for col in possible_end_cols:
            if col in route_data.columns:
                end_col = col
                break
        
        if not start_col or not end_col:
            return None, None
        
        start_date = route_data[start_col].min()
        end_date = route_data[end_col].max()
        return start_date, end_date
    
    def get_available_dates(self, route_id):
        """Get all dates when the route had different configurations"""
        route_data = self.route_versions[self.route_versions['route_id'] == route_id]
        
        # Handle different possible date column names
        start_col = None
        end_col = None
        
        possible_start_cols = ['start_date', 'from_date', 'valid_from', 'date_start']
        possible_end_cols = ['end_date', 'to_date', 'valid_to', 'date_end']
        
        for col in possible_start_cols:
            if col in route_data.columns:
                start_col = col
                break
                
        for col in possible_end_cols:
            if col in route_data.columns:
                end_col = col
                break
        
        if not start_col or not end_col:
            print(f"Warning: Could not find date columns in route_versions")
            print(f"Available columns: {list(route_data.columns)}")
            return [datetime.now().date()]
        
        dates = set()
        for _, row in route_data.iterrows():
            current_date = row[start_col]
            end_date = row[end_col]
            
            if pd.isna(current_date) or pd.isna(end_date):
                continue
                
            # Add key dates: start, end, and some intermediate dates
            dates.add(current_date.date())
            dates.add(end_date.date())
            
            # Add monthly intervals for long-running routes
            while current_date < end_date:
                dates.add(current_date.date())
                current_date += timedelta(days=30)
        
        return sorted(list(dates)) if dates else [datetime.now().date()]
    
    def get_main_shape_for_date(self, route_id, target_date):
        """Get the main_shape_id for a specific route on a specific date"""
        route_data = self.route_versions[self.route_versions['route_id'] == route_id]
        
        # Handle different possible date column names
        start_col = None
        end_col = None
        
        possible_start_cols = ['start_date', 'from_date', 'valid_from', 'date_start']
        possible_end_cols = ['end_date', 'to_date', 'valid_to', 'date_end']
        
        for col in possible_start_cols:
            if col in route_data.columns:
                start_col = col
                break
                
        for col in possible_end_cols:
            if col in route_data.columns:
                end_col = col
                break
        
        if not start_col or not end_col:
            print(f"Warning: Could not find date columns in route_versions")
            return None
        
        # Convert target_date to datetime if it's a date
        if hasattr(target_date, 'date'):
            target_date = target_date
        else:
            target_date = datetime.combine(target_date, datetime.min.time())
        
        # Find the version active on the target date
        active_version = route_data[
            (route_data[start_col] <= target_date) & 
            (route_data[end_col] >= target_date)
        ]
        
        if not active_version.empty:
            # Handle different possible shape column names
            shape_col = 'main_shape_id'
            if 'main_shape_id' not in active_version.columns:
                possible_shape_cols = ['shape_id', 'main_shape', 'shape']
                for col in possible_shape_cols:
                    if col in active_version.columns:
                        shape_col = col
                        break
            
            if shape_col in active_version.columns:
                return active_version.iloc[0][shape_col]
        return None
    
    def get_shape_coordinates(self, shape_id):
        """Get coordinates for a specific shape"""
        if shape_id is None:
            return []
        
        shape_data = self.shapes[self.shapes['shape_id'] == shape_id].sort_values('shape_pt_sequence')
        coordinates = [(row['shape_pt_lat'], row['shape_pt_lon']) for _, row in shape_data.iterrows()]
        return coordinates
    
    def create_map(self, route_id, target_date):
        """Create a folium map for the route on the specified date"""
        # Get main shape for the date
        main_shape_id = self.get_main_shape_for_date(route_id, target_date)
        coordinates = self.get_shape_coordinates(main_shape_id)
        
        if not coordinates:
            # Create empty map centered on Budapest
            m = folium.Map(location=[47.4979, 19.0402], zoom_start=12)
            folium.Marker(
                [47.4979, 19.0402],
                popup="No route data available for this date",
                icon=folium.Icon(color='red')
            ).add_to(m)
            return m
        
        # Calculate center point
        center_lat = sum(coord[0] for coord in coordinates) / len(coordinates)
        center_lon = sum(coord[1] for coord in coordinates) / len(coordinates)
        
        # Create map with preserved zoom level
        if self.current_bounds:
            m = folium.Map(location=[center_lat, center_lon])
            m.fit_bounds(self.current_bounds)
        else:
            m = folium.Map(location=[center_lat, center_lon], zoom_start=self.current_zoom)
        
        # Get route info
        route_info = self.routes[self.routes['route_id'] == route_id]
        if route_info.empty:
            route_name = f"Route {route_id}"
        else:
            route_info = route_info.iloc[0]
            # Handle different possible column names
            short_name = route_info.get('route_short_name', route_info.get('short_name', str(route_id)))
            long_name = route_info.get('route_long_name', route_info.get('long_name', ''))
            route_name = f"{short_name}" + (f" - {long_name}" if long_name else "")
        
        # Add route line
        folium.PolyLine(
            coordinates,
            color='blue',
            weight=4,
            opacity=0.8,
            popup=f"{route_name} on {target_date}"
        ).add_to(m)
        
        # Add start and end markers
        if len(coordinates) >= 2:
            folium.Marker(
                coordinates[0],
                popup=f"Start: {route_name}",
                icon=folium.Icon(color='green', icon='play')
            ).add_to(m)
            
            folium.Marker(
                coordinates[-1],
                popup=f"End: {route_name}",
                icon=folium.Icon(color='red', icon='stop')
            ).add_to(m)
        
        return m
    
    def on_route_change(self, change):
        """Handle route selection change"""
        route_id = change['new']
        
        # Update date slider options
        available_dates = self.get_available_dates(route_id)
        if available_dates:
            self.date_slider.options = available_dates
            self.date_slider.value = available_dates[0]
        
        # Update visualization
        self.update_visualization()
    
    def on_date_change(self, change):
        """Handle date selection change"""
        # Store current map bounds before updating
        self.store_current_bounds()
        self.update_visualization()
    
    def store_current_bounds(self):
        """Store current map bounds (would need to be implemented with JavaScript callback in real notebook)"""
        # In a real implementation, you would use JavaScript callbacks to get current map bounds
        # For now, we'll preserve the zoom level
        pass
    
    def update_visualization(self):
        """Update the map visualization"""
        route_id = self.route_selector.value
        target_date = self.date_slider.value
        
        if route_id is None:
            return
        
        # Create and display map
        map_obj = self.create_map(route_id, target_date)
        
        # Display route lifetime info
        start_date, end_date = self.get_route_lifetime(route_id)
        route_info = self.routes[self.routes['route_id'] == route_id]
        
        if not route_info.empty:
            route_info = route_info.iloc[0]
            short_name = route_info.get('route_short_name', route_info.get('short_name', str(route_id)))
            long_name = route_info.get('route_long_name', route_info.get('long_name', ''))
            route_name = f"{short_name}" + (f" - {long_name}" if long_name else "")
        else:
            route_name = f"Route {route_id}"
        
        print(f"Route: {route_name}")
        if start_date and end_date:
            print(f"Lifetime: {start_date.date()} to {end_date.date()}")
        print(f"Currently showing: {target_date}")
        
        # Show main shape info
        main_shape_id = self.get_main_shape_for_date(route_id, target_date)
        if main_shape_id:
            print(f"Main shape ID: {main_shape_id}")
        
        return map_obj
    
    def display(self):
        """Display the interactive visualization"""
        display(widgets.VBox([
            self.route_selector,
            self.date_slider
        ]))
        
        # Create output widget for the map
        out = widgets.Output()
        display(out)
        
        def update_display(*args):
            with out:
                clear_output(wait=True)
                map_obj = self.update_visualization()
                if map_obj:
                    display(map_obj)
        
        # Connect the update function
        self.route_selector.observe(update_display, names='value')
        self.date_slider.observe(update_display, names='value')
        
        # Initial display
        update_display()

# Usage example with automatic folder detection:
"""
# Initialize the visualizer (automatically finds data folder)
visualizer = BudapestRouteVisualizer()

# Display the interactive visualization
visualizer.display()

# To explore what routes are available:
print("Available routes:")
print(visualizer.routes.head(10))
"""

# Debug function to explore your data structure
def explore_budapest_data(data_folder=None):
    """
    Function to explore the actual structure of your Budapest data
    Run this first to understand your data columns and format
    """
    if data_folder is None:
        project_root = find_project_root()
        data_folder = os.path.join(project_root, 'data', 'processed')
    
    print(f"Exploring data in: {data_folder}")
    
    files = ['routes.csv', 'route_versions.csv', 'shapes.csv', 
             'shape_variants.csv', 'shape_variant_activations.csv']
    
    for file in files:
        try:
            file_path = os.path.join(data_folder, file)
            df = pd.read_csv(file_path)
            print(f"\n=== {file} ===")
            print(f"Shape: {df.shape}")
            print(f"Columns: {list(df.columns)}")
            print("Sample data:")
            print(df.head(2))
            if 'route_id' in df.columns:
                print(f"Unique route_ids: {sorted(df['route_id'].unique())}")
        except Exception as e:
            print(f"Could not read {file}: {e}")

print("Budapest Route Visualizer ready!")
print("IMPORTANT: First run explore_budapest_data() to see your column names")
print("Then use:")
print("1. explore_budapest_data()")
print("2. visualizer = BudapestRouteVisualizer()")
print("3. visualizer.display()")

# Run this first to see your data structure:
print("\n" + "="*50)
print("Running data exploration...")
explore_budapest_data()

# Example data creation for testing (remove this when using real data)
def create_sample_data():
    """Create sample data for testing"""
    
    # Sample routes data
    routes_data = {
        'route_id': [1, 2, 3],
        'route_short_name': ['6', '4', '1'],
        'route_long_name': ['Tram 6', 'Tram 4', 'Metro M1'],
        'route_type': [0, 0, 1]  # 0=tram, 1=metro
    }
    
    # Sample route versions data
    route_versions_data = {
        'route_id': [1, 1, 2, 2, 3],
        'version_id': [1, 2, 1, 2, 1],
        'start_date': ['2020-01-01', '2022-06-01', '2020-01-01', '2021-03-01', '2020-01-01'],
        'end_date': ['2022-05-31', '2024-12-31', '2021-02-28', '2024-12-31', '2024-12-31'],
        'main_shape_id': [101, 102, 201, 202, 301]
    }
    
    # Sample shapes data (simplified coordinates around Budapest)
    shapes_data = {
        'shape_id': [101] * 5 + [102] * 5 + [201] * 4 + [202] * 4 + [301] * 6,
        'shape_pt_lat': [
            # Shape 101 (Tram 6 old route)
            47.500, 47.505, 47.510, 47.515, 47.520,
            # Shape 102 (Tram 6 new route) 
            47.500, 47.503, 47.508, 47.518, 47.525,
            # Shape 201 (Tram 4 old)
            47.490, 47.495, 47.500, 47.505,
            # Shape 202 (Tram 4 new)
            47.490, 47.493, 47.502, 47.508,
            # Shape 301 (Metro M1)
            47.495, 47.498, 47.501, 47.504, 47.507, 47.510
        ],
        'shape_pt_lon': [
            # Shape 101
            19.040, 19.045, 19.050, 19.055, 19.060,
            # Shape 102
            19.040, 19.043, 19.048, 19.058, 19.065,
            # Shape 201
            19.030, 19.035, 19.040, 19.045,
            # Shape 202
            19.030, 19.033, 19.042, 19.048,
            # Shape 301
            19.050, 19.053, 19.056, 19.059, 19.062, 19.065
        ],
        'shape_pt_sequence': (
            list(range(1, 6)) + list(range(1, 6)) + 
            list(range(1, 5)) + list(range(1, 5)) + 
            list(range(1, 7))
        )
    }
    
    # Create DataFrames and save to CSV
    pd.DataFrame(routes_data).to_csv('sample_routes.csv', index=False)
    pd.DataFrame(route_versions_data).to_csv('sample_route_versions.csv', index=False)
    pd.DataFrame(shapes_data).to_csv('sample_shapes.csv', index=False)
    
    print("Sample data created: sample_routes.csv, sample_route_versions.csv, sample_shapes.csv")
    print("You can now test the visualizer with:")
    print("""
visualizer = BudapestRouteVisualizer(
    'sample_routes.csv',
    'sample_route_versions.csv', 
    'sample_shapes.csv'
)
visualizer.display()
    """)

# Create sample data for testing
#create_sample_data()

Budapest Route Visualizer ready!
IMPORTANT: First run explore_budapest_data() to see your column names
Then use:
1. explore_budapest_data()
2. visualizer = BudapestRouteVisualizer()
3. visualizer.display()

Running data exploration...
Exploring data in: c:\Users\User\Documents\Projects\budapest_time_travel\data\processed

=== routes.csv ===
Shape: (810, 6)
Columns: ['route_id', 'agency_id', 'route_short_name', 'route_type', 'route_color', 'route_text_color']
Sample data:
  route_id agency_id route_short_name  route_type route_color route_text_color
0     MP52       BKK               M2           3      1188FF           000000
1     MP52       BKK               M2           3      1188FF           000000
Unique route_ids: ['0050', '0070', '0071', '0075', '0078', '0080', '0081', '0090', '0110', '0130', '0131', '0150', '0160', '0161', '0205', '0210', '0211', '0220', '0221', '0230', '0235', '0250', '0260', '0270', '0290', '0300', '0301', '0302', '0310', '0320', '0330', '0340', '0350', '036

  df = pd.read_csv(file_path)


In [2]:
# Explore your data structure first
explore_budapest_data()

# Create the visualizer (automatically finds your data)
visualizer = BudapestRouteVisualizer()

# Display the interactive visualization
visualizer.display()

Exploring data in: c:\Users\User\Documents\Projects\budapest_time_travel\data\processed

=== routes.csv ===
Shape: (810, 6)
Columns: ['route_id', 'agency_id', 'route_short_name', 'route_type', 'route_color', 'route_text_color']
Sample data:
  route_id agency_id route_short_name  route_type route_color route_text_color
0     MP52       BKK               M2           3      1188FF           000000
1     MP52       BKK               M2           3      1188FF           000000
Unique route_ids: ['0050', '0070', '0071', '0075', '0078', '0080', '0081', '0090', '0110', '0130', '0131', '0150', '0160', '0161', '0205', '0210', '0211', '0220', '0221', '0230', '0235', '0250', '0260', '0270', '0290', '0300', '0301', '0302', '0310', '0320', '0330', '0340', '0350', '0360', '0362', '0380', '0382', '0390', '0400', '0405', '0437', '0440', '0450', '0460', '0530', '0540', '0550', '0570', '0580', '0581', '0630', '0640', '0641', '0650', '0660', '0665', '0670', '0680', '0681', '0710', '0845', '0850', '0855',

  df = pd.read_csv(file_path)



=== shapes.csv ===
Shape: (546076, 6)
Columns: ['shape_id', 'shape_pt_lat', 'shape_pt_lon', 'shape_pt_sequence', 'shape_dist_traveled', 'shape_bkk_ref']
Sample data:
  shape_id  shape_pt_lat  shape_pt_lon  shape_pt_sequence  \
0     0028     47.504701     19.136875             200002   
1     0028     47.505142     19.136703             200003   

   shape_dist_traveled shape_bkk_ref  
0                  0.0         14400  
1                 50.0         14400  

=== shape_variants.csv ===
Shape: (4205, 6)
Columns: ['shape_variant_id', 'version_id', 'shape_id', 'trip_headsign', 'is_main', 'note']
Sample data:
   shape_variant_id  version_id shape_id              trip_headsign  is_main  \
0            100000      100016     R952  Rákospalota, Kossuth utca        1   
1            100001      100016     U197  Rákospalota, Kossuth utca        0   

   note  
0   NaN  
1   NaN  

=== shape_variant_activations.csv ===
Shape: (295976, 3)
Columns: ['date', 'shape_variant_id', 'exception_type

  self.shapes = pd.read_csv(os.path.join(data_folder, 'shapes.csv'))


Route: M2
Lifetime: 2013-10-12 to NaT
Currently showing: 2025-07-02
Loaded data:
  Routes: 810 entries
  Route versions: 1556 entries
  Shapes: 546076 entries
  Shape variants: 4205 entries
  Shape variant activations: 295976 entries


VBox(children=(Dropdown(description='Route:', options=(('M2', 'MP52'), ('M2', 'MP52'), ('M2E', 'MP525'), ('M2E…

Output()

In [1]:
# Route Visualization - Interactive Timeline
# This notebook visualizes how transit routes change over time

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import ipywidgets as widgets
from IPython.display import display, clear_output
from datetime import datetime, timedelta
import os

# =============================================================================
# SETUP DATA PATHS (Choose one of these methods)
# =============================================================================

# Method 1: Auto-detect project root
def find_project_root():
    """Find project root by looking for the data folder."""
    current_dir = os.getcwd()
    while current_dir != os.path.dirname(current_dir):  # Until filesystem root
        data_path = os.path.join(current_dir, 'data')
        if os.path.exists(data_path):
            return current_dir
        current_dir = os.path.dirname(current_dir)
    return os.getcwd()

# Method 2: Manual path setup (uncomment and adjust if Method 1 doesn't work)
# project_root = r"C:\Users\User\Documents\Projects\budapest_time_travel"  # Your actual path
# data_folder = os.path.join(project_root, 'data', 'processed')

# Use auto-detection
project_root = find_project_root()
data_folder = os.path.join(project_root, 'data', 'processed')

print(f"📁 Project root: {project_root}")
print(f"📊 Data folder: {data_folder}")
print(f"✅ Data folder exists: {os.path.exists(data_folder)}")

# =============================================================================
# 1. DATA LOADING
# =============================================================================

print("\n📊 Loading transit data...")

# Check if data files exist
required_files = ['routes.csv', 'route_versions.csv', 'shapes.csv']
missing_files = []

for file in required_files:
    file_path = os.path.join(data_folder, file)
    if not os.path.exists(file_path):
        missing_files.append(file)
    else:
        print(f"✅ Found {file}")

if missing_files:
    print(f"❌ Missing files: {missing_files}")
    print("Please make sure you have processed data in the correct location.")
    print(f"Expected location: {data_folder}")
else:
    print("✅ All required files found!")

# Load the datasets
try:
    routes_df = pd.read_csv(os.path.join(data_folder, 'routes.csv'))
    route_versions_df = pd.read_csv(os.path.join(data_folder, 'route_versions.csv'))
    shapes_df = pd.read_csv(os.path.join(data_folder, 'shapes.csv'))
    
    # Convert dates
    route_versions_df['valid_from'] = pd.to_datetime(route_versions_df['valid_from'])
    route_versions_df['valid_to'] = pd.to_datetime(route_versions_df['valid_to'])
    
    print(f"✅ Loaded {len(routes_df)} routes, {len(route_versions_df)} route versions, {len(shapes_df)} shape points")
    
except Exception as e:
    print(f"❌ Error loading data: {e}")
    print("Please check your data files and paths.")
    
# Quick data preview
print(f"\n📋 Data Preview:")
print(f"Routes sample:")
if 'routes_df' in locals():
    print(routes_df.head()[['route_id', 'route_short_name', 'route_type']])
    
print(f"\nRoute versions sample:")
if 'route_versions_df' in locals():
    print(route_versions_df.head()[['route_id', 'valid_from', 'valid_to', 'main_shape_id']])

# =============================================================================
# 2. ROUTE ANALYSIS FUNCTIONS
# =============================================================================

def get_route_info(route_id):
    """Get basic information about a route."""
    route_info = routes_df[routes_df['route_id'] == route_id]
    if route_info.empty:
        return None
    
    route_versions = route_versions_df[route_versions_df['route_id'] == route_id].copy()
    route_versions = route_versions.sort_values('valid_from')
    
    # Merge with routes to get route_short_name for each version
    route_versions = route_versions.merge(
        routes_df[['route_id', 'route_short_name']], 
        on='route_id', 
        how='left'
    )
    
    # Calculate actual end dates
    route_versions['actual_end'] = route_versions['valid_to'].fillna(pd.Timestamp.now())
    
    info = {
        'route_id': route_id,
        'route_short_name': route_info.iloc[0]['route_short_name'],
        'route_type': route_info.iloc[0]['route_type'],
        'total_versions': len(route_versions),
        'first_date': route_versions['valid_from'].min(),
        'last_date': route_versions['actual_end'].max(),
        'total_days': (route_versions['actual_end'].max() - route_versions['valid_from'].min()).days,
        'versions': route_versions
    }
    
    return info

def get_active_version_on_date(route_id, target_date):
    """Get the active route version for a specific date."""
    target_date = pd.to_datetime(target_date)
    
    route_versions = route_versions_df[route_versions_df['route_id'] == route_id].copy()
    
    # Merge with routes to get route_short_name
    route_versions = route_versions.merge(
        routes_df[['route_id', 'route_short_name']], 
        on='route_id', 
        how='left'
    )
    
    # Find version active on target date
    active_versions = route_versions[
        (route_versions['valid_from'] <= target_date) & 
        ((route_versions['valid_to'].isna()) | (route_versions['valid_to'] >= target_date))
    ]
    
    if active_versions.empty:
        return None
    
    # Return the most recent version if multiple found
    return active_versions.sort_values('valid_from').iloc[-1]

def get_shape_coordinates(shape_id):
    """Get coordinates for a shape."""
    shape_points = shapes_df[shapes_df['shape_id'] == shape_id].copy()
    if shape_points.empty:
        return None
    
    shape_points = shape_points.sort_values('shape_pt_sequence')
    return shape_points[['shape_pt_lat', 'shape_pt_lon', 'shape_pt_sequence']]

# =============================================================================
# 3. ROUTE SELECTION WIDGET
# =============================================================================

print("\n🚇 Available routes:")

# Get unique routes with their names
available_routes = routes_df[['route_id', 'route_short_name', 'route_type']].copy()
available_routes = available_routes.sort_values('route_short_name')

# Create route options for dropdown
route_options = []
for _, row in available_routes.iterrows():
    route_type_name = {0: 'Tram', 1: 'Subway', 2: 'Rail', 3: 'Bus', 4: 'Ferry'}.get(row['route_type'], 'Other')
    label = f"{row['route_short_name']} ({route_type_name}) - ID: {row['route_id']}"
    route_options.append((label, row['route_id']))

print(f"Found {len(route_options)} routes")

# =============================================================================
# 4. INTERACTIVE VISUALIZATION CLASS
# =============================================================================

class RouteTimelineVisualizer:
    def __init__(self):
        self.current_route_id = None
        self.route_info = None
        self.fig = None
        
    def setup_widgets(self, default_route_id=None):
        """Setup interactive widgets."""
        
        # Route selection dropdown
        if default_route_id and default_route_id in [opt[1] for opt in route_options]:
            default_route = default_route_id
        else:
            default_route = route_options[0][1] if route_options else None
            
        self.route_selector = widgets.Dropdown(
            options=route_options,
            value=default_route,
            description='Route:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='400px')
        )
        
        # Date slider (will be updated when route is selected)
        self.date_slider = widgets.SelectionSlider(
            options=[datetime.now().strftime('%Y-%m-%d')],
            value=datetime.now().strftime('%Y-%m-%d'),
            description='Date:',
            disabled=True,
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='600px')
        )
        
        # Info display
        self.info_output = widgets.Output()
        
        # Map display
        self.map_output = widgets.Output()
        
        # Wire up the interactions
        self.route_selector.observe(self.on_route_change, names='value')
        self.date_slider.observe(self.on_date_change, names='value')
        
        # Initialize with default route
        if default_route:
            self.on_route_change({'new': default_route})
    
    def on_route_change(self, change):
        """Handle route selection change."""
        route_id = change['new']
        self.current_route_id = route_id
        self.route_info = get_route_info(route_id)
        
        if self.route_info is None:
            with self.info_output:
                clear_output(wait=True)
                print(f"❌ Route {route_id} not found")
            return
        
        # Update info display
        with self.info_output:
            clear_output(wait=True)
            self.display_route_info()
        
        # Update date slider (this will trigger map update via date change)
        self.update_date_slider()
    
    def update_date_slider(self):
        """Update date slider based on route lifetime."""
        if self.route_info is None:
            return
        
        # Generate date options (every week for long periods, daily for short)
        start_date = self.route_info['first_date']
        end_date = self.route_info['last_date']
        total_days = (end_date - start_date).days
        
        if total_days > 365:
            # Weekly intervals for long periods
            date_range = pd.date_range(start_date, end_date, freq='W')
        elif total_days > 90:
            # Every 3 days for medium periods
            date_range = pd.date_range(start_date, end_date, freq='3D')
        else:
            # Daily for short periods
            date_range = pd.date_range(start_date, end_date, freq='D')
        
        date_options = [d.strftime('%Y-%m-%d') for d in date_range]
        
        if date_options:
            self.date_slider.options = date_options
            self.date_slider.value = date_options[0]
            self.date_slider.disabled = False
        else:
            self.date_slider.disabled = True
    
    def on_date_change(self, change):
        """Handle date slider change."""
        self.update_map()
    
    def display_route_info(self):
        """Display route information."""
        info = self.route_info
        print(f"🚇 Route {info['route_short_name']} (ID: {info['route_id']})")
        print(f"📅 Lifetime: {info['first_date'].strftime('%Y-%m-%d')} to {info['last_date'].strftime('%Y-%m-%d')}")
        print(f"⏰ Duration: {info['total_days']} days")
        print(f"🔄 Total versions: {info['total_versions']}")
        
        print(f"\n📋 Version History:")
        for _, version in info['versions'].iterrows():
            start = version['valid_from'].strftime('%Y-%m-%d')
            end = version['valid_to'].strftime('%Y-%m-%d') if pd.notna(version['valid_to']) else 'Present'
            # Use route_short_name instead of route_long_name
            route_name = version.get('route_short_name', 'Unknown')
            print(f"  • {start} to {end}: {route_name} (Shape: {version['main_shape_id']})")
    
    def update_map(self):
        """Update the map display."""
        if self.route_info is None:
            return
        
        target_date = self.date_slider.value
        active_version = get_active_version_on_date(self.current_route_id, target_date)
        
        with self.map_output:
            clear_output(wait=True)
            
            if active_version is None:
                print(f"❌ No active version found for {target_date}")
                return
            
            # Get shape coordinates
            shape_coords = get_shape_coordinates(active_version['main_shape_id'])
            
            if shape_coords is None or shape_coords.empty:
                print(f"❌ No shape data found for shape {active_version['main_shape_id']}")
                return
            
            # Create the map
            self.create_route_map(shape_coords, active_version, target_date)
    
    def create_route_map(self, shape_coords, version_info, date):
        """Create interactive map showing the route using modern Plotly."""
        
        # Prepare data for line plot
        line_data = shape_coords.copy()
        line_data['route_name'] = self.route_info['route_short_name']
        
        # Get route short name for the title
        route_short_name = version_info.get('route_short_name', self.route_info['route_short_name'])
        
        # Create the modern map using line_map instead of deprecated line_mapbox
        fig = px.line_map(
            line_data,
            lat='shape_pt_lat',
            lon='shape_pt_lon',
            title=f"Route {route_short_name} on {date}<br>" +
                  f"Shape ID: {version_info['main_shape_id']}",
            height=500,
            zoom=12
        )
        
        # Add start/end markers
        start_point = shape_coords.iloc[0]
        end_point = shape_coords.iloc[-1]
        
        fig.add_trace(go.Scattermap(
            mode="markers",
            lon=[start_point['shape_pt_lon'], end_point['shape_pt_lon']],
            lat=[start_point['shape_pt_lat'], end_point['shape_pt_lat']],
            marker=dict(
                size=12,
                color=['green', 'red'],
                symbol=['circle', 'square']
            ),
            text=['Start', 'End'],
            name="Route Endpoints",
            hovertemplate="<b>%{text}</b><br>" +
                         "Lat: %{lat:.6f}<br>" +
                         "Lon: %{lon:.6f}<br>" +
                         "<extra></extra>"
        ))
        
        # Style the route line
        fig.update_traces(
            line=dict(width=4, color='red'),
            selector=dict(mode='lines')
        )
        
        # Center the map
        center_lat = shape_coords['shape_pt_lat'].mean()
        center_lon = shape_coords['shape_pt_lon'].mean()
        
        fig.update_layout(
            map=dict(
                center=dict(lat=center_lat, lon=center_lon),
                zoom=12
            ),
            showlegend=True
        )
        
        fig.show()
    
    def display(self):
        """Display the complete widget interface."""
        print("🗺️ Interactive Route Timeline Visualizer")
        print("=" * 50)
        
        # Layout the widgets
        route_selection = widgets.HBox([
            widgets.VBox([
                widgets.HTML("<b>1. Select Route:</b>"),
                self.route_selector
            ]),
        ])
        
        date_selection = widgets.HBox([
            widgets.VBox([
                widgets.HTML("<b>2. Select Date:</b>"),
                self.date_slider
            ]),
        ])
        
        map_section = widgets.VBox([
            widgets.HTML("<b>3. Route Visualization:</b>"),
            self.map_output
        ])
        
        info_section = widgets.VBox([
            widgets.HTML("<b>4. Route Information:</b>"),
            self.info_output
        ])
        
        display(widgets.VBox([
            route_selection,
            date_selection, 
            map_section,
            info_section
        ]))

# =============================================================================
# 5. CREATE AND DISPLAY THE VISUALIZER
# =============================================================================

print("\n🎯 Creating interactive visualizer...")

# Create the visualizer
visualizer = RouteTimelineVisualizer()

# Setup with route 6 (tram 6) as default if it exists
default_route = '6' if '6' in routes_df['route_id'].values else None
visualizer.setup_widgets(default_route)

print("✅ Visualizer ready!")
print("\nInstructions:")
print("1. Select a route from the dropdown")
print("2. Use the date slider to see how the route changed over time")
print("3. The map will update to show the main shape for each date")

# Display the interactive interface
visualizer.display()

📁 Project root: c:\Users\User\Documents\Projects\budapest_time_travel
📊 Data folder: c:\Users\User\Documents\Projects\budapest_time_travel\data\processed
✅ Data folder exists: True

📊 Loading transit data...
✅ Found routes.csv
✅ Found route_versions.csv
✅ Found shapes.csv
✅ All required files found!
✅ Loaded 810 routes, 1556 route versions, 546076 shape points

📋 Data Preview:
Routes sample:
  route_id route_short_name  route_type
0     MP52               M2           3
1     MP52               M2           3
2    MP525              M2E           3
3    MP525              M2E           3
4     VP01               1V           3

Route versions sample:
  route_id valid_from   valid_to main_shape_id
0     0050 2013-10-11        NaT          R952
1     0050 2013-10-11        NaT          U961
2     0070 2013-10-11 2014-04-06          S704
3     0070 2014-04-07        NaT          Y207
4     0070 2013-10-11 2014-04-06          S705

🚇 Available routes:
Found 810 routes

🎯 Creating interacti

  shapes_df = pd.read_csv(os.path.join(data_folder, 'shapes.csv'))


✅ Visualizer ready!

Instructions:
1. Select a route from the dropdown
2. Use the date slider to see how the route changed over time
3. The map will update to show the main shape for each date
🗺️ Interactive Route Timeline Visualizer


VBox(children=(HBox(children=(VBox(children=(HTML(value='<b>1. Select Route:</b>'), Dropdown(description='Rout…

In [1]:
# Route Visualization - Interactive Timeline
# This notebook visualizes how transit routes change over time

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import ipywidgets as widgets
from IPython.display import display, clear_output
from datetime import datetime, timedelta
import os

# =============================================================================
# SETUP DATA PATHS (Choose one of these methods)
# =============================================================================

# Method 1: Auto-detect project root
def find_project_root():
    """Find project root by looking for the data folder."""
    current_dir = os.getcwd()
    while current_dir != os.path.dirname(current_dir):  # Until filesystem root
        data_path = os.path.join(current_dir, 'data')
        if os.path.exists(data_path):
            return current_dir
        current_dir = os.path.dirname(current_dir)
    return os.getcwd()

# Method 2: Manual path setup (uncomment and adjust if Method 1 doesn't work)
# project_root = r"C:\Users\User\Documents\Projects\budapest_time_travel"  # Your actual path
# data_folder = os.path.join(project_root, 'data', 'processed')

# Use auto-detection
project_root = find_project_root()
data_folder = os.path.join(project_root, 'data', 'processed')

print(f"📁 Project root: {project_root}")
print(f"📊 Data folder: {data_folder}")
print(f"✅ Data folder exists: {os.path.exists(data_folder)}")

# =============================================================================
# 1. DATA LOADING
# =============================================================================

print("\n📊 Loading transit data...")

# Check if data files exist
required_files = ['routes.csv', 'route_versions.csv', 'shapes.csv']
missing_files = []

for file in required_files:
    file_path = os.path.join(data_folder, file)
    if not os.path.exists(file_path):
        missing_files.append(file)
    else:
        print(f"✅ Found {file}")

if missing_files:
    print(f"❌ Missing files: {missing_files}")
    print("Please make sure you have processed data in the correct location.")
    print(f"Expected location: {data_folder}")
else:
    print("✅ All required files found!")

# Load the datasets
try:
    routes_df = pd.read_csv(os.path.join(data_folder, 'routes.csv'))
    route_versions_df = pd.read_csv(os.path.join(data_folder, 'route_versions.csv'))
    shapes_df = pd.read_csv(os.path.join(data_folder, 'shapes.csv'))
    
    # Convert dates
    route_versions_df['valid_from'] = pd.to_datetime(route_versions_df['valid_from'])
    route_versions_df['valid_to'] = pd.to_datetime(route_versions_df['valid_to'])
    
    print(f"✅ Loaded {len(routes_df)} routes, {len(route_versions_df)} route versions, {len(shapes_df)} shape points")
    
except Exception as e:
    print(f"❌ Error loading data: {e}")
    print("Please check your data files and paths.")
    
# Quick data preview
print(f"\n📋 Data Preview:")
print(f"Routes sample:")
if 'routes_df' in locals():
    print(routes_df.head()[['route_id', 'route_short_name', 'route_type']])
    
print(f"\nRoute versions sample:")
if 'route_versions_df' in locals():
    print(route_versions_df.head()[['route_id', 'valid_from', 'valid_to', 'main_shape_id']])

# =============================================================================
# 2. ROUTE ANALYSIS FUNCTIONS
# =============================================================================

def get_route_info(route_id):
    """Get basic information about a route."""
    route_info = routes_df[routes_df['route_id'] == route_id]
    if route_info.empty:
        return None
    
    route_versions = route_versions_df[route_versions_df['route_id'] == route_id].copy()
    route_versions = route_versions.sort_values('valid_from')
    
    # Calculate actual end dates
    route_versions['actual_end'] = route_versions['valid_to'].fillna(pd.Timestamp.now())
    
    info = {
        'route_id': route_id,
        'route_short_name': route_info.iloc[0]['route_short_name'],
        'route_type': route_info.iloc[0]['route_type'],
        'total_versions': len(route_versions),
        'first_date': route_versions['valid_from'].min(),
        'last_date': route_versions['actual_end'].max(),
        'total_days': (route_versions['actual_end'].max() - route_versions['valid_from'].min()).days,
        'versions': route_versions
    }
    
    return info

def get_active_version_on_date(route_id, target_date):
    """Get the active route version for a specific date."""
    target_date = pd.to_datetime(target_date)
    
    route_versions = route_versions_df[route_versions_df['route_id'] == route_id].copy()
    
    # Find version active on target date
    active_versions = route_versions[
        (route_versions['valid_from'] <= target_date) & 
        ((route_versions['valid_to'].isna()) | (route_versions['valid_to'] >= target_date))
    ]
    
    if active_versions.empty:
        return None
    
    # Return the most recent version if multiple found
    return active_versions.sort_values('valid_from').iloc[-1]

def get_shape_coordinates(shape_id):
    """Get coordinates for a shape."""
    shape_points = shapes_df[shapes_df['shape_id'] == shape_id].copy()
    if shape_points.empty:
        return None
    
    shape_points = shape_points.sort_values('shape_pt_sequence')
    return shape_points[['shape_pt_lat', 'shape_pt_lon', 'shape_pt_sequence']]

# =============================================================================
# 3. ROUTE SELECTION WIDGET
# =============================================================================

print("\n🚇 Available routes:")

# Get unique routes with their names
available_routes = routes_df[['route_id', 'route_short_name', 'route_type']].copy()
available_routes = available_routes.sort_values('route_short_name')

# Create route options for dropdown
route_options = []
for _, row in available_routes.iterrows():
    route_type_name = {0: 'Tram', 1: 'Subway', 2: 'Rail', 3: 'Bus', 4: 'Ferry'}.get(row['route_type'], 'Other')
    label = f"{row['route_short_name']} ({route_type_name}) - ID: {row['route_id']}"
    route_options.append((label, row['route_id']))

print(f"Found {len(route_options)} routes")

# =============================================================================
# 4. INTERACTIVE VISUALIZATION CLASS
# =============================================================================

class RouteTimelineVisualizer:
    def __init__(self):
        self.current_route_id = None
        self.route_info = None
        self.fig = None
        
    def setup_widgets(self, default_route_id=None):
        """Setup interactive widgets."""
        
        # Route selection dropdown
        if default_route_id and default_route_id in [opt[1] for opt in route_options]:
            default_route = default_route_id
        else:
            default_route = route_options[0][1] if route_options else None
            
        self.route_selector = widgets.Dropdown(
            options=route_options,
            value=default_route,
            description='Route:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='400px')
        )
        
        # Date slider (will be updated when route is selected)
        self.date_slider = widgets.SelectionSlider(
            options=[datetime.now().strftime('%Y-%m-%d')],
            value=datetime.now().strftime('%Y-%m-%d'),
            description='Date:',
            disabled=True,
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='600px')
        )
        
        # Info display
        self.info_output = widgets.Output()
        
        # Map display
        self.map_output = widgets.Output()
        
        # Wire up the interactions
        self.route_selector.observe(self.on_route_change, names='value')
        self.date_slider.observe(self.on_date_change, names='value')
        
        # Initialize with default route
        if default_route:
            self.on_route_change({'new': default_route})
    
    def on_route_change(self, change):
        """Handle route selection change."""
        route_id = change['new']
        self.current_route_id = route_id
        self.route_info = get_route_info(route_id)
        
        if self.route_info is None:
            with self.info_output:
                clear_output(wait=True)
                print(f"❌ Route {route_id} not found")
            return
        
        # Update info display
        with self.info_output:
            clear_output(wait=True)
            self.display_route_info()
        
        # Update date slider
        self.update_date_slider()
        
        # Update map with current date
        self.update_map()
    
    def update_date_slider(self):
        """Update date slider based on route lifetime."""
        if self.route_info is None:
            return
        
        # Generate date options (every week for long periods, daily for short)
        start_date = self.route_info['first_date']
        end_date = self.route_info['last_date']
        total_days = (end_date - start_date).days
        
        if total_days > 365:
            # Weekly intervals for long periods
            date_range = pd.date_range(start_date, end_date, freq='W')
        elif total_days > 90:
            # Every 3 days for medium periods
            date_range = pd.date_range(start_date, end_date, freq='3D')
        else:
            # Daily for short periods
            date_range = pd.date_range(start_date, end_date, freq='D')
        
        date_options = [d.strftime('%Y-%m-%d') for d in date_range]
        
        if date_options:
            self.date_slider.options = date_options
            self.date_slider.value = date_options[0]
            self.date_slider.disabled = False
        else:
            self.date_slider.disabled = True
    
    def on_date_change(self, change):
        """Handle date slider change."""
        self.update_map()
    
    def display_route_info(self):
        """Display route information."""
        info = self.route_info
        print(f"🚇 Route {info['route_short_name']} (ID: {info['route_id']})")
        print(f"📅 Lifetime: {info['first_date'].strftime('%Y-%m-%d')} to {info['last_date'].strftime('%Y-%m-%d')}")
        print(f"⏰ Duration: {info['total_days']} days")
        print(f"🔄 Total versions: {info['total_versions']}")
        
        print(f"\n📋 Version History:")
        for _, version in info['versions'].iterrows():
            start = version['valid_from'].strftime('%Y-%m-%d')
            end = version['valid_to'].strftime('%Y-%m-%d') if pd.notna(version['valid_to']) else 'Present'
            print(f"  • {start} to {end}: {version['route_short_name']} (Shape: {version['main_shape_id']})")
    
    def update_map(self):
        """Update the map display."""
        if self.route_info is None:
            return
        
        target_date = self.date_slider.value
        active_version = get_active_version_on_date(self.current_route_id, target_date)
        
        with self.map_output:
            clear_output(wait=True)
            
            if active_version is None:
                print(f"❌ No active version found for {target_date}")
                return
            
            # Get shape coordinates
            shape_coords = get_shape_coordinates(active_version['main_shape_id'])
            
            if shape_coords is None or shape_coords.empty:
                print(f"❌ No shape data found for shape {active_version['main_shape_id']}")
                return
            
            # Create the map
            self.create_route_map(shape_coords, active_version, target_date)
    
    def create_route_map(self, shape_coords, version_info, date):
        """Create interactive map showing the route using modern Plotly."""
        
        # Prepare data for line plot
        line_data = shape_coords.copy()
        line_data['route_name'] = self.route_info['route_short_name']
        
        # Create the modern map
        fig = px.line_mapbox(
            line_data,
            lat='shape_pt_lat',
            lon='shape_pt_lon',
            title=f"Route {self.route_info['route_short_name']} on {date}<br>" +
                  f"Version: {version_info['route_long_name']}<br>" +
                  f"Shape ID: {version_info['main_shape_id']}",
            mapbox_style="open-street-map",
            height=500,
            zoom=12
        )
        
        # Add start/end markers
        start_point = shape_coords.iloc[0]
        end_point = shape_coords.iloc[-1]
        
        fig.add_trace(go.Scatter(
            mode="markers",
            x=[start_point['shape_pt_lon'], end_point['shape_pt_lon']],
            y=[start_point['shape_pt_lat'], end_point['shape_pt_lat']],
            marker=dict(
                size=12,
                color=['green', 'red'],
                symbol=['circle', 'square']
            ),
            text=['Start', 'End'],
            name="Route Endpoints",
            hovertemplate="<b>%{text}</b><br>" +
                         "Lat: %{y:.6f}<br>" +
                         "Lon: %{x:.6f}<br>" +
                         "<extra></extra>"
        ))
        
        # Style the route line
        fig.update_traces(
            line=dict(width=4, color='red'),
            selector=dict(mode='lines')
        )
        
        # Center the map
        center_lat = shape_coords['shape_pt_lat'].mean()
        center_lon = shape_coords['shape_pt_lon'].mean()
        
        fig.update_layout(
            mapbox=dict(
                center=dict(lat=center_lat, lon=center_lon),
                zoom=12
            ),
            showlegend=True
        )
        
        fig.show()
    
    def display(self):
        """Display the complete widget interface."""
        print("🗺️ Interactive Route Timeline Visualizer")
        print("=" * 50)
        
        # Layout the widgets
        route_selection = widgets.HBox([
            widgets.VBox([
                widgets.HTML("<b>1. Select Route:</b>"),
                self.route_selector
            ]),
        ])
        
        date_selection = widgets.HBox([
            widgets.VBox([
                widgets.HTML("<b>2. Select Date:</b>"),
                self.date_slider
            ]),
        ])
        
        map_section = widgets.VBox([
            widgets.HTML("<b>3. Route Visualization:</b>"),
            self.map_output
        ])
        
        info_section = widgets.VBox([
            widgets.HTML("<b>4. Route Information:</b>"),
            self.info_output
        ])
        
        display(widgets.VBox([
            route_selection,
            date_selection, 
            map_section,
            info_section
        ]))

# =============================================================================
# 5. CREATE AND DISPLAY THE VISUALIZER
# =============================================================================

print("\n🎯 Creating interactive visualizer...")

# Create the visualizer
visualizer = RouteTimelineVisualizer()

# Setup with route 6 (tram 6) as default if it exists
default_route = '6' if '6' in routes_df['route_id'].values else None
visualizer.setup_widgets(default_route)

print("✅ Visualizer ready!")
print("\nInstructions:")
print("1. Select a route from the dropdown")
print("2. Use the date slider to see how the route changed over time")
print("3. The map will update to show the main shape for each date")

# Display the interactive interface
visualizer.display()

📁 Project root: c:\Users\User\Documents\Projects\budapest_time_travel
📊 Data folder: c:\Users\User\Documents\Projects\budapest_time_travel\data\processed
✅ Data folder exists: True

📊 Loading transit data...
✅ Found routes.csv
✅ Found route_versions.csv
✅ Found shapes.csv
✅ All required files found!
✅ Loaded 810 routes, 1556 route versions, 546076 shape points

📋 Data Preview:
Routes sample:
  route_id route_short_name  route_type
0     MP52               M2           3
1     MP52               M2           3
2    MP525              M2E           3
3    MP525              M2E           3
4     VP01               1V           3

Route versions sample:
  route_id valid_from   valid_to main_shape_id
0     0050 2013-10-11        NaT          R952
1     0050 2013-10-11        NaT          U961
2     0070 2013-10-11 2014-04-06          S704
3     0070 2014-04-07        NaT          Y207
4     0070 2013-10-11 2014-04-06          S705

🚇 Available routes:
Found 810 routes

🎯 Creating interacti

  shapes_df = pd.read_csv(os.path.join(data_folder, 'shapes.csv'))


✅ Visualizer ready!

Instructions:
1. Select a route from the dropdown
2. Use the date slider to see how the route changed over time
3. The map will update to show the main shape for each date
🗺️ Interactive Route Timeline Visualizer


VBox(children=(HBox(children=(VBox(children=(HTML(value='<b>1. Select Route:</b>'), Dropdown(description='Rout…

In [2]:
# Route Visualization - Interactive Timeline
# This notebook visualizes how transit routes change over time

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import ipywidgets as widgets
from IPython.display import display, clear_output
from datetime import datetime, timedelta
import os

# =============================================================================
# SETUP DATA PATHS (Choose one of these methods)
# =============================================================================

# Method 1: Auto-detect project root
def find_project_root():
    """Find project root by looking for the data folder."""
    current_dir = os.getcwd()
    while current_dir != os.path.dirname(current_dir):  # Until filesystem root
        data_path = os.path.join(current_dir, 'data')
        if os.path.exists(data_path):
            return current_dir
        current_dir = os.path.dirname(current_dir)
    return os.getcwd()

# Method 2: Manual path setup (uncomment and adjust if Method 1 doesn't work)
# project_root = r"C:\Users\User\Documents\Projects\budapest_time_travel"  # Your actual path
# data_folder = os.path.join(project_root, 'data', 'processed')

# Use auto-detection
project_root = find_project_root()
data_folder = os.path.join(project_root, 'data', 'processed')

print(f"📁 Project root: {project_root}")
print(f"📊 Data folder: {data_folder}")
print(f"✅ Data folder exists: {os.path.exists(data_folder)}")

# =============================================================================
# 1. DATA LOADING
# =============================================================================

print("\n📊 Loading transit data...")

# Check if data files exist
required_files = ['routes.csv', 'route_versions.csv', 'shapes.csv']
missing_files = []

for file in required_files:
    file_path = os.path.join(data_folder, file)
    if not os.path.exists(file_path):
        missing_files.append(file)
    else:
        print(f"✅ Found {file}")

if missing_files:
    print(f"❌ Missing files: {missing_files}")
    print("Please make sure you have processed data in the correct location.")
    print(f"Expected location: {data_folder}")
else:
    print("✅ All required files found!")

# Load the datasets
try:
    routes_df = pd.read_csv(os.path.join(data_folder, 'routes.csv'))
    route_versions_df = pd.read_csv(os.path.join(data_folder, 'route_versions.csv'))
    shapes_df = pd.read_csv(os.path.join(data_folder, 'shapes.csv'))
    
    # Convert dates
    route_versions_df['valid_from'] = pd.to_datetime(route_versions_df['valid_from'])
    route_versions_df['valid_to'] = pd.to_datetime(route_versions_df['valid_to'])
    
    print(f"✅ Loaded {len(routes_df)} routes, {len(route_versions_df)} route versions, {len(shapes_df)} shape points")
    
except Exception as e:
    print(f"❌ Error loading data: {e}")
    print("Please check your data files and paths.")
    
# Quick data preview
print(f"\n📋 Data Preview:")
print(f"Routes sample:")
if 'routes_df' in locals():
    print(routes_df.head()[['route_id', 'route_short_name', 'route_type']])
    
print(f"\nRoute versions sample:")
if 'route_versions_df' in locals():
    print(route_versions_df.head()[['route_id', 'valid_from', 'valid_to', 'main_shape_id']])

# =============================================================================
# 2. ROUTE ANALYSIS FUNCTIONS
# =============================================================================

def get_route_info(route_id):
    """Get basic information about a route."""
    route_info = routes_df[routes_df['route_id'] == route_id]
    if route_info.empty:
        return None
    
    route_versions = route_versions_df[route_versions_df['route_id'] == route_id].copy()
    route_versions = route_versions.sort_values('valid_from')
    
    # Calculate actual end dates
    route_versions['actual_end'] = route_versions['valid_to'].fillna(pd.Timestamp.now())
    
    info = {
        'route_id': route_id,
        'route_short_name': route_info.iloc[0]['route_short_name'],
        'route_type': route_info.iloc[0]['route_type'],
        'total_versions': len(route_versions),
        'first_date': route_versions['valid_from'].min(),
        'last_date': route_versions['actual_end'].max(),
        'total_days': (route_versions['actual_end'].max() - route_versions['valid_from'].min()).days,
        'versions': route_versions
    }
    
    return info

def get_active_version_on_date(route_id, target_date):
    """Get the active route version for a specific date."""
    target_date = pd.to_datetime(target_date)
    
    route_versions = route_versions_df[route_versions_df['route_id'] == route_id].copy()
    
    # Find version active on target date
    active_versions = route_versions[
        (route_versions['valid_from'] <= target_date) & 
        ((route_versions['valid_to'].isna()) | (route_versions['valid_to'] >= target_date))
    ]
    
    if active_versions.empty:
        return None
    
    # Return the most recent version if multiple found
    return active_versions.sort_values('valid_from').iloc[-1]

def get_shape_coordinates(shape_id):
    """Get coordinates for a shape."""
    shape_points = shapes_df[shapes_df['shape_id'] == shape_id].copy()
    if shape_points.empty:
        return None
    
    shape_points = shape_points.sort_values('shape_pt_sequence')
    return shape_points[['shape_pt_lat', 'shape_pt_lon', 'shape_pt_sequence']]

# =============================================================================
# 3. ROUTE SELECTION WIDGET
# =============================================================================

print("\n🚇 Available routes:")

# Get unique routes with their names
available_routes = routes_df[['route_id', 'route_short_name', 'route_type']].copy()
available_routes = available_routes.sort_values('route_short_name')

# Create route options for dropdown
route_options = []
for _, row in available_routes.iterrows():
    route_type_name = {0: 'Tram', 1: 'Subway', 2: 'Rail', 3: 'Bus', 4: 'Ferry'}.get(row['route_type'], 'Other')
    label = f"{row['route_short_name']} ({route_type_name}) - ID: {row['route_id']}"
    route_options.append((label, row['route_id']))

print(f"Found {len(route_options)} routes")

# =============================================================================
# 4. INTERACTIVE VISUALIZATION CLASS
# =============================================================================

class RouteTimelineVisualizer:
    def __init__(self):
        self.current_route_id = None
        self.route_info = None
        self.fig = None
        self.current_zoom = 12  # Default zoom level
        self.current_center = None  # Will store current map center
        
    def setup_widgets(self, default_route_id=None):
        """Setup interactive widgets."""
        
        # Route selection dropdown
        if default_route_id and default_route_id in [opt[1] for opt in route_options]:
            default_route = default_route_id
        else:
            default_route = route_options[0][1] if route_options else None
            
        self.route_selector = widgets.Dropdown(
            options=route_options,
            value=default_route,
            description='Route:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='400px')
        )
        
        # Date slider (will be updated when route is selected)
        self.date_slider = widgets.SelectionSlider(
            options=[datetime.now().strftime('%Y-%m-%d')],
            value=datetime.now().strftime('%Y-%m-%d'),
            description='Date:',
            disabled=True,
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='600px')
        )
        
        # Info display
        self.info_output = widgets.Output()
        
        # Map display
        self.map_output = widgets.Output()
        
        # Wire up the interactions
        self.route_selector.observe(self.on_route_change, names='value')
        self.date_slider.observe(self.on_date_change, names='value')
        
        # Initialize with default route
        if default_route:
            self.on_route_change({'new': default_route})
    
    def on_route_change(self, change):
        """Handle route selection change."""
        route_id = change['new']
        self.current_route_id = route_id
        self.route_info = get_route_info(route_id)
        
        # Reset zoom and center for new route
        self.current_zoom = 12
        self.current_center = None
        
        if self.route_info is None:
            with self.info_output:
                clear_output(wait=True)
                print(f"❌ Route {route_id} not found")
            return
        
        # Update info display
        with self.info_output:
            clear_output(wait=True)
            self.display_route_info()
        
        # Update date slider
        self.update_date_slider()
        
        # Update map with current date
        self.update_map()
    
    def update_date_slider(self):
        """Update date slider based on route lifetime."""
        if self.route_info is None:
            return
        
        # Generate date options (every week for long periods, daily for short)
        start_date = self.route_info['first_date']
        end_date = self.route_info['last_date']
        total_days = (end_date - start_date).days
        
        if total_days > 365:
            # Weekly intervals for long periods
            date_range = pd.date_range(start_date, end_date, freq='W')
        elif total_days > 90:
            # Every 3 days for medium periods
            date_range = pd.date_range(start_date, end_date, freq='3D')
        else:
            # Daily for short periods
            date_range = pd.date_range(start_date, end_date, freq='D')
        
        date_options = [d.strftime('%Y-%m-%d') for d in date_range]
        
        if date_options:
            self.date_slider.options = date_options
            self.date_slider.value = date_options[0]
            self.date_slider.disabled = False
        else:
            self.date_slider.disabled = True
    
    def on_date_change(self, change):
        """Handle date slider change."""
        self.update_map()
    
    def display_route_info(self):
        """Display route information."""
        info = self.route_info
        print(f"🚇 Route {info['route_short_name']} (ID: {info['route_id']})")
        print(f"📅 Lifetime: {info['first_date'].strftime('%Y-%m-%d')} to {info['last_date'].strftime('%Y-%m-%d')}")
        print(f"⏰ Duration: {info['total_days']} days")
        print(f"🔄 Total versions: {info['total_versions']}")
        
        print(f"\n📋 Version History:")
        for _, version in info['versions'].iterrows():
            start = version['valid_from'].strftime('%Y-%m-%d')
            end = version['valid_to'].strftime('%Y-%m-%d') if pd.notna(version['valid_to']) else 'Present'
            print(f"  • {start} to {end}: {version['route_long_name']} (Shape: {version['main_shape_id']})")
    
    def update_map(self):
        """Update the map display."""
        if self.route_info is None:
            return
        
        target_date = self.date_slider.value
        active_version = get_active_version_on_date(self.current_route_id, target_date)
        
        with self.map_output:
            clear_output(wait=True)
            
            if active_version is None:
                print(f"❌ No active version found for {target_date}")
                return
            
            # Get shape coordinates
            shape_coords = get_shape_coordinates(active_version['main_shape_id'])
            
            if shape_coords is None or shape_coords.empty:
                print(f"❌ No shape data found for shape {active_version['main_shape_id']}")
                return
            
            # Create the map
            self.create_route_map(shape_coords, active_version, target_date)
    
    def create_route_map(self, shape_coords, version_info, date):
        """Create interactive map showing the route using modern Plotly."""
        
        # Prepare data for line plot
        line_data = shape_coords.copy()
        line_data['route_name'] = self.route_info['route_short_name']
        
        # Calculate center point if not already set
        center_lat = shape_coords['shape_pt_lat'].mean()
        center_lon = shape_coords['shape_pt_lon'].mean()
        
        # Use stored center and zoom, or calculate defaults
        if self.current_center is None:
            self.current_center = {'lat': center_lat, 'lon': center_lon}
        
        # Create the modern map using the latest line_map function
        fig = px.line_map(
            line_data,
            lat='shape_pt_lat',
            lon='shape_pt_lon',
            title=f"Route {self.route_info['route_short_name']} on {date}<br>" +
                  f"Version: {version_info['route_long_name']}<br>" +
                  f"Shape ID: {version_info['main_shape_id']}",
            height=500,
            zoom=self.current_zoom  # Use stored zoom level
        )
        
        # Add start/end markers
        start_point = shape_coords.iloc[0]
        end_point = shape_coords.iloc[-1]
        
        fig.add_trace(go.Scatter(
            mode="markers",
            x=[start_point['shape_pt_lon'], end_point['shape_pt_lon']],
            y=[start_point['shape_pt_lat'], end_point['shape_pt_lat']],
            marker=dict(
                size=12,
                color=['green', 'red'],
                symbol=['circle', 'square']
            ),
            text=['Start', 'End'],
            name="Route Endpoints",
            hovertemplate="<b>%{text}</b><br>" +
                         "Lat: %{y:.6f}<br>" +
                         "Lon: %{x:.6f}<br>" +
                         "<extra></extra>"
        ))
        
        # Style the route line
        fig.update_traces(
            line=dict(width=4, color='red'),
            selector=dict(mode='lines')
        )
        
        # Apply stored center and zoom
        fig.update_layout(
            map=dict(
                center=self.current_center,  # Use stored center
                zoom=self.current_zoom,      # Use stored zoom
                style="open-street-map"
            ),
            showlegend=True
        )
        
        # Store reference to current figure for zoom tracking
        self.fig = fig
        
        # Add JavaScript callback to capture zoom/pan changes
        # Note: This is a limitation of Jupyter - we can't easily capture map interactions
        # But we can provide instructions to users
        
        fig.show()
        
        # Display zoom instructions
        print(f"💡 Current zoom level: {self.current_zoom}")
        print(f"📍 To maintain zoom: After zooming/panning, use the zoom controls below")
        
        # Add zoom control buttons
        self.display_zoom_controls()
    
    def display_zoom_controls(self):
        """Display manual zoom control buttons."""
        
        zoom_in_btn = widgets.Button(
            description="Zoom In (+)", 
            button_style='info',
            layout=widgets.Layout(width='100px')
        )
        zoom_out_btn = widgets.Button(
            description="Zoom Out (-)", 
            button_style='info',
            layout=widgets.Layout(width='100px')
        )
        reset_btn = widgets.Button(
            description="Reset View", 
            button_style='warning',
            layout=widgets.Layout(width='100px')
        )
        
        zoom_slider = widgets.IntSlider(
            value=self.current_zoom,
            min=8,
            max=18,
            step=1,
            description='Zoom:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='300px')
        )
        
        # Wire up the zoom controls
        def on_zoom_in(b):
            self.current_zoom = min(18, self.current_zoom + 1)
            zoom_slider.value = self.current_zoom
            self.update_map()
        
        def on_zoom_out(b):
            self.current_zoom = max(8, self.current_zoom - 1)
            zoom_slider.value = self.current_zoom
            self.update_map()
        
        def on_reset(b):
            self.current_zoom = 12
            self.current_center = None  # Will recalculate
            zoom_slider.value = self.current_zoom
            self.update_map()
        
        def on_zoom_change(change):
            self.current_zoom = change['new']
            self.update_map()
        
        zoom_in_btn.on_click(on_zoom_in)
        zoom_out_btn.on_click(on_zoom_out)
        reset_btn.on_click(on_reset)
        zoom_slider.observe(on_zoom_change, names='value')
        
        # Display the controls
        zoom_controls = widgets.HBox([
            zoom_out_btn, zoom_in_btn, reset_btn, zoom_slider
        ])
        
        display(widgets.VBox([
            widgets.HTML("<b>🔍 Zoom Controls:</b>"),
            zoom_controls
        ]))
    
    def display(self):
        """Display the complete widget interface."""
        print("🗺️ Interactive Route Timeline Visualizer")
        print("=" * 50)
        
        # Layout the widgets
        route_selection = widgets.HBox([
            widgets.VBox([
                widgets.HTML("<b>1. Select Route:</b>"),
                self.route_selector
            ]),
        ])
        
        date_selection = widgets.HBox([
            widgets.VBox([
                widgets.HTML("<b>2. Select Date:</b>"),
                self.date_slider
            ]),
        ])
        
        map_section = widgets.VBox([
            widgets.HTML("<b>3. Route Visualization:</b>"),
            self.map_output
        ])
        
        info_section = widgets.VBox([
            widgets.HTML("<b>4. Route Information:</b>"),
            self.info_output
        ])
        
        display(widgets.VBox([
            route_selection,
            date_selection, 
            map_section,
            info_section
        ]))

# =============================================================================
# 5. CREATE AND DISPLAY THE VISUALIZER
# =============================================================================

print("\n🎯 Creating interactive visualizer...")

# Create the visualizer
visualizer = RouteTimelineVisualizer()

# Setup with route 6 (tram 6) as default if it exists
default_route = '6' if '6' in routes_df['route_id'].values else None
visualizer.setup_widgets(default_route)

print("✅ Visualizer ready!")
print("\nInstructions:")
print("1. Select a route from the dropdown")
print("2. Use the date slider to see how the route changed over time")
print("3. The map will update to show the main shape for each date")

# Display the interactive interface
visualizer.display()

📁 Project root: c:\Users\User\Documents\Projects\budapest_time_travel
📊 Data folder: c:\Users\User\Documents\Projects\budapest_time_travel\data\processed
✅ Data folder exists: True

📊 Loading transit data...
✅ Found routes.csv
✅ Found route_versions.csv
✅ Found shapes.csv
✅ All required files found!
✅ Loaded 810 routes, 1556 route versions, 546076 shape points

📋 Data Preview:
Routes sample:
  route_id route_short_name  route_type
0     MP52               M2           3
1     MP52               M2           3
2    MP525              M2E           3
3    MP525              M2E           3
4     VP01               1V           3

Route versions sample:
  route_id valid_from   valid_to main_shape_id
0     0050 2013-10-11        NaT          R952
1     0050 2013-10-11        NaT          U961
2     0070 2013-10-11 2014-04-06          S704
3     0070 2014-04-07        NaT          Y207
4     0070 2013-10-11 2014-04-06          S705

🚇 Available routes:
Found 810 routes

🎯 Creating interacti


Columns (5) have mixed types. Specify dtype option on import or set low_memory=False.



✅ Visualizer ready!

Instructions:
1. Select a route from the dropdown
2. Use the date slider to see how the route changed over time
3. The map will update to show the main shape for each date
🗺️ Interactive Route Timeline Visualizer


VBox(children=(HBox(children=(VBox(children=(HTML(value='<b>1. Select Route:</b>'), Dropdown(description='Rout…

## Check details

In [2]:
# Display route versions for tram 4
print("🚋 Route Versions for Tram 4")
print("=" * 50)

# Filter for route 4
route_4_versions = route_versions_df[route_versions_df['route_id'] == '3040'].copy()

if route_4_versions.empty:
    print("❌ No route versions found for route '4'")
    print("Available route IDs:", sorted(route_versions_df['route_id'].unique()))
else:
    # Sort by date
    route_4_versions = route_4_versions.sort_values('valid_from')
    
    print(f"Found {len(route_4_versions)} versions for route 4:")
    print()
    
    # Display in a nice format
    for i, (_, version) in enumerate(route_4_versions.iterrows(), 1):
        start_date = version['valid_from'].strftime('%Y-%m-%d')
        end_date = version['valid_to'].strftime('%Y-%m-%d') if pd.notna(version['valid_to']) else 'Present'
        
        print(f"📅 Version {i}:")
        print(f"   Period: {start_date} to {end_date}")
        print(f"   Route name: {version['route_long_name']}")
        print(f"   Description: {version.get('route_desc', 'N/A')}")
        print(f"   Main shape: {version['main_shape_id']}")
        print(f"   Direction: {version['direction_id']}")
        print(f"   Trip headsign: {version.get('trip_headsign', 'N/A')}")
        print()

# Also display as a DataFrame for easy viewing
print("📊 Complete table view:")
if not route_4_versions.empty:
    # Select relevant columns and format dates
    display_columns = ['valid_from', 'valid_to', 'route_long_name', 'main_shape_id', 
                      'direction_id', 'trip_headsign']
    
    display_df = route_4_versions[display_columns].copy()
    display_df['valid_from'] = display_df['valid_from'].dt.strftime('%Y-%m-%d')
    display_df['valid_to'] = display_df['valid_to'].dt.strftime('%Y-%m-%d').replace('NaT', 'Present')
    
    display(display_df)

🚋 Route Versions for Tram 4
Found 2 versions for route 4:

📅 Version 1:
   Period: 2013-10-11 to Present
   Route name: nan
   Description: Széll Kálmán tér M / Újbuda-központ
   Main shape: 0976
   Direction: 1
   Trip headsign: Széll Kálmán tér M

📅 Version 2:
   Period: 2013-10-14 to Present
   Route name: nan
   Description: Széll Kálmán tér M / Újbuda-központ
   Main shape: 0977
   Direction: 0
   Trip headsign: Újbuda-központ

📊 Complete table view:


Unnamed: 0,valid_from,valid_to,route_long_name,main_shape_id,direction_id,trip_headsign
432,2013-10-11,,,976,1,Széll Kálmán tér M
431,2013-10-14,,,977,0,Újbuda-központ


In [3]:
# Route Version Debugging - Complete Analysis
# Run this in your new notebook to debug route version issues

import pandas as pd
import numpy as np
import os
from datetime import datetime, timedelta

# =============================================================================
# 1. SETUP - Load Your Data
# =============================================================================

# Auto-detect project root
def find_project_root():
    current_dir = os.getcwd()
    while current_dir != os.path.dirname(current_dir):
        data_path = os.path.join(current_dir, 'data')
        if os.path.exists(data_path):
            return current_dir
        current_dir = os.path.dirname(current_dir)
    return os.getcwd()

project_root = find_project_root()
data_folder = os.path.join(project_root, 'data', 'processed')

print(f"📁 Loading data from: {data_folder}")

# Load the data
route_versions_df = pd.read_csv(os.path.join(data_folder, 'route_versions.csv'))
route_versions_df['valid_from'] = pd.to_datetime(route_versions_df['valid_from'])
route_versions_df['valid_to'] = pd.to_datetime(route_versions_df['valid_to'])

routes_df = pd.read_csv(os.path.join(data_folder, 'routes.csv'))

print(f"✅ Loaded {len(route_versions_df)} route versions")

# =============================================================================
# 2. DEBUGGING FUNCTIONS
# =============================================================================

def analyze_route_versions(route_id, show_details=True):
    """Comprehensive analysis of route versions for debugging."""
    
    print(f"🔍 ANALYZING ROUTE {route_id}")
    print("=" * 60)
    
    # Get all versions for this route
    versions = route_versions_df[route_versions_df['route_id'] == route_id].copy()
    
    if versions.empty:
        print(f"❌ No versions found for route {route_id}")
        return None
    
    # Sort by direction, then by date
    versions = versions.sort_values(['direction_id', 'valid_from'])
    
    print(f"📊 Total versions: {len(versions)}")
    print(f"📊 Directions: {sorted(versions['direction_id'].unique())}")
    print(f"📊 Date range: {versions['valid_from'].min()} to {versions['valid_to'].max()}")
    
    # Analyze by direction
    for direction in sorted(versions['direction_id'].unique()):
        direction_versions = versions[versions['direction_id'] == direction].copy()
        direction_versions = direction_versions.sort_values('valid_from')
        
        print(f"\n🚇 DIRECTION {direction}")
        print("-" * 40)
        print(f"Versions: {len(direction_versions)}")
        
        if show_details:
            for i, (_, version) in enumerate(direction_versions.iterrows(), 1):
                start = version['valid_from'].strftime('%Y-%m-%d')
                end = version['valid_to'].strftime('%Y-%m-%d') if pd.notna(version['valid_to']) else 'Present'
                route_name = version['route_long_name'] if pd.notna(version['route_long_name']) else 'nan'
                
                print(f"  {i}. {start} to {end}: {route_name} (Shape: {version['main_shape_id']})")
        
        # Check for overlaps in this direction
        overlaps = find_overlaps_in_direction(direction_versions)
        if overlaps:
            print(f"⚠️  Found {len(overlaps)} overlaps in direction {direction}:")
            for overlap in overlaps:
                print(f"     • {overlap}")
        else:
            print(f"✅ No overlaps in direction {direction}")
    
    return versions

def find_overlaps_in_direction(direction_versions):
    """Find overlapping date ranges within a single direction."""
    overlaps = []
    
    sorted_versions = direction_versions.sort_values('valid_from')
    
    for i in range(len(sorted_versions) - 1):
        current = sorted_versions.iloc[i]
        next_version = sorted_versions.iloc[i + 1]
        
        current_end = current['valid_to']
        next_start = next_version['valid_from']
        
        # Check for overlap
        if pd.notna(current_end) and current_end >= next_start:
            overlap_desc = f"Version {i+1} ends {current_end.strftime('%Y-%m-%d')}, Version {i+2} starts {next_start.strftime('%Y-%m-%d')}"
            overlaps.append(overlap_desc)
        
        # Check for impossible dates (end before start)
        if pd.notna(current_end) and current_end < current['valid_from']:
            impossible_desc = f"Version {i+1}: End date ({current_end.strftime('%Y-%m-%d')}) before start date ({current['valid_from'].strftime('%Y-%m-%d')})"
            overlaps.append(impossible_desc)
    
    return overlaps

def find_problematic_routes(max_routes=10):
    """Find routes with the most problems."""
    
    print("🔍 FINDING PROBLEMATIC ROUTES")
    print("=" * 50)
    
    problems = []
    
    for route_id in route_versions_df['route_id'].unique():
        route_versions = route_versions_df[route_versions_df['route_id'] == route_id]
        
        issues = []
        
        # Check for NaN route names
        nan_names = route_versions['route_long_name'].isna().sum()
        if nan_names > 0:
            issues.append(f"{nan_names} NaN route names")
        
        # Check for overlaps by direction
        for direction in route_versions['direction_id'].unique():
            direction_versions = route_versions[route_versions['direction_id'] == direction]
            overlaps = find_overlaps_in_direction(direction_versions)
            if overlaps:
                issues.append(f"{len(overlaps)} overlaps in direction {direction}")
        
        # Check for too many versions (might indicate processing issues)
        if len(route_versions) > 10:
            issues.append(f"{len(route_versions)} versions (many)")
        
        if issues:
            problems.append({
                'route_id': route_id,
                'issues': issues,
                'severity': len(issues)
            })
    
    # Sort by severity
    problems.sort(key=lambda x: x['severity'], reverse=True)
    
    print(f"Found {len(problems)} routes with issues:")
    print()
    
    for i, problem in enumerate(problems[:max_routes], 1):
        print(f"{i}. Route {problem['route_id']}:")
        for issue in problem['issues']:
            print(f"   • {issue}")
        print()
    
    return problems

def suggest_fixes_for_route(route_id):
    """Suggest specific fixes for a route."""
    
    print(f"🔧 SUGGESTED FIXES FOR ROUTE {route_id}")
    print("=" * 50)
    
    versions = route_versions_df[route_versions_df['route_id'] == route_id].copy()
    
    if versions.empty:
        print("No versions found.")
        return
    
    fixes = []
    
    # Check for NaN route names
    nan_count = versions['route_long_name'].isna().sum()
    if nan_count > 0:
        fixes.append(f"Fix {nan_count} NaN route names - check source GTFS data")
    
    # Check overlaps by direction
    for direction in versions['direction_id'].unique():
        direction_versions = versions[versions['direction_id'] == direction].sort_values('valid_from')
        
        for i in range(len(direction_versions) - 1):
            current = direction_versions.iloc[i]
            next_version = direction_versions.iloc[i + 1]
            
            current_end = current['valid_to']
            next_start = next_version['valid_from']
            
            if pd.notna(current_end) and current_end >= next_start:
                # Suggest fix
                suggested_end = next_start - pd.Timedelta(days=1)
                fixes.append(f"Direction {direction}: Set version {i+1} end date to {suggested_end.strftime('%Y-%m-%d')}")
    
    if fixes:
        print("Suggested fixes:")
        for i, fix in enumerate(fixes, 1):
            print(f"{i}. {fix}")
    else:
        print("✅ No fixes needed!")
    
    return fixes

# =============================================================================
# 3. RUN THE ANALYSIS
# =============================================================================

print("🚀 Starting Route Version Analysis...")
print()

# Find the most problematic routes
problematic_routes = find_problematic_routes(5)

print("\n" + "="*60)
print("DETAILED ANALYSIS OF TOP PROBLEM ROUTES")
print("="*60)

# Analyze the top 3 problematic routes in detail
for i, problem in enumerate(problematic_routes[:3]):
    route_id = problem['route_id']
    print(f"\n📋 DETAILED ANALYSIS #{i+1}")
    analyze_route_versions(route_id, show_details=True)
    print()
    suggest_fixes_for_route(route_id)
    print("\n" + "-"*60)

# =============================================================================
# 4. SPECIFIC ROUTE ANALYSIS (CUSTOMIZE THIS)
# =============================================================================

print("\n🎯 ANALYZE SPECIFIC ROUTE")
print("="*40)

# Change this to analyze any specific route
ROUTE_TO_ANALYZE = '3010'  # Change this to your problematic route

print(f"Analyzing route {ROUTE_TO_ANALYZE}:")
analyze_route_versions(ROUTE_TO_ANALYZE, show_details=True)
suggest_fixes_for_route(ROUTE_TO_ANALYZE)

# =============================================================================
# 5. QUICK STATS
# =============================================================================

print("\n📊 OVERALL STATISTICS")
print("="*30)

total_routes = route_versions_df['route_id'].nunique()
total_versions = len(route_versions_df)
nan_names = route_versions_df['route_long_name'].isna().sum()

print(f"Total routes: {total_routes}")
print(f"Total versions: {total_versions}")
print(f"Versions with NaN names: {nan_names} ({nan_names/total_versions*100:.1f}%)")
print(f"Routes with problems: {len(problematic_routes)} ({len(problematic_routes)/total_routes*100:.1f}%)")

# Date range analysis
print(f"Date range: {route_versions_df['valid_from'].min()} to {route_versions_df['valid_to'].max()}")

print("\n💡 NEXT STEPS:")
print("1. Check the source GTFS data for routes with many NaN names")
print("2. Investigate the date processing logic for overlapping ranges")
print("3. Consider reprocessing problematic dates with debugging enabled")
print("4. Check if multiple GTFS sources are being mixed")

📁 Loading data from: c:\Users\User\Documents\Projects\budapest_time_travel\data\processed
✅ Loaded 871 route versions
🚀 Starting Route Version Analysis...

🔍 FINDING PROBLEMATIC ROUTES
Found 349 routes with issues:

1. Route 0920:
   • 13 NaN route names
   • 3 overlaps in direction 0
   • 2 overlaps in direction 1
   • 13 versions (many)

2. Route 2182:
   • 6 NaN route names
   • 1 overlaps in direction 0
   • 1 overlaps in direction 1

3. Route 0362:
   • 4 NaN route names
   • 1 overlaps in direction 0

4. Route 1620:
   • 6 NaN route names
   • 1 overlaps in direction 1

5. Route 1660:
   • 7 NaN route names
   • 1 overlaps in direction 0


DETAILED ANALYSIS OF TOP PROBLEM ROUTES

📋 DETAILED ANALYSIS #1
🔍 ANALYZING ROUTE 0920
📊 Total versions: 13
📊 Directions: [np.int64(0), np.int64(1)]
📊 Date range: 2013-10-12 00:00:00 to 2013-12-29 00:00:00

🚇 DIRECTION 0
----------------------------------------
Versions: 8
  1. 2013-10-12 to 2013-11-01: nan (Shape: O287)
  2. 2013-11-02 to 2013

In [3]:
# Route Visualization - Interactive Timeline
# This notebook visualizes how transit routes change over time

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import ipywidgets as widgets
from IPython.display import display, clear_output
from datetime import datetime, timedelta
import os

# =============================================================================
# SETUP DATA PATHS (Choose one of these methods)
# =============================================================================

# Method 1: Auto-detect project root
def find_project_root():
    """Find project root by looking for the data folder."""
    current_dir = os.getcwd()
    while current_dir != os.path.dirname(current_dir):  # Until filesystem root
        data_path = os.path.join(current_dir, 'data')
        if os.path.exists(data_path):
            return current_dir
        current_dir = os.path.dirname(current_dir)
    return os.getcwd()

# Method 2: Manual path setup (uncomment and adjust if Method 1 doesn't work)
# project_root = r"C:\Users\User\Documents\Projects\budapest_time_travel"  # Your actual path
# data_folder = os.path.join(project_root, 'data', 'processed')

# Use auto-detection
project_root = find_project_root()
data_folder = os.path.join(project_root, 'data', 'processed')

print(f"📁 Project root: {project_root}")
print(f"📊 Data folder: {data_folder}")
print(f"✅ Data folder exists: {os.path.exists(data_folder)}")

# =============================================================================
# 1. DATA LOADING
# =============================================================================

print("\n📊 Loading transit data...")

# Check if data files exist
required_files = ['routes.csv', 'route_versions.csv', 'shapes.csv']
missing_files = []

for file in required_files:
    file_path = os.path.join(data_folder, file)
    if not os.path.exists(file_path):
        missing_files.append(file)
    else:
        print(f"✅ Found {file}")

if missing_files:
    print(f"❌ Missing files: {missing_files}")
    print("Please make sure you have processed data in the correct location.")
    print(f"Expected location: {data_folder}")
else:
    print("✅ All required files found!")

# Load the datasets
try:
    routes_df = pd.read_csv(os.path.join(data_folder, 'routes.csv'))
    route_versions_df = pd.read_csv(os.path.join(data_folder, 'route_versions.csv'))
    shapes_df = pd.read_csv(os.path.join(data_folder, 'shapes.csv'))
    
    # Convert dates
    route_versions_df['valid_from'] = pd.to_datetime(route_versions_df['valid_from'])
    route_versions_df['valid_to'] = pd.to_datetime(route_versions_df['valid_to'])
    
    print(f"✅ Loaded {len(routes_df)} routes, {len(route_versions_df)} route versions, {len(shapes_df)} shape points")
    
except Exception as e:
    print(f"❌ Error loading data: {e}")
    print("Please check your data files and paths.")
    
# Quick data preview
print(f"\n📋 Data Preview:")
print(f"Routes sample:")
if 'routes_df' in locals():
    print(routes_df.head()[['route_id', 'route_short_name', 'route_type']])
    
print(f"\nRoute versions sample:")
if 'route_versions_df' in locals():
    print(route_versions_df.head()[['route_id', 'valid_from', 'valid_to', 'main_shape_id']])

# =============================================================================
# 2. ROUTE ANALYSIS FUNCTIONS
# =============================================================================

def get_route_info(route_id):
    """Get basic information about a route."""
    route_info = routes_df[routes_df['route_id'] == route_id]
    if route_info.empty:
        return None
    
    route_versions = route_versions_df[route_versions_df['route_id'] == route_id].copy()
    route_versions = route_versions.sort_values('valid_from')
    
    # Calculate actual end dates
    route_versions['actual_end'] = route_versions['valid_to'].fillna(pd.Timestamp.now())
    
    info = {
        'route_id': route_id,
        'route_short_name': route_info.iloc[0]['route_short_name'],
        'route_type': route_info.iloc[0]['route_type'],
        'total_versions': len(route_versions),
        'first_date': route_versions['valid_from'].min(),
        'last_date': route_versions['actual_end'].max(),
        'total_days': (route_versions['actual_end'].max() - route_versions['valid_from'].min()).days,
        'versions': route_versions
    }
    
    return info

def get_active_version_on_date(route_id, target_date):
    """Get the active route version for a specific date."""
    target_date = pd.to_datetime(target_date)
    
    route_versions = route_versions_df[route_versions_df['route_id'] == route_id].copy()
    
    # Find version active on target date
    active_versions = route_versions[
        (route_versions['valid_from'] <= target_date) & 
        ((route_versions['valid_to'].isna()) | (route_versions['valid_to'] >= target_date))
    ]
    
    if active_versions.empty:
        return None
    
    # Return the most recent version if multiple found
    return active_versions.sort_values('valid_from').iloc[-1]

def get_shape_coordinates(shape_id):
    """Get coordinates for a shape."""
    shape_points = shapes_df[shapes_df['shape_id'] == shape_id].copy()
    if shape_points.empty:
        return None
    
    shape_points = shape_points.sort_values('shape_pt_sequence')
    return shape_points[['shape_pt_lat', 'shape_pt_lon', 'shape_pt_sequence']]

# =============================================================================
# 3. ROUTE SELECTION WIDGET
# =============================================================================

print("\n🚇 Available routes:")

# Get unique routes with their names
available_routes = routes_df[['route_id', 'route_short_name', 'route_type']].copy()
available_routes = available_routes.sort_values('route_short_name')

# Create route options for dropdown
route_options = []
for _, row in available_routes.iterrows():
    route_type_name = {0: 'Tram', 1: 'Subway', 2: 'Rail', 3: 'Bus', 4: 'Ferry'}.get(row['route_type'], 'Other')
    label = f"{row['route_short_name']} ({route_type_name}) - ID: {row['route_id']}"
    route_options.append((label, row['route_id']))

print(f"Found {len(route_options)} routes")

# =============================================================================
# 4. INTERACTIVE VISUALIZATION CLASS
# =============================================================================

class RouteTimelineVisualizer:
    def __init__(self):
        self.current_route_id = None
        self.route_info = None
        self.fig = None
        self.current_zoom = 12  # Default zoom level
        self.current_center = None  # Will store current map center
        self.zoom_controls_displayed = False  # Track if zoom controls are already shown
        
    def setup_widgets(self, default_route_id=None):
        """Setup interactive widgets."""
        
        # Route selection dropdown
        if default_route_id and default_route_id in [opt[1] for opt in route_options]:
            default_route = default_route_id
        else:
            default_route = route_options[0][1] if route_options else None
            
        self.route_selector = widgets.Dropdown(
            options=route_options,
            value=default_route,
            description='Route:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='400px')
        )
        
        # Date slider (will be updated when route is selected)
        self.date_slider = widgets.SelectionSlider(
            options=[datetime.now().strftime('%Y-%m-%d')],
            value=datetime.now().strftime('%Y-%m-%d'),
            description='Date:',
            disabled=True,
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='600px')
        )
        
        # Info display
        self.info_output = widgets.Output()
        
        # Map display
        self.map_output = widgets.Output()
        
        # Wire up the interactions AFTER setting up the initial state
        # but don't initialize automatically
        self.route_selector.observe(self.on_route_change, names='value')
        self.date_slider.observe(self.on_date_change, names='value')
    
    def on_route_change(self, change):
        """Handle route selection change."""
        route_id = change['new']
        self.current_route_id = route_id
        self.route_info = get_route_info(route_id)
        
        # Reset zoom and center for new route
        self.current_zoom = 12
        self.current_center = None
        self.zoom_controls_displayed = False  # Reset zoom controls for new route
        
        if self.route_info is None:
            with self.info_output:
                clear_output(wait=True)
                print(f"❌ Route {route_id} not found")
            return
        
        # Update info display
        with self.info_output:
            clear_output(wait=True)
            self.display_route_info()
        
        # Update date slider (this will trigger map update via date change)
        self.update_date_slider()
    
    def _update_date_slider_silent(self):
        """Update date slider without triggering observer."""
        if self.route_info is None:
            return
        
        # Temporarily remove observer
        self.date_slider.unobserve(self.on_date_change, names='value')
        
        # Generate date options
        start_date = self.route_info['first_date']
        end_date = self.route_info['last_date']
        total_days = (end_date - start_date).days
        
        if total_days > 365:
            date_range = pd.date_range(start_date, end_date, freq='W')
        elif total_days > 90:
            date_range = pd.date_range(start_date, end_date, freq='3D')
        else:
            date_range = pd.date_range(start_date, end_date, freq='D')
        
        date_options = [d.strftime('%Y-%m-%d') for d in date_range]
        
        if date_options:
            self.date_slider.options = date_options
            self.date_slider.value = date_options[0]
            self.date_slider.disabled = False
        else:
            self.date_slider.disabled = True
        
        # Re-add observer
        self.date_slider.observe(self.on_date_change, names='value')
    
    def update_date_slider(self):
        """Update date slider based on route lifetime."""
        if self.route_info is None:
            return
        
        # Generate date options (every week for long periods, daily for short)
        start_date = self.route_info['first_date']
        end_date = self.route_info['last_date']
        total_days = (end_date - start_date).days
        
        if total_days > 365:
            # Weekly intervals for long periods
            date_range = pd.date_range(start_date, end_date, freq='W')
        elif total_days > 90:
            # Every 3 days for medium periods
            date_range = pd.date_range(start_date, end_date, freq='3D')
        else:
            # Daily for short periods
            date_range = pd.date_range(start_date, end_date, freq='D')
        
        date_options = [d.strftime('%Y-%m-%d') for d in date_range]
        
        if date_options:
            self.date_slider.options = date_options
            self.date_slider.value = date_options[0]
            self.date_slider.disabled = False
        else:
            self.date_slider.disabled = True
    
    def on_date_change(self, change):
        """Handle date slider change."""
        self.update_map()
    
    def display_route_info(self):
        """Display route information."""
        info = self.route_info
        print(f"🚇 Route {info['route_short_name']} (ID: {info['route_id']})")
        print(f"📅 Lifetime: {info['first_date'].strftime('%Y-%m-%d')} to {info['last_date'].strftime('%Y-%m-%d')}")
        print(f"⏰ Duration: {info['total_days']} days")
        print(f"🔄 Total versions: {info['total_versions']}")
        
        print(f"\n📋 Version History:")
        for _, version in info['versions'].iterrows():
            start = version['valid_from'].strftime('%Y-%m-%d')
            end = version['valid_to'].strftime('%Y-%m-%d') if pd.notna(version['valid_to']) else 'Present'
            # CHANGE: Use route_short_name instead of route_long_name
            print(f"  • {start} to {end}: {info['route_short_name']} (Shape: {version['main_shape_id']})")
    
    def update_map(self):
        """Update the map display."""
        if self.route_info is None:
            return
        
        target_date = self.date_slider.value
        active_version = get_active_version_on_date(self.current_route_id, target_date)
        
        with self.map_output:
            clear_output(wait=True)
            
            if active_version is None:
                print(f"❌ No active version found for {target_date}")
                return
            
            # Get shape coordinates
            shape_coords = get_shape_coordinates(active_version['main_shape_id'])
            
            if shape_coords is None or shape_coords.empty:
                print(f"❌ No shape data found for shape {active_version['main_shape_id']}")
                return
            
            # Create the map
            self.create_route_map(shape_coords, active_version, target_date)
    
    def create_route_map(self, shape_coords, version_info, date):
        """Create interactive map showing the route using modern Plotly."""
        
        # Prepare data for line plot
        line_data = shape_coords.copy()
        line_data['route_name'] = self.route_info['route_short_name']
        
        # Calculate center point if not already set
        center_lat = shape_coords['shape_pt_lat'].mean()
        center_lon = shape_coords['shape_pt_lon'].mean()
        
        # Use stored center and zoom, or calculate defaults
        if self.current_center is None:
            self.current_center = {'lat': center_lat, 'lon': center_lon}
        
        # CHANGE: Create the modern map using the latest line_map function instead of line_mapbox
        fig = px.line_map(
            line_data,
            lat='shape_pt_lat',
            lon='shape_pt_lon',
            # CHANGE: Simplified title without route_long_name
            title=f"Route {self.route_info['route_short_name']} on {date}<br>" +
                  f"Shape ID: {version_info['main_shape_id']}",
            height=500,
            zoom=self.current_zoom  # Use stored zoom level
        )
        
        # Add start/end markers
        start_point = shape_coords.iloc[0]
        end_point = shape_coords.iloc[-1]
        
        # CHANGE: Use Scattermap instead of Scatter for the new map API
        fig.add_trace(go.Scattermap(
            mode="markers",
            lon=[start_point['shape_pt_lon'], end_point['shape_pt_lon']],
            lat=[start_point['shape_pt_lat'], end_point['shape_pt_lat']],
            marker=dict(
                size=12,
                color=['green', 'red'],
                symbol=['circle', 'square']
            ),
            text=['Start', 'End'],
            name="Route Endpoints",
            # CHANGE: Update hover template for lat/lon instead of x/y
            hovertemplate="<b>%{text}</b><br>" +
                         "Lat: %{lat:.6f}<br>" +
                         "Lon: %{lon:.6f}<br>" +
                         "<extra></extra>"
        ))
        
        # Style the route line
        fig.update_traces(
            line=dict(width=4, color='red'),
            selector=dict(mode='lines')
        )
        
        # Apply stored center and zoom
        fig.update_layout(
            map=dict(
                center=self.current_center,  # Use stored center
                zoom=self.current_zoom,      # Use stored zoom
                style="open-street-map"
            ),
            showlegend=True
        )
        
        # Store reference to current figure for zoom tracking
        self.fig = fig
        
        # Add JavaScript callback to capture zoom/pan changes
        # Note: This is a limitation of Jupyter - we can't easily capture map interactions
        # But we can provide instructions to users
        
        fig.show()
        
        # Only display zoom instructions and controls once per route
        if not self.zoom_controls_displayed:
            # Display zoom instructions
            print(f"💡 Current zoom level: {self.current_zoom}")
            print(f"📍 To maintain zoom: After zooming/panning, use the zoom controls below")
            
            # Add zoom control buttons
            self.display_zoom_controls()
            self.zoom_controls_displayed = True
    
    def display_zoom_controls(self):
        """Display manual zoom control buttons."""
        
        zoom_in_btn = widgets.Button(
            description="Zoom In (+)", 
            button_style='info',
            layout=widgets.Layout(width='100px')
        )
        zoom_out_btn = widgets.Button(
            description="Zoom Out (-)", 
            button_style='info',
            layout=widgets.Layout(width='100px')
        )
        reset_btn = widgets.Button(
            description="Reset View", 
            button_style='warning',
            layout=widgets.Layout(width='100px')
        )
        
        zoom_slider = widgets.IntSlider(
            value=self.current_zoom,
            min=8,
            max=18,
            step=1,
            description='Zoom:',
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='300px')
        )
        
        # Wire up the zoom controls
        def on_zoom_in(b):
            self.current_zoom = min(18, self.current_zoom + 1)
            zoom_slider.value = self.current_zoom
            self.update_map()
        
        def on_zoom_out(b):
            self.current_zoom = max(8, self.current_zoom - 1)
            zoom_slider.value = self.current_zoom
            self.update_map()
        
        def on_reset(b):
            self.current_zoom = 12
            self.current_center = None  # Will recalculate
            zoom_slider.value = self.current_zoom
            self.update_map()
        
        def on_zoom_change(change):
            self.current_zoom = change['new']
            self.update_map()
        
        zoom_in_btn.on_click(on_zoom_in)
        zoom_out_btn.on_click(on_zoom_out)
        reset_btn.on_click(on_reset)
        zoom_slider.observe(on_zoom_change, names='value')
        
        # Display the controls
        zoom_controls = widgets.HBox([
            zoom_out_btn, zoom_in_btn, reset_btn, zoom_slider
        ])
        
        # Display controls in the map output area
        with self.map_output:
            display(widgets.VBox([
                widgets.HTML("<b>🔍 Zoom Controls:</b>"),
                zoom_controls
            ]))
    
    def display(self):
        """Display the complete widget interface."""
        print("🗺️ Interactive Route Timeline Visualizer")
        print("=" * 50)
        
        # Layout the widgets
        route_selection = widgets.HBox([
            widgets.VBox([
                widgets.HTML("<b>1. Select Route:</b>"),
                self.route_selector
            ]),
        ])
        
        date_selection = widgets.HBox([
            widgets.VBox([
                widgets.HTML("<b>2. Select Date:</b>"),
                self.date_slider
            ]),
        ])
        
        map_section = widgets.VBox([
            widgets.HTML("<b>3. Route Visualization:</b>"),
            self.map_output
        ])
        
        info_section = widgets.VBox([
            widgets.HTML("<b>4. Route Information:</b>"),
            self.info_output
        ])
        
        display(widgets.VBox([
            route_selection,
            date_selection, 
            map_section,
            info_section
        ]))

# =============================================================================
# 5. CREATE AND DISPLAY THE VISUALIZER
# =============================================================================

print("\n🎯 Creating interactive visualizer...")

# Create the visualizer
visualizer = RouteTimelineVisualizer()

# Setup with route 6 (tram 6) as default if it exists
default_route = '6' if '6' in routes_df['route_id'].values else None
visualizer.setup_widgets(default_route)

print("✅ Visualizer ready!")
print("\nInstructions:")
print("1. Select a route from the dropdown")
print("2. Use the date slider to see how the route changed over time")
print("3. The map will update to show the main shape for each date")

# Display the interactive interface
visualizer.display()

📁 Project root: c:\Users\User\Documents\Projects\budapest_time_travel
📊 Data folder: c:\Users\User\Documents\Projects\budapest_time_travel\data\processed
✅ Data folder exists: True

📊 Loading transit data...
✅ Found routes.csv
✅ Found route_versions.csv
✅ Found shapes.csv
✅ All required files found!
✅ Loaded 810 routes, 1556 route versions, 546076 shape points

📋 Data Preview:
Routes sample:
  route_id route_short_name  route_type
0     MP52               M2           3
1     MP52               M2           3
2    MP525              M2E           3
3    MP525              M2E           3
4     VP01               1V           3

Route versions sample:
  route_id valid_from   valid_to main_shape_id
0     0050 2013-10-11        NaT          R952
1     0050 2013-10-11        NaT          U961
2     0070 2013-10-11 2014-04-06          S704
3     0070 2014-04-07        NaT          Y207
4     0070 2013-10-11 2014-04-06          S705

🚇 Available routes:
Found 810 routes

🎯 Creating interacti


Columns (5) have mixed types. Specify dtype option on import or set low_memory=False.



VBox(children=(HBox(children=(VBox(children=(HTML(value='<b>1. Select Route:</b>'), Dropdown(description='Rout…