# Galway Public Transport Accessibility Score Analysis

### Import Libraries

In [2]:
# ! pip install langchain openai pandas tabulate
# # ! pip install langchain-community
# ! pip install langchain-ollama
# !pip install thefuzz 
# !pip install python-Levenshtein
# !pip install osmnx

# ! pip install geopandas 
# !pip install selenium

# !pip uninstall numpy -y



In [1]:
data_dir = '/Users/njindal/Documents/aic2025/data'
code_dir = '/Users/njindal/Documents/aic2025/code'
artifact_dir = '/Users/njindal/Documents/aic2025/artifacts'

# Libraries
import pandas as pd
pd.set_option('display.max_colwidth', None) # display full column width
import os
import requests
from IPython.display import display
import pandas as pd
import numpy as np
import zipfile
import shutil # For removing the directory if needed for a clean re-extract
from selenium import webdriver # Used for scraping bus timetables from buseireann.ie
import time
# !pip install thefuzz 
# !pip install python-Levenshtein
import re # For regular expressions
from thefuzz import process, fuzz
import networkx as nx

from math import radians, sin, cos, sqrt, atan2
import osmnx as ox
import matplotlib
import matplotlib.patheffects as patheffects
import matplotlib.pyplot as plt
import geopandas as gpd
import logging
import warnings
import matplotlib.patheffects as path_effects
import pickle
from langchain_ollama import ChatOllama




# References for Data 
# https://libguides.ucd.ie/gisguide/findspatialdata 
# https://download.geofabrik.de/europe/ireland-and-northern-ireland.html
# https://galway-bus.apis.ie/gstoptimes/#g-stop-time-schema
# https://tilburgsciencehub.com/topics/visualization/data-visualization/graphs-charts/grammar-of-graphics-ggplot2/
# https://python.langchain.com/docs/integrations/chat/ollama/
# https://react-lm.github.io/ 

**In this notebook, we perform:**

- Data Collection, Validation, Cleaning & Transformation

- Exploratory Data Analysis (EDA) & POI Definition: Perform EDA and define Points of Interest (POIs)

- Network Graph Construction & Public Transport Mapping: Combine graph construction with the mapping of POIs and bus stops

- Accessibility Metric Definition, Score Calculation & Analysis 

- Visualization of POIs on Galway Map


### 1. Galway Bus API - Bus Stops

Reference: https://galway-bus.apis.ie

Attributes: stop_id, stop_name, stop_lat, stop_lon, direction, and route_id

In [None]:
# Define the path for the CSV file
data_dir = '/Users/njindal/Documents/aic2025/data' 
output_filename = 'gstops_df_v1.csv'
output_path = os.path.join(data_dir, output_filename)

# Try to load the DataFrame from CSV 
if os.path.exists(output_path):
    print(f"Loading existing gstops_df_v1 from: {output_path}")
    # Read the CSV and set the first column as the index
    gstops_df_v1 = pd.read_csv(output_path, index_col=0)
    print(f"Loaded DataFrame with shape: {gstops_df_v1.shape}")
else:
    print(f"File not found at {output_path}. Fetching data from API...")
    route_ids = [401, 402, 404, 405, 407, 409, 410, 411, 412, 414]
    all_stops = []

    for route_id in route_ids:
        url = f"https://galway-bus.apis.ie/api/groute/{route_id}"
        try:
            response = requests.get(url)
            if response.status_code == 200:
                data = response.json()
                results = data.get('results', [])
                if isinstance(results, dict): # results may be a dict or a list
                    results = [results]
                if not results:
                    print(f"Route id {route_id} information not available from API.")
                    continue
                for direction_info in results:
                    direction_id = direction_info.get('direction_id')
                    for stop in direction_info.get('g_stops', []):
                        stop_row = {
                            'stop_id': stop['stop_id'],
                            'stop_name': stop['stop_name'],
                            'stop_lat': stop['stop_lat'],
                            'stop_lon': stop['stop_lon'],
                            'direction': direction_id,
                            'route_id': route_id
                        }
                        all_stops.append(stop_row)
            else:
                print(f"Route id {route_id} information not available from API (HTTP {response.status_code}).")
        except Exception as e:
            print(f"Error fetching data for route id {route_id} from API: {e}")

    if all_stops: # to check if any stops were collected
        gstops_df_v1_from_api = pd.DataFrame(all_stops) # create a temporary DataFrame

        # --- Create custom 'BS' indices ---
        print("\nCreating custom 'BS' indices for new API data...")
        if all(col in gstops_df_v1_from_api.columns for col in ['route_id', 'direction', 'stop_id']):
            gstops_df_v1_sorted = gstops_df_v1_from_api.sort_values(by=['route_id', 'direction', 'stop_id']).reset_index(drop=True)
        else:
            print("Warning: Columns for sorting ('route_id', 'direction', 'stop_id') not all present. Indexing based on current order.")
            gstops_df_v1_sorted = gstops_df_v1_from_api.reset_index(drop=True)

        bus_stop_indices = [f'BS{i+1}' for i in range(len(gstops_df_v1_sorted))]
        gstops_df_v1_sorted.index = bus_stop_indices
        gstops_df_v1 = gstops_df_v1_sorted 
        # --- End of custom 'BS' indices creation ---

        # save the DataFrame with the index
        gstops_df_v1.to_csv(output_path, index=True)
        print(f"gstops_df_v1 fetched from API, custom 'BS' indices created, and saved to {output_path}.")
        print(f"Shape of new gstops_df_v1: {gstops_df_v1.shape}")

    else:
        print("No stop data collected from API. gstops_df_v1 is empty.")
        # Create an empty DataFrame with expected columns if no data was fetched
        gstops_df_v1 = pd.DataFrame(columns=['stop_id', 'stop_name', 'stop_lat', 'stop_lon', 'direction', 'route_id'])
        gstops_df_v1.index.name = 'bs_index' 


print("\n--- gstops_df_v1 Final State ---")
if not gstops_df_v1.empty:
    print(gstops_df_v1.head())
else:
    print("gstops_df_v1 is empty.")
print(f"Shape of final gstops_df_v1: {gstops_df_v1.shape}")
print(f"Index of final gstops_df_v1: {gstops_df_v1.index}")

### 2. Bus Eireann routes-and-timetables

Reference: https://www.buseireann.ie/routes-and-timetables/401

In [None]:
route_ids = [401, 402, 404, 405, 407, 409, 410, 411, 412, 414]
all_timetables_df = pd.DataFrame()
output_csv_filename = 'all_bus_eirean_timetables.csv'
output_csv_path = os.path.join(data_dir, output_csv_filename) 

# 1. Determining loading/scraping path
if os.path.exists(output_csv_path):
    print(f"Loading existing timetable data from: {output_csv_path}")
    all_timetables_df = pd.read_csv(output_csv_path)
    print(f"Loaded DataFrame with shape: {all_timetables_df.shape}")
else:
    print(f"File not found at {output_csv_path}. Proceeding with web scraping...")
    driver = None # Initialize driver
    try:
        print("Attempting to set up Chrome WebDriver...")
        driver = webdriver.Chrome() # setup if chromedriver is in PATH
        print("WebDriver setup successful.")

        for route_id in route_ids:
            url = f"https://www.buseireann.ie/routes-and-timetables/{route_id}"
            print(f"Processing route: {route_id} from URL: {url}")
            driver.get(url)
            print(f"Opened URL: {url}")

            time.sleep(5) 

            print(f"Attempting to get current table for route {route_id}...")
            page_html = driver.page_source

            try:
                list_of_dataframes = pd.read_html(page_html)
                if list_of_dataframes:
                    print(f"Found {len(list_of_dataframes)} table(s) for route {route_id}.")
                    timetable_df = list_of_dataframes[0]  
                    timetable_df['route_id'] = route_id
                    print(f"Extracted DataFrame for route {route_id}:")
                    all_timetables_df = pd.concat([all_timetables_df, timetable_df], ignore_index=True)
                else:
                    print(f"No tables found for route {route_id} on page: {url}")
            except ValueError as ve:
                print(f"No tables found by pandas for route {route_id} (pd.read_html error: {ve})")
            except Exception as table_ex:
                print(f"Could not parse tables for route {route_id}: {table_ex}")
        
        # Save the scraped data
        if not all_timetables_df.empty:
            all_timetables_df.columns = [col.replace('.', '_') for col in all_timetables_df.columns]
            all_timetables_df.to_csv(output_csv_path, index=False)
            print(f"Combined timetable scraped and saved to {output_csv_path}")
        else:
            print("No timetable data was extracted during scraping.")

    except Exception as e:
        print(f"An error occurred during Selenium operations: {e}")
    finally:
        if driver:
            print("Closing WebDriver.")
            driver.quit()


print("\n--- Processed Timetable Data ---")
if not all_timetables_df.empty:
    print(f"Total rows in final DataFrame: {len(all_timetables_df)}")
    print("\nUnique route_ids in DataFrame:", all_timetables_df['route_id'].unique())
else:
    print("No timetable data available.")


if not all_timetables_df.empty:
    required_columns = ['route_id', 'ROUTE'] 
    
    # Check if these columns exist
    available_columns = [col for col in required_columns if col in all_timetables_df.columns]
    
    if len(available_columns) == len(required_columns):
        bus_timetables = all_timetables_df[available_columns].copy() # use .copy() to avoid SettingWithCopyWarning error
        print("\n--- bus_timetables DataFrame ---")
        print(f"Shape of bus_timetables: {bus_timetables.shape}")
    else:
        print(f"\nError: Not all required columns ({required_columns}) found in all_timetables_df.")
        print(f"Available columns: {list(all_timetables_df.columns)}")
        bus_timetables = pd.DataFrame() # Create an empty DataFrame
else:
    print("\nall_timetables_df is empty. Cannot create bus_timetables.")
    bus_timetables = pd.DataFrame() # Create an empty DataFrame

if 'bus_timetables' in locals() and not bus_timetables.empty:
    print("\nAdding 'stop_order_on_route' column (assuming pre-sorted data within each route_id)...")
    
    if not bus_timetables.groupby('route_id').ngroup().is_monotonic_increasing:
         print("Sorting by 'route_id' to ensure contiguous groups for cumcount...")
         bus_timetables = bus_timetables.sort_values(by='route_id', kind='mergesort').reset_index(drop=True)
    
    bus_timetables['stop_order_on_route'] = bus_timetables.groupby('route_id').cumcount()
    
    print("'stop_order_on_route' column added.")
    print("\n--- bus_timetables DataFrame with stop_order_on_route ---")
    print(f"Shape of bus_timetables: {bus_timetables.shape}")
    

    unique_routes_to_sample = bus_timetables['route_id'].unique()
    if len(unique_routes_to_sample) > 0:
        sample_route_id = unique_routes_to_sample[0]
        print(f"\nSample for route_id '{sample_route_id}':")
        print(bus_timetables[bus_timetables['route_id'] == sample_route_id][['ROUTE', 'route_id', 'stop_order_on_route']].head(10))
        if len(bus_timetables[bus_timetables['route_id'] == sample_route_id]) > 10:
            print("...")
            print(bus_timetables[bus_timetables['route_id'] == sample_route_id][['ROUTE', 'route_id', 'stop_order_on_route']].tail(5))

    if len(unique_routes_to_sample) > 1:
        sample_route_id_2 = unique_routes_to_sample[1]
        print(f"\nSample for route_id '{sample_route_id_2}':")
        print(bus_timetables[bus_timetables['route_id'] == sample_route_id_2][['ROUTE', 'route_id', 'stop_order_on_route']].head(10))

else:
    print("\nbus_timetables DataFrame is not defined or is empty. Cannot add 'stop_order_on_route'.")

display(bus_timetables[bus_timetables['route_id'] == 401])

### 3. Galway Bus API - Bus Routes

**Attributes:**

- route_long_name: Full name of the route

- g_trip_headsign: Destination displayed on the bus

- route_id: Unique identifier for the route

- route_short_name: Short route number (e.g. 401, 402)

- direction_id: Direction of travel (0 or 1)

- first_stop_id: ID of the first stop

- last_stop_id: ID of the last stop

- first_stop_name: Name of the first stop

- last_stop_name: Name of the last stop

- num_stops: Total number of stops on the route

In [None]:
# Define the path for the CSV file for gvariations_df_v1
gvariations_output_filename = 'gvariations_df_v1.csv'
gvariations_output_path = os.path.join(data_dir, gvariations_output_filename) # Ensure data_dir is defined

# Try to load the DataFrame from CSV first
if os.path.exists(gvariations_output_path):
    print(f"Loading existing gvariations_df_v1 from: {gvariations_output_path}")
    # Read the CSV and set the first column as the index
    gvariations_df_v1 = pd.read_csv(gvariations_output_path, index_col=0)
    print(f"Loaded gvariations_df_v1 DataFrame with shape: {gvariations_df_v1.shape}")
else:
    print(f"File not found at {gvariations_output_path}. Fetching gvariations data from API...")
    route_ids = [401, 402, 404, 405, 407, 409, 410, 411, 412, 414]
    all_variations = []

    for route_id in route_ids:
        url = f"https://galway-bus.apis.ie/api/groute/{route_id}"
        try:
            response = requests.get(url)
            if response.status_code == 200:
                data = response.json()
                results = data.get('results', [])
                if isinstance(results, dict): # results may be a dict or a list
                    results = [results]
                if not results:
                    print(f"Route id {route_id} (variations) - information not available from API.")
                    continue
                for direction_info in results:
                    route_long_name = direction_info.get('route_long_name')
                    g_trip_headsign = direction_info.get('g_trip_headsign')
                    route_short_name = direction_info.get('route_short_name')
                    direction_id = direction_info.get('direction_id')
                    for variation in direction_info.get('g_route_variations', []):
                        row = {
                            'route_long_name': route_long_name,
                            'g_trip_headsign': g_trip_headsign,
                            'route_id': route_id, # Queried route_id
                            'route_short_name': route_short_name,
                            'direction_id': direction_id,
                            'variation_route_id': variation.get('route_id'), # ID from g_route_variations
                            'first_stop_id': variation.get('first_stop_id'),
                            'last_stop_id': variation.get('last_stop_id'),
                            'first_stop_name': variation.get('first_stop_name'),
                            'last_stop_name': variation.get('last_stop_name'),
                            'num_stops': variation.get('num_stops')
                        }
                        all_variations.append(row)
            else:
                print(f"Route id {route_id} (variations) - information not available from API (HTTP {response.status_code}).")
        except Exception as e:
            print(f"Error fetching variations data for route id {route_id} from API: {e}")

    if all_variations: # Check if any variations were collected
        gvariations_df_from_api = pd.DataFrame(all_variations)

        # --- Create custom 'BR' indices ---
        print("\nCreating custom 'BR' indices for new API gvariations_data...")
        # Define sorting columns 
        sort_columns = ['route_id', 'direction_id', 'variation_route_id', 'first_stop_id']
        if all(col in gvariations_df_from_api.columns for col in sort_columns):
            gvariations_df_sorted = gvariations_df_from_api.sort_values(by=sort_columns).reset_index(drop=True)
        else:
            print(f"Warning: Not all columns for sorting ({sort_columns}) present in gvariations. Indexing based on current order.")
            gvariations_df_sorted = gvariations_df_from_api.reset_index(drop=True)

        bus_route_variation_indices = [f'BR{i+1}' for i in range(len(gvariations_df_sorted))]
        gvariations_df_sorted.index = bus_route_variation_indices
        gvariations_df_v1 = gvariations_df_sorted 
        # --- End of custom 'BR' indices creation ---


        # Save the DataFrame WITH THE INDEX
        gvariations_df_v1.to_csv(gvariations_output_path, index=True)
        print(f"gvariations_df_v1 fetched from API, custom 'BR' indices created, and saved to {gvariations_output_path}.")
        print(f"Shape of new gvariations_df_v1: {gvariations_df_v1.shape}")
    else:
        print("No route variation data collected from API. gvariations_df_v1 is empty.")
        gvariations_df_v1 = pd.DataFrame(columns=[
            'route_long_name', 'g_trip_headsign', 'route_id', 'route_short_name',
            'direction_id', 'variation_route_id', 'first_stop_id', 'last_stop_id',
            'first_stop_name', 'last_stop_name', 'num_stops'
        ])
        gvariations_df_v1.index.name = 'br_index' 


print("\n--- gvariations_df_v1 Final State ---")
if not gvariations_df_v1.empty:
    display(gvariations_df_v1.head())
else:
    print("gvariations_df_v1 is empty.")
print(f"Shape of final gvariations_df_v1: {gvariations_df_v1.shape}")
print(f"Index of final gvariations_df_v1: {gvariations_df_v1.index}")

### 4. Download the Ireland and Northern Ireland Shape Files

https://libguides.ucd.ie/gisguide/findspatialdata  

https://download.geofabrik.de/europe/ireland-and-northern-ireland.html 

In [None]:
# --- Configuration for Geofabrik Download ---
geofabrik_url = "https://download.geofabrik.de/europe/ireland-and-northern-ireland-latest-free.shp.zip"
download_target_dir = data_dir # Or your desired common data directory
zip_filename = os.path.join(download_target_dir, "ireland-and-northern-ireland-latest-free.shp.zip")
extracted_shapefile_dir = os.path.join(download_target_dir, "ireland-and-northern-ireland-latest-free.shp")


# --- Download and Unzip Logic ---
def download_and_extract_osm_data(url, zip_path, extract_to_path):
    """Downloads and extracts OSM shapefile data if not already present."""
    try:
        # Check if the final extracted directory already exists and has files (e.g., roads.shp)
        expected_roads_shp = os.path.join(extract_to_path, 'gis_osm_roads_free_1.shp')
        if os.path.exists(expected_roads_shp):
            print(f"Shapefile data already found at: {extract_to_path}")
            return True

        # If not fully extracted, or zip file is missing, proceed to download
        if not os.path.exists(zip_path):
            print(f"Downloading OSM data from {url} to {zip_path}...")
            response = requests.get(url, stream=True)
            response.raise_for_status() # Will raise an HTTP error if the HTTP request returned an unsuccessful status code
            with open(zip_path, 'wb') as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)
            print("Download complete.")
        else:
            print(f"Zip file already exists at {zip_path}. Proceeding to extraction.")


        print(f"Extracting {zip_path} to {extract_to_path}...")
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(extract_to_path)
        print("Extraction complete.")
        
        return True

    except requests.exceptions.RequestException as e:
        print(f"Error downloading file: {e}")
        return False
    except zipfile.BadZipFile:
        print(f"Error: Downloaded file at {zip_path} is not a valid zip file or is corrupted.")
        if os.path.exists(zip_path):
            os.remove(zip_path)
        return False
    except Exception as e:
        print(f"An unexpected error occurred during download/extraction: {e}")
        return False


if download_and_extract_osm_data(geofabrik_url, zip_filename, extracted_shapefile_dir):
    print("OSM data is ready.")
    shapefile_base_dir = extracted_shapefile_dir
    print(f"Shapefiles will be loaded from: {shapefile_base_dir}")
else:
    print("Failed to prepare OSM data. Please check the errors. Exiting or using fallback.")
    shapefile_base_dir = extracted_shapefile_dir 


### 5. Rahoon-Portershed Public Transport Accessibility Map for Galway - [1]

In [None]:
# Configure osmnx settings and logging
# ox.config(log_console=True, use_cache=False)
logging.basicConfig(level=logging.INFO)
warnings.filterwarnings("ignore")

# --- Configuration ---
place_name = "Galway, Ireland"
shapefile_base_dir = '/Users/njindal/Documents/aic2025/data/ireland-and-northern-ireland-latest-free.shp' 

shapefile_layers = {
    'roads': 'gis_osm_roads_free_1.shp',
    'water_poly': 'gis_osm_water_a_free_1.shp',
    'railways': 'gis_osm_railways_free_1.shp',
    'waterways': 'gis_osm_waterways_free_1.shp',
    'landuse': 'gis_osm_landuse_a_free_1.shp',
    'buildings': 'gis_osm_buildings_a_free_1.shp',
    'places_poly': 'gis_osm_places_a_free_1.shp'
}

print(f"\n--- Processing Data for: {place_name} ---")
print(f"Using Shapefile directory: {shapefile_base_dir}")

try:
    # --- *** GET GALWAY BOUNDARY *** ---
    print("\nFetching boundary for Galway...")
    boundary_gdf = ox.geocode_to_gdf(place_name).to_crs("EPSG:4326")
    if boundary_gdf.empty:
        raise ValueError(f"Could not geocode '{place_name}'.")
    print(f"Boundary fetched. CRS set to: {boundary_gdf.crs}")



    # --- *** LOAD IRELAND SHAPEFILES & CLIP TO GALWAY BOUNDARY *** ---
    print("\nLoading and clipping Ireland-wide layers to Galway boundary...")
    galway_gdfs = {}
    for layer_name, shp_filename in shapefile_layers.items():
        shp_path = os.path.join(shapefile_base_dir, shp_filename)
        print(f"--- Processing layer: {layer_name} ---")
        if not os.path.exists(shp_path):
            print(f"*** WARNING: Shapefile not found: {shp_path} - Skipping layer '{layer_name}' ***")
            continue
        try:
            ireland_layer_gdf = gpd.read_file(shp_path)
            if ireland_layer_gdf.crs != boundary_gdf.crs:
                ireland_layer_gdf = ireland_layer_gdf.to_crs(boundary_gdf.crs)
            clipped_gdf = gpd.clip(ireland_layer_gdf, boundary_gdf, keep_geom_type=True)
            if not clipped_gdf.empty:
                galway_gdfs[layer_name] = clipped_gdf
            else:
                print(f"Note: No features found for layer '{layer_name}'.")
        except Exception as e:
            print(f"*** ERROR processing layer '{layer_name}': {e} ***")



    # --- *** PREPARE BUS STOP GEODATAFRAME FROM GSTOPS_DF_V1 *** --- 
    print("\nPreparing Galway Bus Stop data from gstops_df_v1...")
    bus_stops_gdf = None
    if 'gstops_df_v1' in locals() and isinstance(gstops_df_v1, pd.DataFrame) and not gstops_df_v1.empty:
        # Ensure 'stop_lat' and 'stop_lon' columns exist
        if 'stop_lat' in gstops_df_v1.columns and 'stop_lon' in gstops_df_v1.columns:
            try:
                # Drop rows with invalid (NaN) coordinates before creating GeoDataFrame
                temp_stops_df = gstops_df_v1.dropna(subset=['stop_lat', 'stop_lon']).copy()
                
                if not temp_stops_df.empty:
                    bus_stops_gdf = gpd.GeoDataFrame(
                        temp_stops_df,
                        geometry=gpd.points_from_xy(temp_stops_df['stop_lon'], temp_stops_df['stop_lat']),
                        crs="EPSG:4326"  
                    )
                    print(f"Created GeoDataFrame 'bus_stops_gdf' with {len(bus_stops_gdf)} stops from gstops_df_v1.")
                    # Reproject if CRS doesn't match the boundary CRS
                    if bus_stops_gdf.crs != boundary_gdf.crs:
                        print(f"Reprojecting bus stops GDF to {boundary_gdf.crs}...");
                        bus_stops_gdf = bus_stops_gdf.to_crs(boundary_gdf.crs)
                        print("Reprojection complete.")
                else:
                    print("Warning: No valid coordinates found in gstops_df_v1 after cleaning.")
            except Exception as e:
                print(f"*** ERROR converting gstops_df_v1 data: {e} ***")
                bus_stops_gdf = None
        else:
            print("Warning: 'stop_lat' or 'stop_lon' columns not found in gstops_df_v1.")
    else:
        print("Warning: 'gstops_df_v1' DataFrame not found or is empty. Please load it first.")


    # --- *** PREPARE BUS ROUTES GEODATAFRAME FROM gvariations_df_v1 *** --- 
    bus_routes_gdf = None # Initialize

    if 'gvariations_df_v1' in locals() and isinstance(gvariations_df_v1, pd.DataFrame) and not gvariations_df_v1.empty and \
    'bus_stops_gdf' in locals() and isinstance(bus_stops_gdf, gpd.GeoDataFrame) and not bus_stops_gdf.empty:

        print("\nEnriching gvariations_df_v1 with first/last stop Point geometries...")
        
        bus_routes_gdf = gvariations_df_v1.copy()
        
        if 'stop_id' in bus_stops_gdf.columns and 'geometry' in bus_stops_gdf.columns:
            
            # --- Handle duplicate stop_ids in bus_stops_gdf to get a unique map ---
            # A single physical stop_id has one location, regardless of how many route directions use it.
            # We keep the first occurrence of each stop_id to get its unique geometry.
            bus_stops_gdf_unique_locations = bus_stops_gdf.drop_duplicates(subset=['stop_id'], keep='first')
            
            # Create the mapping series from this de-duplicated DataFrame
            stop_id_to_point_geometry = bus_stops_gdf_unique_locations.set_index('stop_id')['geometry']
            
        else:
            print("Error: 'stop_id' or 'geometry' column not found in bus_stops_gdf. Cannot map stop Point geometries.")
            stop_id_to_point_geometry = pd.Series(dtype='object') 

        # Map first stop Point geometry
        bus_routes_gdf['first_stop_point'] = bus_routes_gdf['first_stop_id'].map(stop_id_to_point_geometry)
        
        # Map last stop Point geometry
        bus_routes_gdf['last_stop_point'] = bus_routes_gdf['last_stop_id'].map(stop_id_to_point_geometry)
        
        num_first_stops_mapped = bus_routes_gdf['first_stop_point'].notna().sum()
        num_last_stops_mapped = bus_routes_gdf['last_stop_point'].notna().sum()
        
        print(f"Successfully mapped Point geometry for {num_first_stops_mapped} first stops.")
        print(f"Successfully mapped Point geometry for {num_last_stops_mapped} last stops.")

        # Check if any mappings failed (resulting in NaNs)
        if bus_routes_gdf['first_stop_point'].isnull().any() or bus_routes_gdf['last_stop_point'].isnull().any():
            print("Warning: Some first/last stop points could not be mapped (resulting in NaNs).")
        

        print("\n--- bus_routes_gdf (with Point geometries) ---")
        # Display relevant columns to check the mapping
        display_cols = ['first_stop_id', 'first_stop_point', 'last_stop_id', 'last_stop_point']
        # Add other columns from gvariations_df_v1 if they provide context
        if 'route_id' in bus_routes_gdf.columns: display_cols.insert(0, 'route_id')
        if 'direction_id' in bus_routes_gdf.columns: display_cols.insert(1, 'direction_id')

        print(bus_routes_gdf[display_cols].head())
        print(f"Shape of bus_routes_gdf: {bus_routes_gdf.shape}")

    else:
        print("\nPrerequisite DataFrames ('gvariations_df_v1' or 'bus_stops_gdf') not available or empty. Cannot create bus_routes_gdf.")


      # --- *** CREATE PLACE SUMMARY DATAFRAME *** ---
    print("\nCreating DataFrame for Galway Place Names and Coordinates...")
    galway_places_summary_df = None # Initialize
    if 'places_poly' in galway_gdfs and not galway_gdfs['places_poly'].empty:
        places_data = []
        # Check if the 'name' column exists
        if 'name' not in galway_gdfs['places_poly'].columns:
            print("Warning: 'name' column not found in places_poly layer. Cannot extract place names.")
        else:
            # Iterate through valid polygons with names
            for idx, row in galway_gdfs['places_poly'][galway_gdfs['places_poly']['name'].notna() & galway_gdfs['places_poly'].geometry.is_valid].iterrows():
                place_name_val = row['name']; geometry = row.geometry; rep_point = None
                # Get representative point (or centroid as fallback)
                if hasattr(geometry, 'representative_point'):
                    try: rep_point = geometry.representative_point()
                    except Exception: rep_point = geometry.centroid # Fallback if representative_point fails
                else: rep_point = geometry.centroid # Fallback if method doesn't exist
                # Append if point is valid
                if rep_point and rep_point.is_valid:
                    places_data.append({'place_name': place_name_val,'latitude': rep_point.y,'longitude': rep_point.x})
            # Create DataFrame if data was extracted
            if places_data:
                galway_places_summary_df = pd.DataFrame(places_data)
                print(f"Created DataFrame 'galway_places_summary_df' with {len(galway_places_summary_df)} places.")
                print(galway_places_summary_df.head())
            else: print("No valid places with names found to create summary DataFrame.")
    else: print("Clipped 'places_poly' GeoDataFrame not found or is empty.")

    galway_places_summary_df1 = None # Initialize

    if 'galway_places_summary_df' in locals() and isinstance(galway_places_summary_df, pd.DataFrame) and not galway_places_summary_df.empty:
        galway_places_summary_df1 = galway_places_summary_df.copy()
        if 'place_name' in galway_places_summary_df1.columns:
            galway_places_summary_df1 = galway_places_summary_df1.sort_values('place_name').reset_index(drop=True)
        else:
            print("Warning: 'place_name' column not found for sorting. Index will be based on current order.")

        # Create custom indices starting with 'P'
        place_indices = [f'P{i+1}' for i in range(len(galway_places_summary_df1))]
        galway_places_summary_df1.index = place_indices

        print("\nCreated DataFrame 'galway_places_summary_df1' with custom 'P' indices:")
        print(f"Number of places: {len(galway_places_summary_df1)}")
        print("\nFirst few rows of 'galway_places_summary_df1':")
        print(galway_places_summary_df1.head())
    else:
        print("Cannot create 'galway_places_summary_df1' as 'galway_places_summary_df' is not available or is empty.")
    # --- *** END PLACES SECTION *** ---



# --- *** CHECK RAHOON PLACE ID FOR PLOTTING *** ---
    rahoon_place_id = None # To store the 'P' index if Rahoon is found
    if 'galway_places_summary_df1' in locals() and isinstance(galway_places_summary_df1, pd.DataFrame) and not galway_places_summary_df1.empty:
        if 'place_name' in galway_places_summary_df1.columns:
            # Search for 'Rahoon' in the 'place_name' column 
            rahoon_search_results = galway_places_summary_df1[galway_places_summary_df1['place_name'].str.contains('Rahoon', case=False, na=False)]

            if not rahoon_search_results.empty:
                print(f"\n--- Found 'Rahoon' in galway_places_summary_df1 ---")
                rahoon_place_data = rahoon_search_results.iloc[0]
                rahoon_place_id = rahoon_place_data.name 
                print(f"Place Name: {rahoon_place_data['place_name']}")
                print(f"Index (ID): {rahoon_place_id}")
                print(f"Latitude: {rahoon_place_data['latitude']}")
                print(f"Longitude: {rahoon_place_data['longitude']}")
            else:
                print("\nPlace name containing 'Rahoon' not found in galway_places_summary_df1.")
        else:
            print("\n'place_name' column not found in galway_places_summary_df1.")
    else:
        print("\nDataFrame 'galway_places_summary_df1' not available for searching 'Rahoon'.")




# --- *** CREATE BUILDINGS SUMMARY DATAFRAME *** ---
    print("\nCreating DataFrame for Galway Buildings with Type and Coordinates...")
    galway_buildings_summary_df = None # Initialize
    if 'buildings' in galway_gdfs and not galway_gdfs['buildings'].empty:
        buildings_data = []

        # Check what columns are available in the buildings layer
        print(f"Available columns in buildings layer: {galway_gdfs['buildings'].columns.tolist()}")

        # Extract building info - name, osm_id, and type (typically in fclass or type column)
        for idx, row in galway_gdfs['buildings'][galway_gdfs['buildings'].geometry.is_valid].iterrows():
            osm_id = row.get('osm_id', None)
            name = row.get('name', None)
            building_type = None
            for type_col in ['fclass', 'type', 'building']:
                if type_col in row and row[type_col] is not None:
                    building_type = row[type_col]; break
            try:
                centroid = row.geometry.centroid
                if centroid and centroid.is_valid:
                    buildings_data.append({
                        'building_name': name, 'osm_id': osm_id, 'building_type': building_type,
                        'latitude': centroid.y, 'longitude': centroid.x
                    })
            except Exception as e: print(f"Error calculating centroid for building {osm_id}: {e}")

        if buildings_data:
            galway_buildings_summary_df = pd.DataFrame(buildings_data)
            print(f"Created DataFrame 'galway_buildings_summary_df' with {len(galway_buildings_summary_df)} buildings.")
            print(galway_buildings_summary_df.head())
        else: print("No valid building data found to create summary DataFrame.")
    else: print("Clipped 'buildings' GeoDataFrame not found or is empty.")

    # --- *** REFINE BUILDING SUMMARY DATAFRAME *** ---
    galway_buildings_summary_df1 = None # Initialize
    if galway_buildings_summary_df is not None:
        galway_buildings_summary_df1 = galway_buildings_summary_df[galway_buildings_summary_df['building_name'].notnull()].copy()
        galway_buildings_summary_df1 = galway_buildings_summary_df1.sort_values('building_name')
        building_indices = [f'B{i+1}' for i in range(len(galway_buildings_summary_df1))]
        galway_buildings_summary_df1.index = building_indices
        print("\nCreated filtered DataFrame 'galway_buildings_summary_df1' with named buildings:")
        print(f"Number of named buildings: {len(galway_buildings_summary_df1)}")
        print("\nFirst few rows of filtered DataFrame:")
        print(galway_buildings_summary_df1.head())
    else: print("Cannot create filtered DataFrame as galway_buildings_summary_df is None")
    # --- *** END BUILDINGS SECTION *** ---


    # --- *** PLOTTING CLIPPED GALWAY DATA *** ---
    print("\nPlotting clipped Galway map layers...")
    fig, ax = plt.subplots(figsize=(18, 18), facecolor='white', dpi=250)

    # Define base colors
    color_water = '#a8dff5'; color_land = '#f2f4f6'; color_parks = '#cceac4'
    color_buildings_osm = '#d8cabc' 
    color_roads = '#aaaaaa'; color_rail = '#a0a0a0';color_place_text = '#36454F'  
    
    # Define bus stop color
    color_bus_stops_blue = '#1E90FF' 

    # Set background
    ax.set_facecolor(color_land)

    # Define approximate z-orders
    zorder_landuse=1; zorder_water_poly=2; zorder_parks=3; zorder_buildings_layer=4 # General buildings layer
    zorder_waterways=5; zorder_railways=6; zorder_roads=7;
    zorder_bus_stops_plot = 8    # Z-order for general bus stops
    zorder_place_text = 9        # Z-order for general place name labels

    # Z-orders for the specific B422 building highlight - Portershed
    zorder_building_b422_point = 10  
    zorder_building_b422_text = 11  

    # Z-orders for the specific 'Rahoon' place highlight
    zorder_rahoon_place_point = 10 
    zorder_rahoon_place_text = 11  


    zorder_boundary = 12   # Boundary should be having highest zorder to frame everything
    

    # Plot base layers
    if 'landuse' in galway_gdfs: galway_gdfs['landuse'].plot(ax=ax, column='fclass', categorical=True, cmap='Pastel2', alpha=0.4, zorder=zorder_landuse)
    if 'water_poly' in galway_gdfs: galway_gdfs['water_poly'].plot(ax=ax, color=color_water, edgecolor='none', zorder=zorder_water_poly)
    if 'landuse' in galway_gdfs and 'fclass' in galway_gdfs['landuse'].columns:
        parks_gdf = galway_gdfs['landuse'][galway_gdfs['landuse']['fclass'] == 'park']
        if not parks_gdf.empty: parks_gdf.plot(ax=ax, color=color_parks, edgecolor='none', zorder=zorder_parks)
    if 'buildings' in galway_gdfs: galway_gdfs['buildings'].plot(ax=ax, facecolor=color_buildings_osm, alpha=0.7, lw=0.5, edgecolor=color_buildings_osm, zorder=zorder_buildings_layer)
    if 'waterways' in galway_gdfs: galway_gdfs['waterways'].plot(ax=ax, color=color_water, linewidth=1.0, zorder=zorder_waterways)
    if 'railways' in galway_gdfs:
        galway_gdfs['railways'].plot(ax=ax, color='#ffffff', linewidth=2.0, linestyle='-', zorder=zorder_railways)
        galway_gdfs['railways'].plot(ax=ax, color=color_rail, linewidth=1.0, linestyle='-', zorder=zorder_railways + 0.1)
    if 'roads' in galway_gdfs: galway_gdfs['roads'].plot(ax=ax, color=color_roads, linewidth=0.8, zorder=zorder_roads)

    # --- Plot ALL Bus Stops from gstops_df_v1 ---
    if bus_stops_gdf is not None and not bus_stops_gdf.empty:
        bus_stops_gdf.plot(
            ax=ax,
            color=color_bus_stops_blue, 
            marker='o',
            markersize=15,             
            edgecolor='black',        
            linewidth=0.5,
            alpha=0.9,
            zorder=zorder_bus_stops_plot, 
            label='Bus Stops (All)'
        )
        print(f"Plotted {len(bus_stops_gdf)} bus stops from gstops_df_v1 as blue dots.")
    else:
        print("No bus stops from gstops_df_v1 to plot.")


    # --- Plot Place Names (No Circles) ---
    if galway_places_summary_df is not None and not galway_places_summary_df.empty:
        print(f"Plotting {len(galway_places_summary_df)} place names...")
        plotted_place_names_map = set()
        for idx, row in galway_places_summary_df.iterrows():
            label = row['place_name']; point_x = row['longitude']; point_y = row['latitude']
            if label not in plotted_place_names_map:
                ax.text(point_x, point_y + 0.0002, label, fontsize=8, color=color_place_text,
                        ha='center', va='bottom', zorder=zorder_place_text, fontweight='normal',
                        path_effects=[matplotlib.patheffects.withStroke(linewidth=1, foreground='w')])
                plotted_place_names_map.add(label)
        print("Place names plotted.")

    # --- *** PLOT B422 BUILDING - PORTERSHED *** ---
    if 'galway_buildings_summary_df1' in locals() and galway_buildings_summary_df1 is not None and not galway_buildings_summary_df1.empty:
        building_point_color = '#FF5733' # Orange
        building_text_color = '#000000'  # Black
        plotted_b422 = False
        # Ensure B422 exists in your dataframe's index
        if 'B422' in galway_buildings_summary_df1.index:
            row = galway_buildings_summary_df1.loc['B422']
            point_x = row['longitude']
            point_y = row['latitude']
            building_name = row['building_name']
            
            # Plot orange circle for B422
            plt.scatter(point_x, point_y, s=60, color=building_point_color, edgecolor='black', # Increased size (s=60)
                        linewidth=1, alpha=0.9, zorder=zorder_building_b422_point, label=f'Building: {building_name}')
            
            # Plot name label for B422
            ax.text(point_x, point_y + 0.0003, building_name, fontsize=7, color=building_text_color, 
                    ha='center', va='bottom', zorder=zorder_building_b422_text, fontweight='bold',
                    path_effects=[matplotlib.patheffects.withStroke(linewidth=1, foreground='white')])
            plotted_b422 = True
            print(f"Plotted orange circle and name label for building B422 ('{building_name}').")
        else:
            print("Building B422 not found in the DataFrame 'galway_buildings_summary_df1'.")
    else:
        print("DataFrame 'galway_buildings_summary_df1' not available for plotting B422.")
    # --- *** END OF B422 PLOTTING CODE *** ---   



    # --- *** PLOT SPECIFIC PLACE 'RAHOON' *** ---
    if 'rahoon_place_id' in locals() and rahoon_place_id is not None and \
       'galway_places_summary_df1' in locals() and galway_places_summary_df1 is not None and \
       not galway_places_summary_df1.empty:

        if rahoon_place_id in galway_places_summary_df1.index:
            place_row = galway_places_summary_df1.loc[rahoon_place_id]
            point_x = place_row['longitude']
            point_y = place_row['latitude']
            place_name_label = place_row['place_name'] 

            place_point_color = '#9400D3' # Dark Violet 
            place_text_color = '#000000'   # Black

            # Plot distinct circle for 'Rahoon'
            plt.scatter(point_x, point_y, s=70, color=place_point_color, edgecolor='black', 
                        linewidth=1, alpha=0.9, zorder=zorder_rahoon_place_point, label=f'Place: {place_name_label}')

            # Plot name label for 'Rahoon'
            ax.text(point_x, point_y + 0.00035, place_name_label, fontsize=7.5, color=place_text_color,
                    ha='center', va='bottom', zorder=zorder_rahoon_place_text, fontweight='bold',
                    path_effects=[matplotlib.patheffects.withStroke(linewidth=1, foreground='white')])
            print(f"Plotted distinct circle and name label for place: '{place_name_label}' (ID: {rahoon_place_id}).")
        else:
            print(f"Place with ID '{rahoon_place_id}' (expected to be Rahoon) not found in galway_places_summary_df1.index for plotting.")
    else:
        print("Rahoon was not identified or 'galway_places_summary_df1' is not available for plotting specific place.")
    # --- *** END OF 'RAHOON' PLOTTING CODE *** ---


    # Plot boundary outline for context last
    boundary_gdf.plot(ax=ax, facecolor='none', edgecolor='black', linewidth=0.5, linestyle='--', zorder=zorder_boundary)

    # --- Set Map Bounds ---
    if 'roads' in galway_gdfs and not galway_gdfs['roads'].empty:
        minx, miny, maxx, maxy = galway_gdfs['roads'].total_bounds
    else:
        minx, miny, maxx, maxy = boundary_gdf.total_bounds
    margin_factor = 0.02
    margin_x = (maxx - minx) * margin_factor
    margin_y = (maxy - miny) * margin_factor
    ax.set_xlim(minx - margin_x, maxx + margin_x)
    ax.set_ylim(miny - margin_y, maxy + margin_y)
    ax.set_aspect('equal', adjustable='box')

    # Final plot adjustments
    ax.set_title(f"Galway Map with Bus Stops (from gstops_df_v1)", color='black', fontsize=16)
    plt.legend(loc='upper right') # add a legend
    plt.axis('off')
    plt.tight_layout()
    plt.show()
 

except FileNotFoundError as e:
    print(f"\n--- File Error ---\n{e}\nPlease ensure file paths are correct.")
except ImportError as e:
    print(f"\n--- Import Error Occurred ---\nError: {e}\nPlease ensure required libraries are installed.")
except ValueError as e:
    print(f"\n--- Value Error ---\n{e}")
except Exception as e:
    print(f"\n--- An Unexpected Error Occurred ---\nError: {e}")
    import traceback
    traceback.print_exc()

### 6. Stop and Route Name Standardization

In [None]:
# --- Abbreviation Expansion and Parentheses Removal Function ---
def expand_abbr_and_remove_paren(text_to_process):
    if pd.isna(text_to_process):
        return text_to_process

    current_text = str(text_to_process) 
    
    abbreviations = {
        r'\brd\b': 'road',
        r'\bst\b': 'street',
        r'\blwr\b': 'lower',
        r'\bav\b': 'avenue',
        r'\bave\b': 'avenue',
        r'\bopp\b': 'opposite',
        r'\bind est\b': 'industrial estate',
    }
    
    expanded_text = current_text
    for abbr, expansion in abbreviations.items():
        expanded_text = re.sub(abbr, expansion, expanded_text)
    
    text_after_paren_open_removed = re.sub(r'\s*\(\s*', ' ', expanded_text) 
    text_after_paren_close_removed = re.sub(r'\s*\)\s*', ' ', text_after_paren_open_removed)
    
    # Consolidate multiple spaces into one
    final_text = re.sub(r'\s+', ' ', text_after_paren_close_removed).strip()
        
    return final_text

# --- Prepare bus_stops_gdf ---
if 'bus_stops_gdf' in locals() and isinstance(bus_stops_gdf, pd.DataFrame) and not bus_stops_gdf.empty:
    if 'stop_name' in bus_stops_gdf.columns:    
        bus_stops_gdf['stop_name_norm_explicit'] = bus_stops_gdf['stop_name'].astype(str).str.lower().str.strip()
        bus_stops_gdf['stop_name_norm_expanded'] = bus_stops_gdf['stop_name_norm_explicit'].apply(expand_abbr_and_remove_paren)
        print("bus_stops_gdf prepared.")
    else:
        print("Error: 'stop_name' column not found in bus_stops_gdf.")
else:
    print("Error: bus_stops_gdf is not defined or is empty.")

# --- Prepare bus_timetables ---
if 'bus_timetables' in locals() and isinstance(bus_timetables, pd.DataFrame) and not bus_timetables.empty:
    if 'ROUTE' in bus_timetables.columns: 
        bus_timetables['ROUTE_norm_explicit'] = bus_timetables['ROUTE'].astype(str).str.lower().str.strip()
        bus_timetables['ROUTE_norm_expanded'] = bus_timetables['ROUTE_norm_explicit'].apply(expand_abbr_and_remove_paren)
        
        print("bus_timetables prepared.")
    else:
        print("Error: 'ROUTE' column not found in bus_timetables.")
else:
    print("\nError: bus_timetables DataFrame is not defined or is empty.")


### 7. Bus Stop Matching and Route Mapping  


In [None]:
# --- MATCHING FUNCTION (Exact -> Fuzzy -> Token Overlap) ---
def find_stop_id_expanded_match(route_norm_expanded_to_match, stops_df):
    if pd.isna(route_norm_expanded_to_match):
        return None, "Input ROUTE_norm_expanded is NaN", None, np.nan, np.nan

    if not isinstance(stops_df, pd.DataFrame) or stops_df.empty:
        return None, "stops_df (bus_stops_gdf) is invalid or empty", None, np.nan, np.nan

    required_cols = ['stop_name_norm_expanded', 'stop_id', 'stop_name']
    if not all(col in stops_df.columns for col in required_cols):
        missing = [col for col in required_cols if col not in stops_df.columns]
        return None, f"Missing required columns in stops_df: {missing}", None, np.nan, np.nan

    matched_stop_id = None
    match_method = "No Initial Match" # Default if no stages match
    matched_original_stop_name_in_gdf = None
    fuzz_ratio_score = np.nan
    fuzz_wratio_score = np.nan

    # --- Stage 1: Exact match ---
    exact_match_gdf = stops_df[stops_df['stop_name_norm_expanded'] == route_norm_expanded_to_match]
    if not exact_match_gdf.empty:
        matched_stop_id = exact_match_gdf.iloc[0]['stop_id']
        matched_original_stop_name_in_gdf = exact_match_gdf.iloc[0]['stop_name']
        match_method = "Exact Expanded Name" + (" (Multiple GDF matches, took first)" if len(exact_match_gdf) > 1 else "")
        fuzz_ratio_score, fuzz_wratio_score = 100, 100
        return matched_stop_id, match_method, matched_original_stop_name_in_gdf, fuzz_ratio_score, fuzz_wratio_score

    # --- Stage 2: Fuzzy match ---
    choices = stops_df['stop_name_norm_expanded'].dropna().tolist()
    if choices:
        best_match_wratio_tuple = process.extractOne(route_norm_expanded_to_match, choices, scorer=fuzz.WRatio, score_cutoff=88)
        if best_match_wratio_tuple:
            best_matched_expanded_name_in_gdf = best_match_wratio_tuple[0]
            fuzz_wratio_score = best_match_wratio_tuple[1]
            fuzz_ratio_score = fuzz.ratio(route_norm_expanded_to_match, best_matched_expanded_name_in_gdf)
            gdf_row_for_best_match = stops_df[stops_df['stop_name_norm_expanded'] == best_matched_expanded_name_in_gdf]
            if not gdf_row_for_best_match.empty:
                matched_stop_id = gdf_row_for_best_match.iloc[0]['stop_id']
                matched_original_stop_name_in_gdf = gdf_row_for_best_match.iloc[0]['stop_name']
                match_method = f"Fuzzy Expanded Name (WRatio: {fuzz_wratio_score:.0f})"
                return matched_stop_id, match_method, matched_original_stop_name_in_gdf, fuzz_ratio_score, fuzz_wratio_score
            else: # Should be rare
                match_method = "Fuzzy Match Found but GDF Row Missing"
                return None, match_method, None, fuzz_ratio_score, fuzz_wratio_score
    
    # --- Stage 3: Token-based Overlap Match (if no exact or fuzzy match) ---
    if matched_stop_id is None and isinstance(route_norm_expanded_to_match, str) and len(route_norm_expanded_to_match.strip()) > 0:
        route_tokens = set(route_norm_expanded_to_match.split())
        if not route_tokens:
             return None, "No Final Match (Empty Route Tokens)", None, np.nan, np.nan

        best_overlap_score = 0
        candidate_stop_id = None
        candidate_original_name = None
        best_gdf_name_for_token_match = None # For tie-breaking

        # Minimum common tokens required for this type of match
        min_required_common_tokens = 1 

        for index, row in stops_df.iterrows():
            gdf_stop_name_expanded = row['stop_name_norm_expanded']
            if pd.isna(gdf_stop_name_expanded):
                continue
            
            gdf_tokens = set(gdf_stop_name_expanded.split())
            if not gdf_tokens:
                continue
                
            common_tokens = route_tokens.intersection(gdf_tokens)
            current_overlap_score = len(common_tokens)
            
            if current_overlap_score >= min_required_common_tokens:
                if current_overlap_score > best_overlap_score:
                    best_overlap_score = current_overlap_score
                    candidate_stop_id = row['stop_id']
                    candidate_original_name = row['stop_name']
                    best_gdf_name_for_token_match = gdf_stop_name_expanded
                elif current_overlap_score == best_overlap_score:
                    # Tie-breaking: prefer shorter GDF name if overlap score is the same
                    if candidate_original_name is None or (best_gdf_name_for_token_match and len(gdf_stop_name_expanded) < len(best_gdf_name_for_token_match)):
                        candidate_stop_id = row['stop_id']
                        candidate_original_name = row['stop_name']
                        best_gdf_name_for_token_match = gdf_stop_name_expanded
                            
        if candidate_stop_id is not None:
            matched_stop_id = candidate_stop_id
            matched_original_stop_name_in_gdf = candidate_original_name
            match_method = f"Token Overlap Match (Score: {best_overlap_score})"
            # Fuzzy scores are not applicable here, remain np.nan
            return matched_stop_id, match_method, matched_original_stop_name_in_gdf, np.nan, np.nan

    # If no match from any stage
    final_match_method = "No Final Match" if match_method == "No Initial Match" else match_method
    return None, final_match_method, None, np.nan, np.nan


# --- APPLYING THE MATCHING TO bus_timetables ---
if ('bus_timetables' in locals() and isinstance(bus_timetables, pd.DataFrame) and not bus_timetables.empty and
    'ROUTE_norm_expanded' in bus_timetables.columns and
    'bus_stops_gdf' in locals() and isinstance(bus_stops_gdf, pd.DataFrame) and not bus_stops_gdf.empty and
    'stop_name_norm_expanded' in bus_stops_gdf.columns and 'stop_name' in bus_stops_gdf.columns and 'stop_id' in bus_stops_gdf.columns):

    print("\nMapping ROUTE_norm_expanded to stop_id (Exact -> Fuzzy -> Token Overlap)...")
    
    match_results_tuples = bus_timetables['ROUTE_norm_expanded'].apply(
        lambda x: find_stop_id_expanded_match(x, bus_stops_gdf.copy()) # Pass a copy of bus_stops_gdf
    )
    
    bus_timetables['stop_id_mapped'] = [res[0] for res in match_results_tuples]
    bus_timetables['match_method'] = [res[1] for res in match_results_tuples]
    bus_timetables['matched_stop_name_in_gdf'] = [res[2] for res in match_results_tuples]
    bus_timetables['fuzz_ratio_score'] = [res[3] for res in match_results_tuples] 
    bus_timetables['fuzz_wratio_score'] = [res[4] for res in match_results_tuples]

    print("Mapping complete.")
    print("\n--- bus_timetables with mapped stop_id (Exact -> Fuzzy -> Token Overlap) ---")
    display_cols = ['ROUTE', 'ROUTE_norm_expanded', 'stop_id_mapped', 'match_method', 'matched_stop_name_in_gdf', 'fuzz_ratio_score', 'fuzz_wratio_score']
    if 'route_id' in bus_timetables.columns: # Add route_id if it exists
        if 'ROUTE_norm_expanded' in display_cols and 'route_id' not in display_cols:
             display_cols.insert(display_cols.index('ROUTE_norm_expanded') + 1, 'route_id')
        elif 'route_id' not in display_cols:
            display_cols.append('route_id')
            
    final_display_cols = [col for col in display_cols if col in bus_timetables.columns]
    
    print("\nMatch Method Distribution:")
    
    unmapped_count = bus_timetables['stop_id_mapped'].isna().sum()
    print(f"\nNumber of ROUTEs not mapped to a stop_id: {unmapped_count} out of {len(bus_timetables)}")
    
    if unmapped_count > 0:
        print("Sample of unmapped ROUTEs:")
        unmapped_sample_cols = ['ROUTE', 'ROUTE_norm_expanded', 'match_method']
        if 'route_id' in bus_timetables.columns: unmapped_sample_cols.append('route_id')
        unmapped_sample_cols = [col for col in unmapped_sample_cols if col in bus_timetables.columns]
        if unmapped_sample_cols:
            display(bus_timetables[bus_timetables['stop_id_mapped'].isna()][unmapped_sample_cols].head(10))
            
    print("\nReview Token Overlap Matches:")
    if 'match_method' in bus_timetables.columns:
        token_overlap_review = bus_timetables[bus_timetables['match_method'].str.contains("Token Overlap", na=False)][final_display_cols]

else:
    print("\nCannot perform mapping. Prerequisites not met (check DataFrames and required columns).")


# Replace empty strings in 'stop_id_mapped' with pd.NA (or np.nan)
bus_timetables['stop_id_mapped'] = bus_timetables['stop_id_mapped'].replace('', pd.NA)

# Drop rows where 'stop_id_mapped' is NA
bus_timetables = bus_timetables.dropna(subset=['stop_id_mapped'])


In [None]:
print('galway_places_summary_df1')
display(galway_places_summary_df1.head(2))
print('galway_buildings_summary_df1')
display(galway_buildings_summary_df1.head(2))
print('bus_stops_gdf')
display(bus_stops_gdf.head(2))
print('bus_timetables')
display(bus_timetables.head(2))
print('bus_routes_gdf')
display(bus_routes_gdf.head(2))

### 8. Network Graph Structure : multi-modal public transportation graph for Galway

Network structure with nodes (places, buildings, bus stops) and edges (walking connections, bus routes)

add places, buildings and bus stops Nodes 

add Access/Egress Edges - walking connections i.e., place-TO-nearby-bus-stop / nearby-bus-stop-TO-building

add Directed Transit Edges - bus route between consecutive stops

**User Input:** MAX_ACCESS_DISTANCE_METERS = 800

In [None]:
# --- Helper Function for Haversine Distance ---
def haversine(lat1, lon1, lat2, lon2):
    R = 6371
    try:
        lat1_rad, lon1_rad, lat2_rad, lon2_rad = map(radians, [float(lat1), float(lon1), float(lat2), float(lon2)])
    except (ValueError, TypeError):
        return float('inf')
    dlon = lon2_rad - lon1_rad
    dlat = lat2_rad - lat1_rad
    a = sin(dlat / 2)**2 + cos(lat1_rad) * cos(lat2_rad) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    return R * c * 1000


# --- 1. Initialize Graph ---
G = nx.DiGraph()
print("Graph initialized.")

# --- Step 1a: Add Place Nodes ---
print("\nAdding place nodes (general POIs)...")
for index, row in galway_places_summary_df1.iterrows():
    place_node_id = row['place_name']
    G.add_node(place_node_id, type='place', name=row['place_name'], latitude=row['latitude'], longitude=row['longitude'])
print(f"Nodes after general places: {G.number_of_nodes()}")

# --- Step 1b: Add Building Nodes---
print("\nAdding building nodes...")
for index, row in galway_buildings_summary_df1.iterrows():
    building_node_id = row['building_name']
    G.add_node(building_node_id, type='building', name=row['building_name'], osm_id=row.get('osm_id'),
               building_type=row.get('building_type'), latitude=row['latitude'], longitude=row['longitude'])
print(f"Nodes after adding buildings: {G.number_of_nodes()}")

# --- Step 1c: Add Unique Bus Stop Nodes ---
print("\nAdding unique bus stop nodes...")
added_stop_ids = set()
for index, row in bus_stops_gdf.iterrows():
    stop_id = row['stop_id']
    if stop_id not in added_stop_ids:
        G.add_node(stop_id, type='bus_stop', name=row['stop_name'], latitude=row['stop_lat'], longitude=row['stop_lon'],
                   direction=row.get('direction'), original_route_id=row.get('route_id'), geometry=row.get('geometry'),
                   norm_explicit=row.get('stop_name_norm_explicit'), norm_expanded=row.get('stop_name_norm_expanded'))
        added_stop_ids.add(stop_id)
print(f"Total nodes after bus stops: {G.number_of_nodes()}")

# --- Step 2: Add Access/Egress Edges
print("\nAdding access/egress edges...")
MAX_ACCESS_DISTANCE_METERS = 800

access_edge_count = 0
place_nodes_data = {node_id: data for node_id, data in G.nodes(data=True) if data.get('type') == 'place'}
building_nodes_data = {node_id: data for node_id, data in G.nodes(data=True) if data.get('type') == 'building'}
bus_stop_nodes_data = {node_id: data for node_id, data in G.nodes(data=True) if data.get('type') == 'bus_stop'}
for place_node_id, place_data in place_nodes_data.items():
    place_lat = place_data.get('latitude'); place_lon = place_data.get('longitude')
    if place_lat is None or place_lon is None: continue
    for stop_node_id, stop_data in bus_stop_nodes_data.items():
        stop_lat = stop_data.get('latitude'); stop_lon = stop_data.get('longitude')
        if stop_lat is None or stop_lon is None: continue
        walking_distance_m = haversine(place_lat, place_lon, stop_lat, stop_lon)
        if walking_distance_m <= MAX_ACCESS_DISTANCE_METERS:
            edge_attrs = {'type':'access_egress', 'mode':'walk', 'distance_m': walking_distance_m}
            G.add_edge(place_node_id, stop_node_id, **edge_attrs); G.add_edge(stop_node_id, place_node_id, **edge_attrs)
            access_edge_count += 2;

for building_node_id, building_data in building_nodes_data.items():
    building_lat = building_data.get('latitude'); building_lon = building_data.get('longitude')
    if building_lat is None or building_lon is None: continue
    for stop_node_id, stop_data in bus_stop_nodes_data.items():
        stop_lat = stop_data.get('latitude'); stop_lon = stop_data.get('longitude')
        if stop_lat is None or stop_lon is None: continue
        walking_distance_m = haversine(building_lat, building_lon, stop_lat, stop_lon)
        if walking_distance_m <= MAX_ACCESS_DISTANCE_METERS:
            edge_attrs = {'type':'access_egress', 'mode':'walk', 'distance_m': walking_distance_m}
            G.add_edge(building_node_id, stop_node_id, **edge_attrs); G.add_edge(stop_node_id, building_node_id, **edge_attrs)
            access_edge_count += 2;
print(f"Added {access_edge_count} access/egress edges in total.")

# --- Step 3: Add Directed Transit Edges ---
print("\nAdding directed transit edges...")
transit_edge_count = 0
valid_graph_stop_node_ids = {node_id for node_id, data in G.nodes(data=True) if data.get('type') == 'bus_stop'}
print(f"Debug: Found {len(valid_graph_stop_node_ids)} valid bus_stop nodes in the graph for transit edges.")
#
print(f"Debug: bus_timetables has {len(bus_timetables)} rows for transit edge creation.")

for route_id_timetable, group in bus_timetables.groupby('route_id'): # Using bus_timetables
    route_stops = group.sort_values(by='stop_order_on_route')
    for i in range(len(route_stops) - 1):
        from_stop_id_mapped = route_stops.iloc[i]['stop_id_mapped']
        to_stop_id_mapped = route_stops.iloc[i+1]['stop_id_mapped']
        from_node_exists = from_stop_id_mapped in valid_graph_stop_node_ids
        to_node_exists = to_stop_id_mapped in valid_graph_stop_node_ids

        if from_node_exists and to_node_exists:
            from_stop_node_data = G.nodes[from_stop_id_mapped]
            to_stop_node_data = G.nodes[to_stop_id_mapped]
            from_lat, from_lon = from_stop_node_data.get('latitude'), from_stop_node_data.get('longitude')
            to_lat, to_lon = to_stop_node_data.get('latitude'), to_stop_node_data.get('longitude')

            if None not in [from_lat, from_lon, to_lat, to_lon]:
                segment_distance_m = haversine(from_lat, from_lon, to_lat, to_lon)
                edge_attrs = {'type':'transit', 'route_id':route_id_timetable, 'hop_count':1, 'distance_m':segment_distance_m}
                G.add_edge(from_stop_id_mapped, to_stop_id_mapped, **edge_attrs)
                transit_edge_count += 1
print(f"Added {transit_edge_count} directed transit edges.")

# --- Final Graph Summary ---
print("\n--- Graph Construction Complete ---")
print(f"Total nodes in graph: {G.number_of_nodes()}")
print(f"Total edges in graph: {G.number_of_edges()}")
node_types_list = [data.get('type', 'Unknown') for node, data in G.nodes(data=True)]
print("\nNode type counts:\n", pd.Series(node_types_list).value_counts())
edge_summary = []
for u, v, data in G.edges(data=True):
    edge_type = data.get('type', 'Unknown')
    edge_summary.append(f"{edge_type}_{data.get('mode', '')}" if edge_type == 'access_egress' else edge_type)
print("\nEdge type counts:\n", pd.Series(edge_summary).value_counts())

# --- check edges with data ---
print("\n--- Explicit check of G.edges(data=True) ---")
all_edges_with_data = list(G.edges(data=True))
if not all_edges_with_data: print("G.edges(data=True) is EMPTY.")
else:
    print(f"Found {len(all_edges_with_data)} edges with data. Sample:")
    for i, edge_tuple in enumerate(all_edges_with_data): 
        print(edge_tuple)
        if i >= 2: 
            break 

### 9. Define Origin ("Place"), Destination ("Building") location and get their nearby bus stops

**User input:**
place_of_interest_rahoon = "Rahoon"; 
place_of_interest_portershed = "Portershed a Dó" 

In [4]:
MAX_ACCESS_DISTANCE_METERS = 800

if 'G' in locals() and G.number_of_nodes() > 0:
    rahoon_node_id = "Rahoon"
    portershed_node_id = "Portershed a Dó" # the building_name used as node ID

    print(f"--- Checking for exact node ID: '{rahoon_node_id}' ---")
    if G.has_node(rahoon_node_id):
        print(f"Node '{rahoon_node_id}' FOUND.")
        print(f"Attributes: {G.nodes[rahoon_node_id]}")
    else:
        print(f"Node '{rahoon_node_id}' NOT FOUND by exact ID.")

    print(f"\n--- Checking for exact node ID: '{portershed_node_id}' ---")
    if G.has_node(portershed_node_id):
        print(f"Node '{portershed_node_id}' FOUND.")
        print(f"Attributes: {G.nodes[portershed_node_id]}")
    else:
        print(f"Node '{portershed_node_id}' NOT FOUND by exact ID.")
else:
    print("Graph G is not defined or is empty.")



# --- Step 3.1: Identify Relevant Stops (Nearby Stops) ---

# Define the node IDs 
place_of_interest_rahoon = rahoon_node_id
place_of_interest_portershed = portershed_node_id

# Define the proximity threshold (user input)
PROXIMITY_THRESHOLD_METERS = MAX_ACCESS_DISTANCE_METERS 

print(f"Using proximity threshold: {PROXIMITY_THRESHOLD_METERS} meters.")

def get_nearby_stops(graph, poi_node_id, max_distance):
    """
    Finds bus stops connected to a POI node via 'access_egress' edges
    within a specified maximum distance, and returns their IDs and distances.
    """
    nearby_stops_info = [] # list to store dictionaries

    if not graph.has_node(poi_node_id):
        print(f"Warning: POI node '{poi_node_id}' not found in the graph.")
        return nearby_stops_info # Return empty list

    # We are interested in edges FROM the POI TO a bus stop for "access"
    # The graph stores bi-directional access/egress, so out_edges from POI is sufficient
    # to find connected bus stops.
    for u, v, data in graph.out_edges(poi_node_id, data=True):
        edge_type = data.get('type')
        edge_distance = data.get('distance_m', float('inf')) #  infinity if no distance

        # Check if the edge is an access/egress edge
        if edge_type == 'access_egress':
            # Check if the connected node 'v' is a bus stop
            if graph.has_node(v) and graph.nodes[v].get('type') == 'bus_stop':
                # Check if the distance is within the threshold
                if edge_distance <= max_distance:
                    # Add a dictionary with stop_id and distance
                    nearby_stops_info.append({'stop_id': v, 'distance_m': edge_distance}) 
            
    return nearby_stops_info

place_nearby_stops = get_nearby_stops(G, place_of_interest_rahoon, PROXIMITY_THRESHOLD_METERS)
building_nearby_stops = get_nearby_stops(G, place_of_interest_portershed, PROXIMITY_THRESHOLD_METERS)
place_nearby_stop_ids = [info['stop_id'] for info in place_nearby_stops]
building_nearby_stop_ids = [info['stop_id'] for info in building_nearby_stops]
print(f"Proximity Threshold: {PROXIMITY_THRESHOLD_METERS} meters.")
print(f"place_nearby_stops : {place_nearby_stops}")
print(f"building_nearby_stops: {building_nearby_stops}")
print(f"place_nearby_stop_ids: {place_nearby_stop_ids}")
print(f"building_nearby_stop_ids: {building_nearby_stop_ids}")

--- Checking for exact node ID: 'Rahoon' ---
Node 'Rahoon' FOUND.
Attributes: {'type': 'place', 'name': 'Rahoon', 'latitude': 53.27716555, 'longitude': -9.089465645095348}

--- Checking for exact node ID: 'Portershed a Dó' ---
Node 'Portershed a Dó' FOUND.
Attributes: {'type': 'building', 'name': 'Portershed a Dó', 'osm_id': '175583929', 'building_type': 'building', 'latitude': 53.27365480436892, 'longitude': -9.053651919263224}
Using proximity threshold: 800 meters.
Proximity Threshold: 800 meters.
place_nearby_stops : [{'stop_id': '84605257301', 'distance_m': 729.9969032122906}, {'stop_id': '8460B5255001', 'distance_m': 601.2206968680148}, {'stop_id': '8460B5255201', 'distance_m': 537.305770665661}, {'stop_id': '8460B5255401', 'distance_m': 796.4924585739333}, {'stop_id': '8460B5256901', 'distance_m': 726.4626934218753}, {'stop_id': '8460B5257001', 'distance_m': 767.3727018826424}, {'stop_id': '8460B5257101', 'distance_m': 691.3026353985596}, {'stop_id': '8460B5257201', 'distance_m':

### 10. Direct Transit Connection Analysis between Rahoon and Portershed

In [5]:
try:
    from IPython.display import display
except ImportError:
    display = print # Fallback to simple print if not in IPython

def direct_transit_conn_between_places(G, bus_timetables, origin_poi_name, 
                                       origin_nearby_stops_info, destination_poi_name, 
                                       destination_nearby_stops_info):
    """
    Analyzes direct public transit connections between two sets of nearby stops 
    for given points of interests (POIs).

    Args:
        G (nx.DiGraph): The NetworkX graph containing transit network data. 
                        Edges should have 'type' ('transit'), 'route_id', and 'distance_m'.
        bus_timetables (pd.DataFrame): DataFrame with bus timetable information, including
                                       'route_id', 'stop_id_mapped', and 'stop_order_on_route'.
        origin_poi_name (str): Name of the origin POI (e.g., "Rahoon").
        origin_nearby_stops_info (list): List of dictionaries for stops near the origin POI.
                                         Each dict: {'stop_id': str, 'distance_m': float}
        destination_poi_name (str): Name of the destination POI (e.g., "Portershed").
        destination_nearby_stops_info (list): List of dictionaries for stops near the destination POI.
                                              Each dict: {'stop_id': str, 'distance_m': float}

    Returns:
        tuple: A tuple containing two DataFrames:
               - connections_df (pd.DataFrame): DataFrame of direct transit connections found.
               - no_connection_df (pd.DataFrame): DataFrame of routes serving origin stops 
                                                  but not connecting to destination stops in sequence.
               Returns (None, None) if critical input errors occur.
    """

    # --- Initialization and Input Preparation ---
    if not origin_nearby_stops_info or not destination_nearby_stops_info:
        print(f"Warning: Input list 'origin_nearby_stops_info' or 'destination_nearby_stops_info' is empty.")

    origin_target_stop_ids = [info['stop_id'] for info in origin_nearby_stops_info]
    destination_target_stop_ids = [info['stop_id'] for info in destination_nearby_stops_info]

    print(f"Origin ({origin_poi_name}) Target Stops: {origin_target_stop_ids}")
    print(f"Destination ({destination_poi_name}) Target Stops: {destination_target_stop_ids}")

    direct_transit_connections = []
    origin_routes_no_destination_connection = []

    # --- Main Analysis Logic ---
    required_cols = ['route_id', 'stop_id_mapped', 'stop_order_on_route']
    if not isinstance(bus_timetables, pd.DataFrame) or not all(col in bus_timetables.columns for col in required_cols):
        print(f"Error: 'bus_timetables' DataFrame is not valid or is missing required columns: {required_cols}")
        return None, None 
    if not hasattr(G, 'edges'):
        print("Error: NetworkX graph 'G' is not valid.")
        return None, None
    if not origin_target_stop_ids or not destination_target_stop_ids:
        print(f"Error: Either Origin ({origin_poi_name}) or Destination ({destination_poi_name}) target stop lists are empty. Cannot proceed.")
        # Return empty DataFrames as per function definition
        return pd.DataFrame(columns=['origin_poi', 'destination_poi', 'route_id', 'origin_stop_id', 
                                     'destination_stop_id', 'origin_stop_order', 'destination_stop_order', 
                                     'hops', 'transit_distance_m']), \
               pd.DataFrame(columns=['origin_stop_id', 'route_id', 'message'])


    print(f"\nAnalyzing routes from {origin_poi_name} area stops towards {destination_poi_name} area stops...")

    for r_stop_id in origin_target_stop_ids:
        routes_serving_r_stop_df = bus_timetables[bus_timetables['stop_id_mapped'] == r_stop_id]
        
        if routes_serving_r_stop_df.empty:
            continue

        unique_routes_for_this_r_stop = routes_serving_r_stop_df['route_id'].unique()

        for route_id_val in unique_routes_for_this_r_stop:
            route_sequence_df = bus_timetables[bus_timetables['route_id'] == route_id_val].sort_values(by='stop_order_on_route')
            
            if route_sequence_df.empty: 
                continue 

            stop_to_order_map = pd.Series(route_sequence_df['stop_order_on_route'].values, index=route_sequence_df['stop_id_mapped']).to_dict()
            
            if r_stop_id not in stop_to_order_map:
                continue
            r_stop_order = stop_to_order_map[r_stop_id]
            
            found_connection_on_this_route_for_r_stop = False

            for p_stop_id in destination_target_stop_ids:
                if p_stop_id in stop_to_order_map:
                    p_stop_order = stop_to_order_map[p_stop_id]
                    
                    if r_stop_order < p_stop_order:
                        found_connection_on_this_route_for_r_stop = True
                        hops = p_stop_order - r_stop_order
                        current_distance_m = 0.0
                        path_found_in_graph = True
                        
                        path_segment_df = route_sequence_df[
                            (route_sequence_df['stop_order_on_route'] >= r_stop_order) &
                            (route_sequence_df['stop_order_on_route'] <= p_stop_order)
                        ]
                        actual_stops_in_path_sequence = path_segment_df['stop_id_mapped'].tolist()
                        
                        if len(actual_stops_in_path_sequence) < 2:
                            if r_stop_id == p_stop_id:
                                current_distance_m = 0.0
                            else: 
                                path_found_in_graph = False
                        else:
                            for i in range(len(actual_stops_in_path_sequence) - 1):
                                from_s = actual_stops_in_path_sequence[i]
                                to_s = actual_stops_in_path_sequence[i+1]
                                
                                if G.has_edge(from_s, to_s):
                                    edge_data = G.get_edge_data(from_s, to_s)
                                    if edge_data.get('type') == 'transit' and edge_data.get('route_id') == route_id_val:
                                        current_distance_m += edge_data.get('distance_m', 0.0)
                                    else:
                                        path_found_in_graph = False; break
                                else:
                                    path_found_in_graph = False; break
                        
                        if not path_found_in_graph:
                            current_distance_m = None 
                            
                        connection_details = {
                            'origin_poi': origin_poi_name, 
                            'destination_poi': destination_poi_name,
                            'route_id': route_id_val,
                            'origin_stop_id': r_stop_id,
                            'destination_stop_id': p_stop_id,
                            'origin_stop_order': r_stop_order,
                            'destination_stop_order': p_stop_order,
                            'hops': hops,
                            'transit_distance_m': current_distance_m 
                        }
                        direct_transit_connections.append(connection_details)

            if not found_connection_on_this_route_for_r_stop and r_stop_id in stop_to_order_map :
                 origin_routes_no_destination_connection.append({
                     'origin_stop_id': r_stop_id,
                     'route_id': route_id_val,
                     'message': f"Route {route_id_val} serves {origin_poi_name} area stop {r_stop_id} but does not connect to any target {destination_poi_name} area stops in sequence."
                 })

    # --- Prepare Output DataFrames ---
    connections_df = pd.DataFrame()
    if direct_transit_connections:
        connections_df = pd.DataFrame(direct_transit_connections)
        connections_df = connections_df.sort_values(by=['route_id', 'origin_stop_order', 'hops'])

    no_connection_df = pd.DataFrame()
    if origin_routes_no_destination_connection:
        no_connection_df = pd.DataFrame(origin_routes_no_destination_connection).drop_duplicates()
        
    return connections_df, no_connection_df



place_to_building_connections_df, place_to_building_no_connection_df = direct_transit_conn_between_places(G, bus_timetables, place_of_interest_rahoon, place_nearby_stops, 
                                   place_of_interest_portershed, building_nearby_stops)
display(place_to_building_connections_df)


#####################################################################################
# --- Calculate Walking Distance from Origin POI (Rahoon) to its nearby bus stops ---
unique_origin_stops = place_to_building_connections_df['origin_stop_id'].unique()
origin_walking_distances_map = {}

for stop_id in unique_origin_stops:
    walking_distance = np.nan
    if G.has_node(stop_id):
        # Check for a direct access_egress edge FROM the Rahoon POI node TO the origin bus stop
        if G.has_edge(rahoon_node_id, stop_id):
            edge_data = G.get_edge_data(rahoon_node_id, stop_id)
            if edge_data.get('type') == 'access_egress':
                walking_distance = edge_data.get('distance_m', np.nan)
    else:
        print(f"Warning: Origin stop ID '{stop_id}' from place_to_building_connections_df not found in graph G.")
    origin_walking_distances_map[stop_id] = walking_distance

place_to_building_connections_df['walking_distance_from_origin_poi_m'] = place_to_building_connections_df['origin_stop_id'].map(origin_walking_distances_map)

# --- Calculate Walking Distance from Building to its nearby bus stops
unique_dest_stops = place_to_building_connections_df['destination_stop_id'].unique()
dest_walking_distances_map = {}  # To store {dest_stop_id: walking_distance_m}

for stop_id in unique_dest_stops:
    walking_distance = np.nan # Default to NaN if no direct walking edge found
    if G.has_node(stop_id): # 
        # Check for a direct access_egress edge from the bus stop TO the Portershed building node
        if G.has_edge(stop_id, portershed_node_id):
            edge_data = G.get_edge_data(stop_id, portershed_node_id)
            if edge_data.get('type') == 'access_egress': 
                walking_distance = edge_data.get('distance_m', np.nan)
    else:
        print(f"Warning: Destination stop ID '{stop_id}' from connections_df not found in graph G.")
        
    dest_walking_distances_map[stop_id] = walking_distance
place_to_building_connections_df['walking_distance_to_dest_poi_m'] = place_to_building_connections_df['destination_stop_id'].map(dest_walking_distances_map)

# create a total travel cost (transit_distance + walking_distance)
if 'transit_distance_m' in place_to_building_connections_df.columns:
    # Convert to numeric
    place_to_building_connections_df['numeric_origin_walk_dist'] = pd.to_numeric(place_to_building_connections_df['walking_distance_from_origin_poi_m'], errors='coerce')
    place_to_building_connections_df['numeric_transit_dist'] = pd.to_numeric(place_to_building_connections_df['transit_distance_m'], errors='coerce')
    place_to_building_connections_df['numeric_dest_walk_dist'] = pd.to_numeric(place_to_building_connections_df['walking_distance_to_dest_poi_m'], errors='coerce')

    
    # Calculate total distance only if all three components are available
    place_to_building_connections_df['total_journey_distance_m'] = place_to_building_connections_df[
        ['numeric_origin_walk_dist', 'numeric_transit_dist', 'numeric_dest_walk_dist']
    ].sum(axis=1, min_count=3) # min_count=3 ensures all parts are present 
        
    print("\n--- place_to_building_connections_df with updated total journey distance (Origin Walk + Transit + Destination Walk) ---")
    place_to_building_connections_df = place_to_building_connections_df.drop(columns=['numeric_origin_walk_dist', 'numeric_transit_dist', 'numeric_dest_walk_dist'])
    display(place_to_building_connections_df.sort_values(by='total_journey_distance_m'))

Origin (Rahoon) Target Stops: ['84605257301', '8460B5255001', '8460B5255201', '8460B5255401', '8460B5256901', '8460B5257001', '8460B5257101', '8460B5257201', '8460B5259601', '8460B5259701', '8460B5259801', '8460B5259901']
Destination (Portershed a Dó) Target Stops: ['8460B5220101', '8460B5220201', '8460B522331', '8460B5243601', '8460B5222901', '8460B522311', '8460B523201', '8460B5232101', '8460B5245001', '8460B5225401', '8460B5225501', '8460B5225601', '8460B5225901', '8460B5226101', '8460B5226201', '8460B5226301', '8460B5231801', '8460B5225701', '8460B5230101', '8460B5230201', '8460B5230301', '8460B5230401', '8460B5230501', '8460B5231901', '8460B5243701', '8460B5241201', '8460B5237101', '8460B5254101', '8460B525641']

Analyzing routes from Rahoon area stops towards Portershed a Dó area stops...


Unnamed: 0,origin_poi,destination_poi,route_id,origin_stop_id,destination_stop_id,origin_stop_order,destination_stop_order,hops,transit_distance_m
6,Rahoon,Portershed a Dó,405,8460B5256901,8460B5225601,0,9,9,5847.477722
9,Rahoon,Portershed a Dó,405,8460B5256901,8460B5230201,0,11,11,13479.613339
5,Rahoon,Portershed a Dó,405,8460B5256901,8460B522331,0,12,12,13859.20758
7,Rahoon,Portershed a Dó,405,8460B5256901,8460B5226201,0,13,13,14304.693633
8,Rahoon,Portershed a Dó,405,8460B5256901,8460B5226301,0,16,16,14470.900167
11,Rahoon,Portershed a Dó,405,8460B5257001,8460B5225601,1,9,8,5347.047644
14,Rahoon,Portershed a Dó,405,8460B5257001,8460B5230201,1,11,10,12979.18326
10,Rahoon,Portershed a Dó,405,8460B5257001,8460B522331,1,12,11,13358.777501
12,Rahoon,Portershed a Dó,405,8460B5257001,8460B5226201,1,13,12,13804.263555
13,Rahoon,Portershed a Dó,405,8460B5257001,8460B5226301,1,16,15,13970.470088



--- place_to_building_connections_df with updated total journey distance (Origin Walk + Transit + Destination Walk) ---


Unnamed: 0,origin_poi,destination_poi,route_id,origin_stop_id,destination_stop_id,origin_stop_order,destination_stop_order,hops,transit_distance_m,walking_distance_from_origin_poi_m,walking_distance_to_dest_poi_m,total_journey_distance_m
1,Rahoon,Portershed a Dó,405,84605257301,8460B5225601,7,9,2,1185.446039,729.996903,656.220341,2571.663283
11,Rahoon,Portershed a Dó,405,8460B5257001,8460B5225601,1,9,8,5347.047644,767.372702,656.220341,6770.640687
6,Rahoon,Portershed a Dó,405,8460B5256901,8460B5225601,0,9,9,5847.477722,726.462693,656.220341,7230.160757
4,Rahoon,Portershed a Dó,405,84605257301,8460B5230201,7,11,4,8817.581655,729.996903,184.293523,9731.872082
0,Rahoon,Portershed a Dó,405,84605257301,8460B522331,7,12,5,9197.175896,729.996903,319.517942,10246.690741
2,Rahoon,Portershed a Dó,405,84605257301,8460B5226201,7,13,6,9642.66195,729.996903,621.966129,10994.624981
3,Rahoon,Portershed a Dó,405,84605257301,8460B5226301,7,16,9,9808.868483,729.996903,786.207521,11325.072908
14,Rahoon,Portershed a Dó,405,8460B5257001,8460B5230201,1,11,10,12979.18326,767.372702,184.293523,13930.849485
9,Rahoon,Portershed a Dó,405,8460B5256901,8460B5230201,0,11,11,13479.613339,726.462693,184.293523,14390.369556
10,Rahoon,Portershed a Dó,405,8460B5257001,8460B522331,1,12,11,13358.777501,767.372702,319.517942,14445.668145


### 11. Public Transport Accessibility Score

In [6]:
import pandas as pd
import numpy as np

def calculate_exp_decay_accessibility_score(connections_df,
                                            distance_column='total_journey_distance_m',
                                            beta=0.0001, 
                                            weights={'d1': 0.50, 'd2': 0.30, 'd3': 0.20}):
    """
    Calculates an accessibility score (0-100, higher is better) based on exponential
    decay of the top three unique shortest journey distances, with given weights.

    Args:
        connections_df (pd.DataFrame): DataFrame containing journey data.
        distance_column (str): Name of the column with total journey distances.
        beta (float): The decay parameter for the exponential function.
                      Adjust based on distance units and desired sensitivity.
                      A common way to set beta is beta = ln(2) / d_half,
                      where d_half is the distance at which accessibility is halved.
                      (ln(2) is approx 0.693)
        weights (dict): Dictionary of weights for d1, d2, d3.

    Returns:
        float: The calculated accessibility score (0-100), or 0.0 if no valid distances.
    """
    if distance_column not in connections_df.columns:
        print(f"Error: Distance column '{distance_column}' not found in DataFrame.")
        return 0.0  

    valid_distances_df = connections_df.dropna(subset=[distance_column]).copy()
    if valid_distances_df.empty:
        print("No valid journey distances available to calculate score.")
        return 0.0

    valid_distances_df = valid_distances_df.sort_values(by=distance_column)
    unique_shortest_distances = valid_distances_df[distance_column].unique()
    
    if len(unique_shortest_distances) == 0:
        return 0.0

    distances_to_score = {'d1': np.nan, 'd2': np.nan, 'd3': np.nan}
    if len(unique_shortest_distances) >= 1: distances_to_score['d1'] = unique_shortest_distances[0]
    if len(unique_shortest_distances) >= 2: distances_to_score['d2'] = unique_shortest_distances[1]
    if len(unique_shortest_distances) >= 3: distances_to_score['d3'] = unique_shortest_distances[2]

    print(f"Using beta (decay parameter): {beta}")
   

    score_components = {}
    for key, d_val in distances_to_score.items():
        if not np.isnan(d_val) and d_val >= 0: # Ensure distance is non-negative
            component_score = 100 * np.exp(-beta * d_val)
            score_components[key] = component_score
        else:
            score_components[key] = 0.0 # No contribution or invalid distance


    final_accessibility_score = (weights.get('d1', 0) * score_components.get('d1', 0.0)) + \
                                (weights.get('d2', 0) * score_components.get('d2', 0.0)) + \
                                (weights.get('d3', 0) * score_components.get('d3', 0.0))
    
    return final_accessibility_score


# Calculate accessibility score
accessibility_score = round(calculate_exp_decay_accessibility_score(place_to_building_connections_df,
                                            distance_column='total_journey_distance_m',
                                            beta=0.0001, 
                                            weights={'d1': 0.50, 'd2': 0.30, 'd3': 0.20}),2)

# Get unique origin and destination from the connections dataframe
origin = place_to_building_connections_df['origin_poi'].iloc[0]
destination = place_to_building_connections_df['destination_poi'].iloc[0]


if accessibility_score >= 80:
    color = '\033[38;5;22m'   # Dark green for high accessibility
elif accessibility_score >= 60:
    color = '\033[38;5;27m'   # Blue for moderate accessibility
elif accessibility_score >= 30:
    color = '\033[38;5;214m'   # Orange for medium-low accessibility
else:
    color = '\033[38;5;196m'   # Red for low accessibility

print(f"{color}\033[1m\nAccessibility Score between {origin} and {destination} is: {accessibility_score}\033[0m")


Using beta (decay parameter): 0.0001
[38;5;27m[1m
Accessibility Score between Rahoon and Portershed a Dó is: 63.61[0m


### Save the artifacts 

In [None]:
# import pickle

# # Define file paths for all relevant artifacts
# places_summary_path = os.path.join(artifact_dir, "galway_places_summary_df1.csv")
# buildings_summary_path = os.path.join(artifact_dir, "galway_buildings_summary_df1.csv")
# bus_stops_path = os.path.join(artifact_dir, "bus_stops_gdf.csv")
# bus_timetables_path = os.path.join(artifact_dir, "bus_timetables.csv")
# bus_routes_path = os.path.join(artifact_dir, "bus_routes_gdf.csv")
# graph_pickle_path = os.path.join(artifact_dir, "galway_transport_graph.gpickle")
# place_to_building_connections_path = os.path.join(artifact_dir, "place_to_building_connections.csv")

# # Save the loaded dataframes and graph back to artifact_dir for reproducibility

# if os.path.isdir(artifact_dir):
#     # Save galway_places_summary_df1
#     if galway_places_summary_df1 is not None:
#         galway_places_summary_df1.to_csv(places_summary_path)
#     # Save galway_buildings_summary_df1
#     if galway_buildings_summary_df1 is not None:
#         galway_buildings_summary_df1.to_csv(buildings_summary_path)
#     # Save bus_stops_gdf
#     if bus_stops_gdf is not None:
#         bus_stops_gdf.to_csv(bus_stops_path)
#     # Save bus_timetables
#     if bus_timetables is not None:
#         bus_timetables.to_csv(bus_timetables_path)
#     # Save bus_routes_gdf
#     if bus_routes_gdf is not None:
#         bus_routes_gdf.to_csv(bus_routes_path)
#     # Save the graph G as a pickle file
#     if 'G' in locals() and G is not None:
#         with open(graph_pickle_path, "wb") as f:
#             pickle.dump(G, f, pickle.HIGHEST_PROTOCOL)
#     print("Artifacts saved successfully.")

#     if place_to_building_connections_df is not None:
#         place_to_building_connections_df.to_csv(place_to_building_connections_path)

### Load the artifacts 

In [2]:
# Define file paths for all relevant artifacts
places_summary_path = os.path.join(artifact_dir, "galway_places_summary_df1.csv")
buildings_summary_path = os.path.join(artifact_dir, "galway_buildings_summary_df1.csv")
bus_stops_path = os.path.join(artifact_dir, "bus_stops_gdf.csv")
bus_timetables_path = os.path.join(artifact_dir, "bus_timetables.csv")
bus_routes_path = os.path.join(artifact_dir, "bus_routes_gdf.csv")
graph_pickle_path = os.path.join(artifact_dir, "galway_transport_graph.gpickle")
place_to_building_connections_path = os.path.join(artifact_dir, "place_to_building_connections.csv")

# Load the dataframes and graph from artifact_dir if available
if os.path.isdir(artifact_dir):

    # Read places summary, preserving index if present
    if os.path.exists(places_summary_path):
        galway_places_summary_df1 = pd.read_csv(places_summary_path, index_col=0)
    else:
        galway_places_summary_df1 = None

    # Read buildings summary, preserving index (e.g., B1, B2, ...)
    if os.path.exists(buildings_summary_path):
        galway_buildings_summary_df1 = pd.read_csv(buildings_summary_path, index_col=0)
    else:
        galway_buildings_summary_df1 = None

    # Read bus stops, preserving index if present
    if os.path.exists(bus_stops_path):
        bus_stops_gdf = pd.read_csv(bus_stops_path, index_col=0)
    else:
        bus_stops_gdf = None

    # Read bus timetables, preserving index if present
    if os.path.exists(bus_timetables_path):
        bus_timetables = pd.read_csv(bus_timetables_path, index_col=0)
    else:
        bus_timetables = None

    # Read bus routes, preserving index (e.g., BR1, BR2, ...)
    if os.path.exists(bus_routes_path):
        bus_routes_gdf = pd.read_csv(bus_routes_path, index_col=0)
    else:
        bus_routes_gdf = None

    # Load the graph G from pickle if available
    if os.path.exists(graph_pickle_path):
        with open(graph_pickle_path, "rb") as f:
            G = pickle.load(f)
    else:
        G = None
    print("Artifacts loaded successfully.")
else:
    all_timetables_df = None
    galway_places_summary_df1 = None
    galway_buildings_summary_df1 = None
    bus_stops_gdf = None
    bus_timetables = None
    bus_routes_gdf = None
    G = None
    print("No artifact directory found. Dataframes and graph not loaded.")

if os.path.isdir(artifact_dir):
    if os.path.exists(place_to_building_connections_path):
        place_to_building_connections_df = pd.read_csv(place_to_building_connections_path, index_col=0)
    else:
        place_to_building_connections_df = None
else:
    place_to_building_connections_df = None

Artifacts loaded successfully.


In [3]:
print(f"Graph G loaded: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")
print(f"DataFrame place_to_building_connections_df loaded: {place_to_building_connections_df.shape}")
display(place_to_building_connections_df.head())

Graph G loaded: 888 nodes, 25733 edges
DataFrame place_to_building_connections_df loaded: (15, 12)


Unnamed: 0,origin_poi,destination_poi,route_id,origin_stop_id,destination_stop_id,origin_stop_order,destination_stop_order,hops,transit_distance_m,walking_distance_from_origin_poi_m,walking_distance_to_dest_poi_m,total_journey_distance_m
6,Rahoon,Portershed a Dó,405,8460B5256901,8460B5225601,0,9,9,5847.477722,726.462693,656.220341,7230.160757
9,Rahoon,Portershed a Dó,405,8460B5256901,8460B5230201,0,11,11,13479.613339,726.462693,184.293523,14390.369556
5,Rahoon,Portershed a Dó,405,8460B5256901,8460B522331,0,12,12,13859.20758,726.462693,319.517942,14905.188215
7,Rahoon,Portershed a Dó,405,8460B5256901,8460B5226201,0,13,13,14304.693633,726.462693,621.966129,15653.122456
8,Rahoon,Portershed a Dó,405,8460B5256901,8460B5226301,0,16,16,14470.900167,726.462693,786.207521,15983.570382


In [None]:
### Langchain
# !pip uninstall numpy

# ! pip install langchain openai pandas tabulate
# ! pip install langchain-ollama

# !pip install numpy==1.21.6

# connections_df= place_to_building_connections_df

# journey_df_filtered = connections_df[(connections_df['origin_stop_id'].astype(str) == '8460B5256901') & (connections_df['destination_stop_id'].astype(str) == '8460B5225601')].copy()

# journey_df_filtered

# connections_df= place_to_building_connections_df

# journey_df_filtered = connections_df[(connections_df['origin_stop_id'].astype(str) == '8460B5256901') & (connections_df['destination_stop_id'].astype(str) == '8460B5225601')].copy()

# journey_df_filtered

# LangChain

### LangChain Configuration 

In [7]:
## Model configuration 1
llm = ChatOllama(model="llama3") 
# print("ChatOllama initialized successfully with the new package.")
from langchain.tools import tool
from langchain import hub
from langchain.agents import AgentExecutor, create_react_agent 
from pydantic import BaseModel, Field
# !pip install -U langchain-openai

### Custom Functions 

In [8]:
# def get_journey_accessibility_info_v2(
#     origin_poi_name: str, 
#     destination_poi_name: str,
#     connections_df: pd.DataFrame # Pass the specific DataFrame as an argument
# ) -> str:
#     """
#     Provides accessibility information for a journey between a specified origin POI 
#     and destination POI using the provided connections_df. 
#     It dynamically calculates the accessibility score using 
#     'calculate_exp_decay_accessibility_score' and details the top journey option(s).
#     """
    
#     if connections_df is None or connections_df.empty:
#         return "Error: Input connections_df is not provided or is empty."

#     # Filter the DataFrame for the specific journey

#     # confirm the data is for the requested pair.
#     journey_df_filtered = connections_df[
#         (connections_df['origin_poi'].astype(str).str.lower() == origin_poi_name.lower()) &
#         (connections_df['destination_poi'].astype(str).str.lower() == destination_poi_name.lower())
#     ].copy() # Use .copy() to avoid SettingWithCopyWarning if you modify it later

#     if journey_df_filtered.empty:
#         return (f"No journey data found in the provided connections_df for "
#                 f"{origin_poi_name} to {destination_poi_name}.")

#     # Calculate the accessibility score using the function
#     # Make sure 'calculate_exp_decay_accessibility_score' function is defined and accessible
#     try:
#         # Pass the filtered DataFrame specific to this O-D pair for score calculation
#         accessibility_score = calculate_exp_decay_accessibility_score(journey_df_filtered) 
#     except Exception as e:
#         return f"Error calculating accessibility score: {e}. Make sure 'calculate_exp_decay_accessibility_score' is correctly defined and accessible."

#     # Sort by total journey distance to find the best options
#     top_options_df = journey_df_filtered.sort_values(by='total_journey_distance_m').reset_index(drop=True)

#     if top_options_df.empty: # Should not happen if journey_df_filtered was not empty, but good check
#         return (f"Could not determine top options for {origin_poi_name} to {destination_poi_name} after sorting.")

#     shortest_journey = top_options_df.iloc[0]

#     # Construct the output string
#     output_str = f"Accessibility Information for {origin_poi_name} to {destination_poi_name}:\n"
#     output_str += f"- Calculated Accessibility Score: {accessibility_score:.2f}\n"
#     output_str += f"- This score is based on an exponential decay of the shortest unique journey distances.\n"
    
#     output_str += f"\nDetails of the overall shortest journey option found:\n"
#     # Using .get with a default for robustness if a column is unexpectedly missing
#     output_str += f"  - Route ID(s) involved: {shortest_journey.get('route_id', 'N/A')}\n"
#     output_str += f"  - Origin Bus Stop ID: {shortest_journey.get('origin_stop_id', 'N/A')}\n"
#     output_str += f"  - Destination Bus Stop ID: {shortest_journey.get('destination_stop_id', 'N/A')}\n"
    
#     # Ensure float formatting only if the value is not 'N/A' (or handle potential non-numeric if 'N/A' is not from .get())
#     walk_origin_dist = shortest_journey.get('walking_distance_from_origin_poi_m', 'N/A')
#     transit_dist = shortest_journey.get('transit_distance_m', 'N/A')
#     walk_dest_dist = shortest_journey.get('walking_distance_to_dest_poi_m', 'N/A')
#     total_dist = shortest_journey.get('total_journey_distance_m', 'N/A')

#     output_str += f"  - Walking distance from {origin_poi_name}: {walk_origin_dist if isinstance(walk_origin_dist, str) else f'{walk_origin_dist:.2f}'} meters\n"
#     output_str += f"  - Transit distance: {transit_dist if isinstance(transit_dist, str) else f'{transit_dist:.2f}'} meters\n"
#     output_str += f"  - Walking distance to {destination_poi_name}: {walk_dest_dist if isinstance(walk_dest_dist, str) else f'{walk_dest_dist:.2f}'} meters\n"
#     output_str += f"  - Total Journey Distance: {total_dist if isinstance(total_dist, str) else f'{total_dist:.2f}'} meters\n"
    
#     unique_shortest_distances = top_options_df['total_journey_distance_m'].dropna().unique()
#     if len(unique_shortest_distances) > 0:
#         output_str += f"\nThe accessibility score considers the following shortest unique total journey distances (meters) from the data for this O-D pair:\n"
#         for i, dist_val in enumerate(sorted(unique_shortest_distances)[:3]): # Show top 3 unique sorted
#              output_str += f"  - d{i+1}: {dist_val:.2f}\n"

#     return output_str



def get_journey_accessibility_info_v2(
    origin_stop_id: str, 
    destination_stop_id: str,
    connections_df: pd.DataFrame 
) -> str:
    """
    Provides accessibility information for a journey between a specified origin_stop_id 
    and destination_stop_id using the provided connections_df.
    It dynamically calculates the accessibility score and details the top journey option(s).
    Retrieves POI names from the matched row for user-friendly output.
    """
    if connections_df is None or connections_df.empty:
        return "Error: Input connections_df is not provided or is empty."

    # Clean the input stop IDs (assuming they are strings)
    clean_origin_stop_id = str(origin_stop_id).strip()
    clean_destination_stop_id = str(destination_stop_id).strip()

    print(f"[get_journey_accessibility_info_v2 DEBUG] Filtering DataFrame for origin_stop_id='{clean_origin_stop_id}', destination_stop_id='{clean_destination_stop_id}'")
    
    # Filter by stop_id. Assuming stop_id columns are strings or can be reliably cast to strings for comparison.
    # If they are numbers, astype(str) might not be needed, or direct comparison could be used.
    origin_id = '8460B5256901'
    destination_ids_list = ['8460B5230201', '8460B5225601', '8460B522331']

    journey_df_filtered = connections_df[
        (connections_df['origin_stop_id'].astype(str) == origin_id) &
        (connections_df['destination_stop_id'].astype(str).isin(destination_ids_list))
    ].copy()

    print(f"[get_journey_accessibility_info_v2 DEBUG] Rows found after filtering by stop_id: {len(journey_df_filtered)}")

    if journey_df_filtered.empty:
        return (f"No journey data found in the provided connections_df for "
                f"origin_stop_id '{origin_stop_id}' to destination_stop_id '{destination_stop_id}'.")

    # Since we filtered by stop_id, we should retrieve the POI names from the found data
    # Assuming there's one primary match or we take the first one.
    # These will be used for the output message.
    # Add error handling in case these columns don't exist or row is empty after all
    try:
        # Get POI names from the *first row* of the filtered data
        # These are used for constructing the human-readable output message.
        actual_origin_poi_name = journey_df_filtered.iloc[0]['origin_poi']
        actual_destination_poi_name = journey_df_filtered.iloc[0]['destination_poi']
    except (IndexError, KeyError) as e:
        print(f"[get_journey_accessibility_info_v2 DEBUG] Could not retrieve POI names from filtered data: {e}")
        actual_origin_poi_name = f"Origin for stop {origin_stop_id}" # Fallback
        actual_destination_poi_name = f"Destination for stop {destination_stop_id}" # Fallback


    try:
        accessibility_score = calculate_exp_decay_accessibility_score(journey_df_filtered) 
    except Exception as e:
        return f"Error calculating accessibility score: {e}. Make sure 'calculate_exp_decay_accessibility_score' is correctly defined. Data for score calc had {len(journey_df_filtered)} rows."

    top_options_df = journey_df_filtered.sort_values(by='total_journey_distance_m').reset_index(drop=True)

    if top_options_df.empty: 
        return (f"Could not determine top options for stop_id {origin_stop_id} to {destination_stop_id} after sorting.")

    shortest_journey = top_options_df.iloc[0]

    # Use the retrieved POI names for the output
    output_str = f"Accessibility Information for journey from '{actual_origin_poi_name}' (Stop ID: {origin_stop_id}) to '{actual_destination_poi_name}' (Stop ID: {destination_stop_id}):\n"
    output_str += f"- Calculated Accessibility Score: {accessibility_score:.2f}\n"
    output_str += f"- This score is based on an exponential decay of the shortest unique journey distances.\n"
    
    output_str += f"\nDetails of the overall shortest journey option found:\n"
    output_str += f"  - Route ID(s) involved: {shortest_journey.get('route_id', 'N/A')}\n"
    # The origin/destination stop_ids are now the primary query keys, so we definitely have them
    output_str += f"  - Origin Bus Stop ID: {clean_origin_stop_id}\n"
    output_str += f"  - Destination Bus Stop ID: {clean_destination_stop_id}\n"
    
    walk_origin_dist = shortest_journey.get('walking_distance_from_origin_poi_m', 'N/A')
    transit_dist = shortest_journey.get('transit_distance_m', 'N/A')
    walk_dest_dist = shortest_journey.get('walking_distance_to_dest_poi_m', 'N/A')
    total_dist = shortest_journey.get('total_journey_distance_m', 'N/A')

    def format_dist(val):
        if isinstance(val, (int, float)) and not pd.isna(val):
            return f"{val:.2f}"
        return 'N/A'

    output_str += f"  - Walking distance from {actual_origin_poi_name}: {format_dist(walk_origin_dist)} meters\n"
    output_str += f"  - Transit distance: {format_dist(transit_dist)} meters\n"
    output_str += f"  - Walking distance to {actual_destination_poi_name}: {format_dist(walk_dest_dist)} meters\n"
    output_str += f"  - Total Journey Distance: {format_dist(total_dist)} meters\n"
    
    unique_shortest_distances = top_options_df['total_journey_distance_m'].dropna().unique()
    if len(unique_shortest_distances) > 0:
        output_str += f"\nThe accessibility score considers the following shortest unique total journey distances (meters) from the data for this O-D pair:\n"
        for i, dist_val in enumerate(sorted(unique_shortest_distances)[:3]): 
             output_str += f"  - d{i+1}: {dist_val:.2f}\n"

    return output_str

# def get_nearby_bus_stops_from_graph(G, poi_node_id: str, max_distance_meters: int = MAX_ACCESS_DISTANCE_METERS) -> str:
#     """
#     Finds bus stops connected to a POI node in the graph 'G' via 'access_egress' 
#     edges within a specified maximum walking distance.
#     The POI node ID must exist in the graph G.
#     """
#     # Access the globally loaded graph G.
#     # Make sure `G` is loaded in your script/notebook.
#     if 'G' not in globals() or not isinstance(G, nx.DiGraph):
#         return "Error: Graph G is not loaded or is not a valid NetworkX DiGraph."

#     if not G.has_node(poi_node_id):
#         return f"Error: POI node '{poi_node_id}' not found in the graph G."

#     nearby_stops_info = []
#     # Your notebook's logic for finding nearby stops (from Section 9)
#     # Iterate over outgoing edges from the POI node
#     if poi_node_id in G: # Check if node exists before querying edges
#         for u, v, data in G.out_edges(poi_node_id, data=True):
#             edge_type = data.get('type')
#             edge_distance = data.get('distance_m', float('inf'))

#             if edge_type == 'access_egress': # Check it's a walking/cycling edge
#                 # Check if the connected node 'v' is a bus stop
#                 if v in G and G.nodes[v].get('type') == 'bus_stop':
#                     if edge_distance <= max_distance_meters:
#                         stop_name = G.nodes[v].get('name', 'N/A') # Get bus stop name if available
#                         nearby_stops_info.append({
#                             'stop_id': v, 
#                             'stop_name': stop_name,
#                             'distance_m': edge_distance
#                         })
    
#     if not nearby_stops_info:
#         return f"No bus stops found within {max_distance_meters}m of '{poi_node_id}' in the graph."

#     output_str = f"Nearby bus stops for '{poi_node_id}' (within {max_distance_meters}m):\n"
#     for stop_info in sorted(nearby_stops_info, key=lambda x: x['distance_m']): # Sort by distance
#         output_str += (f"  - Stop ID: {stop_info['stop_id']}, Name: {stop_info['stop_name']}, "
#                        f"Distance: {stop_info['distance_m']:.2f} meters\n")
    
#     return output_str


In [9]:
origin_stop_id= '8460B5256901', 
destination_stop_id= ['8460B5230201', '8460B5225601', '8460B522331']

get_journey_accessibility_info_v2(origin_stop_id, destination_stop_id, place_to_building_connections_df)
# get_journey_accessibility_info_v2(rahoon_node_id, portershed_node_id, place_to_building_connections_df)
# display(get_nearby_bus_stops_from_graph(G, rahoon_node_id))
# rahoon_node_id, portershed_node_id
# place_to_building_connections_df


[get_journey_accessibility_info_v2 DEBUG] Filtering DataFrame for origin_stop_id='('8460B5256901',)', destination_stop_id='['8460B5230201', '8460B5225601', '8460B522331']'
[get_journey_accessibility_info_v2 DEBUG] Rows found after filtering by stop_id: 3
Using beta (decay parameter): 0.0001


"Accessibility Information for journey from 'Rahoon' (Stop ID: ('8460B5256901',)) to 'Portershed a Dó' (Stop ID: ['8460B5230201', '8460B5225601', '8460B522331']):\n- Calculated Accessibility Score: 35.88\n- This score is based on an exponential decay of the shortest unique journey distances.\n\nDetails of the overall shortest journey option found:\n  - Route ID(s) involved: 405\n  - Origin Bus Stop ID: ('8460B5256901',)\n  - Destination Bus Stop ID: ['8460B5230201', '8460B5225601', '8460B522331']\n  - Walking distance from Rahoon: 726.46 meters\n  - Transit distance: 5847.48 meters\n  - Walking distance to Portershed a Dó: 656.22 meters\n  - Total Journey Distance: 7230.16 meters\n\nThe accessibility score considers the following shortest unique total journey distances (meters) from the data for this O-D pair:\n  - d1: 7230.16\n  - d2: 14390.37\n  - d3: 14905.19\n"

### LangChain tool

In [10]:
# class JourneyDetailsInput(BaseModel):
#     origin_poi_name: str = Field(description="The name of the Place, for example, 'Rahoon'")
#     destination_poi_name: str = Field(description="The name of the building, for example, 'Portershed a Dó'")


# @tool(args_schema=JourneyDetailsInput)
# def get_accessibility_and_journey_details(
#     origin_poi_name: str, 
#     destination_poi_name: str,
# ) -> str:
#     """
#     Provides detailed accessibility information for a public transport journey 
#     between a specified origin Point of Interest (POI) and a destination POI.
#     It returns the dynamically calculated accessibility score and details of 
#     the top contributing journey options based on the pre-loaded 
#     'place_to_building_connections_df' DataFrame. 
#     The origin_poi_name and destination_poi_name must match entries in the DataFrame 
#     (e.g., 'Rahoon', 'Portershed a Dó').
#     """
#     # Ensure place_to_building_connections_df is accessible
#     if 'place_to_building_connections_df' not in globals() or place_to_building_connections_df.empty:
#         return "Error: Main journey data (place_to_building_connections_df) is not loaded or is empty."
    
#     # Call your actual function - it now takes connections_df from the global scope
#     return get_journey_accessibility_info_v2(origin_poi_name, destination_poi_name, place_to_building_connections_df) 
  
# @tool
# def get_accessibility_and_journey_details(origin_and_destination_query: str) -> str:
#     """
#     Provides detailed accessibility information for a public transport journey 
#     between an origin and a destination. 
#     The input 'origin_and_destination_query' should be a string clearly stating 
#     the origin and destination, for example: 'journey from Rahoon to Portershed a Dó' 
#     or simply 'Rahoon to Portershed a Dó'. 
#     The tool will attempt to parse out the origin and destination names.
#     """
#     cleaned_query = origin_and_destination_query.strip()
#     origin_poi_name = None
#     destination_poi_name = None

#     match_from_to = re.search(r"(?:from\s+)?(.*?)\s+to\s+(.*)", cleaned_query, re.IGNORECASE)
#     if match_from_to:
#         origin_poi_name = match_from_to.group(1).strip()
#         destination_poi_name = match_from_to.group(2).strip()
#     else:
#         return (f"Error: Could not reliably parse origin and destination from: '{cleaned_query}'. "
#                 f"Please phrase input like 'Origin to Destination'.")

#     if not origin_poi_name or not destination_poi_name:
#         return (f"Error: Failed to extract both origin ('{origin_poi_name}') and destination ('{destination_poi_name}') "
#                 f"from input: '{cleaned_query}'.")

#     if 'place_to_building_connections_df' not in globals() or place_to_building_connections_df.empty:
#         return "Error: Main journey data (place_to_building_connections_df) is not loaded or is empty."
    
#     return get_journey_accessibility_info_v2(
#         origin_poi_name,
#         destination_poi_name,
#         place_to_building_connections_df
#     )


# @tool
# def get_accessibility_and_journey_details(origin_and_destination_query: str) -> str:
#     """
#     Provides detailed accessibility information for a public transport journey 
#     between an origin and a destination. 
#     The input 'origin_and_destination_query' should be a string clearly stating 
#     the origin and destination, for example: 'journey from Rahoon to Portershed a Dó' 
#     or simply 'Rahoon to Portershed a Dó'. 
#     The tool will attempt to parse out the origin and destination names.
#     """
#     print(f"\n[TOOL DEBUG] get_accessibility_and_journey_details received raw input: '{origin_and_destination_query}'") 
#     cleaned_query = origin_and_destination_query.strip()
#     print(f"[TOOL DEBUG] Cleaned query: '{cleaned_query}'")
    
#     origin_poi_name = None
#     destination_poi_name = None

#     # Regex to capture origin and destination
#     # Tries to match "from [ORIGIN] to [DESTINATION]" or "[ORIGIN] to [DESTINATION]"
#     match_from_to = re.search(r"(?:from\s+)?(.*?)\s+to\s+(.*)", cleaned_query, re.IGNORECASE)
    
#     if match_from_to:
#         origin_poi_name = match_from_to.group(1).strip()
#         destination_poi_name = match_from_to.group(2).strip()
#         print(f"[TOOL DEBUG] Parsed by regex: origin='{origin_poi_name}', destination='{destination_poi_name}'")
#     else:
#         # Fallback parsing attempt if " to " isn't present.
#         # This is a simple fallback and might need to be more robust based on LLM output patterns.
#         parts = cleaned_query.split(',') # Example: "Rahoon, Portershed a Do"
#         if len(parts) == 2:
#             origin_poi_name = parts[0].strip()
#             destination_poi_name = parts[1].strip()
#             print(f"[TOOL DEBUG] Parsed by comma split: origin='{origin_poi_name}', destination='{destination_poi_name}'")
#         else:
#             print(f"[TOOL DEBUG] Parsing failed for: '{cleaned_query}' using regex and comma split.")
#             return (f"Error: Could not reliably parse origin and destination from the input: '{cleaned_query}'. "
#                     f"Please ensure the input clearly separates the origin and destination, ideally using 'from [Origin] to [Destination]' or '[Origin] to [Destination]'.")

#     if not origin_poi_name or not destination_poi_name:
#         return (f"Error: Failed to extract both valid origin ('{origin_poi_name}') and destination ('{destination_poi_name}') "
#                 f"from input: '{cleaned_query}'.")

#     # Check for the DataFrame in globals
#     if 'place_to_building_connections_df' not in globals() or globals()['place_to_building_connections_df'].empty:
#         print("[TOOL DEBUG] Error: place_to_building_connections_df not found or empty in global scope.")
#         return "Error: Main journey data (place_to_building_connections_df) is not loaded or is empty."
    
#     # Access the global DataFrame
#     current_connections_df = globals()['place_to_building_connections_df']
    
#     print(f"[TOOL DEBUG] Calling get_journey_accessibility_info_v2 with: origin='{origin_poi_name}', dest='{destination_poi_name}'")
#     result = get_journey_accessibility_info_v2(
#         origin_poi_name, 
#         destination_poi_name, 
#         current_connections_df # Pass the actual DataFrame
#     )
#     print(f"[TOOL DEBUG] Result from get_journey_accessibility_info_v2 (first 100 chars): {result[:100]}...")
#     return result


@tool
def get_accessibility_and_journey_details(stop_id_query: str) -> str:
    """
    Provides detailed accessibility information for a public transport journey 
    given an origin bus stop ID and a destination bus stop ID.
    The input 'stop_id_query' should be a string clearly stating these IDs, 
    for example: 'origin_stop_id: XXXXX, destination_stop_id: YYYYY' 
    or 'journey from stop XXXXX to stop YYYYY'.
    The tool will attempt to parse out the origin and destination stop IDs.
    """
    print(f"\n[TOOL DEBUG] get_accessibility_and_journey_details received raw stop_id_query: '{stop_id_query}'")
    cleaned_query = stop_id_query.strip()
    
    origin_stop_id = None
    destination_stop_id = None

    # Try to parse "origin_stop_id: XXXXX, destination_stop_id: YYYYY" format
    match_kv = re.search(r"origin_stop_id:\s*([\w\d]+)\s*,\s*destination_stop_id:\s*([\w\d]+)", cleaned_query, re.IGNORECASE)
    if match_kv:
        origin_stop_id = match_kv.group(1).strip()
        destination_stop_id = match_kv.group(2).strip()
    else:
        # Try to parse "from stop XXXXX to stop YYYYY" or "stop XXXXX to stop YYYYY"
        match_from_to = re.search(r"(?:from\s+stop\s+|stop\s+)([\w\d]+)\s+to\s+(?:stop\s+)?([\w\d]+)", cleaned_query, re.IGNORECASE)
        if match_from_to:
            origin_stop_id = match_from_to.group(1).strip()
            destination_stop_id = match_from_to.group(2).strip()
        else: # A more general attempt if specific keywords are missing
            parts = re.findall(r'([\w\d]+)', cleaned_query) # Find all alphanumeric sequences
            if len(parts) >= 2: # Take the first two found as potential IDs if other parsing fails
                 # This is a very liberal parsing, assumes IDs are among the first alphanumeric words
                potential_ids = [p for p in parts if any(char.isdigit() for char in p) and len(p) > 4] # Basic check for ID-like strings
                if len(potential_ids) >=2:
                    origin_stop_id = potential_ids[0]
                    destination_stop_id = potential_ids[1]
                else:
                    print(f"[TOOL DEBUG] Could not find two distinct ID-like parts in '{cleaned_query}'")


    print(f"[TOOL DEBUG] Parsed stop IDs: origin_stop_id='{origin_stop_id}', destination_stop_id='{destination_stop_id}'")

    if not origin_stop_id or not destination_stop_id:
        return (f"Error: Could not reliably parse both origin_stop_id and destination_stop_id from: '{cleaned_query}'. "
                f"Please use formats like 'origin_stop_id: X, destination_stop_id: Y' or 'from stop X to stop Y'.")

    if 'place_to_building_connections_df' not in globals() or globals()['place_to_building_connections_df'].empty:
        print("[TOOL DEBUG] Error: place_to_building_connections_df not found or empty.")
        return "Error: Main journey data (place_to_building_connections_df) is not loaded or is empty."
    
    current_connections_df = globals()['place_to_building_connections_df']
    
    print(f"[TOOL DEBUG] Calling get_journey_accessibility_info_v2 with: origin_stop_id='{origin_stop_id}', dest_stop_id='{destination_stop_id}'")
    result = get_journey_accessibility_info_v2(
        origin_stop_id,
        destination_stop_id,
        current_connections_df
    )
    print(f"[TOOL DEBUG] Result from get_journey_accessibility_info_v2 (first 100 chars): {result[:100]}...")
    return result


 
# @tool
# def find_nearby_bus_stops(poi_node_id: str) -> str:
#     """
#     Finds bus stops connected to a Point of Interest (POI) node in the transport graph 'G' 
#     via 'access_egress' (walking) edges, within a default maximum distance of 
#     800 meters. The poi_node_id must be an exact match to a node ID in the graph G
#     (e.g., 'Rahoon' for a place, or a specific building name like 'Portershed a Dó').
#     Returns a list of nearby stop IDs, their names, and distances.
#     """
#     # Ensure G is accessible
#     if 'G' not in globals() or not isinstance(G, nx.DiGraph):
#         return "Error: Transport graph G is not loaded or is not a valid NetworkX DiGraph."
        
#     # Call your actual function
#     return get_nearby_bus_stops_from_graph(G, poi_node_id, MAX_ACCESS_DISTANCE_METERS)

# create a list of these tools for the agent
# tools = [find_nearby_bus_stops]
tools = [get_accessibility_and_journey_details]
# tools = [get_accessibility_and_journey_details, find_nearby_bus_stops]
print(f"Defined {len(tools)} tools for the LangChain agent.")


# Pull a standard ReAct prompt template
# This prompt provides the LLM with instructions on how to reason and use tools.
prompt_template = hub.pull("hwchase17/react") # Harrison Chase Reasoning and Acting agent framework(ReAct)
prompt_template
agent = create_react_agent(llm, tools, prompt_template)

#LangChain Agent
# The AgentExecutor runs the agent, calls tools, and gets responses
agent_executor = AgentExecutor(
    agent=agent, 
    tools=tools, 
    verbose=True, # Set to True to see the agent's thought process and actions
    handle_parsing_errors=True, # Helps with robustness if LLM output is not perfectly formatted
    max_iterations=5 # Prevents runaway agents if it gets stuck in a loop, adjust as needed
)
print("LangChain ReAct Agent Executor created successfully and ready to use!")


# --Query 1 --

response = agent_executor.invoke({
    "input": "What is the accessibility score and journey details for origin_stop_id: 8460B5256901, destination_stop_id: 8460B5225601? Would you like to give any recommendations based on your knowledge?"
})
print(response["output"])


Defined 1 tools for the LangChain agent.




LangChain ReAct Agent Executor created successfully and ready to use!


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mLet's get started!

Thought: I need to use the `get_accessibility_and_journey_details` tool to retrieve the accessibility information for the given bus stop IDs.

Action: get_accessibility_and_journey_details
Action Input: origin_stop_id: 8460B5256901, destination_stop_id: 8460B5225601
[0m
[TOOL DEBUG] get_accessibility_and_journey_details received raw stop_id_query: 'origin_stop_id: 8460B5256901, destination_stop_id: 8460B5225601
'
[TOOL DEBUG] Parsed stop IDs: origin_stop_id='8460B5256901', destination_stop_id='8460B5225601'
[TOOL DEBUG] Calling get_journey_accessibility_info_v2 with: origin_stop_id='8460B5256901', dest_stop_id='8460B5225601'
[get_journey_accessibility_info_v2 DEBUG] Filtering DataFrame for origin_stop_id='8460B5256901', destination_stop_id='8460B5225601'
[get_journey_accessibility_info_v2 DEBUG] Rows found after filtering by stop_id: 3


### LangChain Agent

In [None]:
# Pull a standard ReAct prompt template
# This prompt provides the LLM with instructions on how to reason and use tools.
prompt_template = hub.pull("hwchase17/react") # Harrison Chase Reasoning and Acting agent framework(ReAct)
prompt_template
agent = create_react_agent(llm, tools, prompt_template)

# The AgentExecutor runs the agent, calls tools, and gets responses
agent_executor = AgentExecutor(
    agent=agent, 
    tools=tools, 
    verbose=True, # Set to True to see the agent's thought process and actions
    handle_parsing_errors=True, # Helps with robustness if LLM output is not perfectly formatted
    max_iterations=5 # Prevents runaway agents if it gets stuck in a loop, adjust as needed
)
print("LangChain ReAct Agent Executor created successfully and ready to use!")


# --Query 1 --

response = agent_executor.invoke({
    "input": "What is the accessibility score and journey details for origin_stop_id: 8460B5256901, destination_stop_id: 8460B5225601? Give two recommendations based on your knowledge?"
})
print(response["output"])


# print("\n--- Query 1: Accessibility Details ---")
# response1 = agent_executor.invoke({
#     "input": "What is the accessibility score and the shortest journey details for a trip from Rahoon to Portershed a Dó?"
# })
# print("\nAgent's Final Answer:")
# print(response1["output"])


In [None]:

# # --Query 2 --
# print("\n--- Query 2: Nearby Stops for Rahoon ---")
# response2 = agent_executor.invoke({
#     "input": "Can you find the bus stops near Rahoon?"
# })
# print("\nAgent's Final Answer:")
# print(response2["output"])

### 12. Rahoon-Portershed Public Transport Accessibility Map for Galway - (2)

In [30]:
# # --- Find the Shortest Journey ---
# shortest_journey_path_details = None # To store details of the shortest path

# if 'place_to_building_connections_df' in locals() and \
#    not place_to_building_connections_df.empty and \
#    'total_journey_distance_m' in place_to_building_connections_df.columns:

#     # Drop rows where total journey distance is NaN
#     valid_journeys_df = place_to_building_connections_df.dropna(subset=['total_journey_distance_m']).copy()
    
#     if not valid_journeys_df.empty:
#         valid_journeys_df = valid_journeys_df.sort_values(by='total_journey_distance_m')
#         shortest_journey_row = valid_journeys_df.iloc[0].copy() # Get the top row (shortest)

#         # --- Store the necessary details for plotting ---
#         shortest_journey_path_details = {
#             'origin_poi_node_id': rahoon_node_id, # variable for Rahoon POI node ID
#             'origin_bus_stop_id': shortest_journey_row['origin_stop_id'],
#             'route_id': shortest_journey_row['route_id'],
#             'origin_bus_stop_order': shortest_journey_row['origin_stop_order'],
#             'destination_bus_stop_order': shortest_journey_row['destination_stop_order'],
#             'destination_bus_stop_id': shortest_journey_row['destination_stop_id'],
#             'destination_poi_node_id': portershed_node_id, # var for Portershed POI
#             'total_distance': shortest_journey_row['total_journey_distance_m']
#         }
#         print("\n--- Shortest Journey Details for Plotting ---")
#         print(f"Origin POI: {shortest_journey_path_details['origin_poi_node_id']}")
#         print(f"Origin Bus Stop: {shortest_journey_path_details['origin_bus_stop_id']}")
#         print(f"Route ID: {shortest_journey_path_details['route_id']}")
#         print(f"Destination Bus Stop: {shortest_journey_path_details['destination_bus_stop_id']}")
#         print(f"Destination POI: {shortest_journey_path_details['destination_poi_node_id']}")
#         print(f"Total Distance: {shortest_journey_path_details['total_distance']:.2f}m")

#         # Get the sequence of transit stops for this shortest path
#         route_seq_df_shortest = bus_timetables[
#             bus_timetables['route_id'] == shortest_journey_path_details['route_id']
#         ].sort_values(by='stop_order_on_route')
        
#         path_segment_df_shortest = route_seq_df_shortest[
#             (route_seq_df_shortest['stop_order_on_route'] >= shortest_journey_path_details['origin_bus_stop_order']) &
#             (route_seq_df_shortest['stop_order_on_route'] <= shortest_journey_path_details['destination_bus_stop_order'])
#         ]
#         shortest_journey_path_details['transit_stop_sequence_ids'] = path_segment_df_shortest['stop_id_mapped'].tolist()
#         print(f"Transit Stop Sequence: {shortest_journey_path_details['transit_stop_sequence_ids']}")
#     else:
#         print("No valid journeys with calculated total distances found to select the shortest.")
# else:
#     print("place_to_building_connections_df not found, empty, or 'total_journey_distance_m' column missing.")



# ###################################################################################################

# # Configure osmnx settings and logging
# ox.config(log_console=True, use_cache=False)
# logging.basicConfig(level=logging.INFO)
# warnings.filterwarnings("ignore")

# # --- Configuration ---
# place_name = "Galway, Ireland"
# shapefile_base_dir = '/Users/njindal/Documents/aic2025/data/ireland-and-northern-ireland-latest-free.shp' 

# shapefile_layers = {
#     'roads': 'gis_osm_roads_free_1.shp',
#     'water_poly': 'gis_osm_water_a_free_1.shp',
#     'railways': 'gis_osm_railways_free_1.shp',
#     'waterways': 'gis_osm_waterways_free_1.shp',
#     'landuse': 'gis_osm_landuse_a_free_1.shp',
#     'buildings': 'gis_osm_buildings_a_free_1.shp',
#     'places_poly': 'gis_osm_places_a_free_1.shp'
# }

# print(f"\n--- Processing Data for: {place_name} ---")
# print(f"Using Shapefile directory: {shapefile_base_dir}")

# try:
#     # --- *** GET GALWAY BOUNDARY *** ---
#     print("\nFetching boundary for Galway...")
#     boundary_gdf = ox.geocode_to_gdf(place_name).to_crs("EPSG:4326")
#     if boundary_gdf.empty:
#         raise ValueError(f"Could not geocode '{place_name}'.")
#     print(f"Boundary fetched. CRS set to: {boundary_gdf.crs}")



#     # --- *** LOAD IRELAND SHAPEFILES & CLIP TO GALWAY BOUNDARY *** ---
#     print("\nLoading and clipping Ireland-wide layers to Galway boundary...")
#     galway_gdfs = {}
#     for layer_name, shp_filename in shapefile_layers.items():
#         shp_path = os.path.join(shapefile_base_dir, shp_filename)
#         print(f"--- Processing layer: {layer_name} ---")
#         if not os.path.exists(shp_path):
#             print(f"*** WARNING: Shapefile not found: {shp_path} - Skipping layer '{layer_name}' ***")
#             continue
#         try:
#             ireland_layer_gdf = gpd.read_file(shp_path)
#             if ireland_layer_gdf.crs != boundary_gdf.crs:
#                 ireland_layer_gdf = ireland_layer_gdf.to_crs(boundary_gdf.crs)
#             clipped_gdf = gpd.clip(ireland_layer_gdf, boundary_gdf, keep_geom_type=True)
#             if not clipped_gdf.empty:
#                 galway_gdfs[layer_name] = clipped_gdf
#             else:
#                 print(f"Note: No features found for layer '{layer_name}'.")
#         except Exception as e:
#             print(f"*** ERROR processing layer '{layer_name}': {e} ***")



#     # --- *** PREPARE BUS STOP GEODATAFRAME FROM GSTOPS_DF_V1 *** --- 
#     print("\nPreparing Galway Bus Stop data from gstops_df_v1...")
#     bus_stops_gdf = None
#     if 'gstops_df_v1' in locals() and isinstance(gstops_df_v1, pd.DataFrame) and not gstops_df_v1.empty:
#         # Ensure 'stop_lat' and 'stop_lon' columns exist
#         if 'stop_lat' in gstops_df_v1.columns and 'stop_lon' in gstops_df_v1.columns:
#             try:
#                 # Drop rows with invalid (NaN) coordinates before creating GeoDataFrame
#                 temp_stops_df = gstops_df_v1.dropna(subset=['stop_lat', 'stop_lon']).copy()
                
#                 if not temp_stops_df.empty:
#                     bus_stops_gdf = gpd.GeoDataFrame(
#                         temp_stops_df,
#                         geometry=gpd.points_from_xy(temp_stops_df['stop_lon'], temp_stops_df['stop_lat']),
#                         crs="EPSG:4326"  
#                     )
#                     print(f"Created GeoDataFrame 'bus_stops_gdf' with {len(bus_stops_gdf)} stops from gstops_df_v1.")
#                     # Reproject if CRS doesn't match the boundary CRS
#                     if bus_stops_gdf.crs != boundary_gdf.crs:
#                         print(f"Reprojecting bus stops GDF to {boundary_gdf.crs}...");
#                         bus_stops_gdf = bus_stops_gdf.to_crs(boundary_gdf.crs)
#                         print("Reprojection complete.")
#                 else:
#                     print("Warning: No valid coordinates found in gstops_df_v1 after cleaning.")
#             except Exception as e:
#                 print(f"*** ERROR converting gstops_df_v1 data: {e} ***")
#                 bus_stops_gdf = None
#         else:
#             print("Warning: 'stop_lat' or 'stop_lon' columns not found in gstops_df_v1.")
#     else:
#         print("Warning: 'gstops_df_v1' DataFrame not found or is empty. Please load it first.")


#     # --- *** PREPARE BUS ROUTES GEODATAFRAME FROM gvariations_df_v1 *** --- 
#     bus_routes_gdf = None # Initialize

#     if 'gvariations_df_v1' in locals() and isinstance(gvariations_df_v1, pd.DataFrame) and not gvariations_df_v1.empty and \
#     'bus_stops_gdf' in locals() and isinstance(bus_stops_gdf, gpd.GeoDataFrame) and not bus_stops_gdf.empty:

#         print("\nEnriching gvariations_df_v1 with first/last stop Point geometries...")
        
#         bus_routes_gdf = gvariations_df_v1.copy()
        
#         if 'stop_id' in bus_stops_gdf.columns and 'geometry' in bus_stops_gdf.columns:
            
#             # --- Handle duplicate stop_ids in bus_stops_gdf to get a unique map ---
#             # A single physical stop_id has one location, regardless of how many route directions use it.
#             # We keep the first occurrence of each stop_id to get its unique geometry.
#             bus_stops_gdf_unique_locations = bus_stops_gdf.drop_duplicates(subset=['stop_id'], keep='first')
            
#             # Create the mapping series from this de-duplicated DataFrame
#             stop_id_to_point_geometry = bus_stops_gdf_unique_locations.set_index('stop_id')['geometry']
            
#         else:
#             print("Error: 'stop_id' or 'geometry' column not found in bus_stops_gdf. Cannot map stop Point geometries.")
#             stop_id_to_point_geometry = pd.Series(dtype='object') 

#         # Map first stop Point geometry
#         bus_routes_gdf['first_stop_point'] = bus_routes_gdf['first_stop_id'].map(stop_id_to_point_geometry)
        
#         # Map last stop Point geometry
#         bus_routes_gdf['last_stop_point'] = bus_routes_gdf['last_stop_id'].map(stop_id_to_point_geometry)
        
#         num_first_stops_mapped = bus_routes_gdf['first_stop_point'].notna().sum()
#         num_last_stops_mapped = bus_routes_gdf['last_stop_point'].notna().sum()
        
#         print(f"Successfully mapped Point geometry for {num_first_stops_mapped} first stops.")
#         print(f"Successfully mapped Point geometry for {num_last_stops_mapped} last stops.")

#         # Check if any mappings failed (resulting in NaNs)
#         if bus_routes_gdf['first_stop_point'].isnull().any() or bus_routes_gdf['last_stop_point'].isnull().any():
#             print("Warning: Some first/last stop points could not be mapped (resulting in NaNs).")
#             # You could print these rows for inspection:
#             # print(bus_routes_gdf[bus_routes_gdf['first_stop_point'].isnull() | bus_routes_gdf['last_stop_point'].isnull()])


#         print("\n--- bus_routes_gdf (with Point geometries) ---")
#         # Display relevant columns to check the mapping
#         display_cols = ['first_stop_id', 'first_stop_point', 'last_stop_id', 'last_stop_point']
#         # Add other columns from gvariations_df_v1 if they provide context
#         if 'route_id' in bus_routes_gdf.columns: display_cols.insert(0, 'route_id')
#         if 'direction_id' in bus_routes_gdf.columns: display_cols.insert(1, 'direction_id')

#         print(bus_routes_gdf[display_cols].head())
#         print(f"Shape of bus_routes_gdf: {bus_routes_gdf.shape}")

#     else:
#         print("\nPrerequisite DataFrames ('gvariations_df_v1' or 'bus_stops_gdf') not available or empty. Cannot create bus_routes_gdf.")


#       # --- *** CREATE PLACE SUMMARY DATAFRAME *** ---
#     print("\nCreating DataFrame for Galway Place Names and Coordinates...")
#     galway_places_summary_df = None # Initialize
#     if 'places_poly' in galway_gdfs and not galway_gdfs['places_poly'].empty:
#         places_data = []
#         # Check if the 'name' column exists
#         if 'name' not in galway_gdfs['places_poly'].columns:
#             print("Warning: 'name' column not found in places_poly layer. Cannot extract place names.")
#         else:
#             # Iterate through valid polygons with names
#             for idx, row in galway_gdfs['places_poly'][galway_gdfs['places_poly']['name'].notna() & galway_gdfs['places_poly'].geometry.is_valid].iterrows():
#                 place_name_val = row['name']; geometry = row.geometry; rep_point = None
#                 # Get representative point (or centroid as fallback)
#                 if hasattr(geometry, 'representative_point'):
#                     try: rep_point = geometry.representative_point()
#                     except Exception: rep_point = geometry.centroid # Fallback if representative_point fails
#                 else: rep_point = geometry.centroid # Fallback if method doesn't exist
#                 # Append if point is valid
#                 if rep_point and rep_point.is_valid:
#                     places_data.append({'place_name': place_name_val,'latitude': rep_point.y,'longitude': rep_point.x})
#             # Create DataFrame if data was extracted
#             if places_data:
#                 galway_places_summary_df = pd.DataFrame(places_data)
#                 print(f"Created DataFrame 'galway_places_summary_df' with {len(galway_places_summary_df)} places.")
#                 print(galway_places_summary_df.head())
#             else: print("No valid places with names found to create summary DataFrame.")
#     else: print("Clipped 'places_poly' GeoDataFrame not found or is empty.")

#     galway_places_summary_df1 = None # Initialize

#     if 'galway_places_summary_df' in locals() and isinstance(galway_places_summary_df, pd.DataFrame) and not galway_places_summary_df.empty:
#         galway_places_summary_df1 = galway_places_summary_df.copy()
#         if 'place_name' in galway_places_summary_df1.columns:
#             galway_places_summary_df1 = galway_places_summary_df1.sort_values('place_name').reset_index(drop=True)
#         else:
#             print("Warning: 'place_name' column not found for sorting. Index will be based on current order.")

#         # Create custom indices starting with 'P'
#         place_indices = [f'P{i+1}' for i in range(len(galway_places_summary_df1))]
#         galway_places_summary_df1.index = place_indices

#         print("\nCreated DataFrame 'galway_places_summary_df1' with custom 'P' indices:")
#         print(f"Number of places: {len(galway_places_summary_df1)}")
#         print("\nFirst few rows of 'galway_places_summary_df1':")
#         print(galway_places_summary_df1.head())
#     else:
#         print("Cannot create 'galway_places_summary_df1' as 'galway_places_summary_df' is not available or is empty.")
#     # --- *** END PLACES SECTION *** ---



# # --- *** CHECK RAHOON PLACE ID FOR PLOTTING *** ---
#     rahoon_place_id = None # To store the 'P' index if Rahoon is found
#     if 'galway_places_summary_df1' in locals() and isinstance(galway_places_summary_df1, pd.DataFrame) and not galway_places_summary_df1.empty:
#         if 'place_name' in galway_places_summary_df1.columns:
#             # Search for 'Rahoon' in the 'place_name' column 
#             rahoon_search_results = galway_places_summary_df1[galway_places_summary_df1['place_name'].str.contains('Rahoon', case=False, na=False)]

#             if not rahoon_search_results.empty:
#                 print(f"\n--- Found 'Rahoon' in galway_places_summary_df1 ---")
#                 rahoon_place_data = rahoon_search_results.iloc[0]
#                 rahoon_place_id = rahoon_place_data.name 
#                 print(f"Place Name: {rahoon_place_data['place_name']}")
#                 print(f"Index (ID): {rahoon_place_id}")
#                 print(f"Latitude: {rahoon_place_data['latitude']}")
#                 print(f"Longitude: {rahoon_place_data['longitude']}")
#             else:
#                 print("\nPlace name containing 'Rahoon' not found in galway_places_summary_df1.")
#         else:
#             print("\n'place_name' column not found in galway_places_summary_df1.")
#     else:
#         print("\nDataFrame 'galway_places_summary_df1' not available for searching 'Rahoon'.")


# # --- *** CREATE BUILDINGS SUMMARY DATAFRAME *** ---
#     print("\nCreating DataFrame for Galway Buildings with Type and Coordinates...")
#     galway_buildings_summary_df = None # Initialize
#     if 'buildings' in galway_gdfs and not galway_gdfs['buildings'].empty:
#         buildings_data = []

#         # Check what columns are available in the buildings layer
#         print(f"Available columns in buildings layer: {galway_gdfs['buildings'].columns.tolist()}")

#         # Extract building info - name, osm_id, and type (typically in fclass or type column)
#         for idx, row in galway_gdfs['buildings'][galway_gdfs['buildings'].geometry.is_valid].iterrows():
#             osm_id = row.get('osm_id', None)
#             name = row.get('name', None)
#             building_type = None
#             for type_col in ['fclass', 'type', 'building']:
#                 if type_col in row and row[type_col] is not None:
#                     building_type = row[type_col]; break
#             try:
#                 centroid = row.geometry.centroid
#                 if centroid and centroid.is_valid:
#                     buildings_data.append({
#                         'building_name': name, 'osm_id': osm_id, 'building_type': building_type,
#                         'latitude': centroid.y, 'longitude': centroid.x
#                     })
#             except Exception as e: print(f"Error calculating centroid for building {osm_id}: {e}")

#         if buildings_data:
#             galway_buildings_summary_df = pd.DataFrame(buildings_data)
#             print(f"Created DataFrame 'galway_buildings_summary_df' with {len(galway_buildings_summary_df)} buildings.")
#             print(galway_buildings_summary_df.head())
#         else: print("No valid building data found to create summary DataFrame.")
#     else: print("Clipped 'buildings' GeoDataFrame not found or is empty.")

#     # --- *** REFINE BUILDING SUMMARY DATAFRAME *** ---
#     galway_buildings_summary_df1 = None # Initialize
#     if galway_buildings_summary_df is not None:
#         galway_buildings_summary_df1 = galway_buildings_summary_df[galway_buildings_summary_df['building_name'].notnull()].copy()
#         galway_buildings_summary_df1 = galway_buildings_summary_df1.sort_values('building_name')
#         building_indices = [f'B{i+1}' for i in range(len(galway_buildings_summary_df1))]
#         galway_buildings_summary_df1.index = building_indices
#         print("\nCreated filtered DataFrame 'galway_buildings_summary_df1' with named buildings:")
#         print(f"Number of named buildings: {len(galway_buildings_summary_df1)}")
#         print("\nFirst few rows of filtered DataFrame:")
#         print(galway_buildings_summary_df1.head())
#     else: print("Cannot create filtered DataFrame as galway_buildings_summary_df is None")

#     # --- *** END BUILDINGS SECTION *** ---



#     # -- PRODUCTION CODE --
#     # we should have place_nearby_stop_ids and building_nearby_stop_ids to run this code
#     # Filter bus_stops_gdf for these specific stops
#     # Ensure bus_stops_gdf is available and has a 'stop_id' column
#     bus_stops_near_rahoon_gdf = None
#     bus_stops_near_portershed_gdf = None

#     if 'bus_stops_gdf' in locals() and bus_stops_gdf is not None and not bus_stops_gdf.empty:
#         if place_nearby_stop_ids:
#             bus_stops_near_rahoon_gdf = bus_stops_gdf[bus_stops_gdf['stop_id'].isin(place_nearby_stop_ids)]
#         if building_nearby_stop_ids:
#             bus_stops_near_portershed_gdf = bus_stops_gdf[bus_stops_gdf['stop_id'].isin(building_nearby_stop_ids)]
#     else:
#         print("Warning: bus_stops_gdf not available for filtering nearby stops.")
#     # --- END PRODUCTION CODE ---



#     # --- *** PLOTTING CLIPPED GALWAY DATA *** ---
#     print("\nPlotting clipped Galway map layers...")
#     fig, ax = plt.subplots(figsize=(18, 18), facecolor='white', dpi=250)

#     # Define base colors
#     color_water = '#a8dff5'; color_land = '#f2f4f6'; color_parks = '#cceac4'
#     color_buildings_osm = '#d8cabc' # Renamed to avoid conflict
#     color_roads = '#aaaaaa'; color_rail = '#a0a0a0';color_place_text = '#36454F'  # Charcoal for place labels
    
#     # Define bus stop color
#     color_bus_stops_blue = '#1E90FF' # Dodger blue for all bus stops


#     # -- PRODUCTION CODE --
#     # Define NEW colors for nearby stops
#     color_nearby_rahoon_stops = '#32CD32'  # Lime Green
#     color_nearby_portershed_stops = '#FFD700' # Gold (or choose another distinct color like a different shade of green)
#     nearby_stop_marker_size = 35 # Slightly larger than general, smaller than POIs
#     # --- END PRODUCTION CODE ---

#     # Set background
#     ax.set_facecolor(color_land)

#     # Define approximate z-orders
#     zorder_landuse=1; zorder_water_poly=2; zorder_parks=3; zorder_buildings_layer=4 # General buildings layer
#     zorder_waterways=5; zorder_railways=6; zorder_roads=7;
#     zorder_bus_stops_plot = 8    # Z-order for general bus stops
#     zorder_nearby_stops_plot = zorder_bus_stops_plot + 0.1  # production code
#     zorder_place_text = 9        # Z-order for general place name labels

#     # Z-orders for the specific B422 building highlight - Portershed
#     zorder_building_b422_point = 10  
#     zorder_building_b422_text = 11  

#     # Z-orders for the specific 'Rahoon' place highlight
#     zorder_rahoon_place_point = 10 
#     zorder_rahoon_place_text = 11  


#     zorder_boundary = 12   # Boundary should be having highest zorder to frame everything
    

#     # Plot base layers
#     if 'landuse' in galway_gdfs: galway_gdfs['landuse'].plot(ax=ax, column='fclass', categorical=True, cmap='Pastel2', alpha=0.4, zorder=zorder_landuse)
#     if 'water_poly' in galway_gdfs: galway_gdfs['water_poly'].plot(ax=ax, color=color_water, edgecolor='none', zorder=zorder_water_poly)
#     if 'landuse' in galway_gdfs and 'fclass' in galway_gdfs['landuse'].columns:
#         parks_gdf = galway_gdfs['landuse'][galway_gdfs['landuse']['fclass'] == 'park']
#         if not parks_gdf.empty: parks_gdf.plot(ax=ax, color=color_parks, edgecolor='none', zorder=zorder_parks)
#     if 'buildings' in galway_gdfs: galway_gdfs['buildings'].plot(ax=ax, facecolor=color_buildings_osm, alpha=0.7, lw=0.5, edgecolor=color_buildings_osm, zorder=zorder_buildings_layer)
#     if 'waterways' in galway_gdfs: galway_gdfs['waterways'].plot(ax=ax, color=color_water, linewidth=1.0, zorder=zorder_waterways)
#     if 'railways' in galway_gdfs:
#         galway_gdfs['railways'].plot(ax=ax, color='#ffffff', linewidth=2.0, linestyle='-', zorder=zorder_railways)
#         galway_gdfs['railways'].plot(ax=ax, color=color_rail, linewidth=1.0, linestyle='-', zorder=zorder_railways + 0.1)
#     if 'roads' in galway_gdfs: galway_gdfs['roads'].plot(ax=ax, color=color_roads, linewidth=0.8, zorder=zorder_roads)

#     # --- Plot ALL Bus Stops from gstops_df_v1 as BLUE DOTS ---
#     if bus_stops_gdf is not None and not bus_stops_gdf.empty:
#         bus_stops_gdf.plot(
#             ax=ax,
#             color=color_bus_stops_blue, # Use the defined blue color
#             marker='o',
#             markersize=15,             
#             edgecolor='black',        
#             linewidth=0.5,
#             alpha=0.9,
#             zorder=zorder_bus_stops_plot, # Ensure they are on top of most layers
#             label='Bus Stops (All)'
#         )
#         print(f"Plotted {len(bus_stops_gdf)} bus stops from gstops_df_v1 as blue dots.")
#     else:
#         print("No bus stops from gstops_df_v1 to plot.")

#     # -- PRODUCTION CODE --
#     # --- *** NEW: Plot Bus Stops Near Rahoon *** ---
#     if bus_stops_near_rahoon_gdf is not None and not bus_stops_near_rahoon_gdf.empty:
#         bus_stops_near_rahoon_gdf.plot(
#             ax=ax,
#             color=color_nearby_rahoon_stops,
#             marker='o',
#             markersize=nearby_stop_marker_size, # Use new smaller size
#             edgecolor='black',
#             linewidth=0.7,
#             alpha=0.9,
#             zorder=zorder_nearby_stops_plot, # Higher z-order
#             label='Stops near Rahoon'
#         )
#         print(f"Plotted {len(bus_stops_near_rahoon_gdf)} bus stops near Rahoon.")

#             # --- *** NEW: Plot Bus Stops Near Portershed *** ---
#     if bus_stops_near_portershed_gdf is not None and not bus_stops_near_portershed_gdf.empty:
#         bus_stops_near_portershed_gdf.plot(
#             ax=ax,
#             color=color_nearby_portershed_stops,
#             marker='o',
#             markersize=nearby_stop_marker_size, # Use new smaller size
#             edgecolor='black',
#             linewidth=0.7,
#             alpha=0.9,
#             zorder=zorder_nearby_stops_plot, # Higher z-order
#             label='Stops near Portershed'
#         )
#         print(f"Plotted {len(bus_stops_near_portershed_gdf)} bus stops near Portershed.")
#     # --- END PRODUCTION CODE ---

    

#     # --- Plot Place Names (No Circles) ---
#     if galway_places_summary_df is not None and not galway_places_summary_df.empty:
#         print(f"Plotting {len(galway_places_summary_df)} place names...")
#         plotted_place_names_map = set()
#         for idx, row in galway_places_summary_df.iterrows():
#             label = row['place_name']; point_x = row['longitude']; point_y = row['latitude']
#             if label not in plotted_place_names_map:
#                 ax.text(point_x, point_y + 0.0002, label, fontsize=8, color=color_place_text,
#                         ha='center', va='bottom', zorder=zorder_place_text, fontweight='normal',
#                         path_effects=[matplotlib.patheffects.withStroke(linewidth=1, foreground='w')])
#                 plotted_place_names_map.add(label)
#         print("Place names plotted.")

#     # --- *** PLOT B422 BUILDING - PORTERSHED *** ---
#     if 'galway_buildings_summary_df1' in locals() and galway_buildings_summary_df1 is not None and not galway_buildings_summary_df1.empty:
#         building_point_color = '#FF5733' # Orange
#         building_text_color = '#000000'  # Black
#         plotted_b422 = False
#         # Ensure B422 exists in your dataframe's index
#         if 'B422' in galway_buildings_summary_df1.index:
#             row = galway_buildings_summary_df1.loc['B422']
#             point_x = row['longitude']
#             point_y = row['latitude']
#             building_name = row['building_name']
            
#             # Plot orange circle for B422
#             plt.scatter(point_x, point_y, s=60, color=building_point_color, edgecolor='black', # Increased size (s=60)
#                         linewidth=1, alpha=0.9, zorder=zorder_building_b422_point, label=f'Building: {building_name}')
            
#             # Plot name label for B422
#             ax.text(point_x, point_y + 0.0003, building_name, fontsize=7, color=building_text_color, 
#                     ha='center', va='bottom', zorder=zorder_building_b422_text, fontweight='bold',
#                     path_effects=[matplotlib.patheffects.withStroke(linewidth=1, foreground='white')])
#             plotted_b422 = True
#             print(f"Plotted orange circle and name label for building B422 ('{building_name}').")
#         else:
#             print("Building B422 not found in the DataFrame 'galway_buildings_summary_df1'.")
#     else:
#         print("DataFrame 'galway_buildings_summary_df1' not available for plotting B422.")
#     # --- *** END OF B422 PLOTTING CODE *** ---   



#     # --- *** PLOT SPECIFIC PLACE 'RAHOON' *** ---
#     if 'rahoon_place_id' in locals() and rahoon_place_id is not None and \
#        'galway_places_summary_df1' in locals() and galway_places_summary_df1 is not None and \
#        not galway_places_summary_df1.empty:

#         if rahoon_place_id in galway_places_summary_df1.index:
#             place_row = galway_places_summary_df1.loc[rahoon_place_id]
#             point_x = place_row['longitude']
#             point_y = place_row['latitude']
#             place_name_label = place_row['place_name'] 

#             place_point_color = '#9400D3' # Dark Violet 
#             place_text_color = '#000000'   # Black

#             # Plot distinct circle for 'Rahoon'
#             plt.scatter(point_x, point_y, s=70, color=place_point_color, edgecolor='black', 
#                         linewidth=1, alpha=0.9, zorder=zorder_rahoon_place_point, label=f'Place: {place_name_label}')

#             # Plot name label for 'Rahoon'
#             ax.text(point_x, point_y + 0.00035, place_name_label, fontsize=7.5, color=place_text_color,
#                     ha='center', va='bottom', zorder=zorder_rahoon_place_text, fontweight='bold',
#                     path_effects=[matplotlib.patheffects.withStroke(linewidth=1, foreground='white')])
#             print(f"Plotted distinct circle and name label for place: '{place_name_label}' (ID: {rahoon_place_id}).")
#         else:
#             print(f"Place with ID '{rahoon_place_id}' (expected to be Rahoon) not found in galway_places_summary_df1.index for plotting.")
#     else:
#         print("Rahoon was not identified or 'galway_places_summary_df1' is not available for plotting specific place.")
#     # --- *** END OF 'RAHOON' PLOTTING CODE *** ---


#     # Plot boundary outline for context last
#     boundary_gdf.plot(ax=ax, facecolor='none', edgecolor='black', linewidth=0.5, linestyle='--', zorder=zorder_boundary)

#     # --- Set Map Bounds ---
#     if 'roads' in galway_gdfs and not galway_gdfs['roads'].empty:
#         minx, miny, maxx, maxy = galway_gdfs['roads'].total_bounds
#     else:
#         minx, miny, maxx, maxy = boundary_gdf.total_bounds
#     margin_factor = 0.02
#     margin_x = (maxx - minx) * margin_factor
#     margin_y = (maxy - miny) * margin_factor
#     ax.set_xlim(minx - margin_x, maxx + margin_x)
#     ax.set_ylim(miny - margin_y, maxy + margin_y)
#     ax.set_aspect('equal', adjustable='box')

#     # (Inside your main plotting cell [12], within the `try` block, in the
# #  "PLOT THE SHORTEST JOURNEY PATH" section)

# # --- *** PLOT THE SHORTEST JOURNEY PATH *** ---
#     if shortest_journey_path_details:
#         print("\nPlotting the shortest journey path...")
        
#         # Define styles for the path
#         walk_color = 'dimgrey' # Darker grey 
#         walk_linestyle = '--'
#         walk_linewidth = 1.8
        
  
#         transit_color = '#E60000' 
#         transit_linestyle = '-'
#         transit_linewidth = 3.0
#         transit_alpha = 0.85 # transparency

#         path_zorder = zorder_boundary + 1 

#         # Helper to get coordinates 
#         def get_node_coords(node_id, graph, places_gdf, buildings_gdf, stops_gdf):
    
#             # return (longitude, latitude) for any given node ID
#             if graph.has_node(node_id):
#                 node_data = graph.nodes[node_id]
#                 if 'x' in node_data and 'y' in node_data:
#                     return node_data['x'], node_data['y']
                
#                 # Fallback logic using 'type' and 'id' if present in G.nodes[node_id]
#                 node_type = node_data.get('type')
#                 original_id = node_data.get('id', node_id) # 

#                 if node_type == 'place' and places_gdf is not None:
#                     # places_gdf index is the ID we need (e.g. 'P1', 'P2')
#                     if original_id in places_gdf.index:
#                         geom = places_gdf.loc[original_id].geometry
#                         return geom.x, geom.y
#                 elif node_type == 'building' and buildings_gdf is not None:
#                     # buildings_gdf index is the ID (e.g. 'B1', 'B22')
#                     if original_id in buildings_gdf.index:
#                         geom = buildings_gdf.loc[original_id].geometry
#                         return geom.x, geom.y
#                 elif node_type == 'bus_stop' and stops_gdf is not None and 'stop_id' in stops_gdf.columns:
#                     # 'original_id' value to match in 'stop_id' column
#                     stop_row = stops_gdf[stops_gdf['stop_id'] == original_id]
#                     if not stop_row.empty:
#                         geom = stop_row.iloc[0].geometry
#                         return geom.x, geom.y
#             return None, None


#         try:
#             # 1. Origin POI to Origin Bus Stop (Walk)
#             o_poi_x, o_poi_y = get_node_coords(shortest_journey_path_details['origin_poi_node_id'], G, galway_places_summary_df1, galway_buildings_summary_df1, bus_stops_gdf)
#             o_bs_x, o_bs_y = get_node_coords(shortest_journey_path_details['origin_bus_stop_id'], G, galway_places_summary_df1, galway_buildings_summary_df1, bus_stops_gdf)
#             walk_path_label_set = False
#             if o_poi_x and o_bs_x:
#                 ax.plot([o_poi_x, o_bs_x], [o_poi_y, o_bs_y], color=walk_color, linestyle=walk_linestyle, 
#                         linewidth=walk_linewidth, zorder=path_zorder, label='Shortest Path (Walk)', alpha=transit_alpha)
#                 walk_path_label_set = True
        

#             # 2. Transit Segment (Bus)
#             transit_nodes_sequence = shortest_journey_path_details['transit_stop_sequence_ids']
#             if len(transit_nodes_sequence) >= 2:
#                 transit_path_label_set = False
#                 for i in range(len(transit_nodes_sequence) - 1):
#                     from_node_id = transit_nodes_sequence[i]
#                     to_node_id = transit_nodes_sequence[i+1]
#                     from_x, from_y = get_node_coords(from_node_id, G, galway_places_summary_df1, galway_buildings_summary_df1, bus_stops_gdf)
#                     to_x, to_y = get_node_coords(to_node_id, G, galway_places_summary_df1, galway_buildings_summary_df1, bus_stops_gdf)
                    
#                     if from_x and to_x:
#                         current_label = 'Shortest Path (Transit)' if not transit_path_label_set else None
#                         line, = ax.plot([from_x, to_x], [from_y, to_y], color=transit_color, linestyle=transit_linestyle, 
#                                     linewidth=transit_linewidth, zorder=path_zorder, label=current_label, alpha=transit_alpha)
#                         # Add path effect
#                         line.set_path_effects([path_effects.Stroke(linewidth=transit_linewidth + 1.5, foreground='white', alpha=0.6),
#                                             path_effects.Normal()])
#                         if not transit_path_label_set: transit_path_label_set = True
                

#             # 3. Destination Bus Stop to Destination POI (Walk)
#             d_bs_x, d_bs_y = get_node_coords(shortest_journey_path_details['destination_bus_stop_id'], G, galway_places_summary_df1, galway_buildings_summary_df1, bus_stops_gdf)
#             d_poi_x, d_poi_y = get_node_coords(shortest_journey_path_details['destination_poi_node_id'], G, galway_places_summary_df1, galway_buildings_summary_df1, bus_stops_gdf)
#             if d_bs_x and d_poi_x:
#                 current_walk_label = 'Shortest Path (Walk)' if not walk_path_label_set else None
#                 ax.plot([d_bs_x, d_poi_x], [d_bs_y, d_poi_y], color=walk_color, linestyle=walk_linestyle, 
#                         linewidth=walk_linewidth, zorder=path_zorder, label=current_walk_label, alpha=transit_alpha)
                
                
#             # Update legend to ensure new labels are included and no duplicates
#             handles, labels = ax.get_legend_handles_labels()
#             by_label = dict(zip(labels, handles)) 
#             ax.legend(by_label.values(), by_label.keys(), loc='upper right', fontsize='small')
#             print("Shortest journey path plotted (if details were available).")

#         except Exception as e_plot:
#             print(f"Error during shortest path plotting: {e_plot}")
#             # import traceback # Already imported at top level of notebook usually
#             traceback.print_exc()
#     else:
#         print("\nNo shortest journey path details available to plot.")


#     # Final plot adjustments
#     ax.set_title(f"Galway Map with Bus Stops (from gstops_df_v1)", color='black', fontsize=16)
#     plt.legend(loc='upper right') # add a legend
#     plt.axis('off')
#     plt.tight_layout()
#     plt.show()
 

# except FileNotFoundError as e:
#     print(f"\n--- File Error ---\n{e}\nPlease ensure file paths are correct.")
# except ImportError as e:
#     print(f"\n--- Import Error Occurred ---\nError: {e}\nPlease ensure required libraries are installed.")
# except ValueError as e:
#     print(f"\n--- Value Error ---\n{e}")
# except Exception as e:
#     print(f"\n--- An Unexpected Error Occurred ---\nError: {e}")
#     import traceback
#     traceback.print_exc()
