# Points Generation and Origin-Destination Pair Creation

This notebook generates random points within city districts and creates origin-destination (OD) pairs based on population and area products.

## Overview

The notebook performs the following steps:
1. Generate random points within city district boundaries
2. Calculate district combinations and their products (population and area)
3. Scale the products to determine the number of OD pairs
4. Create OD pairs by sampling random points from each district combination

In [2]:
"""
Import required libraries for geospatial data processing and analysis.
"""
import geopandas as gpd
import random
from shapely.geometry import Point
import pandas as pd
from itertools import combinations_with_replacement

# Import configuration settings
import config

## Step 1: Generate Random Points Within City Districts

Load the city district boundaries and generate random points within each district. These points will serve as potential origins and destinations for transit route analysis.

In [4]:
def generate_random_points_within(city_gdf, num_points):
    """
    Generate random points within city district boundaries.
    
    Args:
        city_gdf: GeoDataFrame containing city district boundaries
        num_points: Number of random points to generate
        
    Returns:
        GeoDataFrame containing random points within the city boundaries
    """
    min_x, min_y, max_x, max_y = city_gdf.total_bounds
    points = []
    while len(points) < num_points:
        x = random.uniform(min_x, max_x)
        y = random.uniform(min_y, max_y)
        point = Point(x, y)
        if city_gdf.contains(point).any():
            points.append(point)
    return gpd.GeoDataFrame(geometry=points, crs=city_gdf.crs)


# Load city district boundaries from configuration
city_districts = gpd.read_file(str(config.CITY_GEOJSON_FILE))

# Generate random points within city boundaries
city_random_points = generate_random_points_within(city_districts, config.NUM_RANDOM_POINTS)

# Reproject to WGS84 (EPSG:4326) for proper latitude and longitude
city_random_points = city_random_points.to_crs(epsg=config.WGS84_EPSG)

# Ensure output directory exists and save the random points to GeoJSON
config.ensure_directory(config.RANDOM_POINTS_GEOJSON)
city_random_points.to_file(
    str(config.RANDOM_POINTS_GEOJSON), 
    driver="GeoJSON", 
    encoding='utf-8'
)

# Add longitude and latitude columns
city_random_points["latitude"] = city_random_points.geometry.y
city_random_points["longitude"] = city_random_points.geometry.x

# Save as CSV
config.ensure_directory(config.RANDOM_POINTS_CSV)
city_random_points[["latitude", "longitude"]].to_csv(
    str(config.RANDOM_POINTS_CSV), 
    index=False
)

## Step 2: Calculate District Combinations

Generate all possible combinations of districts and calculate the product of their populations and areas. These products will be used to determine the number of origin-destination pairs to generate for each district combination.

In [5]:
def calculate_district_combinations(city_gdf, value_column, value_name):
    """
    Calculate all possible district combinations and their product values.
    
    Args:
        city_gdf: GeoDataFrame containing city districts with values
        value_column: Column name containing the values to multiply (e.g., 'population', 'area')
        value_name: Name for the product column (e.g., 'Population Product', 'Area Product')
        
    Returns:
        DataFrame with district combinations and their products
    """
    # Extract district names and their respective values
    district_values = list(zip(city_gdf["layer"], city_gdf[value_column]))
    
    # Generate all possible combinations, including itself
    combinations_list = list(combinations_with_replacement(district_values, 2))
    
    # Calculate the product for each combination
    results = []
    for (d1, v1), (d2, v2) in combinations_list:
        results.append({
            "District 1": d1, 
            "District 2": d2, 
            value_name: v1 * v2
        })
        # Add reverse pair if districts are different (A-B and B-A are both needed)
        if d1 != d2:
            results.append({
                "District 1": d2, 
                "District 2": d1, 
                value_name: v1 * v2
            })
    
    return pd.DataFrame(results)


# Check available columns in the city districts data
print("Available columns:", city_districts.columns.tolist())

# Calculate population product combinations
df_population_products = calculate_district_combinations(
    city_districts, 
    "population", 
    "Population Product"
)

# Calculate area product combinations
df_area_products = calculate_district_combinations(
    city_districts, 
    "area", 
    "Area Product"
)

# Save the results to CSV files
config.ensure_directory(config.POPULATION_PRODUCT_COMBINATIONS_CSV)
df_population_products.to_csv(
    str(config.POPULATION_PRODUCT_COMBINATIONS_CSV), 
    index=False
)

config.ensure_directory(config.AREA_PRODUCT_COMBINATIONS_CSV)
df_area_products.to_csv(
    str(config.AREA_PRODUCT_COMBINATIONS_CSV), 
    index=False
)


Index(['layer', 'population', 'area', 'density', 'geometry'], dtype='object')


## Step 3: Scale Products to Determine OD Pair Counts

Apply scaling factors to the population and area products to determine the actual number of origin-destination pairs to generate. This scaling is necessary because the raw products are too large to use directly.

In [16]:
# Apply scaling factors to convert products to integer counts
df_population_products['Population Product Modified'] = (
    df_population_products['Population Product'] * config.POPULATION_SCALING_FACTOR
).astype(int)

df_area_products['Area Product Modified'] = (
    df_area_products['Area Product'] * config.AREA_SCALING_FACTOR
).astype(int)

# Save updated results
df_population_products.to_csv(
    str(config.POPULATION_PRODUCT_COMBINATIONS_CSV), 
    index=False
)

df_area_products.to_csv(
    str(config.AREA_PRODUCT_COMBINATIONS_CSV), 
    index=False
)

## Step 4: Assign Districts to Random Points

Perform a spatial join to assign each random point to its corresponding district. This allows us to sample points from specific districts when creating OD pairs.

In [7]:
# Perform spatial join to assign districts to random points
points_with_districts = gpd.sjoin(
    city_random_points, 
    city_districts, 
    predicate='within'
)

# Save points with district assignments
config.ensure_directory(config.RANDOM_POINTS_WITH_DISTRICTS_GEOJSON)
points_with_districts.to_file(
    str(config.RANDOM_POINTS_WITH_DISTRICTS_GEOJSON), 
    driver="GeoJSON"
)

# Count points per district
points_per_district = points_with_districts.groupby('layer').size().reset_index(
    name='point_count'
)

print("Points per district:")
print(points_per_district)

        layer  point_count
0     Centrum          762
1  Nieuw-West         1923
2       Noord         1728
3        Oost         1426
4        West          892
5   Westpoort          481
6        Zuid         1379
7   Zuuidoost         1409


## Step 5: Generate Origin-Destination Pairs (Population-Based)

Create origin-destination pairs by sampling random points from district combinations based on population products. Each district pair will have a number of OD pairs proportional to the scaled population product.

In [13]:
def generate_od_pairs(product_df, points_gdf, num_pairs_column, output_path):
    """
    Generate origin-destination pairs by sampling random points from district combinations.
    
    Args:
        product_df: DataFrame with district combinations and number of pairs to generate
        points_gdf: GeoDataFrame with random points and their district assignments
        num_pairs_column: Column name containing the number of pairs to generate
        output_path: Path where the output CSV file should be saved
        
    Returns:
        DataFrame containing the generated OD pairs
    """
    # Group points by district
    district_points = points_gdf.groupby("layer")
    
    # List to store OD pairs
    od_pairs = []
    
    # Generate OD pairs for each district combination
    for _, row in product_df.iterrows():
        district_1 = row["District 1"]
        district_2 = row["District 2"]
        num_pairs = int(row[num_pairs_column])
        
        # Get points for each district if they exist
        if district_1 in district_points.groups and district_2 in district_points.groups:
            origin_candidates = district_points.get_group(district_1)
            destination_candidates = district_points.get_group(district_2)
            
            # Sample random points for the specified number of pairs
            for _ in range(num_pairs):
                origin = origin_candidates.sample(1).iloc[0]
                destination = destination_candidates.sample(1).iloc[0]
                
                od_pairs.append({
                    "origin_lat": origin.geometry.y,
                    "origin_lon": origin.geometry.x,
                    "destination_lat": destination.geometry.y,
                    "destination_lon": destination.geometry.x
                })
    
    # Convert to DataFrame and save
    od_pairs_df = pd.DataFrame(od_pairs)
    config.ensure_directory(output_path)
    od_pairs_df.to_csv(str(output_path), index=False)
    
    return od_pairs_df


# Generate OD pairs based on population products
od_pairs_population = generate_od_pairs(
    df_population_products,
    points_with_districts,
    "Population Product Modified",
    config.OD_PAIRS_POPULATION_CSV
)

print(f"Generated {len(od_pairs_population)} OD pairs based on population products")

## Step 6: Generate Origin-Destination Pairs (Area-Based)

Create origin-destination pairs by sampling random points from district combinations based on area products. This provides an alternative analysis based on geographic area rather than population.

In [17]:
# Generate OD pairs based on area products
od_pairs_area = generate_od_pairs(
    df_area_products,
    points_with_districts,
    "Area Product Modified",
    config.OD_PAIRS_AREA_CSV
)

print(f"Generated {len(od_pairs_area)} OD pairs based on area products")