This script calculate statistics of each lat/lon bin from future urban potential prediction csv files (including ssp126, ssp370, ssp585) for figure making
Author: Junjie Yang
Date: 2025-03-01

In [2]:
import pandas as pd
import numpy as np
import geopandas as gpd

1. load files

In [10]:
# File paths (update with actual paths)
csv_path_585 = "/path/to/ssp585_prediction_csv_file"
csv_path_370 = "/path/to/ssp370_prediction_csv_file"
csv_path_126 = "/path/to/ssp126_prediction_csv_file"
shapefile_path = "/path/to/0.05_deg_grid_shapefile"


# load csv files (the results of future probability predictions of all 3 scenarios)
comparison_df_585 = pd.read_csv(csv_path_585)
comparison_df_370 = pd.read_csv(csv_path_370)
comparison_df_126 = pd.read_csv(csv_path_126)
# load the global 0.05-degree grid shapefile
gdf = gpd.read_file(shapefile_path)

2. rename columns and merge dataframes

In [11]:
# dictionary of scenario-specific DataFrames
scenarios = {
    '585': comparison_df_585,
    '370': comparison_df_370,
    '126': comparison_df_126
}
# define base column names to be renamed for each scenario
base_columns = [
    'current_probability_mean',
    'current_probability_std',
    'future_probability_mean',
    'future_probability_std',
    'probability_change_mean',
    'probability_change_std'
]
# rename columns for each scenario-specific DataFrame
for scenario, df in scenarios.items():
    df.rename(columns={col: f"{col}_{scenario}" for col in base_columns}, inplace=True)

# merge each scenario DataFrame into the shapefile GeoDataFrame
gdf.rename(columns={'ID': 'id'}, inplace=True)
for df in scenarios.values():
    gdf = gdf.merge(df, on='id', how='inner')
# delete "unnamed" columns
gdf = gdf.loc[:, ~gdf.columns.str.contains('^Unnamed')]
gdf = gdf.reset_index(drop=True)

3. Bin the data by latitude or longitude for visualization preparation, and export it as parquet files.

In [None]:
ssps = ['585', '370', '126']                # scenarios to process (default: ['585', '370', '126'])
dims = ['longitude','latitude']             # dimensions to bin by (default: ['longitude', 'latitude'])
bin_size = 1                                # bin size in degrees (default: 1°)
out_dir_path = '/path/to/out_directory/'    # output directory for saving parquet files

# extract centroid coordinate (longitude or latitude) from geometry
def get_coordinate_array(gdf, dim):
    if dim == 'longitude':
        return np.array([geom.centroid.x for geom in gdf.geometry])
    elif dim == 'latitude':
        return np.array([geom.centroid.y for geom in gdf.geometry])
    else:
        raise ValueError(f"Invalid dimension: {dim}")

# generate bin edges based on dimension and bin size
def get_bins(dim, bin_size):
    return np.arange(-180, 180 + bin_size, bin_size) if dim == 'longitude' else np.arange(-90, 90 + bin_size, bin_size)

# compute binned statistics (mean, std, median, quantiles) by spatial dimension
def compute_bin_stats(x, y, dim, bin_size):
    df = pd.DataFrame({dim: x, 'suitability': y})
    df[dim + '_bin'] = pd.cut(df[dim], bins=get_bins(dim, bin_size))

    bin_stats = df.groupby(f'{dim}_bin')['suitability'].agg([
        'mean', 'std', 'median',
        lambda x: x.quantile(0.25),
        lambda x: x.quantile(0.75)
    ])
    bin_stats.columns = ['mean', 'std', 'median', 'q25', 'q75']
    # calculate midpoint and edges of each bin
    bin_stats[dim + '_mid'] = bin_stats.index.map(lambda x: (x.left + x.right) / 2)
    bin_stats[dim + '_left'] = bin_stats.index.map(lambda x: x.left)
    bin_stats[dim + '_right'] = bin_stats.index.map(lambda x: x.right)
    return bin_stats.reset_index(drop=True)

# iterate over SSP scenarios and dimensions to compute and export binned stats
for ssp in ssps:
    for dim in dims:
        x = get_coordinate_array(gdf, dim)
        y = gdf[f'probability_change_mean_{ssp}'].values
        bin_stats = compute_bin_stats(x, y, dim, bin_size)

        bin_stats.to_parquet(out_dir_path + f'bin_stats_{ssp}_{dim[:3]}_bin{bin_size}.parquet')
        print(f'[INFO] Saved {ssp} {dim} stats to: {out_dir_path}bin_stats_{ssp}_{dim[:3]}_bin{bin_size}.parquet')
