In [2]:
import json
from typing import Dict

import geopandas as gpd
import numpy as np
import pandas as pd
import requests
from lonboard import Map, PolygonLayer, ScatterplotLayer
from shapely import from_geojson

In [1]:
BASE_URL = "http://space2stats.ds.io"
FIELDS_ENDPOINT = f"{BASE_URL}/fields"
SUMMARY_ENDPOINT = f"{BASE_URL}/summary"
AGGREGATION_ENDPOINT = f"{BASE_URL}/aggregate"

In [3]:
def fetch_admin_boundaries(iso3: str, adm: str) -> gpd.GeoDataFrame:
    """Fetch administrative boundaries from GeoBoundaries API."""
    url = f"https://www.geoboundaries.org/api/current/gbOpen/{iso3}/{adm}/"
    res = requests.get(url).json()
    return gpd.read_file(res["gjDownloadURL"])


def fetch_summary_data(feature: Dict) -> pd.DataFrame:
    """Fetch summary data for each administrative feature."""
    request_payload = {
        "aoi": feature,
        "spatial_join_method": "touches",
        "fields": ["sum_pop_2020"],
        "geometry": "point",
    }
    response = requests.post(SUMMARY_ENDPOINT, json=request_payload)
    if response.status_code != 200:
        raise Exception(f"Failed to get summary: {response.text}")

    summary_data = response.json()
    if not summary_data:
        print(f"Failed to get summary for {feature['id']}")
        return pd.DataFrame()  # Return an empty DataFrame if no data

    df = pd.DataFrame(summary_data)
    df["adm_id"] = int(feature["id"])
    df["adm_name"] = feature["properties"]["shapeName"]
    df["geometry"] = df["geometry"].apply(lambda geom: from_geojson(geom))
    return df

In [4]:
ISO3 = "KEN"
ADM = "ADM1"
adm_boundaries = fetch_admin_boundaries(ISO3, ADM)
geojson_str = adm_boundaries.to_json()
adm_geojson = json.loads(geojson_str)
adm_features = adm_geojson["features"]

gdfs = []
for i, feature in enumerate(adm_features):
    df = fetch_summary_data(feature)
    if not df.empty:
        gdfs.append(gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326"))
        
# Concatenate all GeoDataFrames into a single GeoDataFrame
gdf = pd.concat(gdfs, ignore_index=True)

# Display the GeoDataFrame structure
gdf.head()

ConnectionError: HTTPConnectionPool(host='space2stats.ds.io', port=80): Max retries exceeded with url: /summary (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000002A7FFCB3B50>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))

In [5]:
# Define custom breaks and corresponding RGBA colors for visualization
breaks = [
    gdf["sum_pop_2020"].min(),
    1,
    1000,
    10000,
    50000,
    100000,
    200000,
    gdf["sum_pop_2020"].max(),
]
colors = np.array(
    [
        [211, 211, 211, 255],  # Light gray for 0
        [255, 255, 0, 255],  # Yellow for 1-1000
        [255, 165, 0, 255],  # Orange for 1000-10000
        [255, 0, 0, 255],  # Red for 10000-50000
        [128, 0, 128, 255],  # Purple for 50000-100000
        [0, 0, 255, 255],  # Blue for 100000-200000
        [0, 0, 139, 255],  # Dark blue for 200000+
    ]
)


def assign_color(value: float) -> list:
    """Assign colors based on population value."""
    for i in range(len(breaks) - 1):
        if breaks[i] <= value < breaks[i + 1]:
            return colors[i].tolist()  # Convert to list
    return colors[-1].tolist()  # In case value exceeds all breaks

In [6]:
# Map sum_pop_2020 values to colors using the custom function
gdf["color"] = gdf["sum_pop_2020"].apply(assign_color)

# Flatten the color list into a 2D array for ScatterplotLayer
color_list = np.array(gdf["color"].tolist(), dtype=np.uint8)

# Create the scatterplot layer with the assigned colors
layer = ScatterplotLayer.from_geopandas(gdf, get_radius=2000, get_fill_color=color_list)

# Create the map with the scatterplot layer
m = Map(layer)
m

Map(layers=[ScatterplotLayer(get_fill_color=<pyarrow.lib.FixedSizeListArray object at 0x11bd4d7e0>
[
  [
    2…

In [7]:
def fetch_aggregated_population(row):
    request_payload = {
        "aoi": {
            "type": "Feature",
            "geometry": row.geometry.__geo_interface__,
            "properties": {},
        },
        "spatial_join_method": "touches",
        "fields": ["sum_pop_2020"],
        "aggregation_type": "sum",
    }

    response = requests.post(AGGREGATION_ENDPOINT, json=request_payload)

    if response.status_code == 200:
        result = response.json()
        return result.get("sum_pop_2020", 0)
    else:
        print(response.content)
        return 0

In [8]:
adm_gdf = gpd.GeoDataFrame.from_features(adm_features)
adm_gdf["sum_pop_2020"] = adm_gdf.apply(fetch_aggregated_population, axis=1)
adm_gdf.head()

Unnamed: 0,geometry,shapeName,shapeISO,shapeID,shapeGroup,shapeType,sum_pop_2020
0,"POLYGON ((36.05061 4.45622, 35.94395 4.54803, ...",Turkana,KE-43,32016919B72266624462344,KEN,ADM1,1374176.0
1,"POLYGON ((36.60089 2.40574, 36.60138 2.4053, 3...",Marsabit,KE-25,32016919B63496705134089,KEN,ADM1,446034.2
2,"POLYGON ((40.99195 2.17919, 40.99245 2.25188, ...",Mandera,KE-24,32016919B2031803566233,KEN,ADM1,2815273.0
3,"POLYGON ((38.96255 2.09739, 38.96272 2.09718, ...",Wajir,KE-46,32016919B89873713911655,KEN,ADM1,1930365.0
4,"POLYGON ((34.94278 2.45547, 34.93892 2.45551, ...",West Pokot,KE-47,32016919B96045830258165,KEN,ADM1,942957.6


In [9]:
breaks = [
    adm_gdf["sum_pop_2020"].min(),
    10_000,
    100_000,
    1_000_000,
    5_000_000,
    10_000_000,
    20_000_000,
    gdf["sum_pop_2020"].max(),
]
colors = np.array(
    [
        [211, 211, 211, 125],  # Light gray with transparency for 0 - 10,000
        [255, 255, 0, 125],  # Yellow with transparency for 10,000 - 100,000
        [255, 165, 0, 125],  # Orange with transparency for 100,000 - 1,000,000
        [255, 0, 0, 125],  # Red with transparency for 1,000,000 - 5,000,000
        [128, 0, 128, 125],  # Purple with transparency for 5,000,000 - 10,000,000
        [0, 0, 255, 125],  # Blue with transparency for 10,000,000 - 20,000,000
        [0, 0, 139, 125],  # Dark blue with transparency for 20,000,000 - max
    ]
)


def assign_color(value: float) -> list:
    """Assign colors based on population value, including transparency."""
    for i in range(len(breaks) - 1):
        if breaks[i] <= value < breaks[i + 1]:
            return colors[i].tolist()  # Convert to list
    return colors[-1].tolist()

In [10]:
# Assign colors to each row in gdf based on sum_pop_2020
adm_gdf["color"] = adm_gdf["sum_pop_2020"].apply(assign_color)

# Convert the list of colors into a 2D numpy array of uint8
color_array = np.array(adm_gdf["color"].tolist(), dtype=np.uint8)

# Create the polygon layer using PolygonLayer, referencing the color_array
layer = PolygonLayer.from_geopandas(
    adm_gdf,
    get_fill_color=color_array,  # Pass the 2D numpy array for the colors
    get_line_color=[0, 0, 0, 255],  # Optional: Black outline
    line_width_min_pixels=1,
)

# Create the map with the polygon layer
m = Map(layer)
m

  warn(


Map(layers=[PolygonLayer(get_fill_color=<pyarrow.lib.FixedSizeListArray object at 0x11bc82aa0>
[
  [
    255,
…