# Calculating travel time to health facilities in Brazil

In support of DECSI(?) we are estimating how many people live a certain distance from health facilities of different types


In [None]:
# the geobr package is required for accessing Brazil open data https://ipeagit.github.io/geobr/
# ! pip install geobr

In [2]:
import sys
import os
import rasterio
import geobr

import geopandas as gpd
import skimage.graph as graph

sys.path.insert(0, r"C:\WBG\Work\Code\GOSTrocks\src")
import GOSTrocks.rasterMisc as rMisc
from GOSTrocks.misc import tPrint

sys.path.append(r"C:\WBG\Work\Code\GOSTnetsraster\src")
import GOSTnetsraster.market_access as ma

%load_ext autoreload
%autoreload 2

GDAL is not installed - OGR functionality not available


In [16]:
iso3 = "BRA"

# Input parameters
m_crs = 5880  # Need to project data to a metres-based projection

# Define input data
base_folder = "C:/WBG/Work/Projects/BRA_health_access/"
results_folder = os.path.join(base_folder, "RESULTS")
if not os.path.exists(results_folder):
    os.makedirs(results_folder)
if not os.path.exists(os.path.join(base_folder, "DATA")):
    os.makedirs(os.path.join(base_folder, "DATA"))

friction_surface = os.path.join(base_folder, "DATA", "BRA_friction_surface_202306.tif")
population_raster = os.path.join(
    base_folder, "DATA", "bra_pop_2025_CN_1km_R2025A_UA_v1.tif"
)  # https://data.worldpop.org/GIS/Population/Global_2015_2030/R2025A/2025/BRA/v1/1km_ua/constrained/bra_pop_2025_CN_1km_R2025A_UA_v1.tif
population_standardized = os.path.join(
    base_folder, "DATA", "bra_pop_2025_CN_1km_R2025A_UA_v1_standardized.tif"
)
health_facilities = os.path.join(
    base_folder, "DATA", "BRA_health_facilities_202306.geojson"
)
output_folder = os.path.join(base_folder, "OUTPUT")
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

In [4]:
# Read in global admin data from DDH and isolate to Brazil
### TODO: Read directly from DDH instead of local file
### https://datalakeesouoprod.blob.core.windows.net/data/ddh/data/ddh-published/0038272/5/DR0095369/World%20Bank%20Official%20Boundaries%20(GeoJSON)/World%20Bank%20Official%20Boundaries%20-%20Admin%200.geojson?_=1761666056434
global_admin = (
    r"C:\WBG\Work\data\ADMIN\NEW_WB_BOUNDS\FOR_PUBLICATION\geojson\WB_GAD_ADM1.geojson"
)
in_gdf = gpd.read_file(global_admin)
bra_admin = in_gdf[in_gdf["ISO_A3"] == iso3]
bra_admin

Unnamed: 0,ISO_A3,ISO_A2,WB_A3,WB_REGION,WB_STATUS,NAM_0,NAM_1,ADM1CD_c,GEOM_SRCE,geometry
2406,BRA,BR,BRA,LCR,Member State,Brazil,Name Unknown,BRA016,WB GAD,"POLYGON ((-28.851 -20.48, -28.853 -20.477, -28..."
2407,BRA,BR,BRA,LCR,Member State,Brazil,Name Unknown,BRA014,WB GAD,"POLYGON ((-29.338 -20.488, -29.333 -20.489, -2..."
2408,BRA,BR,BRA,LCR,Member State,Brazil,Alagoas,BRA002,WB GAD,"MULTIPOLYGON (((-38.011 -9.1557, -38 -9.1519, ..."
2409,BRA,BR,BRA,LCR,Member State,Brazil,Bahia,BRA005,WB GAD,"MULTIPOLYGON (((-38.086 -10.711, -38.096 -10.7..."
2410,BRA,BR,BRA,LCR,Member State,Brazil,Espirito Santo,BRA008,WB GAD,"MULTIPOLYGON (((-39.92 -19.692, -39.927 -19.69..."
2411,BRA,BR,BRA,LCR,Member State,Brazil,Paraiba,BRA019,WB GAD,"MULTIPOLYGON (((-37.518 -7.4246, -37.514 -7.42..."
2412,BRA,BR,BRA,LCR,Member State,Brazil,Pernambuco,BRA021,WB GAD,"MULTIPOLYGON (((-35.539 -8.8168, -35.542 -8.82..."
2413,BRA,BR,BRA,LCR,Member State,Brazil,Rio Grande Do Norte,BRA024,WB GAD,"MULTIPOLYGON (((-37.705 -6.1867, -37.709 -6.18..."
2414,BRA,BR,BRA,LCR,Member State,Brazil,Sergipe,BRA030,WB GAD,"MULTIPOLYGON (((-36.922 -10.134, -36.916 -10.1..."
2463,BRA,BR,BRA,LCR,Member State,Brazil,Name Unknown,BRA017,WB GAD,"POLYGON ((-33.826 -3.8568, -33.824 -3.8527, -3..."


In [8]:
# Extract Global Friction Surface for Brazil
if not os.path.exists(friction_surface):
    tPrint("Extracting Brazil friction surface...")
    global_friction = (
        r"C:\WBG\Work\data\FRICTION\2020_motorized_friction_surface.geotiff"
    )
    rMisc.clipRaster(
        rasterio.open(global_friction), bra_admin, friction_surface, crop=False
    )

In [9]:
# Extract health facilities data for Brazil from geobr package
health_facilities_gdf = geobr.read_health_facilities()
health_facilities_gdf = health_facilities_gdf.loc[
    ~health_facilities_gdf["geometry"].is_empty
]
if not os.path.exists(health_facilities):
    health_facilities_gdf.to_file(health_facilities, driver="GeoJSON")
health_facilities_gdf.head()

Unnamed: 0,code_cnes,code_muni,code_state,abbrev_state,name_state,code_region,name_region,date_update,year_update,co_unidade,...,st_centro_cirurgico,st_centro_obstetrico,st_centro_neonatal,st_atend_hospitalar,st_servico_apoio,st_atend_ambulatorial,co_motivo_desab,co_ambulatorial_sus,empty_geo,geometry
0,19.0,2602902.0,26.0,PE,Pernambuco,2.0,Nordeste,20230330,2023.0,2602900000019,...,0.0,0.0,0.0,0.0,1.0,0.0,,SIM,,POINT (-34.97 -8.2317)
1,27.0,2602902.0,26.0,PE,Pernambuco,2.0,Nordeste,20230330,2023.0,2602900000027,...,1.0,1.0,1.0,1.0,1.0,1.0,,SIM,,POINT (-35.035 -8.287)
2,35.0,2602902.0,26.0,PE,Pernambuco,2.0,Nordeste,20230330,2023.0,2602900000035,...,1.0,1.0,1.0,1.0,1.0,1.0,,SIM,,POINT (-35.035 -8.287)
3,43.0,2602902.0,26.0,PE,Pernambuco,2.0,Nordeste,20230330,2023.0,2602900000043,...,0.0,0.0,0.0,0.0,1.0,0.0,,SIM,,POINT (-35.035 -8.2876)
4,51.0,2602902.0,26.0,PE,Pernambuco,2.0,Nordeste,20230330,2023.0,2602900000051,...,0.0,0.0,0.0,0.0,1.0,0.0,,SIM,,POINT (-35.035 -8.287)


| Column    | English | Count |
| --- | --- | --- |
| st_centro_cirurgico | surgical_center | 7454 |
| st_centro_obstetrico | obstetric_center | 4654 |
| st_centro_neonatal | neonatal_center | 3366 |
| st_atend_hospitalar | hospital_care | 8175 |
| st_servico_apoio | support_service | 215240 | 
| st_atend_ambulatorial | outpatient_care | 9052

In [19]:
hospital_types = [
    "st_centro_cirurgico",
    "st_centro_obstetrico",
    "st_centro_neonatal",
    "st_atend_hospitalar",
    "st_servico_apoio",
    "st_atend_ambulatorial",
]
for col in hospital_types:
    print(f"{col}: {health_facilities_gdf[col].sum()}")

sel_facilities = health_facilities_gdf.loc[
    (health_facilities_gdf["st_atend_hospitalar"] == 1)
    | (health_facilities_gdf["st_atend_ambulatorial"] == 1)
].copy()

st_centro_cirurgico: 7454.0
st_centro_obstetrico: 4654.0
st_centro_neonatal: 3366.0
st_atend_hospitalar: 8175.0
st_servico_apoio: 215240.0
st_atend_ambulatorial: 9052.0


# Calculate travel time

We want travel time to two facility types: Hospitals and Outpatients. Once we have those we want the following three summaries:

1. Population < 1 hr, 1-3 hours, and > 3 hours
2. Average travel time within admin level 1
3. Average travel time for those outside of 3 hours

In [None]:
# Calculate travel time to health facilities
tt_folder = os.path.join(results_folder, "TRAVEL_TIME")
if not os.path.exists(tt_folder):
    os.makedirs(tt_folder)

frictionR = rasterio.open(friction_surface)
frictionD = frictionR.read()[0, :, :]
frictionD = frictionD * 1000
mcp = graph.MCP_Geometric(frictionD)

if health_facilities_gdf.crs != frictionR.crs:
    health_facilities_gdf = health_facilities_gdf.to_crs(frictionR.crs)

# Run analtsis for each individual hospital type
for hospital_type in hospital_types:
    cur_out_file = os.path.join(tt_folder, f"BRA_travel_time_{hospital_type}.tif")
    if not os.path.exists(cur_out_file):
        tPrint(f"Calculating travel time to {hospital_type}...")
        cur_facilities_gdf = health_facilities_gdf.loc[
            health_facilities_gdf[hospital_type] == 1
        ]
        tt, traceback = ma.calculate_travel_time(
            frictionR, mcp, cur_facilities_gdf, out_raster=cur_out_file
        )

# Run analysis for hospitals and outpatient clinics combined
hospital_types_combined = ["st_atend_hospitalar", "st_atend_ambulatorial"]
tt_master_file = os.path.join(
    tt_folder, "BRA_travel_time_hospital_outpatient_combined.tif"
)
if not os.path.exists(tt_master_file):
    tPrint("Calculating travel time to hospital and outpatient clinics combined...")
    tt, traceback = ma.calculate_travel_time(
        frictionR, mcp, sel_facilities, out_raster=tt_master_file
    )

12:53:45	Calculating travel time to hospital and outpatient clinics combined...


In [24]:
# Summarize population within travel time thresholds
if not os.path.exists(population_standardized):
    tPrint("Standardizing population raster...")
    rMisc.standardizeInputRasters(
        rasterio.open(population_raster),
        rasterio.open(friction_surface),
        population_standardized,
    )

population_data = rasterio.open(population_standardized)
tt_surface = rasterio.open(tt_master_file)

pop_within_threshold = ma.summarize_travel_time_populations(
    population_data, tt_surface, sel_facilities, mcp, bra_admin, thresholds=[60, 180]
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

In [27]:
pop_within_threshold.drop(columns=["geometry"]).to_csv(
    os.path.join(
        output_folder,
        "BRA_population_within_travel_time_thresholds_hospital_outpatient_combined.csv",
    ),
    index=False,
)

In [28]:
bra_admin.to_file(
    os.path.join(
        output_folder,
        "BRA_admin1_population_travel_time_hospital_outpatient_combined.gpkg",
    ),
    driver="GPKG",
)