## Imports and Setup

In [None]:
import os
import pandas as pd
import geopandas as gpd
import logging
import joblib
import torch

import sys
sys.path.insert(0, "../src")
import sat_download

sys.path.insert(0, "../utils/")
import post_utils
import config_utils
import data_utils

import matplotlib.pyplot as plt
from PIL import Image

%load_ext autoreload
%autoreload 2

## Load Config Files

In [43]:
iso_code = "GHA"
sum_threshold = 5

cwd = os.path.dirname(os.getcwd())
data_config_file = os.path.join(cwd, "configs/data_configs/data_config_ISO_AF.yaml")
data_config = config_utils.load_config(data_config_file)

model_config_file = os.path.join(cwd, "configs/cnn_configs/convnext_large_v01.yaml")
model_config = config_utils.load_config(model_config_file)

## Load Model Predictions

In [46]:
preds = post_utils.load_data(iso_code, data_config, model_config, sum_threshold, source="pred")
preds["geometry"] = preds["geometry"].centroid
preds.head(3)

INFO:root:Data dimensions: (38558, 5)


Unnamed: 0,prob,PUID,sum,group,geometry
0,0.993279,0,29.0,17488,POINT (-253388.890 662313.242)
1,0.989819,1,23.0,21430,POINT (120511.756 679766.701)
2,0.989806,2,96.0,9607,POINT (-185451.975 746975.782)


## Load Master (Government Data)

In [34]:
master = post_utils.load_data(iso_code, data_config, source="master")
master.head(3)

INFO:root:clean
0    3295
2     718
1      82
Name: count, dtype: int64
INFO:root:Data dimensions: (4095, 10)


Unnamed: 0,MUID,source,iso,country,region,subregion,name,giga_id_school,clean,geometry
0,UNICEF-RWA-SCHOOL-00000000,UNICEF,RWA,Rwanda,Africa,Sub-Saharan Africa,GS MUHORORO,cdf4f80c-e3cc-4ebc-8931-8fe76018cc61,0,POINT (29.54797 -2.15939)
1,UNICEF-RWA-SCHOOL-00000001,UNICEF,RWA,Rwanda,Africa,Sub-Saharan Africa,GS MWENDO,a58be9dc-59c4-4cf9-96f5-e5b8fa84ba82,0,POINT (30.21272 -1.73064)
2,UNICEF-RWA-SCHOOL-00000002,UNICEF,RWA,Rwanda,Africa,Sub-Saharan Africa,GS NYAGISOZI,9fe38ae7-7d51-4646-841b-1b6daa31c987,0,POINT (29.64556 -2.29503)


## Load OSM and Overture Data

In [35]:
sources = post_utils.load_data(iso_code, data_config, source="osm_overture")
sources.head(3)

INFO:root:clean
0    168
2      3
1      2
Name: count, dtype: int64
INFO:root:Data dimensions: (173, 10)


Unnamed: 0,SUID,source,iso,country,region,subregion,name,giga_id_school,clean,geometry
0,OSM-RWA-SCHOOL-00000000,OSM,RWA,Rwanda,Africa,Sub-Saharan Africa,École Belge,,0,POINT (30.05984 -1.95128)
1,OSM-RWA-SCHOOL-00000001,OSM,RWA,Rwanda,Africa,Sub-Saharan Africa,Primary School,,0,POINT (30.06790 -2.11252)
2,OSM-RWA-SCHOOL-00000002,OSM,RWA,Rwanda,Africa,Sub-Saharan Africa,Nelson Mandela Education Center,,0,POINT (30.06449 -2.11022)


## Compute Distance between Master and Predictions

In [36]:
temp = data_utils._convert_crs(master.copy(), target_crs="EPSG:3857")
preds = preds.set_index("PUID")
dist = temp.geometry.apply(lambda x: preds.distance(x).sort_values())
master["distance"] = dist.min(axis=1).values
master["PUID"] = list(dist.columns[dist.to_numpy().argmin(axis=1)])
master.head(3)

Unnamed: 0,MUID,source,iso,country,region,subregion,name,giga_id_school,clean,geometry,distance,PUID
0,UNICEF-RWA-SCHOOL-00000000,UNICEF,RWA,Rwanda,Africa,Sub-Saharan Africa,GS MUHORORO,cdf4f80c-e3cc-4ebc-8931-8fe76018cc61,0,POINT (29.54797 -2.15939),66.647871,16
1,UNICEF-RWA-SCHOOL-00000001,UNICEF,RWA,Rwanda,Africa,Sub-Saharan Africa,GS MWENDO,a58be9dc-59c4-4cf9-96f5-e5b8fa84ba82,0,POINT (30.21272 -1.73064),32.615105,611
2,UNICEF-RWA-SCHOOL-00000002,UNICEF,RWA,Rwanda,Africa,Sub-Saharan Africa,GS NYAGISOZI,9fe38ae7-7d51-4646-841b-1b6daa31c987,0,POINT (29.64556 -2.29503),62.185774,296


## Compute Distance between Predictions and Master

In [37]:
master_filtered = master[master["clean"] == 0]
temp = data_utils._convert_crs(master_filtered.copy(), target_crs="EPSG:3857").set_index("MUID")
dist = preds.geometry.apply(lambda x: temp.distance(x).sort_values())
preds["MUID"] = list(dist.columns[dist.to_numpy().argmin(axis=1)])
preds["master_name"] = preds["MUID"].apply(
    lambda x: master_filtered[master_filtered["MUID"] == x]["name"].values[0]
)
preds["distance_to_nearest_master"] = dist.min(axis=1).values
preds.head(3)

Unnamed: 0_level_0,prob,sum,group,geometry,MUID,master_name,distance_to_nearest_master
PUID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.995682,43.0,524,POINT (3297424.423 -226161.918),UNICEF-RWA-SCHOOL-00002173,KIGALI,132.393365
1,0.995382,66.0,191,POINT (3261232.506 -243632.031),UNICEF-RWA-SCHOOL-00003960,ESAPAN NGOMA,128.74935
2,0.995111,72.0,5185,POINT (3381831.485 -253884.661),UNICEF-RWA-SCHOOL-00001585,GS JARAMA,105.775741


## Get Distance between Model Predictions and Other Sources

In [38]:
sources_filtered = sources[sources["clean"] == 0]
temp = data_utils._convert_crs(sources_filtered.copy(), target_crs="EPSG:3857").set_index("SUID")
dist = preds.geometry.apply(lambda x: temp.distance(x).sort_values())
preds["SUID"] = list(dist.columns[dist.to_numpy().argmin(axis=1)])
preds["source_name"] = preds["SUID"].apply(
    lambda x: sources_filtered[sources_filtered["SUID"] == x]["name"].values[0]
)
preds["distance_to_nearest_osm_overture"] = dist.min(axis=1).values
preds.head(3)

Unnamed: 0_level_0,prob,sum,group,geometry,MUID,master_name,distance_to_nearest_master,SUID,source_name,distance_to_nearest_osm_overture
PUID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0.995682,43.0,524,POINT (3297424.423 -226161.918),UNICEF-RWA-SCHOOL-00002173,KIGALI,132.393365,OVERTURE-RWA-SCHOOL-00000007,Harvest Academy Mahoko,14184.569747
1,0.995382,66.0,191,POINT (3261232.506 -243632.031),UNICEF-RWA-SCHOOL-00003960,ESAPAN NGOMA,128.74935,OSM-RWA-SCHOOL-00000053,Mubuga Secondary School,6282.861557
2,0.995111,72.0,5185,POINT (3381831.485 -253884.661),UNICEF-RWA-SCHOOL-00001585,GS JARAMA,105.775741,OSM-RWA-SCHOOL-00000042,Gyunuzi Primary School,20955.651821


## Save Results

In [39]:
pred_file = f"{iso_code}_results.geojson"
preds.to_file(pred_file, driver="GeoJSON")

In [40]:
master_file = f"{iso_code}_master.geojson"
master.to_file(master_file, driver="GeoJSON")

In [41]:
sources_file = f"{iso_code}_osm_overture.geojson"
sources.to_file(sources_file, driver="GeoJSON")