## Imports and Setup

In [1]:
import os
import pandas as pd
import geopandas as gpd
import logging
import joblib
import torch

import sys
sys.path.insert(0, "../src")
import sat_download

sys.path.insert(0, "../utils/")
import post_utils
import config_utils
import data_utils

import matplotlib.pyplot as plt
from PIL import Image

%load_ext autoreload
%autoreload 2

## Load Config Files

In [26]:
iso_code="GHA"
sum_threshold=0
buffer_size=50

cwd = os.path.dirname(os.getcwd())
data_config_file = os.path.join(cwd, "configs/data_configs/data_config_ISO_AF.yaml")
data_config = config_utils.load_config(data_config_file)

model_config_file = os.path.join(cwd, "configs/cnn_configs/satlas-aerial_swinb_mi_01.yaml")
model_config = config_utils.load_config(model_config_file)

## Load Model Predictions

In [27]:
preds = post_utils.load_data(iso_code, data_config, model_config, sum_threshold, buffer_size=buffer_size)
preds["geometry"] = preds["geometry"].centroid
preds.head(3)

INFO:root:Data dimensions: (358, 9)


Unnamed: 0,prob,UID,sum,group,geometry,ADM1,ADM2,ADM3,PUID
0,0.731058,23470,20.0,148,POINT (-309009.498 775025.869),Ahafo,Asunafo North Municipal,Ghana,Asunafo North Municipal_23470
1,0.731058,41068,4.0,0,POINT (-295260.733 731003.665),Ahafo,Asunafo South,Ghana,Asunafo South_41068
2,0.731058,59589,31.0,150,POINT (-291928.098 755038.669),Ahafo,Asunafo North Municipal,Ghana,Asunafo North Municipal_59589


## Load Government Data

In [4]:
master = post_utils.load_data(iso_code, data_config, source="master")
master.head(3)

INFO:root:clean
0    965
1     36
2     30
Name: count, dtype: int64
INFO:root:Data dimensions: (1031, 13)


Unnamed: 0,MUID,source,iso,country,region,subregion,name,giga_id_school,clean,geometry,ADM1,ADM2,ADM3
0,UNICEF-BWA-SCHOOL-00000000,UNICEF,BWA,Botswana,Africa,Sub-Saharan Africa,Gweta,330be1f2-316e-32b8-8602-c556dedc70a3,0,POINT (25.23508 -20.20663),Central,Tutume,Gweta
1,UNICEF-BWA-SCHOOL-00000001,UNICEF,BWA,Botswana,Africa,Sub-Saharan Africa,Matsaakgang,e923393a-1ee5-3b21-b85b-7fc59d36c671,0,POINT (25.31522 -24.99474),Southern,Southern,Kanye
2,UNICEF-BWA-SCHOOL-00000002,UNICEF,BWA,Botswana,Africa,Sub-Saharan Africa,Mogobane,64218c54-b80e-3910-b5ee-aa9b92815d0b,0,POINT (25.69787 -24.98600),South East,South East,Mogobane


## Load OSM and Overture Data

In [5]:
sources = post_utils.load_data(iso_code, data_config, source="osm_overture")
sources.head(3)

INFO:root:clean
0    409
Name: count, dtype: int64
INFO:root:Data dimensions: (409, 10)


Unnamed: 0,SUID,source,iso,country,region,subregion,name,giga_id_school,clean,geometry
0,OSM-BWA-SCHOOL-00000000,OSM,BWA,Botswana,Africa,Sub-Saharan Africa,,,0,POINT (27.50699 -21.17129)
1,OSM-BWA-SCHOOL-00000001,OSM,BWA,Botswana,Africa,Sub-Saharan Africa,Setlalekgosi Junior Secondary School,,0,POINT (27.50219 -21.17028)
2,OSM-BWA-SCHOOL-00000002,OSM,BWA,Botswana,Africa,Sub-Saharan Africa,Francistown Senior Secondary School,,0,POINT (27.49136 -21.16794)


## Compute Distance between Master and Predictions

In [6]:
temp = data_utils._convert_crs(master.copy(), target_crs="EPSG:3857")
temp_preds = preds[["geometry", "PUID"]].set_index("PUID")
dist = temp.geometry.apply(lambda x: temp_preds.distance(x).sort_values())
master["distance"] = dist.min(axis=1).values
master["PUID"] = list(dist.columns[dist.to_numpy().argmin(axis=1)])
master = master.merge(preds[["PUID", "prob"]], on="PUID", how="left")
print(master.shape)
master.head(3)

(1031, 16)


Unnamed: 0,MUID,source,iso,country,region,subregion,name,giga_id_school,clean,geometry,ADM1,ADM2,ADM3,distance,PUID,prob
0,UNICEF-BWA-SCHOOL-00000000,UNICEF,BWA,Botswana,Africa,Sub-Saharan Africa,Gweta,330be1f2-316e-32b8-8602-c556dedc70a3,0,POINT (25.23508 -20.20663),Central,Tutume,Gweta,1686.616818,Tutume_468200,0.95401
1,UNICEF-BWA-SCHOOL-00000001,UNICEF,BWA,Botswana,Africa,Sub-Saharan Africa,Matsaakgang,e923393a-1ee5-3b21-b85b-7fc59d36c671,0,POINT (25.31522 -24.99474),Southern,Southern,Kanye,65.629124,Southern_762398,0.951193
2,UNICEF-BWA-SCHOOL-00000002,UNICEF,BWA,Botswana,Africa,Sub-Saharan Africa,Mogobane,64218c54-b80e-3910-b5ee-aa9b92815d0b,0,POINT (25.69787 -24.98600),South East,South East,Mogobane,412.349946,South East_93458,0.954256


In [7]:
temp = data_utils._convert_crs(sources.copy(), target_crs="EPSG:3857")
temp_preds = preds[["geometry", "PUID"]].set_index("PUID")
dist = temp.geometry.apply(lambda x: temp_preds.distance(x).sort_values())
sources["distance"] = dist.min(axis=1).values
sources["PUID"] = list(dist.columns[dist.to_numpy().argmin(axis=1)])
sources = sources.merge(preds[["PUID", "prob"]], on="PUID", how="left")
print(sources.shape)
sources.head(3)

(409, 13)


Unnamed: 0,SUID,source,iso,country,region,subregion,name,giga_id_school,clean,geometry,distance,PUID,prob
0,OSM-BWA-SCHOOL-00000000,OSM,BWA,Botswana,Africa,Sub-Saharan Africa,,,0,POINT (27.50699 -21.17129),548.527441,Francistown_1446,0.954139
1,OSM-BWA-SCHOOL-00000001,OSM,BWA,Botswana,Africa,Sub-Saharan Africa,Setlalekgosi Junior Secondary School,,0,POINT (27.50219 -21.17028),1.165115,Francistown_1446,0.954139
2,OSM-BWA-SCHOOL-00000002,OSM,BWA,Botswana,Africa,Sub-Saharan Africa,Francistown Senior Secondary School,,0,POINT (27.49136 -21.16794),319.329895,Francistown_627,0.954098


## Compute Distance between Predictions and Master

In [8]:
master_filtered = master[master["clean"] == 0]
temp = data_utils._convert_crs(master_filtered.copy(), target_crs="EPSG:3857").set_index("MUID")
dist = preds.geometry.apply(lambda x: temp.distance(x).sort_values())
preds["MUID"] = list(dist.columns[dist.to_numpy().argmin(axis=1)])
preds["master_name"] = preds["MUID"].apply(
    lambda x: master_filtered[master_filtered["MUID"] == x]["name"].values[0]
)
preds["distance_to_nearest_master"] = dist.min(axis=1).values
print(preds.shape)
preds.head(3)

(1628, 12)


Unnamed: 0,prob,UID,sum,group,geometry,ADM1,ADM2,ADM3,PUID,MUID,master_name,distance_to_nearest_master
0,0.955591,26830,25.0,1368,POINT (2884891.394 -2830694.629),Gaborone,Gaborone,Gaborone,Gaborone_26830,UNICEF-BWA-SCHOOL-00000356,Bosele (RPC),231.106489
1,0.955543,21286,60.0,1369,POINT (2881333.994 -2834337.829),Gaborone,Gaborone,Gaborone,Gaborone_21286,UNICEF-BWA-SCHOOL-00000315,Bophirima (RPC),255.619427
2,0.95549,4020623,44.0,852,POINT (3056351.406 -2443132.836),Central,Tutume,Tonota,Tutume_4020623,UNICEF-BWA-SCHOOL-00000195,Rauwe,222.971033


## Get Distance between ML Preds and OSM/Overture

In [9]:
sources_filtered = sources[sources["clean"] == 0][["SUID", "name", "geometry"]]
temp = sources_filtered.to_crs("EPSG:3857").set_index("SUID")
dist = preds.geometry.apply(lambda x: temp.distance(x).sort_values())
preds["SUID"] = list(dist.columns[dist.to_numpy().argmin(axis=1)])
preds["source_name"] = preds["SUID"].apply(
    lambda x: sources_filtered[sources_filtered["SUID"] == x]["name"].values[0]
)
preds["distance_to_nearest_osm_overture"] = dist.min(axis=1).values
print(preds.shape)
preds.head(3)

(1628, 15)


Unnamed: 0,prob,UID,sum,group,geometry,ADM1,ADM2,ADM3,PUID,MUID,master_name,distance_to_nearest_master,SUID,source_name,distance_to_nearest_osm_overture
0,0.955591,26830,25.0,1368,POINT (2884891.394 -2830694.629),Gaborone,Gaborone,Gaborone,Gaborone_26830,UNICEF-BWA-SCHOOL-00000356,Bosele (RPC),231.106489,OVERTURE-BWA-SCHOOL-00000059,Raphael Inspires Academy,406.411709
1,0.955543,21286,60.0,1369,POINT (2881333.994 -2834337.829),Gaborone,Gaborone,Gaborone,Gaborone_21286,UNICEF-BWA-SCHOOL-00000315,Bophirima (RPC),255.619427,OSM-BWA-SCHOOL-00000005,Bophirima Primary School,35.76614
2,0.95549,4020623,44.0,852,POINT (3056351.406 -2443132.836),Central,Tutume,Tonota,Tutume_4020623,UNICEF-BWA-SCHOOL-00000195,Rauwe,222.971033,OVERTURE-BWA-SCHOOL-00000228,Shashe River Senior School,1149.93949


## Save Results

In [25]:
out_dir = os.path.join(cwd, "output", iso_code, "results", model_config["project"], "cams")
pred_file = os.path.join(out_dir, f"{iso_code}_{model_config['config_name']}_cams.geojson")
preds.to_file(pred_file, driver="GeoJSON")

In [11]:
out_dir = os.path.join(cwd, "output", iso_code, "results", model_config["project"])
master_file = os.path.join(out_dir, f"{iso_code}_master.geojson")
master.to_file(master_file, driver="GeoJSON")

In [12]:
out_dir = os.path.join(cwd, "output", iso_code, "results", model_config["project"])
sources_file = os.path.join(out_dir, f"{iso_code}_osm_overture.geojson")
sources.to_file(sources_file, driver="GeoJSON")