## Imports and Setup

In [1]:
import os
import pandas as pd
import geopandas as gpd
import logging
import joblib
import torch

import sys
sys.path.insert(0, "../src")
import sat_download

sys.path.insert(0, "../utils/")
import post_utils
import config_utils
import data_utils

import matplotlib.pyplot as plt
from PIL import Image

%load_ext autoreload
%autoreload 2

## Load Config Files

In [2]:
iso_code="SEN"
sum_threshold=1
buffer_size=50
calibrated=True

cwd = os.path.dirname(os.getcwd())
data_config_file = os.path.join(cwd, "configs/data_configs/data_config_ISO_AF.yaml")
data_config = config_utils.load_config(data_config_file)

#model_config_file = os.path.join(cwd, "configs/cnn_configs/satlas-aerial_swinb_mi_01.yaml")
model_config_file = os.path.join(cwd, "configs/cnn_configs/convnext_large_v01.yaml")
model_config = config_utils.load_config(model_config_file)

## Load Model Predictions

In [3]:
preds = post_utils.load_data(
    iso_code, 
    data_config, 
    model_config, 
    sum_threshold, 
    source="pred", 
    cam_model_config=model_config,
    buffer_size=buffer_size,
    calibrated=calibrated
)
preds["geometry"] = preds["geometry"].centroid
preds.head(3)

INFO:root:Data dimensions: (13818, 9)


Unnamed: 0,prob,UID,sum,group,geometry,ADM1,ADM2,ADM3,PUID
0,0.991148,44707,38.0,0,POINT (-1766669.174 1444139.279),Sedhiou,Sedhiou,Djibabouya,Sedhiou_44707
1,0.990805,131374,28.0,1,POINT (-1686689.864 1574843.369),Kaffrine,Malem Hodar,Sagna,Malem Hodar_131374
2,0.990759,50441,9.0,2,POINT (-1764375.974 1426739.879),Sedhiou,Sedhiou,Djibabouya,Sedhiou_50441


## Load Government Data

In [4]:
master = post_utils.load_data(iso_code, data_config, source="master")
master.head(3)

INFO:root:clean
0    9033
2     794
1     238
Name: count, dtype: int64
INFO:root:Data dimensions: (10065, 13)


Unnamed: 0,MUID,source,iso,country,region,subregion,name,giga_id_school,clean,geometry,ADM1,ADM2,ADM3
0,UNICEF-SEN-SCHOOL-00000000,UNICEF,SEN,Senegal,Africa,Sub-Saharan Africa,EE ALIEU SAMB,7614c6c6-9aca-36ff-978b-22bfca59916a,0,POINT (-17.51261 14.74680),Dakar,Dakar,Almadies
1,UNICEF-SEN-SCHOOL-00000001,UNICEF,SEN,Senegal,Africa,Sub-Saharan Africa,EE EL HADJI BAYTIR SAMB,350fb172-bfef-331d-bd55-5c51139b9f48,0,POINT (-17.51196 14.74951),Dakar,Dakar,Almadies
2,UNICEF-SEN-SCHOOL-00000002,UNICEF,SEN,Senegal,Africa,Sub-Saharan Africa,EE CASTORS ASECNA,b1a38b2d-0a27-33bf-8155-2ce84433d07a,0,POINT (-17.48871 14.73014),Dakar,Dakar,Almadies


## Load OSM and Overture Data

In [5]:
sources = post_utils.load_data(iso_code, data_config, source="osm_overture")
sources.head(3)

INFO:root:clean
0    784
Name: count, dtype: int64
INFO:root:Data dimensions: (784, 10)


Unnamed: 0,SUID,source,iso,country,region,subregion,name,giga_id_school,clean,geometry
0,OSM-SEN-SCHOOL-00000000,OSM,SEN,Senegal,Africa,Sub-Saharan Africa,Ecole Nationale de Police,,0,POINT (-17.44890 14.68903)
1,OSM-SEN-SCHOOL-00000001,OSM,SEN,Senegal,Africa,Sub-Saharan Africa,CIFOP,,0,POINT (-16.90105 15.16665)
2,OSM-SEN-SCHOOL-00000002,OSM,SEN,Senegal,Africa,Sub-Saharan Africa,Sainte Bernadette,,0,POINT (-17.46424 14.71035)


## Compute Distance between Master and Predictions

In [6]:
temp = data_utils._convert_crs(master.copy(), target_crs="EPSG:3857")
temp_preds = preds[["geometry", "PUID"]].set_index("PUID")
dist = temp.geometry.apply(lambda x: temp_preds.distance(x).sort_values())
master["distance"] = dist.min(axis=1).values
master["PUID"] = list(dist.columns[dist.to_numpy().argmin(axis=1)])
master = master.merge(preds[["PUID", "prob"]], on="PUID", how="left")
print(master.shape)
master.head(3)

(10065, 16)


Unnamed: 0,MUID,source,iso,country,region,subregion,name,giga_id_school,clean,geometry,ADM1,ADM2,ADM3,distance,PUID,prob
0,UNICEF-SEN-SCHOOL-00000000,UNICEF,SEN,Senegal,Africa,Sub-Saharan Africa,EE ALIEU SAMB,7614c6c6-9aca-36ff-978b-22bfca59916a,0,POINT (-17.51261 14.74680),Dakar,Dakar,Almadies,100.944182,Dakar_1389,0.849016
1,UNICEF-SEN-SCHOOL-00000001,UNICEF,SEN,Senegal,Africa,Sub-Saharan Africa,EE EL HADJI BAYTIR SAMB,350fb172-bfef-331d-bd55-5c51139b9f48,0,POINT (-17.51196 14.74951),Dakar,Dakar,Almadies,11.46247,Dakar_1493,0.471868
2,UNICEF-SEN-SCHOOL-00000002,UNICEF,SEN,Senegal,Africa,Sub-Saharan Africa,EE CASTORS ASECNA,b1a38b2d-0a27-33bf-8155-2ce84433d07a,0,POINT (-17.48871 14.73014),Dakar,Dakar,Almadies,27.652982,Dakar_3195,0.970392


In [7]:
temp = data_utils._convert_crs(sources.copy(), target_crs="EPSG:3857")
temp_preds = preds[["geometry", "PUID"]].set_index("PUID")
dist = temp.geometry.apply(lambda x: temp_preds.distance(x).sort_values())
sources["distance"] = dist.min(axis=1).values
sources["PUID"] = list(dist.columns[dist.to_numpy().argmin(axis=1)])
sources = sources.merge(preds[["PUID", "prob"]], on="PUID", how="left")
print(sources.shape)
sources.head(3)

(784, 13)


Unnamed: 0,SUID,source,iso,country,region,subregion,name,giga_id_school,clean,geometry,distance,PUID,prob
0,OSM-SEN-SCHOOL-00000000,OSM,SEN,Senegal,Africa,Sub-Saharan Africa,Ecole Nationale de Police,,0,POINT (-17.44890 14.68903),252.033321,Dakar_6094,0.584155
1,OSM-SEN-SCHOOL-00000001,OSM,SEN,Senegal,Africa,Sub-Saharan Africa,CIFOP,,0,POINT (-16.90105 15.16665),276.321685,Tivaoune_65681,0.755528
2,OSM-SEN-SCHOOL-00000002,OSM,SEN,Senegal,Africa,Sub-Saharan Africa,Sainte Bernadette,,0,POINT (-17.46424 14.71035),37.091788,Dakar_5099,0.848785


## Compute Distance between Predictions and Master

In [8]:
master_filtered = master[master["clean"] == 0]
temp = data_utils._convert_crs(master_filtered.copy(), target_crs="EPSG:3857").set_index("MUID")
dist = preds.geometry.apply(lambda x: temp.distance(x).sort_values())
preds["MUID"] = list(dist.columns[dist.to_numpy().argmin(axis=1)])
preds["master_name"] = preds["MUID"].apply(
    lambda x: master_filtered[master_filtered["MUID"] == x]["name"].values[0]
)
preds["distance_to_nearest_master"] = dist.min(axis=1).values
print(preds.shape)
preds.head(3)

(13818, 12)


Unnamed: 0,prob,UID,sum,group,geometry,ADM1,ADM2,ADM3,PUID,MUID,master_name,distance_to_nearest_master
0,0.991148,44707,38.0,0,POINT (-1766669.174 1444139.279),Sedhiou,Sedhiou,Djibabouya,Sedhiou_44707,UNICEF-SEN-SCHOOL-00001558,EE KAMOYA,32.498275
1,0.990805,131374,28.0,1,POINT (-1686689.864 1574843.369),Kaffrine,Malem Hodar,Sagna,Malem Hodar_131374,UNICEF-SEN-SCHOOL-00003603,EE SEANE,29.04088
2,0.990759,50441,9.0,2,POINT (-1764375.974 1426739.879),Sedhiou,Sedhiou,Djibabouya,Sedhiou_50441,UNICEF-SEN-SCHOOL-00001380,EE BANTANGNIMA,24.590923


## Get Distance between ML Preds and OSM/Overture

In [9]:
sources_filtered = sources[sources["clean"] == 0][["SUID", "name", "geometry"]]
temp = sources_filtered.to_crs("EPSG:3857").set_index("SUID")
dist = preds.geometry.apply(lambda x: temp.distance(x).sort_values())
preds["SUID"] = list(dist.columns[dist.to_numpy().argmin(axis=1)])
preds["source_name"] = preds["SUID"].apply(
    lambda x: sources_filtered[sources_filtered["SUID"] == x]["name"].values[0]
)
preds["distance_to_nearest_osm_overture"] = dist.min(axis=1).values
print(preds.shape)
preds.head(3)

(13818, 15)


Unnamed: 0,prob,UID,sum,group,geometry,ADM1,ADM2,ADM3,PUID,MUID,master_name,distance_to_nearest_master,SUID,source_name,distance_to_nearest_osm_overture
0,0.991148,44707,38.0,0,POINT (-1766669.174 1444139.279),Sedhiou,Sedhiou,Djibabouya,Sedhiou_44707,UNICEF-SEN-SCHOOL-00001558,EE KAMOYA,32.498275,OSM-SEN-SCHOOL-00000457,Ecole Coranique Dahara Moderne,8218.619262
1,0.990805,131374,28.0,1,POINT (-1686689.864 1574843.369),Kaffrine,Malem Hodar,Sagna,Malem Hodar_131374,UNICEF-SEN-SCHOOL-00003603,EE SEANE,29.04088,OSM-SEN-SCHOOL-00000456,École Mbadianene,17844.531081
2,0.990759,50441,9.0,2,POINT (-1764375.974 1426739.879),Sedhiou,Sedhiou,Djibabouya,Sedhiou_50441,UNICEF-SEN-SCHOOL-00001380,EE BANTANGNIMA,24.590923,OSM-SEN-SCHOOL-00000699,Marsassoum Elementary School,19716.485656


## Save Results

In [10]:
postfix = "_calibrated" if calibrated else ""
out_dir = os.path.join(cwd, "output", iso_code, "results", model_config["project"], "cams", model_config["config_name"]+postfix)
pred_file = os.path.join(out_dir, f"{iso_code}_{model_config['config_name']}_cams{postfix}.geojson")
preds.to_file(pred_file, driver="GeoJSON")

In [11]:
out_dir = os.path.join(cwd, "output", iso_code, "results", model_config["project"])
master_file = os.path.join(out_dir, f"{iso_code}_master.geojson")
master.to_file(master_file, driver="GeoJSON")

In [12]:
out_dir = os.path.join(cwd, "output", iso_code, "results", model_config["project"])
sources_file = os.path.join(out_dir, f"{iso_code}_osm_overture.geojson")
sources.to_file(sources_file, driver="GeoJSON")