## Imports and Setup

In [None]:
import pandas as pd
import geopandas as gpd
import logging
import joblib
import torch

import os 
os.chdir('/home/itingzon.unicef/giga')
from src import sat_download
from utils import post_utils
from utils import calib_utils
from utils import config_utils
from utils import data_utils
from utils import model_utils
from utils import plot_utils

%load_ext autoreload
%autoreload 2

## Load Config Files

In [168]:
iso_code = "SEN"
sum_threshold=-1
buffer_size=25
calibrator=None
cam_method="gradcam"

data_config_file = os.path.join(os.getcwd(), "configs/data_configs/data_config_ISO_AF.yaml")
model_config_file = os.path.join(os.getcwd(), "configs/best_models.yaml")
data_config = config_utils.load_config(data_config_file)
model_config = config_utils.load_config(model_config_file)

## Load Model Predictions

In [186]:
preds = post_utils.load_preds(
    iso_code, data_config, model_config, cam_method, sum_threshold, buffer_size, calibrator
)
preds.head(2)

Reading files for SEN...: 100%|████████████████████| 45/45 [00:07<00:00,  6.42it/s]                                                  
INFO:root:Data dimensions: (15967, 9)


Unnamed: 0,prob,UID,sum,geometry,group,ADM1,ADM2,ADM3,PUID
0,0.966634,23507,22.0,POINT (-1814593.415 1665931.749),7367,Diourbel,Diourbel,Ndindy,Diourbel_23507
1,0.966054,178498,15.0,POINT (-1811869.892 1464003.347),12109,Ziguinchor,Bignona,Sindian,Bignona_178498


## Load Government Data

In [93]:
master = post_utils.load_master(iso_code, data_config)
master.head(2)

INFO:root:clean
0    9033
2     794
1     238
Name: count, dtype: int64
INFO:root:Data dimensions: (10065, 13)


Unnamed: 0,MUID,source,iso,country,region,subregion,name,giga_id_school,clean,geometry,ADM1,ADM2,ADM3
0,UNICEF-SEN-SCHOOL-00000000,UNICEF,SEN,Senegal,Africa,Sub-Saharan Africa,EE ALIEU SAMB,7614c6c6-9aca-36ff-978b-22bfca59916a,0,POINT (-17.51261 14.74680),Dakar,Dakar,Almadies
1,UNICEF-SEN-SCHOOL-00000001,UNICEF,SEN,Senegal,Africa,Sub-Saharan Africa,EE EL HADJI BAYTIR SAMB,350fb172-bfef-331d-bd55-5c51139b9f48,0,POINT (-17.51196 14.74951),Dakar,Dakar,Almadies


## Load OSM and Overture Data

In [94]:
osm_overture = post_utils.load_osm_overture(iso_code, data_config)
osm_overture.head(2)

INFO:root:clean
0    784
Name: count, dtype: int64
INFO:root:Data dimensions: (784, 13)


Unnamed: 0,SUID,source,iso,country,region,subregion,name,giga_id_school,clean,geometry,ADM1,ADM2,ADM3
0,OSM-SEN-SCHOOL-00000000,OSM,SEN,Senegal,Africa,Sub-Saharan Africa,Ecole Nationale de Police,,0,POINT (-17.44890 14.68903),Dakar,Dakar,Plateau
1,OSM-SEN-SCHOOL-00000001,OSM,SEN,Senegal,Africa,Sub-Saharan Africa,CIFOP,,0,POINT (-16.90105 15.16665),Thies,Tivaoune,Meouane


## Save Results

In [7]:
post_utils.save_results(iso_code, preds, model_config, cam_method=cam_method, source="preds")
post_utils.save_results(iso_code, master, model_config, source="master")
post_utils.save_results(iso_code, osm_overture, model_config, source="osm_overture")

## Standardize Data

In [193]:
preds = post_utils.standardize_data(model_config, iso_code, source="preds", uid="UID")
master = post_utils.standardize_data(model_config, iso_code, source="master", uid="MUID")
osm = post_utils.standardize_data(model_config, iso_code, source="osm_overture", uid="SUID")
master.head(3)

Unnamed: 0,giga_id_school,school_id,name,lat,lon,admin1,admin1_id_giga,admin2,admin2_id_giga,education_level,source,uninhabited,is_duplicated,predicted_proba,rurban
0,7614c6c6-9aca-36ff-978b-22bfca59916a,UNICEF-SEN-SCHOOL-00000000,EE ALIEU SAMB,14.746802,-17.512605,Dakar,SEN001,Dakar,SEN001001,,MASTER,False,False,,urban
1,350fb172-bfef-331d-bd55-5c51139b9f48,UNICEF-SEN-SCHOOL-00000001,EE EL HADJI BAYTIR SAMB,14.74951,-17.511958,Dakar,SEN001,Dakar,SEN001001,,MASTER,False,False,,urban
2,b1a38b2d-0a27-33bf-8155-2ce84433d07a,UNICEF-SEN-SCHOOL-00000002,EE CASTORS ASECNA,14.73014,-17.488706,Dakar,SEN001,Dakar,SEN001001,,MASTER,False,False,,urban


In [194]:
data = pd.concat([preds, master, osm])
data.to_parquet('SEN_validator_meta.parquet')

  data = pd.concat([preds, master, osm])
