## Imports and Setup

In [1]:
import pandas as pd
import geopandas as gpd
import logging
import joblib
import torch

import os 
os.chdir('../')
from src import sat_download
from utils import post_utils
from utils import calib_utils
from utils import config_utils
from utils import data_utils
from utils import model_utils
from utils import plot_utils

%load_ext autoreload
%autoreload 2

  return torch._C._cuda_getDeviceCount() > 0


## Load Config Files

In [2]:
iso_code = "MNG"
sum_threshold=-1
buffer_size=25
calibrator=None
cam_method="hirescam"

data_config_file = os.path.join(os.getcwd(), "configs/data_configs/data_config_ISO_AF.yaml")
model_config_file = os.path.join(os.getcwd(), "configs/best_models.yaml")
data_config = config_utils.load_config(data_config_file)
model_config = config_utils.load_config(model_config_file)

## Load Model Predictions

In [3]:
preds = post_utils.load_preds(
    iso_code, data_config, model_config, cam_method, sum_threshold, buffer_size, calibrator
)
preds.head(2)

Reading files for MNG...:   0%|                    | 0/4 [00:00<?, ?it/s]ERROR:fiona._env:`/mnt/batch/tasks/shared/LS_root/mounts/clusters/itingzon-compute/code/Users/itingzon/20241011_dell_hpc_env/giga-global-school-mapping/output/MNG/results/GIGAv2/cams/ensemble/vit_b_16/hirescam/.amlignore' not recognized as a supported file format.
Reading files for MNG...:   0%|                    | 0/4 [00:00<?, ?it/s]


DriverError: '/mnt/batch/tasks/shared/LS_root/mounts/clusters/itingzon-compute/code/Users/itingzon/20241011_dell_hpc_env/giga-global-school-mapping/output/MNG/results/GIGAv2/cams/ensemble/vit_b_16/hirescam/.amlignore' not recognized as a supported file format.

## Load Government Data

In [259]:
master = post_utils.load_master(iso_code, data_config)
master.head(2)

INFO:root:clean
0    9958
1    1771
2     722
Name: count, dtype: int64
INFO:root:Data dimensions: (12451, 13)


Unnamed: 0,MUID,source,iso,country,region,subregion,name,giga_id_school,clean,geometry,ADM1,ADM2,ADM3
0,UNICEF-MOZ-SCHOOL-00000000,UNICEF,MOZ,Mozambique,Africa,Sub-Saharan Africa,,0,0,POINT (35.10153 -17.41236),Tete,Mutarara,Nhamayabue
1,UNICEF-MOZ-SCHOOL-00000001,UNICEF,MOZ,Mozambique,Africa,Sub-Saharan Africa,,1,0,POINT (36.91100 -14.50489),Niassa,Metarica,Nacumua


## Load OSM and Overture Data

In [260]:
osm_overture = post_utils.load_osm_overture(iso_code, data_config)
osm_overture.head(2)

INFO:root:clean
0    487
Name: count, dtype: int64
INFO:root:Data dimensions: (487, 13)


Unnamed: 0,SUID,source,iso,country,region,subregion,name,giga_id_school,clean,geometry,ADM1,ADM2,ADM3
0,OSM-MOZ-SCHOOL-00000000,OSM,MOZ,Mozambique,Africa,Sub-Saharan Africa,primary school,0,0,POINT (34.75546 -19.64221),Sofala,Dondo,Dondo
1,OSM-MOZ-SCHOOL-00000001,OSM,MOZ,Mozambique,Africa,Sub-Saharan Africa,Maputo International School,1,0,POINT (32.58823 -25.97934),Maputo City,Cidade De Maputo,Cidade De Maputo


## Save Results

In [262]:
post_utils.save_results(iso_code, preds, model_config, cam_method=cam_method, source="preds")
post_utils.save_results(iso_code, master, model_config, source="master")
post_utils.save_results(iso_code, osm_overture, model_config, source="osm_overture")

## Standardize Data

In [270]:
preds = post_utils.standardize_data(model_config, iso_code, cam_method=cam_method, source="preds", uid="UID")
master = post_utils.standardize_data(model_config, iso_code, source="master", uid="MUID")
osm = post_utils.standardize_data(model_config, iso_code, source="osm_overture", uid="SUID")
preds.head(3)

Unnamed: 0,giga_id_school,school_id,name,lat,lon,admin1,admin1_id_giga,admin2,admin2_id_giga,education_level,source,uninhabited,is_duplicated,predicted_proba,rurban
0,ML-MOZ-SCHOOL-76580000,ML-MOZ-SCHOOL-76580000,Unknown,-19.124442,33.470237,Manica,MOZ006,Cidade de Chimoio,MOZ006001,,ML,,,0.98088,urban
1,ML-MOZ-SCHOOL-24801000,ML-MOZ-SCHOOL-24801000,Unknown,-19.772812,34.876675,Sofala,MOZ007,Cidade da Beira,MOZ007001,,ML,,,0.98079,urban
2,ML-MOZ-SCHOOL-21286000,ML-MOZ-SCHOOL-21286000,Unknown,-19.748836,34.8512,Sofala,MOZ007,Cidade da Beira,MOZ007001,,ML,,,0.979622,urban


In [264]:
data = pd.concat([preds, master, osm])
data = data.reset_index(drop=True)
data.to_parquet(f'{iso_code}_validator_meta.parquet')

  data = pd.concat([preds, master, osm])
