## Imports and Setup

In [1]:
import os
import pandas as pd
import geopandas as gpd
import logging
import joblib
import torch

import sys
sys.path.insert(0, "../src")
import sat_download

sys.path.insert(0, "../utils/")
import post_utils
import config_utils
import data_utils

import matplotlib.pyplot as plt
from PIL import Image

%load_ext autoreload
%autoreload 2

## Load Config Files

In [6]:
iso_code = "MOZ"
sum_threshold=5

cwd = os.path.dirname(os.getcwd())
data_config_file = os.path.join(cwd, "configs/data_config.yaml")
data_config = config_utils.load_config(data_config_file)

model_config_file = os.path.join(cwd, "configs/cnn_configs/convnext_small_v01.yaml")
model_config = config_utils.load_config(model_config_file)

## Load Reference

In [8]:
#reference = post_utils.load_reference(iso_code, data_config, model_config, sum_threshold)
filename = os.path.join(cwd, data_config["vectors_dir"], data_config["pos_class"], "osm", "MOZ_osm.geojson")
reference = gpd.read_file(filename)
reference["geometry"] = reference.to_crs("EPSG:3857")["geometry"].centroid
reference.head(3)

Unnamed: 0,giga_id_school,school_id,name,lat,lon,education_level,education_level_regional,school_type,school_type_govt,connectivity,...,nearest_LTE_distance,nearest_UMTS_id,nearest_UMTS_distance,nearest_GSM_id,nearest_GSM_distance,pop_within_1km,pop_within_2km,pop_within_3km,pop_within_10km,geometry
0,99f0bd85-d03f-4e16-85a5-ac6f66ff40e2,663190261,primary school,-19.642206,34.755461,Primary,Primary,,,,...,,,,,,,,,,POINT (3868960.199 -2230693.125)
1,935ec5c1-d73a-48e5-ae8f-e652178be77b,757390569,Maputo International School,-25.979337,32.588228,Unknown,Unknown,,,,...,,,,,,,,,,POINT (3627704.969 -2996522.005)
2,f06b1135-0c3d-41ad-96bb-8c42cccb4476,903054787,SOS Children Village,-23.883524,35.402704,Unknown,Unknown,,,,...,,,,,,,,,,POINT (3941011.038 -2739221.460)


## Load Master Dataset

In [15]:
master = post_utils.load_master(iso_code, data_config, name="unicef_clean")
master.head(3)

INFO:root:unicef_clean
0    11289
4     1511
2      220
Name: count, dtype: int64
INFO:root:Data dimensions: (13020, 10)


Unnamed: 0,MUID,source,iso,country,region,subregion,name,giga_id_school,unicef_clean,geometry
0,UNICEF-MOZ-SCHOOL-00000000,UNICEF,MOZ,Mozambique,Africa,Sub-Saharan Africa,Escola Primário do 1º Grau 4 de Outubro,0,0,POINT (33.23836 -19.21253)
1,UNICEF-MOZ-SCHOOL-00000001,UNICEF,MOZ,Mozambique,Africa,Sub-Saharan Africa,Escola Primária de Naicuainha,1,0,POINT (35.20014 -13.47617)
2,UNICEF-MOZ-SCHOOL-00000002,UNICEF,MOZ,Mozambique,Africa,Sub-Saharan Africa,Escola Primária de Lipapa,2,4,POINT (35.54036 -13.43500)


## Get Distance between Reference and Master

In [16]:
temp = data_utils._convert_crs(master.copy(), target_crs="EPSG:3857") #.set_index("MUID")
dist = temp.geometry.apply(lambda x: reference.distance(x).sort_values())
master["distance"] = dist.min(axis=1).values
master.head(3)

Unnamed: 0,MUID,source,iso,country,region,subregion,name,giga_id_school,unicef_clean,geometry,distance
0,UNICEF-MOZ-SCHOOL-00000000,UNICEF,MOZ,Mozambique,Africa,Sub-Saharan Africa,Escola Primário do 1º Grau 4 de Outubro,0,0,POINT (33.23836 -19.21253),21754.066006
1,UNICEF-MOZ-SCHOOL-00000001,UNICEF,MOZ,Mozambique,Africa,Sub-Saharan Africa,Escola Primária de Naicuainha,1,0,POINT (35.20014 -13.47617),20576.5904
2,UNICEF-MOZ-SCHOOL-00000002,UNICEF,MOZ,Mozambique,Africa,Sub-Saharan Africa,Escola Primária de Lipapa,2,4,POINT (35.54036 -13.43500),35951.235519


## Filter Master Dataset

In [18]:
master[(master.unicef_clean == 0) & (master["distance"] < 250)].shape

(394, 11)

In [19]:
result.to_file("SEN_results_centroid.geojson", driver="GeoJSON")

In [56]:
master.to_file("SEN_master.geojson", driver="GeoJSON")