## Imports and Setup

In [1]:
import os
import pandas as pd
import geopandas as gpd
import logging
import joblib
import torch

import sys
sys.path.insert(0, "../src")
import sat_download

sys.path.insert(0, "../utils/")
import data_utils
import config_utils
import pred_utils
import embed_utils

%load_ext autoreload
%autoreload 2

## Load Config Files

In [10]:
iso_code = "BWA"
adm_level = "ADM2"
shapename = "Mahalapye"
spacing = 150
buffer_size = 150

cwd = os.path.dirname(os.getcwd())
data_config_file = os.path.join(cwd, "configs/data_config.yaml")
data_config = config_utils.load_config(data_config_file)

sat_config_file = os.path.join(cwd, "configs/sat_configs/sat_config_500x500_60cm.yaml")
sat_creds_file = os.path.join(cwd, "configs/sat_configs/sat_creds.yaml")

sat_config = config_utils.load_config(sat_config_file)
sat_creds = config_utils.create_config(sat_creds_file)

model_config_file = os.path.join(cwd, f"configs/model_configs/{iso_code}-dinov2_vitl14-SVC.yaml")
model_config = config_utils.load_config(model_config_file)

## Generate Prediction Tiles

In [11]:
tiles = pred_utils.generate_pred_tiles(
    data_config, iso_code, spacing, buffer_size, adm_level, shapename
).reset_index()
tiles["UID"] = list(tiles.index)

out_dir = data_utils._makedir(os.path.join(cwd, "output", iso_code))
out_file = os.path.join(out_dir, f"{iso_code}_{shapename}.gpkg")
tiles[["geometry"]].to_file(out_file, driver="GPKG")

print(f"Total tiles: {tiles.shape}")
tiles.head(3)

100%|████████████████████| 136/136 [03:45<00:00,  1.66s/it]                                                              


Total tiles: (32263, 12)


Unnamed: 0,index,geometry,shapeName,shapeISO,shapeID,shapeGroup,shapeType,points,index_right,type,properties,UID
0,1496156,"POLYGON ((3052469.260 -2622595.005, 3052469.26...",Mahalapye,,29308430B30543037727108,BWA,ADM2,POINT (3052319.260 -2622745.005),1037,Feature,"{'height': -1.0, 'confidence': -1.0}",0
1,1496157,"POLYGON ((3052469.260 -2622445.005, 3052469.26...",Mahalapye,,29308430B30543037727108,BWA,ADM2,POINT (3052319.260 -2622595.005),1037,Feature,"{'height': -1.0, 'confidence': -1.0}",1
2,1497176,"POLYGON ((3052619.260 -2622595.005, 3052619.26...",Mahalapye,,29308430B30543037727108,BWA,ADM2,POINT (3052469.260 -2622745.005),1037,Feature,"{'height': -1.0, 'confidence': -1.0}",2


## Download Satellite Images

In [12]:
data = tiles.copy()
data["geometry"] = data["points"]
sat_dir = os.path.join(cwd, "output", iso_code, shapename)
sat_download.download_sat_images(sat_creds, sat_config, data=data, out_dir=sat_dir)

INFO:root:Data dimensions: (32263, 14), CRS: EPSG:3857
100%|████████████████████| 32263/32263 [8:56:05<00:00,  1.00it/s]                                                        


## Load Model

In [13]:
model = torch.hub.load("facebookresearch/dinov2", model_config["embed_model"])
model.name = model_config["embed_model"]
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
model.to(device)

Using cache found in /home/itingzon.unicef/.cache/torch/hub/facebookresearch_dinov2_main
INFO:dinov2:using MLP layer as FFN


DinoVisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 1024, kernel_size=(14, 14), stride=(14, 14))
    (norm): Identity()
  )
  (blocks): ModuleList(
    (0-23): 24 x NestedTensorBlock(
      (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
      (attn): MemEffAttention(
        (qkv): Linear(in_features=1024, out_features=3072, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=1024, out_features=1024, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): LayerScale()
      (drop_path1): Identity()
      (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=1024, out_features=4096, bias=True)
        (act): GELU(approximate='none')
        (fc2): Linear(in_features=4096, out_features=1024, bias=True)
        (drop): Dropout(p=0.0, inplace=False)
      )
      (ls2): LayerScale()
      (drop_path2): Identity()
    )


## Generate Embeddings

In [14]:
out_dir = os.path.join("output", iso_code)
name = f"{iso_code}_{shapename}"
embeddings = embed_utils.get_image_embeddings(
    model_config, data, model, in_dir=sat_dir, out_dir=out_dir, name=name
)
embeddings.head(3)

100%|████████████████████| 32263/32263 [11:18:13<00:00,  1.26s/it]                                                       
INFO:root:Saved to /home/itingzon.unicef/giga/output/BWA/BWA_Mahalapye_dinov2_vitl14_embeds.csv


Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
UID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,-1.059438,0.446116,0.020766,1.392747,-0.249886,-1.723373,2.27053,-1.62888,-1.021884,0.576728,...,-1.078665,-0.836093,0.212806,2.260133,0.24332,2.158025,-2.131413,2.603458,-0.51164,-0.500217
1,-2.290388,0.361234,-0.721791,1.683992,-0.079404,-1.542805,2.641819,-1.111953,-0.35199,1.120521,...,-0.568002,-1.913545,1.192743,1.682427,0.953533,2.06618,-1.291087,2.015403,-0.649356,-0.908426
2,-2.168535,1.645123,-0.601397,0.786744,0.165029,-2.241012,2.956747,-1.955167,-1.214654,1.435781,...,-1.044319,-1.749654,0.450145,2.253536,0.956275,2.374974,-1.587729,1.8851,-0.265138,0.014655


## Model Prediction

In [15]:
exp_dir = os.path.join(cwd, model_config["exp_dir"], model_config['config_name'])
model_file = os.path.join(exp_dir, f"{model_config['config_name']}.pkl")
model = joblib.load(model_file)
model

In [16]:
preds = model.predict(embeddings)
tiles["pred"] = preds
results = tiles[["UID", "geometry", "shapeName", "pred"]]
results = gpd.GeoDataFrame(results, geometry="geometry")
results.head(3)



Unnamed: 0,UID,geometry,shapeName,pred
0,0,"POLYGON ((3052469.260 -2622595.005, 3052469.26...",Mahalapye,non_school
1,1,"POLYGON ((3052469.260 -2622445.005, 3052469.26...",Mahalapye,non_school
2,2,"POLYGON ((3052619.260 -2622595.005, 3052619.26...",Mahalapye,non_school


In [17]:
outfile = os.path.join(cwd, "output", iso_code, f"{name}_results.gpkg")
results.to_file(outfile, driver="GPKG")