# Get nationwide region embeddings

## Distilled MobCLIP embeddings

In [1]:
import sys
sys.path.append('../pretrained_distilled_model')

import torch
from distilled_model import *

Load the distilled model as a surrogate for MobCLIP.

In [2]:
path = '../pretrained_distilled_model/distilled_MobCLIP.pth'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = load(path, device=device) 

Load the H3 central coordinates.

In [3]:
import pandas as pd

h3 = pd.read_pickle('lv6_h3_china.pkl')

In [4]:
h3

Unnamed: 0,h3,longitude,latitude
0,8640e3cefffffff,104.057273,30.665787
1,86408859fffffff,108.937699,34.266360
2,864118b2fffffff,113.286798,23.134293
3,864118b27ffffff,113.348234,23.123088
4,864019627ffffff,106.515547,29.568050
...,...,...,...
195569,863c20b97ffffff,84.473605,31.179140
195570,86149c127ffffff,129.652586,47.129015
195571,86259d417ffffff,92.708253,41.567457
195572,8640eaa1fffffff,101.327904,30.969196


In [5]:
coords = torch.tensor(h3[['longitude', 'latitude']].values, dtype=torch.float32)
model.eval()
with torch.no_grad():
    embeddings = model(coords.double().to(device)).detach().cpu()

In [6]:
embeddings.shape

torch.Size([195574, 128])

In [7]:
df = pd.DataFrame({
    'h3': h3['h3'],
    'ebd': embeddings.tolist()  
})

In [8]:
df

Unnamed: 0,h3,ebd
0,8640e3cefffffff,"[1.1620392464487912, -3.7438002390840173, 3.15..."
1,86408859fffffff,"[-0.9566033939110425, -0.9551551677419909, 2.5..."
2,864118b2fffffff,"[1.27092297188211, -2.081754681914115, 2.20666..."
3,864118b27ffffff,"[1.3712669695304402, -2.031194048761742, 2.212..."
4,864019627ffffff,"[1.473237274721895, -3.666506090103306, 2.9035..."
...,...,...
195569,863c20b97ffffff,"[0.01666142594491266, 0.021135190911021007, 0...."
195570,86149c127ffffff,"[0.03265942597180866, -0.01914424230459001, -0..."
195571,86259d417ffffff,"[0.04620417538989387, 0.016791471451539183, 0...."
195572,8640eaa1fffffff,"[0.0348278866815696, 0.12354336209854194, 0.21..."


Save the embeddings for downstream validation.

In [9]:
# save the embeddings for downstream evaluation
df.to_pickle('distilled_MobCLIP.pkl') 

## SatCLIP embeddings

To obtained pretrained **SatCLIP** embeddings, first install the repository.

In [None]:
!rm -r sample_data .config # Empty current directory
!git clone https://github.com/microsoft/satclip.git . # Clone SatCLIP repository

Install required packages.

In [None]:
!pip install lightning --quiet
!pip install rasterio --quiet
!pip install torchgeo --quiet

We choose a SatCLIP using a vit vision encoder and $L=40$ Legendre polynomials for spherical harmonics calculation in the location encoder for comparison.

In [None]:
!wget 'https://satclip.z13.web.core.windows.net/satclip/satclip-vit16-l40.ckpt'

In [10]:
import sys
sys.path.append('./satclip')


import torch
from load import get_satclip

In [13]:
satclip_path = 'satclip-vit16-l40.ckpt'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


satclip_model = get_satclip(satclip_path, device=device) # Only loads location encoder by default
satclip_model.eval()

h3 = pd.read_pickle('lv6_h3_china.pkl')
coords = torch.tensor(h3[['longitude', 'latitude']].values, dtype=torch.float32)

with torch.no_grad():
    satclip_embeddings  = satclip_model(coords.double().to(device)).detach().cpu()

using pretrained moco vit16


In [14]:
satclip_df = pd.DataFrame({
    'h3': h3['h3'],
    'ebd': satclip_embeddings.tolist()  
})

In [15]:
satclip_df.head(2)

Unnamed: 0,h3,ebd
0,8640e3cefffffff,"[0.7885097337042274, 0.668696744529086, 3.8145..."
1,86408859fffffff,"[-0.7540319008796809, 1.9027509927487722, -2.1..."


In [None]:
# save the embeddings for downstream evaluation
satclip_df.to_pickle('satclip.pkl')

## GeoCLIP embeddings

First install the package.

In [None]:
!pip install geoclip    

In [1]:
from geoclip import LocationEncoder
import pandas as pd
import torch
import torch.nn as nn


# Automatically load pretrained weights
geoclip_model = LocationEncoder()
geoclip_model.eval()


h3 = pd.read_pickle('lv6_h3_china.pkl')
coords = torch.tensor(h3[['latitude', 'longitude']].values, dtype=torch.float32)  ## Notice that GeoCLIP requires input to be (lat, lon)


with torch.no_grad():
    geoclip_embeddings = geoclip_model(coords).detach().cpu()

In [2]:
geoclip_embeddings

tensor([[-0.0201,  0.0039,  0.0039,  ..., -0.0048, -0.0057, -0.0132],
        [ 0.0048, -0.0058, -0.0061,  ..., -0.0150, -0.0056, -0.0368],
        [-0.0102, -0.0120, -0.0040,  ...,  0.0081,  0.0006, -0.0314],
        ...,
        [ 0.0008, -0.0197,  0.0106,  ..., -0.0259,  0.0051,  0.0076],
        [ 0.0209,  0.0038, -0.0019,  ...,  0.0310, -0.0189,  0.0079],
        [ 0.0234,  0.0084,  0.0113,  ...,  0.0015, -0.0139,  0.0009]])

In [3]:
geoclip_df = pd.DataFrame({
    'h3': h3['h3'],
    'ebd': geoclip_embeddings.tolist()  
})

In [4]:
geoclip_df.head(2)

Unnamed: 0,h3,ebd
0,8640e3cefffffff,"[-0.02009962499141693, 0.0038699787110090256, ..."
1,86408859fffffff,"[0.004778123460710049, -0.005807604640722275, ..."


In [None]:
# save the embeddings for downstream evaluation
geoclip_df.to_pickle('geoclip.pkl')