# MobCLIP tutorial

### Obtain the pretrained MobCLIP embeddings for any given coordinates

In [1]:
import sys
sys.path.append('pretrained_distilled_model')

import torch
from distilled_model import *


In [2]:
path = 'pretrained_distilled_model/distilled_MobCLIP.pth'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

c = torch.randn(32, 2) # Represents a batch of 32 locations (lon/lat)

model = load(path, device=device) # Load the distilled model as a surrogate for MobCLIP.
model.eval()
with torch.no_grad():
    embeddings = model(c.double().to(device)).detach().cpu()

In [3]:
embeddings

tensor([[-0.2800, -0.1108, -0.2344,  ..., -0.0620, -0.2259,  0.0011],
        [ 0.2111, -0.3647,  0.0584,  ..., -0.4883, -0.0922,  0.2911],
        [-0.0623, -0.1450, -0.3106,  ..., -0.4109, -0.0837,  0.1598],
        ...,
        [ 0.0433,  0.0646, -0.0423,  ...,  0.0755, -0.2586,  0.0385],
        [ 0.1259,  0.0634,  0.0735,  ...,  0.0633, -0.0774,  0.0441],
        [ 0.0742,  0.0022,  0.0201,  ...,  0.0134, -0.0470,  0.0343]],
       dtype=torch.float64)

In [4]:
embeddings.shape

torch.Size([32, 128])

### Get nationwide region embeddings

In [12]:
# get the coordinates of H3 in china
import pandas as pd

h3 = pd.read_pickle('embeddings/lv6_h3_china.pkl')

In [13]:
h3

Unnamed: 0,h3,longitude,latitude
0,8640e3cefffffff,104.057273,30.665787
1,86408859fffffff,108.937699,34.266360
2,864118b2fffffff,113.286798,23.134293
3,864118b27ffffff,113.348234,23.123088
4,864019627ffffff,106.515547,29.568050
...,...,...,...
195569,863c20b97ffffff,84.473605,31.179140
195570,86149c127ffffff,129.652586,47.129015
195571,86259d417ffffff,92.708253,41.567457
195572,8640eaa1fffffff,101.327904,30.969196


In [18]:
coords = torch.tensor(h3[['longitude', 'latitude']].values, dtype=torch.float32)

In [19]:
coords

tensor([[104.0573,  30.6658],
        [108.9377,  34.2664],
        [113.2868,  23.1343],
        ...,
        [ 92.7083,  41.5675],
        [101.3279,  30.9692],
        [117.1660,  48.4400]])

In [20]:
model.eval()
with torch.no_grad():
    embeddings = model(coords.double().to(device)).detach().cpu()

In [23]:
embeddings

tensor([[ 1.1620e+00, -3.7438e+00,  3.1581e+00,  ..., -5.0073e+00,
          2.0982e+00, -3.7038e+00],
        [-9.5660e-01, -9.5516e-01,  2.5612e+00,  ..., -2.9535e+00,
          3.4739e+00, -1.8794e+00],
        [ 1.2709e+00, -2.0818e+00,  2.2067e+00,  ..., -5.5405e-01,
          3.7166e+00, -3.8148e+00],
        ...,
        [ 4.6204e-02,  1.6791e-02,  5.6124e-02,  ...,  3.3333e-02,
         -1.7934e-02,  1.9150e-03],
        [ 3.4828e-02,  1.2354e-01,  2.1645e-01,  ..., -4.4098e-02,
         -1.5078e-02,  3.0726e-02],
        [ 3.2826e-02, -1.7798e-01, -5.1692e-02,  ...,  1.4076e-01,
         -5.8189e-02, -1.3192e-02]], dtype=torch.float64)

In [None]:
new_df = pd.DataFrame({
    'h3': h3['h3'],
    'ebd': ebd_array.tolist()  # 将 Tensor 转为列表存储到 DataFrame 的列中
})