In [1]:
import yaml
import numpy as np

from sklearn.neighbors import NearestNeighbors
from scipy.spatial.distance import directed_hausdorff

from data.dataset import PolygonDataset
from train.trainer import Trainer
from model.nn import build_model
from torchinfo import summary

## Datasets

In [2]:
with open('cfg/gae.yaml', 'r') as f:
    cfg = yaml.safe_load(f)  
    
import pandas as pd
osm_df = pd.read_pickle(cfg['osm'])
melb_df = pd.read_pickle(cfg['melb'])
glyph_df = pd.read_pickle(cfg['test'])

osm_df_o = osm_df[osm_df.trans == 'o'].reset_index(drop=True)    
osm_df_r = osm_df[osm_df.trans == 'r'].reset_index(drop=True)  

glyph_df_o = glyph_df[glyph_df.trans == 'o'].reset_index(drop=True)
glyph_df_r = glyph_df[glyph_df.trans == 'r'].reset_index(drop=True)
glyph_df_sk = glyph_df[glyph_df.trans == 'sk'].reset_index(drop=True)
glyph_df_sc = glyph_df[glyph_df.trans == 'sc'].reset_index(drop=True)

glyph_set = PolygonDataset(glyph_df)
glyph_set_o = PolygonDataset(glyph_df_o)
glyph_set_r = PolygonDataset(glyph_df_r)
glyph_set_sk = PolygonDataset(glyph_df_sk)
glyph_set_sc = PolygonDataset(glyph_df_sc)

osm_set_o = PolygonDataset(osm_df_o)
osm_set_r = PolygonDataset(osm_df_r)
melb_set = PolygonDataset(melb_df)

datasets = {
            'glyph_set_o':glyph_set_o,
            'glyph_set_r':glyph_set_r,
            'glyph_set_sk': glyph_set_sk,
            'glyph_set_sc': glyph_set_sc,
            'osm_set_o': osm_set_o, 
            'osm_set_r': osm_set_r,
            'melb_set': melb_set
            }

backbone = [
            'gcn', 
            'gin', 
            'edgc', 
            ]

ckpt = 'epoch100'

## Baseline

In [3]:

cfg['nn'] = 'gcn'
cfg['path'] = f'save/baseline/{cfg["nn"]}'

model = build_model(cfg=cfg)
trainer = Trainer(cfg=cfg) 
model = trainer.load_ckpt(model, 'epoch100')
print(summary(model))

metrics = {}
for key, dataset in datasets.items():
    zs = []
    for data in dataset:
        data.to('cuda')
        z = model.encoder(data.pos, data.edge_index)
        zs.append(z.mean(0).detach().cpu().numpy())

    zs = np.stack(zs, axis=0)
    zs = zs[~np.isnan(zs).any(axis=1)]
    
    # NN neighbors
    nbrs = NearestNeighbors(n_neighbors=7, algorithm='auto').fit(zs)
    dist, idx = nbrs.kneighbors(zs)
    # Hausdorff dist    
    query_id = idx[:, 0]
    target_id = idx[:, 1:]
    haus_dist = []
    for id, tids in enumerate(target_id):
        qid = query_id[id]
        tem_dist = []
        for tid in tids: 
            dist = directed_hausdorff(dataset[qid].pos.cpu().numpy(), 
                                      dataset[tid].pos.cpu().numpy())
            tem_dist.append(dist[0])
        haus_dist.append(tem_dist)
    metrics[key] =  np.stack(haus_dist, axis=0).mean(axis=0).tolist()
    
baseline = {'gcn' : metrics}
baseline

Layer (type:depth-idx)                   Param #
GAE                                      --
├─GCNBlock: 1-1                          --
│    └─GCNConv: 2-1                      64
│    │    └─SumAggregation: 3-1          --
│    │    └─Linear: 3-2                  128
│    └─GCNConv: 2-2                      64
│    │    └─SumAggregation: 3-3          --
│    │    └─Linear: 3-4                  4,096
│    └─LayerNorm: 2-3                    128
│    └─PReLU: 2-4                        1
├─GCNBlock: 1-2                          --
│    └─GCNConv: 2-5                      64
│    │    └─SumAggregation: 3-5          --
│    │    └─Linear: 3-6                  4,096
│    └─GCNConv: 2-6                      2
│    │    └─SumAggregation: 3-7          --
│    │    └─Linear: 3-8                  128
│    └─LayerNorm: 2-7                    128
│    └─PReLU: 2-8                        1
Total params: 8,900
Trainable params: 8,900
Non-trainable params: 0


{'gcn': {'glyph_set_o': [0.19340112338184645,
   0.22931758305124209,
   0.2457546524209462,
   0.2570902160310217,
   0.2642201329122376,
   0.271130051519615],
  'glyph_set_r': [0.27174103975662467,
   0.30228207983115396,
   0.31845408640338546,
   0.32816154017405236,
   0.3356573274667821,
   0.3432758444392137],
  'glyph_set_sk': [0.28453678169631685,
   0.310046591784396,
   0.3215947212214998,
   0.33077778914290673,
   0.33756855579958295,
   0.344623192449002],
  'glyph_set_sc': [0.22947526073232394,
   0.2570622683552507,
   0.2689646404888241,
   0.2784820437507517,
   0.28706436794436485,
   0.2936647169947797],
  'osm_set_o': [0.2187289262495796,
   0.23896038541758427,
   0.2507575436353715,
   0.26168156338216986,
   0.2658960036690511,
   0.27177802069840584],
  'osm_set_r': [0.32374269701017666,
   0.35345958226566326,
   0.3738194122668253,
   0.3848302114375793,
   0.3940975956203076,
   0.40091064795657166],
  'melb_set': [0.5889472451495906,
   0.598016289651936,


### No Aug

In [4]:
no_aug = {}
for nn in backbone:
    cfg['nn'] = nn
    cfg['path'] = f'save/no_aug/{cfg["nn"]}'

    model = build_model(cfg=cfg)
    trainer = Trainer(cfg=cfg) 
    model = trainer.load_ckpt(model, ckpt)
    print(summary(model))

    metrics = {}
    for key, dataset in datasets.items():
        zs = []
        for data in dataset:
            data.to('cuda')
            z = model.encoder(data.pos, data.edge_index)
            zs.append(z.mean(0).detach().cpu().numpy())

        zs = np.stack(zs, axis=0)
        zs = zs[~np.isnan(zs).any(axis=1)]
        
        # NN neighbors
        nbrs = NearestNeighbors(n_neighbors=7, algorithm='auto').fit(zs)
        dist, idx = nbrs.kneighbors(zs)
        # Hausdorff dist    
        query_id = idx[:, 0]
        target_id = idx[:, 1:]
        haus_dist = []
        for id, tids in enumerate(target_id):
            qid = query_id[id]
            tem_dist = []
            for tid in tids: 
                dist = directed_hausdorff(dataset[qid].pos.cpu().numpy(), 
                                          dataset[tid].pos.cpu().numpy())
                tem_dist.append(dist[0])
            haus_dist.append(tem_dist)

        metrics[key] = np.stack(haus_dist, axis=0).mean(axis=0).tolist()
        
    no_aug[nn] = metrics
no_aug

Layer (type:depth-idx)                   Param #
GAE                                      --
├─GCNBlock: 1-1                          --
│    └─GCNConv: 2-1                      64
│    │    └─SumAggregation: 3-1          --
│    │    └─Linear: 3-2                  128
│    └─GCNConv: 2-2                      64
│    │    └─SumAggregation: 3-3          --
│    │    └─Linear: 3-4                  4,096
│    └─LayerNorm: 2-3                    128
│    └─PReLU: 2-4                        1
├─GCNBlock: 1-2                          --
│    └─GCNConv: 2-5                      64
│    │    └─SumAggregation: 3-5          --
│    │    └─Linear: 3-6                  4,096
│    └─GCNConv: 2-6                      2
│    │    └─SumAggregation: 3-7          --
│    │    └─Linear: 3-8                  128
│    └─LayerNorm: 2-7                    128
│    └─PReLU: 2-8                        1
Total params: 8,900
Trainable params: 8,900
Non-trainable params: 0
Layer (type:depth-idx)                  

{'gcn': {'glyph_set_o': [0.1836293434797566,
   0.21876723100320064,
   0.23649627942088003,
   0.2458537992172435,
   0.2528194315921117,
   0.26069935945650724],
  'glyph_set_r': [0.2635933798708933,
   0.29513436959562067,
   0.3114172351515137,
   0.32125130103892907,
   0.32988431619917247,
   0.3400803161064262],
  'glyph_set_sk': [0.27255797801877296,
   0.3018232777822065,
   0.3134741619421862,
   0.3210214944612618,
   0.33073491715176256,
   0.33727582578104603],
  'glyph_set_sc': [0.22474642429719932,
   0.2531248882066338,
   0.26598022142189665,
   0.2793006080638856,
   0.288418566730402,
   0.29387815003327733],
  'osm_set_o': [0.20832280266204697,
   0.23277260279311718,
   0.24552864518075185,
   0.25407961939315327,
   0.25999274718818866,
   0.2678507910296102],
  'osm_set_r': [0.3080726987981234,
   0.3443122552413619,
   0.35987834399145174,
   0.3795640052217382,
   0.3873413718309311,
   0.3960710629386443],
  'melb_set': [0.5795492882246671,
   0.58804430233519

### Aug

In [5]:
aug = {}
for nn in backbone:
    cfg['nn'] = nn
    cfg['path'] = f'save/aug/{cfg["nn"]}'

    model = build_model(cfg=cfg)
    trainer = Trainer(cfg=cfg) 
    model = trainer.load_ckpt(model, ckpt)
    print(summary(model))

    metrics = {}
    for key, dataset in datasets.items():
        zs = []
        for data in dataset:
            data.to('cuda')
            z = model.encoder(data.pos, data.edge_index)
            zs.append(z.mean(0).detach().cpu().numpy())

        zs = np.stack(zs, axis=0)
        zs = zs[~np.isnan(zs).any(axis=1)]
        
        # NN neighbors
        nbrs = NearestNeighbors(n_neighbors=7, algorithm='auto').fit(zs)
        dist, idx = nbrs.kneighbors(zs)
        # Hausdorff dist    
        query_id = idx[:, 0]
        target_id = idx[:, 1:]
        haus_dist = []
        for id, tids in enumerate(target_id):
            qid = query_id[id]
            tem_dist = []
            for tid in tids: 
                dist = directed_hausdorff(dataset[qid].pos.cpu().numpy(), 
                                          dataset[tid].pos.cpu().numpy())
                tem_dist.append(dist[0])
            haus_dist.append(tem_dist)

        metrics[key] = np.stack(haus_dist, axis=0).mean(axis=0).tolist()
        
    aug[nn] = metrics
aug

Layer (type:depth-idx)                   Param #
GAE                                      --
├─GCNBlock: 1-1                          --
│    └─GCNConv: 2-1                      64
│    │    └─SumAggregation: 3-1          --
│    │    └─Linear: 3-2                  128
│    └─GCNConv: 2-2                      64
│    │    └─SumAggregation: 3-3          --
│    │    └─Linear: 3-4                  4,096
│    └─LayerNorm: 2-3                    128
│    └─PReLU: 2-4                        1
├─GCNBlock: 1-2                          --
│    └─GCNConv: 2-5                      64
│    │    └─SumAggregation: 3-5          --
│    │    └─Linear: 3-6                  4,096
│    └─GCNConv: 2-6                      2
│    │    └─SumAggregation: 3-7          --
│    │    └─Linear: 3-8                  128
│    └─LayerNorm: 2-7                    128
│    └─PReLU: 2-8                        1
Total params: 8,900
Trainable params: 8,900
Non-trainable params: 0
Layer (type:depth-idx)                  

{'gcn': {'glyph_set_o': [0.1742573151834923,
   0.2076035365413938,
   0.22280974487125146,
   0.23295085280731087,
   0.23977797363990203,
   0.24599701915093508],
  'glyph_set_r': [0.24559010579335833,
   0.27061514181150037,
   0.28556386501922515,
   0.29447431606807367,
   0.30450301900828275,
   0.3113597847954272],
  'glyph_set_sk': [0.2568813606901199,
   0.2792795141961066,
   0.2918279324762187,
   0.3005549457320044,
   0.30724101143934107,
   0.3126197794795191],
  'glyph_set_sc': [0.2088724774884869,
   0.2309006433505759,
   0.24357622822277938,
   0.2527860055028641,
   0.26029851899596795,
   0.2623044549550615],
  'osm_set_o': [0.2000889722942713,
   0.22010117658302122,
   0.2311979030939935,
   0.23826449996226393,
   0.24485599629639304,
   0.25117832168473675],
  'osm_set_r': [0.29211343621784647,
   0.32002316880817533,
   0.33840772187352725,
   0.3517080650243395,
   0.3590868804977204,
   0.3691515044401564],
  'melb_set': [0.5757804191570064,
   0.584604824045