In [1]:
import pickle
import os
import numpy as np
import glob
from matplotlib import pyplot as plt

In [2]:
root = './results/'
experiment = 'REDDITBINARY_tuning_redditbinary_20210219_baseline'
prefix = 'REDDITBINARY-'
num_configurations = 64

In [55]:
# parse aggregated results

def parse_results(stream):
    conf = dict()
    for line in stream:
        line = line.strip().split(':')
        if line[0].strip() == 'Accuracy':
            result = line[1].strip().split('±')
            mean = float(result[0].strip())
            std = float(result[1].strip())
        elif line[0].strip()[:2] == '--' and line[0].strip()[2] != '-':
            key = line[0].strip()[2:]
            value = line[1].strip()
            conf[key] = value
    return (mean, std, conf)

folder = os.path.join(root, experiment, prefix+'{}')
res_list = list()
for i in range(num_configurations):
    path = os.path.join(folder.format(i), 'result.txt')
    try:
        with open(path, 'r') as handle:
            res_list.append((*parse_results(handle), i))
    except FileNotFoundError:
        continue
print('Found {} result(s).'.format(len(res_list)))

Found 64 result(s).


In [56]:
# rank and inspect

ranked = sorted(res_list, key=lambda x: -x[0])
inspect_args = [
    'batch_size',
    'drop_rate',
    'final_readout',
    'lr',
    'lr_scheduler_decay_rate',
    'lr_scheduler_decay_steps']
k = 5

for i in range(k):
    print('-------------------------------------------------------')
    print('{} ({}): {:.4f} ± {:.4f}\n'.format(i+1, ranked[i][3], ranked[i][0], ranked[i][1]))
    for key in ranked[i][2]:
        if key not in inspect_args:
            continue
        print('\t{}: {}'.format(key, ranked[i][2][key]))
    print()

-------------------------------------------------------
1 (5): 0.8610 ± 0.0234

	batch_size: 32
	drop_rate: 0.0
	final_readout: mean
	lr: 0.01
	lr_scheduler_decay_rate: 0.5
	lr_scheduler_decay_steps: 20

-------------------------------------------------------
2 (12): 0.8580 ± 0.0136

	batch_size: 32
	drop_rate: 0.0
	final_readout: sum
	lr: 0.01
	lr_scheduler_decay_rate: 0.5
	lr_scheduler_decay_steps: 50

-------------------------------------------------------
3 (13): 0.8560 ± 0.0173

	batch_size: 32
	drop_rate: 0.0
	final_readout: sum
	lr: 0.01
	lr_scheduler_decay_rate: 0.5
	lr_scheduler_decay_steps: 20

-------------------------------------------------------
4 (2): 0.8540 ± 0.0218

	batch_size: 32
	drop_rate: 0.0
	final_readout: mean
	lr: 0.001
	lr_scheduler_decay_rate: 0.9
	lr_scheduler_decay_steps: 50

-------------------------------------------------------
5 (7): 0.8540 ± 0.0196

	batch_size: 32
	drop_rate: 0.0
	final_readout: mean
	lr: 0.01
	lr_scheduler_decay_rate: 0.9
	lr_schedu

In [85]:
root = './results/'
experiment = 'REDDITBINARY_tuning_reddditbinary_20210219'
prefix = 'REDDITBINARY-'
num_configurations = 64

In [88]:
folder = os.path.join(root, experiment, prefix+'{}')
res_list = list()
for i in range(num_configurations):
    path = os.path.join(folder.format(i), 'result.txt')
    try:
        with open(path, 'r') as handle:
            res_list.append((*parse_results(handle), i))
    except FileNotFoundError:
        continue
print('Found {} result(s).'.format(len(res_list)))

Found 36 result(s).


In [89]:
# rank and inspect

ranked = sorted(res_list, key=lambda x: -x[0])
inspect_args = [
    'batch_size',
    'drop_rate',
    'final_readout',
    'lr',
    'lr_scheduler_decay_rate',
    'lr_scheduler_decay_steps']
k = 5

for i in range(k):
    print('-------------------------------------------------------')
    print('{} ({}): {:.4f} ± {:.4f}\n'.format(i+1, ranked[i][3], ranked[i][0], ranked[i][1]))
    for key in ranked[i][2]:
        if key not in inspect_args:
            continue
        print('\t{}: {}'.format(key, ranked[i][2][key]))
    print()

-------------------------------------------------------
1 (9): 0.8595 ± 0.0255

	batch_size: 32
	drop_rate: 0.0
	final_readout: sum
	lr: 0.001
	lr_scheduler_decay_rate: 0.5
	lr_scheduler_decay_steps: 20

-------------------------------------------------------
2 (27): 0.8555 ± 0.0216

	batch_size: 32
	drop_rate: 0.5
	final_readout: sum
	lr: 0.001
	lr_scheduler_decay_rate: 0.9
	lr_scheduler_decay_steps: 20

-------------------------------------------------------
3 (2): 0.8555 ± 0.0231

	batch_size: 32
	drop_rate: 0.0
	final_readout: mean
	lr: 0.001
	lr_scheduler_decay_rate: 0.9
	lr_scheduler_decay_steps: 50

-------------------------------------------------------
4 (0): 0.8550 ± 0.0275

	batch_size: 32
	drop_rate: 0.0
	final_readout: mean
	lr: 0.001
	lr_scheduler_decay_rate: 0.5
	lr_scheduler_decay_steps: 50

-------------------------------------------------------
5 (25): 0.8535 ± 0.0210

	batch_size: 32
	drop_rate: 0.5
	final_readout: sum
	lr: 0.001
	lr_scheduler_decay_rate: 0.5
	lr_sch

In [69]:
import pickle
with open('gsn_redditb_splits.pkl', 'rb') as handle:
    gsn_splits = pickle.load(handle)

In [70]:
gsn_splits[1]

(array([   0,    1,    2, ..., 1997, 1998, 1999]),
 array([  17,   27,   47,   62,   65,   97,  122,  140,  172,  179,  199,
         210,  216,  217,  237,  239,  263,  273,  279,  288,  295,  300,
         320,  323,  336,  342,  344,  346,  362,  371,  372,  374,  382,
         396,  401,  416,  417,  419,  427,  431,  437,  441,  444,  472,
         496,  516,  524,  525,  531,  537,  551,  554,  555,  597,  602,
         604,  615,  617,  621,  631,  638,  643,  662,  667,  673,  711,
         713,  721,  733,  735,  765,  770,  777,  798,  819,  821,  823,
         830,  833,  837,  841,  848,  849,  859,  863,  865,  869,  880,
         889,  895,  897,  901,  907,  921,  953,  954,  959,  966,  970,
         980,  987, 1014, 1015, 1036, 1038, 1039, 1041, 1045, 1057, 1063,
        1064, 1075, 1090, 1098, 1126, 1134, 1137, 1168, 1172, 1185, 1200,
        1213, 1220, 1230, 1245, 1255, 1268, 1270, 1287, 1288, 1294, 1301,
        1316, 1336, 1342, 1352, 1364, 1429, 1452, 1453, 1469,

In [71]:
import pickle
with open('mpsn_redditb_splits_0.pkl', 'rb') as handle:
    mpsn_splits = pickle.load(handle)

In [72]:
mpsn_splits[1]

(array([   0,    1,    2, ..., 1997, 1998, 1999]),
 array([  17,   27,   47,   62,   65,   97,  122,  140,  172,  179,  199,
         210,  216,  217,  237,  239,  263,  273,  279,  288,  295,  300,
         320,  323,  336,  342,  344,  346,  362,  371,  372,  374,  382,
         396,  401,  416,  417,  419,  427,  431,  437,  441,  444,  472,
         496,  516,  524,  525,  531,  537,  551,  554,  555,  597,  602,
         604,  615,  617,  621,  631,  638,  643,  662,  667,  673,  711,
         713,  721,  733,  735,  765,  770,  777,  798,  819,  821,  823,
         830,  833,  837,  841,  848,  849,  859,  863,  865,  869,  880,
         889,  895,  897,  901,  907,  921,  953,  954,  959,  966,  970,
         980,  987, 1014, 1015, 1036, 1038, 1039, 1041, 1045, 1057, 1063,
        1064, 1075, 1090, 1098, 1126, 1134, 1137, 1168, 1172, 1185, 1200,
        1213, 1220, 1230, 1245, 1255, 1268, 1270, 1287, 1288, 1294, 1301,
        1316, 1336, 1342, 1352, 1364, 1429, 1452, 1453, 1469,

In [75]:
import pickle
with open('mpsn_redditb_splits_9.pkl', 'rb') as handle:
    mpsn_splits = pickle.load(handle)

In [76]:
mpsn_splits[1]

(array([   0,    1,    2, ..., 1997, 1998, 1999]),
 array([  17,   27,   47,   62,   65,   97,  122,  140,  172,  179,  199,
         210,  216,  217,  237,  239,  263,  273,  279,  288,  295,  300,
         320,  323,  336,  342,  344,  346,  362,  371,  372,  374,  382,
         396,  401,  416,  417,  419,  427,  431,  437,  441,  444,  472,
         496,  516,  524,  525,  531,  537,  551,  554,  555,  597,  602,
         604,  615,  617,  621,  631,  638,  643,  662,  667,  673,  711,
         713,  721,  733,  735,  765,  770,  777,  798,  819,  821,  823,
         830,  833,  837,  841,  848,  849,  859,  863,  865,  869,  880,
         889,  895,  897,  901,  907,  921,  953,  954,  959,  966,  970,
         980,  987, 1014, 1015, 1036, 1038, 1039, 1041, 1045, 1057, 1063,
        1064, 1075, 1090, 1098, 1126, 1134, 1137, 1168, 1172, 1185, 1200,
        1213, 1220, 1230, 1245, 1255, 1268, 1270, 1287, 1288, 1294, 1301,
        1316, 1336, 1342, 1352, 1364, 1429, 1452, 1453, 1469,

In [3]:
import torch
gsn_data = torch.load('./../../../git/graph-substructure-networks/datasets/social/REDDITBINARY/processed/local/cycle_graph_3.pt')

In [4]:
gsn_data[0][0].edge_index[:,:4]

tensor([[  0, 167, 167,   1],
        [167,  47, 171,  57]])

In [5]:
with open('./../datasets/REDDITBINARY/raw/REDDITBINARY_graph_list_degree_as_tag_False.pkl', 'rb') as handle:
    graph_list = pickle.load(handle)

In [6]:
import sys
sys.path.append('/home/ubuntu/git/scn')
with open('./../datasets/REDDITBINARY/complex_dim2_mean/REDDITBINARY_complex_list.pkl', 'rb') as handle:
    complex_list = pickle.load(handle)

In [7]:
for e, (graph_a, graph_b) in enumerate(zip(graph_list, complex_list)):
    print('\r', e, end='      ')
    assert torch.equal(graph_a.y, graph_b.y)
    assert torch.equal(graph_a.x, graph_b.chains[0].x)

 1999      

In [8]:
for e, (graph_a, graph_b) in enumerate(zip(gsn_data[0], graph_list)):
    print('\r', e, end='      ')
    assert torch.equal(graph_a.edge_index, graph_b.edge_index)
    assert torch.equal(graph_a.y, graph_b.y)
    assert torch.equal(graph_a.degrees, torch.argmax(graph_b.x, 1).float())

 0      

AssertionError: 

In [None]:
from torch_geometric.utils import contains_self_loops, degree, contains_isolated_nodes, is_undirected

In [176]:
contains_self_loops(graph_b.edge_index)

False

In [164]:
contains_isolated_nodes(graph_b.edge_index)

False

In [166]:
is_undirected(graph_b.edge_index)

True

In [173]:
degree(graph_b.edge_index[0])

tensor([  1.,   1., 341.,   1.,   1.,   1.,   8.,   1.,   1.,   1.,   2.,   3.,
          1.,  12.,   1.,   3.,   3.,   1.,   1.,   2.,   2.,   1.,   1.,   1.,
          1.,   6.,   3.,   4.,   4.,   1.,   1.,   1.,   1.,   1.,   3.,   3.,
          1.,   1.,   1.,   4.,   1.,   1.,   1.,   1.,   1.,   1.,   2.,   1.,
          1.,   1.,   5.,   1.,   1.,   7.,  19.,   1.,   1.,   1.,   1.,   2.,
          2.,   2.,   1.,   1.,   4.,   2.,   1.,   5.,   1.,   1.,   1.,   1.,
          4.,   1.,   1.,   1.,  14.,   1.,   1.,   1.,   1.,   1.,   1.,   5.,
          3.,   1.,   1.,   1.,   3.,   2.,   1.,   1.,   2.,   1.,   1.,   4.,
          1.,   1.,   1.,   1.,   2.,   1.,   1.,   1.,   6.,   1.,   8.,   3.,
          1.,   1.,   2.,   3.,   2.,   1.,   2.,   1.,   1.,   1.,   1.,   1.,
          2.,   5.,   1.,   1.,   2.,   1.,   1.,   1.,   1.,   1.,   4.,   1.,
          1.,   1.,   1.,   3.,   2.,   1.,   1.,   1.,   1.,   2.,   1.,   1.,
          1.,   3.,   2.,   1.,   2.,   

In [177]:
graph_b.x.shape

torch.Size([717, 566])

In [155]:
edge_list = graph_b.edge_index.numpy().T

In [156]:
(edge_list[:,0] == 2).sum()

341

In [153]:
torch.argmax(graph_b.x, 1)

tensor([  1,   1, 310,   1,   1,   1,   8,   1,   1,   1,   2,   3,   1,  12,
          1,   3,   3,   1,   1,   2,   2,   1,   1,   1,   1,   6,   3,   4,
          4,   1,   1,   1,   1,   1,   3,   3,   1,   1,   1,   4,   1,   1,
          1,   1,   1,   1,   2,   1,   1,   1,   5,   1,   1,   7,  19,   1,
          1,   1,   1,   2,   2,   2,   1,   1,   4,   2,   1,   5,   1,   1,
          1,   1,   4,   1,   1,   1,  14,   1,   1,   1,   1,   1,   1,   5,
          3,   1,   1,   1,   3,   2,   1,   1,   2,   1,   1,   4,   1,   1,
          1,   1,   2,   1,   1,   1,   6,   1,   8,   3,   1,   1,   2,   3,
          2,   1,   2,   1,   1,   1,   1,   1,   2,   5,   1,   1,   2,   1,
          1,   1,   1,   1,   4,   1,   1,   1,   1,   3,   2,   1,   1,   1,
          1,   2,   1,   1,   1,   3,   2,   1,   2,   1,   5,   2,   2,   1,
          1,   1,   2,   2,   4,   2,   1,   2,   3,   1,   7,   2,   5,   1,
          1,   1,   2,   1,   1,   1,   1,   4,   4,   7,   1,  

In [151]:
graph_a.degrees

tensor([  1.,   1., 341.,   1.,   1.,   1.,   8.,   1.,   1.,   1.,   2.,   3.,
          1.,  12.,   1.,   3.,   3.,   1.,   1.,   2.,   2.,   1.,   1.,   1.,
          1.,   6.,   3.,   4.,   4.,   1.,   1.,   1.,   1.,   1.,   3.,   3.,
          1.,   1.,   1.,   4.,   1.,   1.,   1.,   1.,   1.,   1.,   2.,   1.,
          1.,   1.,   5.,   1.,   1.,   7.,  19.,   1.,   1.,   1.,   1.,   2.,
          2.,   2.,   1.,   1.,   4.,   2.,   1.,   5.,   1.,   1.,   1.,   1.,
          4.,   1.,   1.,   1.,  14.,   1.,   1.,   1.,   1.,   1.,   1.,   5.,
          3.,   1.,   1.,   1.,   3.,   2.,   1.,   1.,   2.,   1.,   1.,   4.,
          1.,   1.,   1.,   1.,   2.,   1.,   1.,   1.,   6.,   1.,   8.,   3.,
          1.,   1.,   2.,   3.,   2.,   1.,   2.,   1.,   1.,   1.,   1.,   1.,
          2.,   5.,   1.,   1.,   2.,   1.,   1.,   1.,   1.,   1.,   4.,   1.,
          1.,   1.,   1.,   3.,   2.,   1.,   1.,   1.,   1.,   2.,   1.,   1.,
          1.,   3.,   2.,   1.,   2.,   

In [9]:
from data.datasets.tu import load_tu_graph_dataset

In [10]:
graph_list, train_ids, val_ids, test_ids = load_tu_graph_dataset('REDDITBINARY', root='./../datasets/', degree_as_tag=True, fold=0, seed=0)

loading data
# classes: 2
# maximum node tag: 566
# data: 2000
Converting graph data into PyG format...


In [11]:
graph_b = graph_list[0]
print(torch.argmax(graph_b.x, 1))

tensor([  1,   1,   1,   1,   2,   2,   2,   2,   1,   1,   1,   1,   1,   1,
          1,   1,   1,   1,   1,   1,   1,   1,   2,   1,   1,   2,   6,   1,
          3,   3,   1,   1,   1,   1,   1,   2,   1,   1,   2,   1,   2,   6,
         13,   1,  11,   3,   1,   3,   1,   1,   1,   1,   1,   1,   1,   1,
          1, 128,   1,   1,   1,   1,   2,   2,   1,   1,   1,   1,   1,   1,
          1,   1,   1,   1,   1,   2,   1,   1,   2,   1,   1,   1,   1,   1,
          1,   1,   1,   1,   3,   1,   1,   1,   2,   1,   1,   1,   1,   1,
          2,   1,   1,   6,   3,   3,   1,   1,   1,   1,   1,   1,   1,   1,
          1,   1,   4,   2,   1,   1,   1,   3,   1,   1,   2,   4,   2,  13,
          1,   1,   1,   1,   2,   1,   5,   3,   1,   1,   1,   1,   1,   1,
          1,   1,   1,  12,   1,   1,   1,   1,   1,   1,   2,   1,   1,   1,
          1,   1,   1,   5,   1,   1,   1,   1,   1,   1,   1,   2,   3,   3,
          2,   1,   3,   5,   1,   1,   3,   1,   1,   1,   1,  

In [191]:
graph_b = graph_list[0]
print(torch.argmax(graph_b.x, 1))

tensor([  1,   3,   1, 128,   1,   1,   2,   2,   2,   2,   1,  13,   2,   3,
          2,   5,  12,   1,   1,   1,   2,   1,   1,   1,   1,   1,   1,   1,
          6,   1,   1,   5,   1,   5,   1,   2,   1,   2,   1,   1,   2,  13,
          1,   1,   1,   1,   3,   2,   2,   3,   1,   1,   1,   1,  11,   2,
          2,   1,   1,   2,   1,   1,   4,   2,   1,   2,   6,   6,   3,   2,
          3,   1,   2,   3,   1,   3,   3,   1,   1,   1,   1,   1,   1,   3,
          1,   4,   2,   1,   1,   3,   1,   1,   1,   1,   1,   1,   1,   1,
          2,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
          1,   1,   1,   2,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
          1,   1,   1,   2,   1,   1,   1,   2,   1,   1,   2,   1,   1,   1,
          1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
          1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
          1,   1,   1,   1,   1,   1,   1,   2,   1,   3,   1,  

In [193]:
graph_a = gsn_data[0][0]
print(graph_a.degrees)

tensor([  1.,   1.,   1.,   1.,   2.,   2.,   2.,   2.,   1.,   1.,   1.,   1.,
          1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   2.,   1.,
          1.,   2.,   6.,   1.,   3.,   3.,   1.,   1.,   1.,   1.,   1.,   2.,
          1.,   1.,   2.,   1.,   2.,   6.,  13.,   1.,  11.,   3.,   1.,   3.,
          1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1., 128.,   1.,   1.,
          1.,   1.,   2.,   2.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,
          1.,   1.,   1.,   2.,   1.,   1.,   2.,   1.,   1.,   1.,   1.,   1.,
          1.,   1.,   1.,   1.,   3.,   1.,   1.,   1.,   2.,   1.,   1.,   1.,
          1.,   1.,   2.,   1.,   1.,   6.,   3.,   3.,   1.,   1.,   1.,   1.,
          1.,   1.,   1.,   1.,   1.,   1.,   4.,   2.,   1.,   1.,   1.,   3.,
          1.,   1.,   2.,   4.,   2.,  13.,   1.,   1.,   1.,   1.,   2.,   1.,
          5.,   3.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,  12.,
          1.,   1.,   1.,   1.,   1.,   

In [205]:
degrees = set()
for graph in graph_list:
    degrees |= set(degree(graph.edge_index[0]).numpy().astype(int).tolist())

In [207]:
translator = list(sorted(degrees))

In [209]:
d = degree(graph_b.edge_index[0])

In [227]:
translated = np.zeros_like(d)
for i, deg in enumerate(d):
    translated[i] = float(translator.index(deg))
translated

array([  1.,   1.,   1.,   1.,   2.,   2.,   2.,   2.,   1.,   1.,   1.,
         1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,
         2.,   1.,   1.,   2.,   6.,   1.,   3.,   3.,   1.,   1.,   1.,
         1.,   1.,   2.,   1.,   1.,   2.,   1.,   2.,   6.,  13.,   1.,
        11.,   3.,   1.,   3.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,
         1.,   1., 128.,   1.,   1.,   1.,   1.,   2.,   2.,   1.,   1.,
         1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   2.,   1.,
         1.,   2.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,
         3.,   1.,   1.,   1.,   2.,   1.,   1.,   1.,   1.,   1.,   2.,
         1.,   1.,   6.,   3.,   3.,   1.,   1.,   1.,   1.,   1.,   1.,
         1.,   1.,   1.,   1.,   4.,   2.,   1.,   1.,   1.,   3.,   1.,
         1.,   2.,   4.,   2.,  13.,   1.,   1.,   1.,   1.,   2.,   1.,
         5.,   3.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,
        12.,   1.,   1.,   1.,   1.,   1.,   1.,   

In [228]:
torch.FloatTensor(translated) - d

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])

In [225]:
translator

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 178,
 180,
 181,
 182,
 183,
 184,
 185,
 186,


In [12]:
with open('tag2index.pkl', 'rb') as handle:
    tag2index = pickle.load(handle)

In [13]:
tag2index

{0: 0,
 1: 1,
 2: 2,
 3: 3,
 4: 4,
 5: 5,
 6: 6,
 7: 7,
 8: 8,
 9: 9,
 10: 10,
 11: 11,
 12: 12,
 13: 13,
 14: 14,
 15: 15,
 16: 16,
 17: 17,
 18: 18,
 19: 19,
 20: 20,
 21: 21,
 22: 22,
 23: 23,
 24: 24,
 25: 25,
 26: 26,
 27: 27,
 28: 28,
 29: 29,
 30: 30,
 31: 31,
 32: 32,
 33: 33,
 34: 34,
 35: 35,
 36: 36,
 37: 37,
 38: 38,
 39: 39,
 40: 40,
 41: 41,
 42: 42,
 43: 43,
 44: 44,
 45: 45,
 46: 46,
 47: 47,
 48: 48,
 49: 49,
 50: 50,
 51: 51,
 52: 52,
 53: 53,
 54: 54,
 55: 55,
 56: 56,
 57: 57,
 58: 58,
 59: 59,
 60: 60,
 61: 61,
 62: 62,
 63: 63,
 64: 64,
 65: 65,
 66: 66,
 67: 67,
 68: 68,
 69: 69,
 70: 70,
 71: 71,
 72: 72,
 73: 73,
 74: 74,
 75: 75,
 76: 76,
 77: 77,
 78: 78,
 79: 79,
 80: 80,
 81: 81,
 82: 82,
 83: 83,
 84: 84,
 85: 85,
 86: 86,
 87: 87,
 88: 88,
 89: 89,
 90: 90,
 91: 91,
 92: 92,
 93: 93,
 94: 94,
 95: 95,
 96: 96,
 97: 97,
 98: 98,
 99: 99,
 100: 100,
 101: 101,
 102: 102,
 103: 103,
 104: 104,
 105: 105,
 106: 106,
 107: 107,
 108: 108,
 109: 109,
 110: 110,

In [15]:
with open('./../encoder.pkl', 'rb') as handle:
    encoder = pickle.load(handle)

In [19]:
for a, b, in zip(sorted(tag2index), sorted(encoder)):
    print(a, tag2index[a])
    print(b, encoder[b])
    print('----------------------')

0 0
0 0
----------------------
1 1
1 1
----------------------
2 2
2 2
----------------------
3 3
3 3
----------------------
4 4
4 4
----------------------
5 5
5 5
----------------------
6 6
6 6
----------------------
7 7
7 7
----------------------
8 8
8 8
----------------------
9 9
9 9
----------------------
10 10
10 10
----------------------
11 11
11 11
----------------------
12 12
12 12
----------------------
13 13
13 13
----------------------
14 14
14 14
----------------------
15 15
15 15
----------------------
16 16
16 16
----------------------
17 17
17 17
----------------------
18 18
18 18
----------------------
19 19
19 19
----------------------
20 20
20 20
----------------------
21 21
21 21
----------------------
22 22
22 22
----------------------
23 23
23 23
----------------------
24 24
24 24
----------------------
25 25
25 25
----------------------
26 26
26 26
----------------------
27 27
27 27
----------------------
28 28
28 28
----------------------
29 29
29 29
-------------

In [18]:
len(tag2index)

566