### Using pre-trained graph embeddings

This notebook downloads and evaluates a model that was trained on $50 000$ most frequent tokens from English Wikipedia.
You can also use it to score your model using standard benchmarks.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import torch
import torch.nn as nn
from tqdm.auto import tqdm
import sys
sys.path.insert(0, '..')
import lib

In [2]:
# Download the pre-trained model and matching corpora:
!wget -q -nc https://www.dropbox.com/s/l69y3n54vfiiyyp/graphglove_wiki50k_dist_20d.model.pth?dl=1 -O graphglove_wiki50k_dist_20d.model.pth
model_path = 'graphglove_wiki50k_dist_20d.model.pth'
model = torch.load(model_path)
model.graph_embedding.report_model_size()

{'self': ParallelGraphEmbedding(50001 vertices, 2789514 edges total, 495388 edges kept, 0.82241 sparsity rate, default distance = 0.0),
 'threshold': 0.5,
 'bits_per_float': 32,
 'bits_per_int': 32,
 'num_edges': 495387,
 'num_slices': 50002,
 'num_vertices': 50003,
 'trainable_default': 1,
 'num_parameters': 1040778,
 'params_per_vertex': 20.81431114133152,
 'size_bits': 33304896}

In [3]:
lib.task.nlp.evaluate_similarity(model, lowercase=True)



{'WS353_infer_nan': 0.5548952299583522,
 'WS353_omit_nan': 0.5608986055874999,
 'SCWS_infer_nan': 0.5169381049048455,
 'SCWS_omit_nan': 0.5603445273660402,
 'RW_infer_nan': 0.07321699459535888,
 'RW_omit_nan': 0.38668954013162754,
 'SimLex_infer_nan': 0.3000619770025792,
 'SimLex_omit_nan': 0.31156426341082105,
 'SimVerb_infer_nan': 0.0897690156071369,
 'SimVerb_omit_nan': 0.11991086417467317}