## HTorch hyperbolic embedding for the WordNet Mammals

In [22]:
import timeit
import torch, HTorch
import torch.nn.functional as F
from tqdm import tqdm
import numpy as np
import logging
from hype.graph import load_edge_list, eval_reconstruction
from HTorch.layers import HEmbedding
from HTorch.optimizers import RiemannianSGD, RiemannianAdam
import sys, os, random
import json
import torch.multiprocessing as mp
from hype.graph_dataset import BatchedDataset

In [33]:
from gensim import utils, matutils  # utility fnc for pickling, common scipy operations etc
from six import iteritems, itervalues, string_types
from six.moves import xrange
from numpy import dot, zeros, dtype, float32 as REAL,\
    double, array, vstack, fromstring, sqrt, newaxis,\
    ndarray, sum as np_sum, prod, ascontiguousarray,\
    argmax
import logging
logger = logging.getLogger(__name__)


class Vocab(object):
    """
    A single vocabulary item, used internally for collecting per-word frequency/sampling info,
    and for constructing binary trees (incl. both word leaves and inner nodes).
    """

    def __init__(self, **kwargs):
        self.count = 0
        self.__dict__.update(kwargs)

    def __lt__(self, other):  # used for sorting in a priority queue
        return self.count < other.count

    def __str__(self):
        vals = ['%s:%r' % (key, self.__dict__[key]) for key in sorted(self.__dict__) if not key.startswith('_')]
        return "%s(%s)" % (self.__class__.__name__, ', '.join(vals))


class KeyedVectorsBase(utils.SaveLoad):
    """
    Base class to contain vectors and vocab for any set of vectors which are each associated with a key.
    """

    def __init__(self):
        self.syn0 = []
        self.vocab = {}
        self.index2word = []
        self.vector_size = None

    def save_word2vec_format(self, fname, fvocab=None, binary=False, total_vec=None):
        """
        Store the input-hidden weight matrix in the same format used by the original
        C word2vec-tool, for compatibility.
         `fname` is the file used to save the vectors in
         `fvocab` is an optional file used to save the vocabulary
         `binary` is an optional boolean indicating whether the data is to be saved
         in binary word2vec format (default: False)
         `total_vec` is an optional parameter to explicitly specify total no. of vectors
         (in case word vectors are appended with document vectors afterwards)
        """
        if total_vec is None:
            total_vec = len(self.vocab)
        vector_size = self.syn0.shape[1]
        if fvocab is not None:
            logger.info("storing vocabulary in %s", fvocab)
            with utils.smart_open(fvocab, 'wb') as vout:
                for word, vocab in sorted(iteritems(self.vocab), key=lambda item: -item[1].count):
                    vout.write(utils.to_utf8("%s %s\n" % (word, vocab.count)))
        logger.info("storing %sx%s projection weights into %s", total_vec, vector_size, fname)
        assert (len(self.vocab), vector_size) == self.syn0.shape
        with utils.smart_open(fname, 'wb') as fout:
            fout.write(utils.to_utf8("%s %s\n" % (total_vec, vector_size)))
            # store in sorted order: most frequent words at the top
            for word, vocab in sorted(iteritems(self.vocab), key=lambda item: -item[1].count):
                row = self.syn0[vocab.index]
                if binary:
                    fout.write(utils.to_utf8(word) + b" " + row.tostring())
                else:
                    fout.write(utils.to_utf8("%s %s\n" % (word, ' '.join("%f" % val for val in row))))

    @classmethod
    def load_word2vec_format(cls, fname, fvocab=None, binary=False, encoding='utf8', unicode_errors='strict',
                             limit=None, datatype=REAL):
        """
        Load the input-hidden weight matrix from the original C word2vec-tool format.
        Note that the information stored in the file is incomplete (the binary tree is missing),
        so while you can query for word similarity etc., you cannot continue training
        with a model loaded this way.
        `binary` is a boolean indicating whether the data is in binary word2vec format.
        `norm_only` is a boolean indicating whether to only store normalised word2vec vectors in memory.
        Word counts are read from `fvocab` filename, if set (this is the file generated
        by `-save-vocab` flag of the original C tool).
        If you trained the C model using non-utf8 encoding for words, specify that
        encoding in `encoding`.
        `unicode_errors`, default 'strict', is a string suitable to be passed as the `errors`
        argument to the unicode() (Python 2.x) or str() (Python 3.x) function. If your source
        file may include word tokens truncated in the middle of a multibyte unicode character
        (as is common from the original word2vec.c tool), 'ignore' or 'replace' may help.
        `limit` sets a maximum number of word-vectors to read from the file. The default,
        None, means read all.
        `datatype` (experimental) can coerce dimensions to a non-default float type (such
        as np.float16) to save memory. (Such types may result in much slower bulk operations
        or incompatibility with optimized routines.)
        """
        counts = None
        if fvocab is not None:
            logger.info("loading word counts from %s", fvocab)
            counts = {}
            with utils.smart_open(fvocab) as fin:
                for line in fin:
                    word, count = utils.to_unicode(line).strip().split()
                    counts[word] = int(count)

        logger.info("loading projection weights from %s", fname)
        with utils.smart_open(fname) as fin:
            header = utils.to_unicode(fin.readline(), encoding=encoding)
            vocab_size, vector_size = (int(x) for x in header.split())  # throws for invalid file format
            if limit:
                vocab_size = min(vocab_size, limit)
            result = cls()
            result.vector_size = vector_size
            result.syn0 = zeros((vocab_size, vector_size), dtype=datatype)

            def add_word(word, weights):
                word_id = len(result.vocab)
                if word in result.vocab:
                    logger.warning("duplicate word '%s' in %s, ignoring all but first", word, fname)
                    return
                if counts is None:
                    # most common scenario: no vocab file given. just make up some bogus counts, in descending order
                    result.vocab[word] = Vocab(index=word_id, count=vocab_size - word_id)
                elif word in counts:
                    # use count from the vocab file
                    result.vocab[word] = Vocab(index=word_id, count=counts[word])
                else:
                    # vocab file given, but word is missing -- set count to None (TODO: or raise?)
                    logger.warning("vocabulary file is incomplete: '%s' is missing", word)
                    result.vocab[word] = Vocab(index=word_id, count=None)
                result.syn0[word_id] = weights
                result.index2word.append(word)

            if binary:
                binary_len = dtype(REAL).itemsize * vector_size
                for _ in xrange(vocab_size):
                    # mixed text and binary: read text first, then binary
                    word = []
                    while True:
                        ch = fin.read(1)
                        if ch == b' ':
                            break
                        if ch == b'':
                            raise EOFError("unexpected end of input; is count incorrect or file otherwise damaged?")
                        if ch != b'\n':  # ignore newlines in front of words (some binary files have)
                            word.append(ch)
                    word = utils.to_unicode(b''.join(word), encoding=encoding, errors=unicode_errors)
                    weights = fromstring(fin.read(binary_len), dtype=REAL)
                    add_word(word, weights)
            else:
                for line_no in xrange(vocab_size):
                    line = fin.readline()
                    if line == b'':
                        raise EOFError("unexpected end of input; is count incorrect or file otherwise damaged?")
                    parts = utils.to_unicode(line.rstrip(), encoding=encoding, errors=unicode_errors).split(" ")
                    if len(parts) != vector_size + 1:
                        raise ValueError("invalid vector on line %s (is this really the text format?)" % line_no)
                    word, weights = parts[0], [REAL(x) for x in parts[1:]]
                    add_word(word, weights)
        if result.syn0.shape[0] != len(result.vocab):
            logger.info(
                "duplicate words detected, shrinking matrix size from %i to %i",
                result.syn0.shape[0], len(result.vocab)
            )
            result.syn0 = ascontiguousarray(result.syn0[: len(result.vocab)])
        assert (len(result.vocab), vector_size) == result.syn0.shape

        logger.info("loaded %s matrix from %s", result.syn0.shape, fname)
        return result

    def similarity(self, w1, w2):
        """
        Compute similarity between vectors of two input words.
        To be implemented by child class.
        """
        raise NotImplementedError

    def distance(self, w1, w2):
        """
        Compute distance between vectors of two input words.
        To be implemented by child class.
        """
        raise NotImplementedError

    def distances(self, word_or_vector, other_words=()):
        """
        Compute distances from given word or vector to all words in `other_words`.
        If `other_words` is empty, return distance between `word_or_vectors` and all words in vocab.
        To be implemented by child class.
        """
        raise NotImplementedError

    def word_vec(self, word):
        """
        Accept a single word as input.
        Returns the word's representations in vector space, as a 1D numpy array.
        Example::
          >>> trained_model.word_vec('office')
          array([ -1.40128313e-02, ...])
        """
        if word in self.vocab:
            result = self.syn0[self.vocab[word].index]
            result.setflags(write=False)
            return result
        else:
            raise KeyError("word '%s' not in vocabulary" % word)

    def __getitem__(self, words):
        """
        Accept a single word or a list of words as input.
        If a single word: returns the word's representations in vector space, as
        a 1D numpy array.
        Multiple words: return the words' representations in vector space, as a
        2d numpy array: #words x #vector_size. Matrix rows are in the same order
        as in input.
        Example::
          >>> trained_model['office']
          array([ -1.40128313e-02, ...])
          >>> trained_model[['office', 'products']]
          array([ -1.40128313e-02, ...]
                [ -1.70425311e-03, ...]
                 ...)
        """
        if isinstance(words, string_types):
            # allow calls like trained_model['office'], as a shorthand for trained_model[['office']]
            return self.word_vec(words)

        return vstack([self.word_vec(word) for word in words])

    def __contains__(self, word):
        return word in self.vocab

    def most_similar_to_given(self, w1, word_list):
        """Return the word from word_list most similar to w1.
        Args:
            w1 (str): a word
            word_list (list): list of words containing a word most similar to w1
        Returns:
            the word in word_list with the highest similarity to w1
        Raises:
            KeyError: If w1 or any word in word_list is not in the vocabulary
        Example::
          >>> trained_model.most_similar_to_given('music', ['water', 'sound', 'backpack', 'mouse'])
          'sound'
          >>> trained_model.most_similar_to_given('snake', ['food', 'pencil', 'animal', 'phone'])
          'animal'
        """
        return word_list[argmax([self.similarity(w1, word) for word in word_list])]

    def words_closer_than(self, w1, w2):
        """
        Returns all words that are closer to `w1` than `w2` is to `w1`.
        Parameters
        ----------
        w1 : str
            Input word.
        w2 : str
            Input word.
        Returns
        -------
        list (str)
            List of words that are closer to `w1` than `w2` is to `w1`.
        Examples
        --------
        >>> model.words_closer_than('carnivore.n.01', 'mammal.n.01')
        ['dog.n.01', 'canine.n.02']
        """
        all_distances = self.distances(w1)
        w1_index = self.vocab[w1].index
        w2_index = self.vocab[w2].index
        closer_node_indices = np.where(all_distances < all_distances[w2_index])[0]
        return [self.index2word[index] for index in closer_node_indices if index != w1_index]

    def rank(self, w1, w2):
        """
        Rank of the distance of `w2` from `w1`, in relation to distances of all words from `w1`.
        Parameters
        ----------
        w1 : str
            Input word.
        w2 : str
            Input word.
        Returns
        -------
        int
            Rank of `w2` from `w1` in relation to all other nodes.
        Examples
        --------
        >>> model.rank('mammal.n.01', 'carnivore.n.01')
        3
        """
        return len(self.words_closer_than(w1, w2)) + 1

In [45]:
class DAGEmbeddingKeyedVectors(KeyedVectorsBase):
    """Class to contain vectors and vocab for the
     :class:`~gensim.models.poincare.DAGEmbeddingKeyedVectorsModel` training class.
        Used to perform operations on the vectors such as vector lookup, distance etc.
    """
    def __init__(self):
        super(DAGEmbeddingKeyedVectors, self).__init__()
        self.syn0 = []

    def vector_distance(self, vector_1, vector_2):
        """
        Return poincare distance between two input vectors. Convenience method over `vector_distance_batch`.
        Parameters
        ----------
        vector_1 : numpy.array
            input vector
        vector_2 : numpy.array
            input vector
        Returns
        -------
        numpy.float
            Distance between `vector_1` and `vector_2`.
        """
        return DAGEmbeddingKeyedVectors.vector_distance_batch(vector_1, vector_2[np.newaxis, :])[0]


    def distance(self, w1, w2):
        """
        Return distance between vectors for nodes `w1` and `w2`.
        Parameters
        ----------
        w1 : str or int
            Key for first node.
        w2 : str or int
            Key for second node.
        Returns
        -------
        float
            distance between the vectors for nodes `w1` and `w2`.
        Examples
        --------
        >>> model.distance('mammal.n.01', 'carnivore.n.01')
        2.13
        Notes
        -----
        Raises KeyError if either of `w1` and `w2` is absent from vocab.
        """
        vector_1 = self.word_vec(w1)
        vector_2 = self.word_vec(w2)
        return self.vector_distance(vector_1, vector_2)


    def most_similar(self, node_or_vector, topn=10, restrict_vocab=None):
        """
        Find the top-N most similar nodes to the given node or vector, sorted in increasing order of distance.
        Parameters
        ----------
        node_or_vector : str/int or numpy.array
            node key or vector for which similar nodes are to be found.
        topn : int or None, optional
            number of similar nodes to return, if `None`, returns all.
        restrict_vocab : int or None, optional
            Optional integer which limits the range of vectors which are searched for most-similar values.
            For example, restrict_vocab=10000 would only check the first 10000 node vectors in the vocabulary order.
            This may be meaningful if vocabulary is sorted by descending frequency.
        Returns
        --------
        list of tuples (str, float)
            List of tuples containing (node, distance) pairs in increasing order of distance.
        Examples
        --------
        >>> vectors.most_similar('lion.n.01')
        [('lion_cub.n.01', 0.4484), ('lionet.n.01', 0.6552), ...]
        """
        if not restrict_vocab:
            all_distances = self.distances(node_or_vector)
        else:
            nodes_to_use = self.index2word[:restrict_vocab]
            all_distances = self.distances(node_or_vector, nodes_to_use)

        if isinstance(node_or_vector, string_types + (int,)):
            node_index = self.vocab[node_or_vector].index
        else:
            node_index = None
        if not topn:
            closest_indices = matutils.argsort(all_distances)
        else:
            closest_indices = matutils.argsort(all_distances, topn=1 + topn)
        result = [
            (self.index2word[index], float(all_distances[index]))
            for index in closest_indices if (not node_index or index != node_index)  # ignore the input node
        ]
        if topn:
            result = result[:topn]
        return result


    def vector_distance_batch(self, vector_1, vectors_all):
        """
        Return distances between one vector and a set of other vectors.
        Parameters
        ----------
        vector_1 : numpy.array
            vector from which distances are to be computed.
            expected shape (dim,)
        vectors_all : numpy.array
            for each row in vectors_all, distance from vector_1 is computed.
            expected shape (num_vectors, dim)
        Returns
        -------
        numpy.array
            Contains distance between vector_1 and each row in vectors_all.
            shape (num_vectors,)
        """
        raise NotImplementedError


    def distances_from_indices(self, node_index, other_indices=()):
        assert node_index < len(self.syn0)
        input_vector = self.syn0[node_index]
        if not other_indices:
            other_vectors = self.syn0
        else:
            other_vectors = self.syn0[other_indices]
        return self.vector_distance_batch(input_vector, other_vectors)


    def is_a_scores_vector_batch(self, alpha, parent_vectors, other_vectors, rel_reversed):
        raise NotImplementedError


    def is_a_scores_from_indices(self, alpha, parent_indices, other_indices, rel_reversed):
        parent_vectors = self.syn0[parent_indices]
        other_vectors = self.syn0[other_indices]
        return self.is_a_scores_vector_batch(alpha, parent_vectors, other_vectors, rel_reversed)


    def distances(self, node_or_vector, other_nodes=()):
        """
        Compute distances from given node or vector to all nodes in `other_nodes`.
        If `other_nodes` is empty, return distance between `node_or_vector` and all nodes in vocab.
        Parameters
        ----------
        node_or_vector : str/int or numpy.array
            Node key or vector from which distances are to be computed.
        other_nodes : iterable of str/int or None
            For each node in `other_nodes` distance from `node_or_vector` is computed.
            If None or empty, distance of `node_or_vector` from all nodes in vocab is computed (including itself).
        Returns
        -------
        numpy.array
            Array containing distances to all nodes in `other_nodes` from input `node_or_vector`,
            in the same order as `other_nodes`.
        Examples
        --------
        >>> model.distances('mammal.n.01', ['carnivore.n.01', 'dog.n.01'])
        np.array([2.1199, 2.0710]
        >>> model.distances('mammal.n.01')
        np.array([0.43753847, 3.67973852, ..., 6.66172886])
        Notes
        -----
        Raises KeyError if either `node_or_vector` or any node in `other_nodes` is absent from vocab.
        """
        if isinstance(node_or_vector, string_types):
            input_vector = self.word_vec(node_or_vector)
        else:
            input_vector = node_or_vector
        if other_nodes == None:
            other_vectors = self.syn0
        else:
            other_indices = [self.vocab[node].index for node in other_nodes]
            other_vectors = self.syn0[other_indices]
        return self.vector_distance_batch(input_vector, other_vectors)

In [46]:
# model defined using HTorch
class EnergyFunction(torch.nn.Module):
    def __init__(self, size, dim, sparse=False, manifold='PoincareBall', curvature=-1.0, **kwargs):
        super().__init__()
        # initialize layer, weights are automatically initialized around origin
        self.lt = HEmbedding(size, dim, sparse=sparse, manifold=manifold, curvature=curvature) 
        self.nobjects = size
        self.kv = DAGEmbeddingKeyedVectors

    def forward(self, inputs):
        e = self.lt(inputs)
        with torch.no_grad():
            e.proj_()
        o = e.narrow(1, 1, e.size(1) - 1)
        s = e.narrow(1, 0, 1).expand_as(o)
        return o.Hdist(s).squeeze(-1)
    
    def loss(self, inp, target, **kwargs):
        return F.cross_entropy(inp.neg(), target)

In [47]:
# set meta-parameters, float precision etc.
os.environ["NUMEXPR_MAX_THREADS"] = '8'

def seed_everything(seed=1234):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

# note d16, d32 may produce infs and NaNs due to imprecision
d16 = torch.float16; d32 = torch.float32; d64 = torch.float64
cpu = torch.device("cpu"); gpu = torch.device(type='cuda', index=0)
device = cpu
opt_dtype = d64

if opt_dtype == d16:
    dtype = "d16"
    torch.set_default_tensor_type('torch.HalfTensor')
elif opt_dtype == d32:
    dtype = "d32"
    torch.set_default_tensor_type('torch.FloatTensor')
else:
    dtype = "d64"
    torch.set_default_tensor_type('torch.DoubleTensor')
    
torch.manual_seed(42)
np.random.seed(42)

### Hyperparameters for PyTorch Poincare Halfspace model: (now use opt_epochs = 20)

In [48]:
## parameters; these are global in the notebook!
opt_maxnorm = 500000; opt_debug = False;
opt_dim = 2; #for Lorentz, add 1 to dim relative to Poincare/HalfSpace
opt_negs = 50;  opt_eval_each = 20;
opt_sparse = True; opt_ndproc = 1;  opt_burnin = 20;
opt_dampening = 0.75; opt_neg_multiplier = 1.0; 
opt_burnin_multiplier = 0.01; 
###########################################################
opt_epochs = 10; opt_batchsize = 32; 
opt_lr = 1.0;  opt_dscale = 1.0
#opt_manifold = "PoincareBall"
# opt_manifold = "Lorentz"
opt_manifold = "HalfSpace"
opt_curvature = -1.0 
opt_task = 'mammals'
#######################################
FILE_NAME = "_".join([opt_task, 'lr', str(opt_lr), 'batch', str(opt_batchsize),
                      str(opt_epochs), "torch", dtype, str(opt_dscale)])

### Initializing logging and data loading

In [49]:
log_level = logging.DEBUG if opt_debug else logging.INFO
log = logging.getLogger('Embedding')
logging.basicConfig(level=log_level, format='%(message)s', stream=sys.stdout)
log.info('Using edge list dataloader')
idx, objects, weights = load_edge_list("wordnet/mammal_closure.csv", False) 
#idx, objects, weights = load_edge_list("/home/jl3789/Hyperbolic_Library/applications/poincare_embedding/wordnet/mammal_closure.csv", False) 

Using edge list dataloader


### Initializing model

In [50]:
def init_model(manifold, curvature, dim, idx, objects, weights, sparse=True):
    model_name = '%s_dim%d'
    mname = model_name % (manifold, dim)
    data = BatchedDataset(idx, objects, weights, opt_negs, opt_batchsize,
        opt_ndproc, opt_burnin > 0, opt_dampening)
    model = EnergyFunction(len(data.objects), opt_dim, sparse=sparse, manifold=manifold, curvature=curvature)
    data.objects = objects
    return model, data, mname

def adj_matrix(data):
    adj = {}
    for inputs, _ in data:
        for row in inputs:
            x = row[0].item()
            y = row[1].item()
            if x in adj:
                adj[x].add(y)
            else:
                adj[x] = {y}
    return adj

# Animation

In [51]:
from __future__ import division

import logging

from collections import Counter
import numpy as np
import chart_studio.plotly as py
import plotly.graph_objects as go

def space_title(s):
    for cut in [100, 200, 300, 400]:
        if len(s) > cut:
            x = s.find(';', cut) + 1
            if x > 0:
                s = s[:x] + '<br>' + s[x:]
    return s

def create_animation(figure_title):
    figure = {'data': [],
              'layout': {'xaxis': {'range': [-1, 1.3], 'autorange': False, 'zeroline' :False, 'showgrid' :False},
                         'yaxis': {'range': [-1, 1.3], 'autorange': False, 'zeroline' :False, 'showgrid' :False},
                         'title': space_title(figure_title),
                         'width': 1200,
                         'height': 1200,
                         'showlegend': False,
                         'hovermode': 'closest',
                         'updatemenus': [{
                             'type': 'buttons',
                             'buttons': [
                                 {'label': 'Play',
                                  'method': 'animate',
                                  'args': [None]},
                                 {
                                     'args': [[None], {'frame': {'duration': 0, 'redraw': False},
                                                       'mode': 'immediate',
                                                       'transition': {'duration': 0}}],
                                     'label': 'Pause',
                                     'method': 'animate'
                                 }
                             ]}]
                         },
              'frames': []}
    return figure


def poincare_2d_visualization(
        model,
        animation,
        epoch,
        eval_result,
        avg_loss,
        avg_pos_loss,
        avg_neg_loss,
        tree,
        figure_title,
        num_nodes=50,
        show_node_labels=()):
    """Create a 2-d plot of the nodes and edges of a 2-d poincare embedding.
    Parameters
    ----------
    model : :class:`~hyperbolic.dag_emb_model.DAGEmbeddingModel`
        The model to visualize, model size must be 2.
    tree : list
        Set of tuples containing the direct edges present in the original dataset.
    figure_title : str
        Title of the plotted figure.
    num_nodes : int or None
        Number of nodes for which edges are to be plotted.
        If `None`, all edges are plotted.
        Helpful to limit this in case the data is too large to avoid a messy plot.
    show_node_labels : iterable
        Iterable of nodes for which to show labels by default.
    Returns
    -------
    :class:`plotly.graph_objs.Figure`
        Plotly figure that contains plot.
    """
    vectors = model.kv.syn0
    if vectors.shape[1] != 2:
        raise ValueError('Can only plot 2-D vectors')

    node_labels = model.kv.index2word
    nodes_x = list(vectors[:, 0])
    nodes_y = list(vectors[:, 1])
    nodes = dict(
        x=nodes_x, y=nodes_y,
        mode='markers',
        marker=dict(color='rgb(30, 100, 200)'),
        text=node_labels,
        textposition='bottom'
    )

    nodes_x, nodes_y, node_labels = [], [], []
    for node in show_node_labels:
        if node in model.kv:
            vector = model.kv[node]
            nodes_x.append(vector[0])
            nodes_y.append(vector[1])
            node_labels.append(node)

    nodes_with_labels = dict(
        x=nodes_x, y=nodes_y,
        mode='markers+text',
        marker=dict(color='rgb(200, 100, 200)'),
        text=node_labels,
        textfont=dict(
            family='sans serif',
            size=18,
            color='#ff7f0e' # orange
        ),
        textposition='bottom'
    )

    node_out_degrees = Counter(hypernym_pair[1] for hypernym_pair in tree)
    if num_nodes is None:
        chosen_nodes = list(node_out_degrees.keys())
    else:
        chosen_nodes = list(sorted(node_out_degrees.keys(), key=lambda k: -node_out_degrees[k]))[:num_nodes]

    edges_x = []
    edges_y = []
    for u, v in tree:
        if not(u in chosen_nodes or v in chosen_nodes):
            continue
        vector_u = model.kv[u]
        vector_v = model.kv[v]
        edges_x += [vector_u[0], vector_v[0], None]
        edges_y += [vector_u[1], vector_v[1], None]
    edges = dict(
        x=edges_x, y=edges_y, mode="line", hoverinfo=False,
        line=dict(color='rgb(50,50,50)', width=1))

    layout = go.Layout(
        title=figure_title, showlegend=False, hovermode='closest', width=1500, height=1500,
        xaxis={'range': [-1, 1.3], 'autorange': False},
        yaxis={'range': [-1, 1.3], 'autorange': False},
        updatemenus= [{'type': 'buttons',
                         'buttons': [
                             {'label': 'Play',
                              'method': 'animate',
                              'args': [None]
                              },
                              {
                                 'args': [[None], {'frame': {'duration': 0, 'redraw': False},
                                                   'mode': 'immediate',
                                                   'transition': {'duration': 0}}],
                                 'label': 'Pause',
                                 'method': 'animate'
                              }
                         ]}]
    )


    epoch_sticker = dict(
        x=[0.5], y = [1.2], mode='text', text=['Epoch : ' + str(epoch)],
        textfont=dict(
            family='sans serif',
            size=20,
            color='rgb(200,0,0)'
        ),
    )

    result_str = str(eval_result) + '<br>'
    result_str += 'loss = %.2f; pos loss = %.2f; neg loss = %.2f' % (avg_loss, avg_pos_loss, avg_neg_loss)

    eval_result_sticker = dict(
        x=[0.5], y = [1.1],
        mode='text',
        text=[result_str],
        textfont=dict(
            family='sans serif',
            size=20,
            color='rgb(0,0,200)'
        ),
    )

    # Add a new frame into the animation
    frame = {'data': [], 'name': str(epoch)}
    frame['data'].append(edges)
    frame['data'].append(nodes_with_labels)
    frame['data'].append(eval_result_sticker)
    frame['data'].append(epoch_sticker)
    animation['frames'].append(frame)

    if epoch == 0:
        animation['data'].append(edges)
        animation['data'].append(nodes_with_labels)
        animation['data'].append(eval_result_sticker)
        animation['data'].append(epoch_sticker)

    return go.Figure(data=[edges, nodes, nodes_with_labels, eval_result_sticker, epoch_sticker], layout=layout)

In [52]:
# it takes a list of relations and returns a list of relations
# the relations are the same, but the order is different
# the order is different because the relations are sorted by the number of ancestors
# the relations with the most ancestors are first
# the relations with the least ancestors are last
# the relations with the most ancestors are the most general
# the relations with the least ancestors are the most specific
def recover_tree_from_transitive_closure(relations):
    all_nodes_set = set()
    for rel in relations:
        all_nodes_set.add(rel[0])
        all_nodes_set.add(rel[1])

    ancestors = {}
    for node in all_nodes_set:
        ancestors[node] = []
    for rel in relations:
        if rel[0] != rel[1]:
            ancestors[rel[1]].append(rel[0])

    new_relations = []
    for node in all_nodes_set:
        num_ancestors = len(ancestors[node])
        for ancestor in ancestors[node]:
            if len(ancestors[ancestor]) == num_ancestors - 1:
                new_relations.append((ancestor, node))

    return new_relations

data_file_path = 'data/mammals.csv'

def read_tree_data():
    # Load the tree data:
    transitive_relations = []
    tree_relations = recover_tree_from_transitive_closure(transitive_relations)

    # Plot the embeddings
    show_node_labels=[]
    if opt_task == 'mammals':
        show_node_labels=['dog.n.01', 'canine.n.02', 'carnivore.n.01', 'placental.n.01', # 'mammal.n.01',
                          'rodent.n.01', 'clumber.n.01', 'ungulate.n.01', 'primate.n.02',
                          'even-toed_ungulate.n.01', 'odd-toed_ungulate.n.01']

    # All direct children of root
    transitive_relations_without_root = []
    tree_relations_without_root = []
    root_label = 'mammal.n.01'
    for rel in tree_relations:
        if rel[0] != root_label:
            tree_relations_without_root.append(rel)

    for rel in transitive_relations:
        if rel[0] != root_label:
            transitive_relations_without_root.append(rel)

    return transitive_relations_without_root, tree_relations_without_root, show_node_labels


transitive_relations, tree_relations, show_node_labels = read_tree_data()

### Training

In [53]:
def data_loader_lr(data, epoch, progress = False):
    data.burnin = False 
    lr = opt_lr
    if epoch < opt_burnin:
        data.burnin = True
        lr = opt_lr * train._lr_multiplier
    loader_iter = tqdm(data) if progress else data
    return loader_iter, lr

In [54]:
def train(device, model, data, optimizer, progress=False):
    epoch_loss = torch.Tensor(len(data))
    LOSS = np.zeros(opt_epochs)
    for epoch in range(opt_epochs):
        epoch_loss.fill_(0)
        t_start = timeit.default_timer()
        # handling burnin, get loader_iter and learning rate
        loader_iter, lr = data_loader_lr(data, epoch, progress=progress)
        for i_batch, (inputs, targets) in enumerate(loader_iter):
            elapsed = timeit.default_timer() - t_start
            inputs = inputs.to(device)
            targets = targets.to(device)
            optimizer.zero_grad()
            preds = model(inputs) * opt_dscale
            loss = model.loss(preds, targets, size_average=True)
            loss.backward()
            optimizer.step(lr=lr)
            epoch_loss[i_batch] = loss.cpu().item()
            with torch.no_grad():
                loss = model.loss(preds.to(d64), targets, size_average=True)
                epoch_loss[i_batch] = loss.cpu().item()

            animation = create_animation("Visualization")
            
            show_node_labels = True
            figure_name = "Visualization"
            figure = poincare_2d_visualization(
                model,
                animation=animation,
                epoch=epoch,
                eval_result='',
                avg_loss=0,
                avg_pos_loss=0,
                avg_neg_loss=0,
                tree=list(tree_relations),
                show_node_labels=show_node_labels,
                figure_title=figure_name,
                num_nodes=None)    
            figure.show()

        LOSS[epoch] = torch.mean(epoch_loss).to(d64).item()
        # since only one thread is used:
        log.info('json_stats: {' f'"epoch": {epoch}, '
                 f'"elapsed": {elapsed}, ' f'"loss": {LOSS[epoch]}, ' '}')
                 
    return LOSS


# Training embedding

In [55]:
# setup model
seed_everything(1)
model, data, model_name = init_model(opt_manifold, opt_curvature, opt_dim, idx, objects, weights, sparse=opt_sparse)
data.neg_multiplier = opt_neg_multiplier
train._lr_multiplier = opt_burnin_multiplier
model = model.to(device)
print('the total dimension', model.lt.weight.data.size(-1))
print(">>>>>> # Tensor# | dtype is:", model.lt.weight.dtype, "| device is:", model.lt.weight.device)
# setup optimizer, both works, though a small lr should be used for RiemannianAdam (which is not tuned yet)
#optimizer = RiemannianAdam(model.parameters(), lr=opt_lr)
optimizer = RiemannianSGD(model.parameters(), lr=opt_lr)
# get adjacency matrix
adj = adj_matrix(data)
# begin training
start_time = timeit.default_timer()
loss = train(device, model, data, optimizer, progress=False)
train_time = timeit.default_timer() - start_time
print("Total training time is:", train_time)

the total dimension 2
>>>>>> # Tensor# | dtype is: torch.float64 | device is: cpu


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  data = BatchedDataset(idx, objects, weights, opt_negs, opt_batchsize,


AttributeError: type object 'DAGEmbeddingKeyedVectors' has no attribute 'syn0'

# Evaluate embedding

In [10]:
class RES():
    """for logging results"""
    def __init__(self, loss, eval_res, weight):
        self.loss = torch.tensor(loss, dtype=torch.float64, 
                                 device=cpu)
        self.eval_res = torch.tensor(eval_res, dtype=torch.float64, 
                                     device=cpu)
        self.weight = weight

In [11]:
model_weight = model.lt.weight.clone()
# eval meanrank, maprank in the original model
meanrank, maprank = eval_reconstruction(adj, model_weight, workers=opt_ndproc)
if opt_manifold != "PoincareBall":
    # change to PoincareBall to derive the sqnorms metric
    model_weight = model_weight.to_other_manifold("PoincareBall")
sqnorms = torch.sqrt(torch.sum(torch.pow(model_weight, 2), dim=-1))
sqnorm_min = sqnorms.min().item()
sqnorm_avg = sqnorms.mean().item()
sqnorm_max = sqnorms.max().item()
eval_res = [meanrank, maprank, sqnorm_min, sqnorm_avg, sqnorm_max, train_time]
RESULTS = RES(loss, eval_res, model_weight)
# torch.save(RESULTS, "./results_weights/"+FILE_NAME+"_seed1"+ ".pt")
log.info(
    'json_stats final test: \n{'
    f'"sqnorm_min": {round(sqnorm_min,6)}, '
    f'"sqnorm_avg": {round(sqnorm_avg,6)}, '
    f'"sqnorm_max": {round(sqnorm_max,6)}, \n'
    f'"mean_rank": {round(meanrank,6)}, '
    f'"map": {round(maprank,6)}, '
    '}'
)

json_stats final test: 
{"sqnorm_min": 0.000141, "sqnorm_avg": 0.005579, "sqnorm_max": 0.040499, 
"mean_rank": 503.558257, "map": 0.019105, }
