In [1]:
import networkx as nx
from tqdm import tqdm
import random
from leafer import Leafer
import numpy as np
import pickle
import os
import glob


seed = 42
random.seed(seed)
np.random.seed(seed)

In [3]:
G = nx.read_edgelist('path to your graph edgelist', delimiter='\t', create_using=nx.DiGraph)

In [5]:
# removing cycles if any
while True:
    try:
        cycle = nx.find_cycle(G)
        print(cycle)
        G.remove_edge(*cycle[0])
    except:
        break

[('restrain.v.01', 'inhibit.v.04'), ('inhibit.v.04', 'restrain.v.01')]


In [8]:
l = Leafer(G)

train, test = l.split_train_test(
    generation_depth=0,  # level of topology sort to start from
    p=0.05,  # probability of sample going to test
    p_divide_leafs=0.5,
    # probability of dividing leafs in half and not put whole in train or test
    min_to_test_rate=0.5,
    # min rate of nodes that were not seen in train to let them be divided into halves 
    # to go train and test corresponding
    # for example if 6 out of 10 were in train, then all 10 goes to train
    # but if only 5 out of 10 were in train, then we have a possibility
    # to put left half in the test
    weights=[0.00, 0.0, 0.0, 0.00, 0.00, 1.],
    # probabilities of taking
    # Single leaves, Only Leaves, Internal Nodes, Synset mixing, Insertion, Hypernym prediction
)

predict_hypernym 2736 2736
predict_hypernym 143 143


In [9]:
len(train), len(test)

(2736, 143)

In [10]:
train_count = {}
for elem in train:
    if elem["case"] in train_count.keys():
        train_count[elem["case"]] += 1

    else:
        train_count[elem["case"]] = 1

test_count = {}
for elem in test:
    if elem["case"] in test_count.keys():
        test_count[elem["case"]] += 1

    else:
        test_count[elem["case"]] = 1

In [11]:
train_count, test_count

({'predict_hypernym': 2736}, {'predict_hypernym': 143})

In [16]:
name_train = "train path . pickle"
name_test = "test path .pickle"

with open(name_train, "wb") as handle:
    pickle.dump(train, handle)

with open(name_test, "wb") as handle:
    pickle.dump(test, handle)