In [2]:
cd ..

/home/smaug/ownCloud/praca/reasonable-embeddings/src


In [3]:
from pathlib import Path
base_dir = Path('../local/out/elpp/')

base_dir.mkdir(parents=True, exist_ok=True)

In [4]:
import lzma
import dill

with lzma.open(base_dir / 'reasoners.dill.xz', 'rb') as f:
    reasoners = dill.load(f)

In [38]:
from src.reasoner import ReasonerHead
from src.reasoner import EmbeddingLayer

with lzma.open(base_dir / 'exp1.dill.xz', 'rb') as f:
    artifacts = dill.load(f)

emb_size = 10
hidden_size = 16

for key, components in artifacts.items():
    neural_reasoner = ReasonerHead(emb_size=emb_size, hidden_size=hidden_size)
    neural_reasoner.load_state_dict(components['reasoner'])
    components['reasoner'] = neural_reasoner
    encoders = [EmbeddingLayer(emb_size=emb_size, n_concepts=reasoner.n_concepts, n_roles=reasoner.n_roles) for reasoner in
                reasoners]
    for sd, e in zip(artifacts['encoders'], encoders):
        e.load_state_dict(sd)
    artifacts['encoders'] = encoders

In [52]:
with lzma.open(base_dir / 'test_reasoners.dill.xz', 'rb') as f:
    test_reasoners = dill.load(f)

100%|██████████| 300/300 [00:17<00:00, 17.19it/s]  
100%|██████████| 300/300 [00:06<00:00, 47.08it/s]  
100%|██████████| 300/300 [00:40<00:00,  7.48it/s]  
 76%|███████▋  | 229/300 [00:00<00:00, 835.56it/s] 
100%|██████████| 300/300 [00:12<00:00, 24.22it/s]  
 78%|███████▊  | 234/300 [00:00<00:00, 649.61it/s] 
 71%|███████   | 213/300 [00:00<00:00, 614.61it/s] 
 71%|███████   | 213/300 [00:00<00:00, 852.56it/s] 
100%|██████████| 300/300 [00:34<00:00,  8.73it/s]  
 80%|████████  | 240/300 [00:00<00:00, 799.27it/s] 
 61%|██████▏   | 184/300 [00:00<00:00, 1024.46it/s]
100%|██████████| 300/300 [00:57<00:00,  5.23it/s]  
100%|██████████| 300/300 [00:21<00:00, 13.70it/s]  
 80%|████████  | 241/300 [00:00<00:00, 725.77it/s] 
 67%|██████▋   | 202/300 [00:00<00:00, 807.05it/s] 
 59%|█████▉    | 177/300 [00:00<00:00, 1122.24it/s]
100%|██████████| 300/300 [00:24<00:00, 12.12it/s]  
100%|██████████| 300/300 [00:33<00:00,  9.07it/s]  
 67%|██████▋   | 202/300 [00:00<00:00, 594.95it/s] 
 78%|███████

In [58]:
best_neural_reasoner = artifacts[max(artifacts.keys())]['reasoner']

In [60]:
import numpy as np
from src.reasoner import ReasonerHead, EmbeddingLayer, train
from src.utils import timestr, paramcount
import torch as T
from src.elpp.gen import split_dataset

seed = 2022
ts = timestr()

emb_size = 10
hidden_size = 16
epoch_count = 15
test_epoch_count = 10
batch_size = 32

test_artifacts = {}

for complexity_threshold in range(2, 21):

    print("Complexity threshold", complexity_threshold)

    training, validation, test = split_dataset(test_reasoners, np.random.default_rng(seed=0xbeef), complexity_threshold=complexity_threshold)

    T.manual_seed(seed)
    reasoner = best_neural_reasoner
    encoders = [EmbeddingLayer(emb_size=emb_size, n_concepts=reasoner.n_concepts, n_roles=reasoner.n_roles) for reasoner in
                test_reasoners]

    print(f'created {len(encoders)} encoders with {paramcount(encoders[0])} parameters each')

    train_logger = train(training, validation, reasoner, encoders, epoch_count=epoch_count, batch_size=batch_size, freeze_reasoner=True)

    test_artifacts[complexity_threshold] = {
        'encoders': encoders,
        'training': training,
        'validation': validation,
        'test': test
    }


Complexity threshold 2
created 20 encoders with 1440 parameters each
train epoch 00/15 | batch 320/319 | loss 0.7227 | val loss 0.7290 | acc 0.6946 | f1 0.6698 | prec 0.7291 | recall 0.6195 | roc auc 0.7678 | pr auc 0.8086 | elapsed 1.47s
train epoch 01/15 | batch 320/319 | loss 0.7115 | val loss 0.7021 | acc 0.7037 | f1 0.6792 | prec 0.7404 | recall 0.6274 | roc auc 0.7765 | pr auc 0.8148 | elapsed 2.69s
train epoch 02/15 | batch 320/319 | loss 0.6615 | val loss 0.6762 | acc 0.7104 | f1 0.6856 | prec 0.7500 | recall 0.6313 | roc auc 0.7847 | pr auc 0.8211 | elapsed 2.73s
train epoch 03/15 | batch 320/319 | loss 0.6134 | val loss 0.6537 | acc 0.7144 | f1 0.6891 | prec 0.7561 | recall 0.6329 | roc auc 0.7922 | pr auc 0.8269 | elapsed 2.70s
train epoch 04/15 | batch 320/319 | loss 0.5696 | val loss 0.6342 | acc 0.7223 | f1 0.6969 | prec 0.7671 | recall 0.6384 | roc auc 0.7990 | pr auc 0.8323 | elapsed 2.79s
train epoch 05/15 | batch 320/319 | loss 0.5299 | val loss 0.6169 | acc 0.7298 | 

In [None]:
tmp = {key: {'encoders': [e.state_dict() for e in value['encoders']], 'training': value['training'], 'validation': value['validation'], 'test': value['test']} for key, value in test_artifacts.items()}

with lzma.open(base_dir / 'exp2.dill.xz', 'wb') as f:
    dill.dump(tmp, f)

In [None]:
from tqdm import tqdm
from src.reasoner import eval_batch

import pandas as pd

rows = []

for complexity_threshold, components in tqdm(artifacts.items()):
    with T.no_grad():
        idx_te, X_te, y_te = components['test']
        _, _, Y_te_good = eval_batch(best_neural_reasoner, components['encoders'], X_te, y_te, idx_te)
    for i in range(len(idx_te)):
        idx = idx_te[i]
        axiom = X_te[i]
        expected = y_te[i]
        predicted = Y_te_good[i]
        complexity = len(reasoners[idx].decode_shortest_proof(axiom[1], axiom[2]))
        rows.append([complexity_threshold, idx, complexity, axiom, expected, int(predicted >= .5), predicted])

In [91]:
df = pd.DataFrame(rows, columns=["Complexity threshold", "KB", "Complexity", "Axiom", "Expected", "Predicted", "Raw predicted"])
df.to_feather(base_dir / 'exp2.feather')
df


100%|██████████| 19/19 [00:23<00:00,  1.25s/it]


Unnamed: 0,Complexity threshold,KB,Complexity,Axiom,Expected,Predicted
0,2,0,9,"(0, 4, 53)",1,0
1,2,0,3,"(0, 7, 53)",1,0
2,2,0,8,"(0, 9, 0)",1,0
3,2,0,10,"(0, 9, 46)",1,0
4,2,0,7,"(0, 9, 53)",1,0
...,...,...,...,...,...,...
311003,20,17,22,"(0, 96, 66)",1,1
311004,20,17,23,"(0, 98, 5)",1,1
311005,20,17,21,"(0, 98, 10)",1,1
311006,20,17,22,"(0, 98, 48)",1,1
