In [1]:
#import pandas as pd
import torch
from pykeen.pipeline import pipeline
import numpy as np
from pykeen.datasets import WN18RR, FB15k237
from pykeen.nn.init import PretrainedInitializer
from functions import *

device="cuda"

#Load pre-generated word embeddings

entity_embedd = torch.load('03_nlm_embeddings/word2vec_wn18rr/05_word2vec_wn18rr_300dim_ent_sorted.pt', map_location = torch.device(device))
relation_embedd = torch.load('03_nlm_embeddings/word2vec_wn18rr/06_word2vec_wn18rr_pcadim150_rel_sorted.pt', map_location = torch.device(device))

entity_embedd = entity_embedd.contiguous()
relation_embedd = relation_embedd.contiguous()

#### Prepare embeddings to fit RotatE input format 
- For each datapoint take half of the embedding dimension as the real part and the other half as the imaginary part of the complex tensor
- For the relations apply the init_phases functions that rotates the embeddings

###### Word2vec Embeddings

In [2]:
rotate_rel = torch.unsqueeze(relation_embedd, dim=2)
rotate_rel = torch.cat([rotate_rel, torch.full_like(rotate_rel, np.nan)], dim=2)

In [3]:
rotate_rel.shape

torch.Size([11, 150, 2])

In [4]:
rotate_ent = torch.chunk(entity_embedd, 2, dim=1)
rotate_ent = torch.stack(rotate_ent, 1)
rotate_ent = torch.reshape(rotate_ent, (entity_embedd.shape[0],150,2))

###### BERT Embeddings

In [2]:
# Load raw embeddings 
bert_rel = torch.load('03_nlm_embeddings/bert_fb15k237/4lastlayers/00_bert_4lastlayers_fb15k237_rel.pt', map_location = torch.device(device))
# Store 2 second last layers separately
bert_rel_re = bert_rel[-1]
bert_rel_im = bert_rel[-2]

In [3]:
rotate_rel = rotate_bert_init(bert_rel_re, bert_rel_im)
rotate_rel.shape

torch.Size([237, 768, 2])

In [4]:
bert_ent = load_ent_embeddings('03_nlm_embeddings/bert_fb15k237/4lastlayers', device)
bert_ent_concat = concat_ent_embeddings(bert_ent)

['00_bert_4lastlayers_fb15k237_rel.pt', '01_bert_4lastlayers_fb15k237_ent.pt', '02_bert_4lastlayers_fb15k237_ent.pt', '03_bert_4lastlayers_fb15k237_ent.pt', '04_bert_4lastlayers_fb15k237_ent.pt', '05_bert_4lastlayers_fb15k237_ent.pt', '06_bert_4lastlayers_fb15k237_ent.pt']


In [5]:
bert_ent_re = bert_ent_concat[-1]
bert_ent_im = bert_ent_concat[-2]

In [6]:
rotate_ent = rotate_bert_init(bert_ent_re, bert_ent_im)
rotate_ent.shape

torch.Size([14951, 768, 2])

#### RotatE Model

In [5]:
rotate_rel = init_phases(rotate_rel)
rotate_rel.shape

torch.Size([11, 150, 2])

In [6]:
result = pipeline(
    dataset="wn18rr",
    dataset_kwargs=dict(create_inverse_triples=False),
    model="rotate",
    model_kwargs=dict(
        embedding_dim=150,
        entity_initializer=PretrainedInitializer(tensor=rotate_ent),
        relation_initializer=PretrainedInitializer(tensor=rotate_rel),
    ),
    stopper="early",
    stopper_kwargs=dict(frequency=50, patience=3, relative_delta=0.002),
    result_tracker='wandb',
    result_tracker_kwargs=dict(
        project='rotatE',
    ),
    optimizer='adam',
    optimizer_kwargs=dict(lr=0.00005, weight_decay=0.0),
    loss='NSSALoss',
    loss_kwargs=dict(margin=9, adversarial_temperature=1.0),
    training_loop='lcwa',
    training_kwargs=dict(num_epochs=1500, 
                         checkpoint_name='rotate_wn18rr_word2vec300dim_paperP_1000epochs.pt',
                         checkpoint_directory='01_models/rotatE/wn18rr/checkpoints',
                         checkpoint_frequency=30,
                         batch_size=8),
    evaluator="rankbased",
    evaluator_kwargs=dict(filtered=True),
    negative_sampler_kwargs=dict(num_negs_per_pos=256)
)

result.save_to_directory("01_models/rotatE/wn18rr_word2vec300_paperP_1500epochs.pt")

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mvjolacl[0m ([33mnlm_kgc[0m). Use [1m`wandb login --relogin`[0m to force relogin


INFO:pykeen.training.training_loop:=> loading checkpoint '01_models/rotatE/wn18rr/checkpoints/rotate_wn18rr_word2vec300dim_paperP_1000epochs.pt'
INFO:pykeen.training.training_loop:=> loaded checkpoint '01_models/rotatE/wn18rr/checkpoints/rotate_wn18rr_word2vec300dim_paperP_1000epochs.pt' stopped after having finished epoch 1461
INFO:pykeen.stoppers.stopper:=> loading stopper summary dict from training loop checkpoint in '01_models/rotatE/wn18rr/checkpoints/rotate_wn18rr_word2vec300dim_paperP_1000epochs.pt'
INFO:pykeen.stoppers.stopper:=> loaded stopper summary dictionary from checkpoint in '01_models/rotatE/wn18rr/checkpoints/rotate_wn18rr_word2vec300dim_paperP_1000epochs.pt'


Training epochs on cuda:0:  97%|#########7| 1461/1500 [00:00<?, ?epoch/s]

Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1462.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1463.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1464.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1465.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1466.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1467.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1468.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1469.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1470.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1471.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1472.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1473.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1474.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1475.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1476.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1477.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1478.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1479.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1480.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1481.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1482.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1483.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1484.
INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1484.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1485.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1486.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1487.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1488.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1489.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1490.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1491.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1492.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1493.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1494.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1495.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1496.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1497.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1498.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1499.


Training batches on cuda:0:   0%|          | 0/7819 [00:00<?, ?batch/s]

INFO:pykeen.evaluation.evaluator:Starting batch_size search for evaluation now...
INFO:pykeen.evaluation.evaluator:Concluded batch_size search with batch_size=512.
INFO:pykeen.evaluation.evaluator:Evaluation took 1.53s seconds
INFO:pykeen.stoppers.early_stopping:New best result at epoch 1500: 0.5552407932011332. Saved model weights to /home/kit/aifb/ho8030/.data/pykeen/checkpoints/best-model-weights-e672b70c-3e64-43f3-b4ff-ddc198e1930f.pt
INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1500.
INFO:pykeen.training.training_loop:=> Saved checkpoint after having finished epoch 1500.


Evaluating on cuda:0:   0%|          | 0.00/2.92k [00:00<?, ?triple/s]

INFO:pykeen.evaluation.evaluator:Evaluation took 1.85s seconds


0,1
loss,█▆▅▅▆▆▆▇▅▆▆▆▆▅▄▆▄▅▄▅▅▄▄▄▅▄▃▁▄▃▄▃▂▄▂▂▂▂▃
testing.both.optimistic.adjusted_arithmetic_mean_rank,▁
testing.both.optimistic.adjusted_arithmetic_mean_rank_index,▁
testing.both.optimistic.adjusted_geometric_mean_rank_index,▁
testing.both.optimistic.adjusted_hits_at_k,▁
testing.both.optimistic.adjusted_inverse_harmonic_mean_rank,▁
testing.both.optimistic.arithmetic_mean_rank,▁
testing.both.optimistic.count,▁
testing.both.optimistic.geometric_mean_rank,▁
testing.both.optimistic.harmonic_mean_rank,▁

0,1
loss,0.05488
testing.both.optimistic.adjusted_arithmetic_mean_rank,0.15968
testing.both.optimistic.adjusted_arithmetic_mean_rank_index,0.84036
testing.both.optimistic.adjusted_geometric_mean_rank_index,0.99858
testing.both.optimistic.adjusted_hits_at_k,0.5635
testing.both.optimistic.adjusted_inverse_harmonic_mean_rank,0.47221
testing.both.optimistic.arithmetic_mean_rank,3237.09918
testing.both.optimistic.count,5848.0
testing.both.optimistic.geometric_mean_rank,22.20955
testing.both.optimistic.harmonic_mean_rank,2.11706


INFO:pykeen.triples.triples_factory:Stored TriplesFactory(num_entities=40559, num_relations=11, create_inverse_triples=False, num_triples=86835, path="/pfs/data5/home/kit/aifb/ho8030/.data/pykeen/datasets/wn18rr/train.txt") to file:///pfs/data5/home/kit/aifb/ho8030/01_models/rotatE/wn18rr_word2vec300_paperP_1500epochs.pt/training_triples
INFO:pykeen.pipeline.api:Saved to directory: file:///pfs/data5/home/kit/aifb/ho8030/01_models/rotatE/wn18rr_word2vec300_paperP_1500epochs.pt


##### Load trained model to continue training 