# Analysis helper overview
* I wrote two helper scripts to help run the time-leveraged pipeline: eval_helper and analysis_helper
* This notebook focuses on how to use analysis_helper
* analysis_helper extracts the hits@k and mrr results generated by eval_helper
* three main steps:
    * get model parameters and setup the model
    * create helper object using the model
    * use class method to get hits@k and mrr stats

In [1]:
import polars as pl
import pykeen
import pykeen.constants
from pykeen.predict import predict_triples
import pykeen.nn.compositions as compositions
import pykeen.nn.modules as modules
import pykeen.datasets.timeresolvedkg as trkg
import sys

sys.path.append("../tools")
from analysis_helper import AnalysisHelper as ah
import torch
import optuna

## Build model with associated kwargs

### Get optuna parameters

In [2]:
storage = optuna.storages.RDBStorage(
    url="postgresql+psycopg2://rogertu:admin@localhost:5432/optuna_semmed"
)
optuna.get_all_study_names(storage)

['transe_hpo',
 'rotate_hpo',
 'cgcn-sub_transe_hpo',
 'rotate_inversetriples_hpo',
 'rotate_neg_hpo',
 'transe_neg_hpo',
 'rotate_neg_evalwl_hpo']

In [3]:
best_params = optuna.load_study(
    study_name="cgcn-sub_transe_hpo", storage=storage
).best_params

best_params

{'model.embedding_dim': 296,
 'loss.margin': 15,
 'loss.adversarial_temperature': 0.9243338272557406,
 'negative_sampler.num_negs_per_pos': 28,
 'training.batch_size': 182}

### Get model parameters

In [4]:
model_kwargs = {
    "model": "compGCN",
    "model_kwargs": {
        "encoder_kwargs": {
            "num_layers": 1,  # from 2
            "layer_kwargs": {"composition": compositions.SubtractionCompositionModule},
        },
        "interaction": modules.TransEInteraction,
        "interaction_kwargs": {
            "p": 2,
        },
        "embedding_dim": best_params["model.embedding_dim"],
    },
    "checkpoint_name": "cGCN-sub-TransE_neg_ttv_tvswap2_1994.pt",
    "random_seed": 1759051689,
}

### Create model

In [5]:
my_model = pykeen.models.CompGCN(
    triples_factory=ah.load_pykeen_dataset(
        build_dataset_kwargs={"split_ttv": True}, year="1994"
    ).training,
    random_seed=1759051689,
    **model_kwargs["model_kwargs"],
)

relative path: /home/rogertu/.data/pykeen/datasets/timeresolvedkg/data/time_networks-6_metanode/1994
training path: /home/rogertu/.data/pykeen/datasets/timeresolvedkg/data/time_networks-6_metanode/1994/train_ttv_notime.txt
testing path: /home/rogertu/.data/pykeen/datasets/timeresolvedkg/data/time_networks-6_metanode/1994/test_ttv_notime.txt
validation path: /home/rogertu/.data/pykeen/datasets/timeresolvedkg/data/time_networks-6_metanode/1994/valid_ttv_notime.txt
Checking if all files are unpacked: True.
Loading dataset from /home/rogertu/.data/pykeen/datasets/timeresolvedkg/data/time_networks-6_metanode/1994.


## Create analysis_helper object

In [6]:
cgcn_1994 = ah(
    build_dataset_kwargs={"split_ttv": True},
    year="1994",
    pykeen_model=my_model,
    chkpt_file=model_kwargs["checkpoint_name"],
    cuda=True,
)

relative path: /home/rogertu/.data/pykeen/datasets/timeresolvedkg/data/time_networks-6_metanode/1994
training path: /home/rogertu/.data/pykeen/datasets/timeresolvedkg/data/time_networks-6_metanode/1994/train_ttv_notime.txt
testing path: /home/rogertu/.data/pykeen/datasets/timeresolvedkg/data/time_networks-6_metanode/1994/test_ttv_notime.txt
validation path: /home/rogertu/.data/pykeen/datasets/timeresolvedkg/data/time_networks-6_metanode/1994/valid_ttv_notime.txt
Checking if all files are unpacked: True.
Loading dataset from /home/rogertu/.data/pykeen/datasets/timeresolvedkg/data/time_networks-6_metanode/1994.


relative path: /home/rogertu/.data/pykeen/datasets/timeresolvedkg/data/time_networks-6_metanode/1994
training path: /home/rogertu/.data/pykeen/datasets/timeresolvedkg/data/time_networks-6_metanode/1994/train_ttv_notime.txt
testing path: /home/rogertu/.data/pykeen/datasets/timeresolvedkg/data/time_networks-6_metanode/1994/test_ttv_notime.txt
validation path: /home/rogertu/.data/pykeen/datasets/timeresolvedkg/data/time_networks-6_metanode/1994/valid_ttv_notime.txt
Checking if all files are unpacked: True.
Loading dataset from /home/rogertu/.data/pykeen/datasets/timeresolvedkg/data/time_networks-6_metanode/1994.
relative path: /home/rogertu/.data/pykeen/datasets/timeresolvedkg/data/time_networks-6_metanode/1994
training path: /home/rogertu/.data/pykeen/datasets/timeresolvedkg/data/time_networks-6_metanode/1994/train_ttv_notime.txt
testing path: /home/rogertu/.data/pykeen/datasets/timeresolvedkg/data/time_networks-6_metanode/1994/test_ttv_notime.txt
validation path: /home/rogertu/.data/pyk

### Get results dataframes

In [25]:
cgcn_1994.df

query_label,query,answer_label,in_testing,in_validation,answer_filt_rank
str,str,list[str],list[i32],list[i32],list[i64]
"""C0026838""","""head""","[""C0026838"", ""D002220"", … ""C1413151""]","[0, 0, … 0]","[0, 0, … 0]",[2880]
"""C0151467""","""head""","[""D003348"", ""C0001655"", … ""C1822740""]","[0, 0, … 0]","[0, 0, … 0]","[380, 475, 918]"
"""C0009766""","""head""","[""D016593"", ""D016589"", … ""C1424742""]","[0, 0, … 0]","[0, 0, … 0]","[410, 1101, … 3962]"
"""C0032285""","""head""","[""D000890"", ""D010100"", … ""C1413151""]","[0, 0, … 0]","[0, 0, … 0]","[34, 2038, 3773]"
"""C0010543""","""head""","[""D009278"", ""C0010543"", … ""C1413152""]","[0, 0, … 0]","[0, 0, … 0]",[57]
…,…,…,…,…,…
"""C015238""","""tail""","[""C0393735"", ""C0018681"", … ""C089015""]","[0, 0, … 0]","[0, 0, … 0]","[71, 87]"
"""C007734""","""tail""","[""C0848309"", ""D012221"", … ""C089015""]","[0, 0, … 0]","[0, 0, … 0]",[16]
"""C004644""","""tail""","[""C004644"", ""C0003950"", … ""C089015""]","[0, 0, … 0]","[0, 0, … 0]",[4]
"""D000077300""","""tail""","[""D000077300"", ""C0080203"", … ""C089015""]","[0, 0, … 0]","[0, 0, … 0]",[2416]


In [26]:
cgcn_1994.answer_df

query_label,query,answer_label,in_validation,answer_filt_rank,known_trues,answers,year_diff
str,str,list[str],list[i32],i64,i64,str,i64
"""C0026838""","""head""","[""C0026838"", ""D002220"", … ""C1413151""]","[0, 0, … 0]",2880,2879,"""D008234""",-17
"""C0151467""","""head""","[""D003348"", ""C0001655"", … ""C1822740""]","[0, 0, … 0]",380,379,"""D008774""",-44
"""C0151467""","""head""","[""D003348"", ""C0001655"", … ""C1822740""]","[0, 0, … 0]",475,475,"""D000527""",-33
"""C0151467""","""head""","[""D003348"", ""C0001655"", … ""C1822740""]","[0, 0, … 0]",918,919,"""D011034""",-43
"""C0009766""","""head""","[""D016593"", ""D016589"", … ""C1424742""]","[0, 0, … 0]",410,409,"""D005283""",-38
…,…,…,…,…,…,…,…
"""C015238""","""tail""","[""C0393735"", ""C0018681"", … ""C089015""]","[0, 0, … 0]",87,87,"""D010612""",-42
"""C007734""","""tail""","[""C0848309"", ""D012221"", … ""C089015""]","[0, 0, … 0]",16,15,"""C0027424""",-16
"""C004644""","""tail""","[""C004644"", ""C0003950"", … ""C089015""]","[0, 0, … 0]",4,3,"""D017229""",-40
"""D000077300""","""tail""","[""D000077300"", ""C0080203"", … ""C089015""]","[0, 0, … 0]",2416,2415,"""C0009806""",-11


## Get hits/mrr results

In [27]:
cgcn_1994.mrr

0.020623051602007583

In [28]:
cgcn_1994.hits_10

0.040697674418604654

In [29]:
cgcn_1994.hits_100

0.21677740863787376