# Sample hyperparameter optimization run for TransE on MIND

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import logging
import os
import json
import argparse
import numpy as np
import matplotlib.pyplot as plt

import optuna
from optuna.trial import TrialState

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from sklearn.metrics import average_precision_score

from torch.utils.data import DataLoader

import sys

sys.path.append("../kge/codes/")
from dataloader import TestDataset, TrainDataset, BidirectionalOneShotIterator
from run import parse_args
import model

  from .autonotebook import tqdm as notebook_tqdm


## Copy and paste the dataloader and relevant functions over..

In [2]:
# L119 in run.py
def read_triple(file_path, entity2id, relation2id):
    """
    Read triples and map them into ids.
    """
    triples = []
    with open(file_path) as fin:
        for line in fin:
            h, r, t = line.strip().split("\t")
            triples.append((entity2id[h], relation2id[r], entity2id[t]))
    return triples


def save_model(model, optimizer, save_variable_list, args):
    """
    Save the parameters of the model and the optimizer,
    as well as some other variables such as step and learning_rate
    """

    argparse_dict = vars(args)
    with open(os.path.join(args.save_path, "config.json"), "w") as fjson:
        json.dump(argparse_dict, fjson)

    torch.save(
        {
            **save_variable_list,
            "model_state_dict": model.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
        },
        os.path.join(args.save_path, "checkpoint"),
    )

    entity_embedding = model.entity_embedding.detach().cpu().numpy()
    np.save(os.path.join(args.save_path, "entity_embedding"), entity_embedding)

    relation_embedding = model.relation_embedding.detach().cpu().numpy()
    np.save(os.path.join(args.save_path, "relation_embedding"), relation_embedding)


def set_logger(args):
    """
    Write logs to checkpoint and console
    """

    if args.do_train:
        log_file = os.path.join(args.save_path or args.init_checkpoint, "train.log")
    else:
        log_file = os.path.join(args.save_path or args.init_checkpoint, "test.log")

    logging.basicConfig(
        format="%(asctime)s %(levelname)-8s %(message)s",
        level=logging.INFO,
        datefmt="%Y-%m-%d %H:%M:%S",
        filename=log_file,
        filemode="w",
    )
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
    console.setFormatter(formatter)
    logging.getLogger("").addHandler(console)


def log_metrics(mode, step, metrics):
    """
    Print the evaluation logs
    """
    for metric in metrics:
        logging.info("%s %s at step %d: %f" % (mode, metric, step, metrics[metric]))

## Set relevant arguments

In [3]:
args = parse_args

In [4]:
args.data_path = "/home/rogertu/projects/KnowledgeGraphEmbedding00/data/MIND_CtD/"
# more parameters
args.uni_weight = True
args.cpu_num = 10
args.cuda = True
args.model = "TransE"
args.save_path = os.path.join(
    "/home/rogertu/projects/KnowledgeGraphEmbedding00/models/",
    args.model,
)
args.negative_adversarial_sampling = True
args.gamma = 48  # controls embedding range.  (gamma+2)/hidden_dim
# should range from +/-1 to 0.1667
args.regularization = (
    0.0  # controls loss, have it off for default, on for DistMult and ComplEx
)
args.save_checkpoint_steps = 10000
args.valid_steps = 100000
args.do_valid = True
args.log_steps = 100
args.test_log_steps = 10000

args.double_entity_embedding = False  # False for TransE
args.double_relation_embedding = False  # False for TransE

In [5]:
if args.save_path and not os.path.exists(args.save_path):
    os.makedirs(args.save_path)

In [6]:
# Read in data
with open(os.path.join(args.data_path, "entities.dict")) as fin:
    entity2id = dict()
    for line in fin:
        eid, entity = line.strip().split("\t")
        entity2id[entity] = int(eid)

with open(os.path.join(args.data_path, "relations.dict")) as fin:
    relation2id = dict()
    for line in fin:
        rid, relation = line.strip().split("\t")
        relation2id[relation] = int(rid)

nentity = len(entity2id)
nrelation = len(relation2id)

args.nentity = nentity
args.nrelation = nrelation

train_triples = read_triple(
    os.path.join(args.data_path, "train.txt"), entity2id, relation2id
)
valid_triples = read_triple(
    os.path.join(args.data_path, "valid.txt"), entity2id, relation2id
)
test_triples = read_triple(
    os.path.join(args.data_path, "test.txt"), entity2id, relation2id
)

all_true_triples = train_triples + valid_triples + test_triples

In [7]:
args.adversarial_temperature = 1.0  # default. Modulates negative effect on loss.
args.test_batch_size = 4
args.warm_up_steps = None
args.countries = None
args.optimizer = "adam"
args.do_test = False
args.do_predict = False

## Define what constitutes a trial for each hyperparameter optimziation step.

In [8]:
def define_model(trial):
    """Set items to be optimized in this"""

    # varied
    args.batch_size = trial.suggest_int(f"batch_size", 64, 256, step=4)
    args.negative_sample_size = trial.suggest_int(
        f"negative_sample_size", 64, 128, step=4
    )
    args.hidden_dim = trial.suggest_int(f"hidden_dimension_size", 100, 300, step=25)
    args.learning_rate = trial.suggest_float(f"learning_rate", 1e-4, 1e-2, log=True)
    args.max_steps = trial.suggest_int("max_steps", 50000, 100000, step=10000)

    # constant var can't be referenced outside of function?
    # if args.warm_up_steps:
    #    warm_up_steps = args.warm_up_steps
    # else:
    #    warm_up_steps = args.max_steps // 2

    # model
    kge_model = model.KGEModel(
        model_name=args.model,
        nentity=args.nentity,
        nrelation=args.nrelation,
        hidden_dim=args.hidden_dim,
        gamma=args.gamma,
        double_entity_embedding=args.double_entity_embedding,
        double_relation_embedding=args.double_relation_embedding,
    )

    kge_model.cuda()

    # load train data
    train_dataloader_head = DataLoader(
        TrainDataset(
            train_triples, nentity, nrelation, args.negative_sample_size, "head-batch"
        ),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=max(1, args.cpu_num // 2),
        collate_fn=TrainDataset.collate_fn,
    )

    train_dataloader_tail = DataLoader(
        TrainDataset(
            train_triples, nentity, nrelation, args.negative_sample_size, "tail-batch"
        ),
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=max(1, args.cpu_num // 2),
        collate_fn=TrainDataset.collate_fn,
    )
    # every other sample will be head or tail
    train_iterator = BidirectionalOneShotIterator(
        train_dataloader_head, train_dataloader_tail
    )

    # Set training configuration and optimizer
    current_learning_rate = args.learning_rate
    if args.optimizer == "adam":
        optimizer = torch.optim.Adam(
            filter(lambda p: p.requires_grad, kge_model.parameters()),
            lr=current_learning_rate,
        )
    elif args.optimizer == "sgd":
        optimizer = torch.optim.SGD(
            filter(lambda p: p.requires_grad, kge_model.parameters()),
            lr=current_learning_rate,
        )
    elif args.optimizer == "adagrad":
        optimizer = torch.optim.Adagrad(
            filter(lambda p: p.requires_grad, kge_model.parameters()),
            lr=current_learning_rate,
        )

    # add in optimizer for steps otherwise LR doesn't really matter
    training_logs = []
    for step in range(0, args.max_steps):
        log = kge_model.train_step(kge_model, optimizer, train_iterator, args)

        training_logs.append(log)

        if step % args.save_checkpoint_steps == 0:
            save_variable_list = {
                "step": step,
                "current_learning_rate": current_learning_rate,
                #'warm_up_steps': warm_up_steps
            }
            save_model(kge_model, optimizer, save_variable_list, args)

        if step % args.log_steps == 0:
            metrics = {}
            for metric in training_logs[0].keys():
                metrics[metric] = sum([log[metric] for log in training_logs]) / len(
                    training_logs
                )
            log_metrics("Training average", step, metrics)
            training_logs = []

        if args.do_valid and (step / (args.max_steps - 1) == 1):
            logging.info("Evaluating on Valid Dataset...")
            metrics = kge_model.test_step(
                kge_model, valid_triples, all_true_triples, args
            )
            log_metrics("Valid", step, metrics)

    save_variable_list = {
        "step": step,
        "current_learning_rate": current_learning_rate,
        #'warm_up_steps': warm_up_steps
    }
    save_model(kge_model, optimizer, save_variable_list, args)

    # return log['loss'] #for loss
    torch.cuda.empty_cache()  # clear memory buildup from multiple models
    return metrics["MRR"]  # MRR

## Setup your optuna study

```python
# other server url
url="postgresql+psycopg2://root:su07dev@su07:5432/optuna_test"
```

If you get an error that tells you that you need to update your optuna storage, follow the code below:
```bash
optuna storage upgrade --storage $STORAGE_URL
```

In [9]:
# Other server url
storage = optuna.storages.RDBStorage(
    url="postgresql+psycopg2://rogertu:admin@localhost/optuna_test",
)

In [10]:
# Create a new study.
study = optuna.create_study(
    study_name="TransE_MIND_CtD",
    direction="maximize",
    storage=storage,
    load_if_exists=True,
)

[32m[I 2022-10-22 21:09:18,542][0m A new study created in RDB with name: TransE_MIND_CtD[0m


In [None]:
study.optimize(define_model, n_trials=100)

[32m[I 2022-10-22 22:06:40,277][0m Trial 0 finished with value: 0.009607338832682872 and parameters: {'batch_size': 168, 'negative_sample_size': 100, 'hidden_dimension_size': 250, 'learning_rate': 0.004145239582232327, 'max_steps': 90000}. Best is trial 0 with value: 0.009607338832682872.[0m
[32m[I 2022-10-22 22:52:53,669][0m Trial 1 finished with value: 0.0022293989672690267 and parameters: {'batch_size': 208, 'negative_sample_size': 116, 'hidden_dimension_size': 125, 'learning_rate': 0.00017446498144999613, 'max_steps': 100000}. Best is trial 0 with value: 0.009607338832682872.[0m
[32m[I 2022-10-22 23:19:34,093][0m Trial 2 finished with value: 0.008654065978562327 and parameters: {'batch_size': 228, 'negative_sample_size': 124, 'hidden_dimension_size': 100, 'learning_rate': 0.001806108189080781, 'max_steps': 60000}. Best is trial 0 with value: 0.009607338832682872.[0m
[32m[I 2022-10-22 23:51:06,040][0m Trial 3 finished with value: 0.011050940642967171 and parameters: {'bat

Sample Best Trial
```Python
FrozenTrial(number=71, values=[0.027441743017920704], datetime_start=datetime.datetime(2022, 10, 25, 19, 34, 46, 44677), datetime_complete=datetime.datetime(2022, 10, 25, 20, 47, 59, 858900), params={'batch_size': 256, 'negative_sample_size': 96, 'hidden_dimension_size': 275, 'learning_rate': 0.0014758180913550004, 'max_steps': 90000}, distributions={'batch_size': IntDistribution(high=256, log=False, low=64, step=4), 'negative_sample_size': IntDistribution(high=128, log=False, low=64, step=4), 'hidden_dimension_size': IntDistribution(high=300, log=False, low=100, step=25), 'learning_rate': FloatDistribution(high=0.01, log=True, low=0.0001, step=None), 'max_steps': IntDistribution(high=100000, log=False, low=50000, step=10000)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=1135, state=TrialState.COMPLETE, value=None)
```

In [12]:
study.best_trial

FrozenTrial(number=71, values=[0.027441743017920704], datetime_start=datetime.datetime(2022, 10, 25, 19, 34, 46, 44677), datetime_complete=datetime.datetime(2022, 10, 25, 20, 47, 59, 858900), params={'batch_size': 256, 'negative_sample_size': 96, 'hidden_dimension_size': 275, 'learning_rate': 0.0014758180913550004, 'max_steps': 90000}, distributions={'batch_size': IntDistribution(high=256, log=False, low=64, step=4), 'negative_sample_size': IntDistribution(high=128, log=False, low=64, step=4), 'hidden_dimension_size': IntDistribution(high=300, log=False, low=100, step=25), 'learning_rate': FloatDistribution(high=0.01, log=True, low=0.0001, step=None), 'max_steps': IntDistribution(high=100000, log=False, low=50000, step=10000)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=1135, state=TrialState.COMPLETE, value=None)

Sample Important parameters output
```Python
OrderedDict([('learning_rate', 0.616334598857384),
             ('max_steps', 0.12996281990864547),
             ('hidden_dimension_size', 0.12604799029348462),
             ('batch_size', 0.06826465838004579),
             ('negative_sample_size', 0.05938993256044008)])
```

In [13]:
optuna.importance.get_param_importances(study)

OrderedDict([('learning_rate', 0.616334598857384),
             ('max_steps', 0.12996281990864547),
             ('hidden_dimension_size', 0.12604799029348462),
             ('batch_size', 0.06826465838004579),
             ('negative_sample_size', 0.05938993256044008)])

In [14]:
# clear your torch memory
torch.cuda.empty_cache()