In [32]:
import torch
import random
import numpy as np
import os
from utils.data import build_data
from utils.data import load_data_setting
from trainer.trainer import Trainer
from models.setpred4RE import SetPred4RE
import wandb



In [33]:
def set_seed(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

class make_args:
    def __init__(self):
        self.generated_data_directory = "/home/tian/Projects/RelationExtraction/data/NYT/generated_data/"
        self.generated_param_directory = "data/NYT/Hungarian-model-spanbert-regressive/"
        self.dataset_name = "NYT"
        self.model_name = "HungarianModel"
        # self.bert_directory = "bert-base-cased"
        self.bert_directory = "SpanBERT/spanbert-base-cased"
        self.train_file = "data/NYT/exact_data/train.json"
        self.valid_file = "data/NYT/exact_data/valid.json"
        self.test_file = "data/NYT/exact_data/test.json"
        self.num_generated_triples = 15
        self.num_decoder_layers = 3
        self.na_rel_coef = 0.25
        self.matcher = "avg"
        self.rel_loss_weight = 1.0
        self.head_ent_loss_weight = 2.0
        self.tail_ent_loss_weight = 2.0
        self.fix_bert_embeddings = True
        self.batch_size = 8
        self.max_epoch = 100
        self.gradient_accumulation_steps = 1
        self.decoder_lr = 2e-5
        self.encoder_lr = 1e-5
        self.lr_decay = 0.01
        self.weight_decay = 1e-5
        self.max_grad_norm = 2.5
        self.optimizer = "AdamW"

        # Evaluation arguments
        self.n_best_size = 100
        self.max_span_length = 12

        # Misc arguments
        self.refresh = False
        self.use_gpu = False
        self.visible_gpu = 1
        self.random_seed = 1

        # new attribute
        self.use_ILP = False
        self.use_dotproduct = False
        self.use_regressive_decoder = True

    def __iter__(self):
        for attr in dir(self):
            if not callable(getattr(self, attr)) and not attr.startswith("__"):
                yield attr

a = make_args()


data = load_data_setting(a)


model = SetPred4RE(a, data.relational_alphabet.size())

Data setting is loaded from file:  /home/tian/Projects/RelationExtraction/data/NYT/generated_data/NYT_ILPmodel_data.pickle
DATA SUMMARY START:
     Relation Alphabet Size: 24
     Train  Instance Number: 56196
     Valid  Instance Number: 5000
     Test   Instance Number: 5000
DATA SUMMARY END.


Some weights of BertModel were not initialized from the model checkpoint at SpanBERT/spanbert-base-cased and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
data.valid_loader[0:2]

[[0,
  [101,
   1130,
   9347,
   117,
   1456,
   11301,
   17879,
   117,
   1485,
   1103,
   15272,
   3070,
   117,
   28117,
   8661,
   16999,
   170,
   7135,
   1736,
   117,
   1105,
   3328,
   2125,
   170,
   11898,
   16929,
   1107,
   4402,
   1633,
   119,
   102],
  {'relation': [0, 13],
   'head_start_index': [2, 27],
   'head_end_index': [2, 28],
   'tail_start_index': [27, 2],
   'tail_end_index': [28, 2]}],
 [1,
  [101,
   1130,
   1117,
   27355,
   1105,
   8035,
   118,
   13767,
   1207,
   1520,
   117,
   112,
   112,
   1109,
   1249,
   3202,
   21543,
   1116,
   112,
   8375,
   131,
   1738,
   1107,
   5008,
   117,
   112,
   112,
   1103,
   1203,
   20998,
   2432,
   1667,
   14667,
   1200,
   16816,
   1366,
   1115,
   1103,
   2383,
   1104,
   1103,
   6096,
   3469,
   1106,
   1301,
   1106,
   1594,
   1222,
   5008,
   1105,
   1157,
   5672,
   9712,
   14602,
   16271,
   8130,
   1104,
   1103,
   5846,
   1127,
   1241,
   3073,
   176

In [4]:
from utils.functions import formulate_gold

In [6]:
model.eval()
prediction, gold = {}, {}

with torch.no_grad():
    batch_size = 2
    eval_num = 2
    total_batch = 1
    for batch_id in range(total_batch):
        start = 0
        end = 2
        if end > eval_num:
            end = eval_num
        eval_instance = data.valid_loader[start:end]
        if not eval_instance:
            continue
        input_ids, attention_mask, target, info = model.batchify(eval_instance)
        gold.update(formulate_gold(target, info))
        # print(f"gold: {gold}")
        # print(target)
        gen_triples = model.gen_triples(input_ids, attention_mask, info)
        # print(f"gen_triples: {gen_triples}")
        # print(f"gen_triples[0]: {gen_triples[0]}")
        # print(f"gen_triples[0] length: {len(gen_triples[0])}")
        prediction.update(gen_triples)

In [23]:
info

{'seq_len': [31, 126], 'sent_idx': [0, 1]}

In [30]:
model.forward(input_ids, attention_mask)['head_start_logits'][0][0]

tensor([ 2.0421e-02,  2.0421e-02,  2.0421e-02,  2.0421e-02,  2.0421e-02,
         2.0421e-02,  2.0421e-02,  2.0421e-02,  2.0421e-02,  2.0421e-02,
         2.0421e-02,  2.0421e-02,  2.0421e-02,  2.0421e-02,  2.0421e-02,
         2.0421e-02,  2.0421e-02,  2.0421e-02,  2.0421e-02,  2.0421e-02,
         2.0421e-02,  2.0421e-02,  2.0421e-02,  2.0421e-02,  2.0421e-02,
         2.0421e-02,  2.0421e-02,  2.0421e-02,  2.0421e-02,  2.0421e-02,
         2.0421e-02, -1.0000e+04, -1.0000e+04, -1.0000e+04, -1.0000e+04,
        -1.0000e+04, -1.0000e+04, -1.0000e+04, -1.0000e+04, -1.0000e+04,
        -1.0000e+04, -1.0000e+04, -1.0000e+04, -1.0000e+04, -1.0000e+04,
        -1.0000e+04, -1.0000e+04, -1.0000e+04, -1.0000e+04, -1.0000e+04,
        -1.0000e+04, -1.0000e+04, -1.0000e+04, -1.0000e+04, -1.0000e+04,
        -1.0000e+04, -1.0000e+04, -1.0000e+04, -1.0000e+04, -1.0000e+04,
        -1.0000e+04, -1.0000e+04, -1.0000e+04, -1.0000e+04, -1.0000e+04,
        -1.0000e+04, -1.0000e+04, -1.0000e+04, -1.0

In [11]:
target

[{'relation': tensor([ 0, 13]),
  'head_start_index': tensor([ 2, 27]),
  'head_end_index': tensor([ 2, 28]),
  'tail_start_index': tensor([27,  2]),
  'tail_end_index': tensor([28,  2])},
 {'relation': tensor([0]),
  'head_start_index': tensor([122]),
  'head_end_index': tensor([123]),
  'tail_start_index': tensor([48]),
  'tail_end_index': tensor([48])}]

In [18]:
gen_triples[0][3]

Pred_Triple(pred_rel=18, rel_prob=0.08826401084661484, head_start_index=1, head_end_index=1, head_start_prob=0.032258063554763794, head_end_prob=0.032258063554763794, tail_start_index=1, tail_end_index=1, tail_start_prob=0.032258063554763794, tail_end_prob=0.032258063554763794)

In [7]:
prediction

{0: [Pred_Triple(pred_rel=1, rel_prob=0.12718185782432556, head_start_index=1, head_end_index=1, head_start_prob=0.032258063554763794, head_end_prob=0.032258063554763794, tail_start_index=1, tail_end_index=1, tail_start_prob=0.032258063554763794, tail_end_prob=0.032258063554763794),
  Pred_Triple(pred_rel=16, rel_prob=0.08242230117321014, head_start_index=1, head_end_index=1, head_start_prob=0.032258063554763794, head_end_prob=0.032258063554763794, tail_start_index=1, tail_end_index=1, tail_start_prob=0.032258063554763794, tail_end_prob=0.032258063554763794),
  Pred_Triple(pred_rel=11, rel_prob=0.11693752557039261, head_start_index=1, head_end_index=1, head_start_prob=0.032258063554763794, head_end_prob=0.032258063554763794, tail_start_index=1, tail_end_index=1, tail_start_prob=0.032258063554763794, tail_end_prob=0.032258063554763794),
  Pred_Triple(pred_rel=18, rel_prob=0.08826401084661484, head_start_index=1, head_end_index=1, head_start_prob=0.032258063554763794, head_end_prob=0.032

In [34]:
trainer = Trainer(model, data, a)

In [36]:
# check all attributes of trainer

for attr in dir(trainer):
    if not callable(getattr(trainer, attr)) and not attr.startswith("__"):
        print(attr)

T_destination
_backward_hooks
_backward_pre_hooks
_buffers
_forward_hooks
_forward_hooks_with_kwargs
_forward_pre_hooks
_forward_pre_hooks_with_kwargs
_is_full_backward_hook
_load_state_dict_post_hooks
_load_state_dict_pre_hooks
_modules
_non_persistent_buffers_set
_parameters
_state_dict_hooks
_state_dict_pre_hooks
_version
args
call_super_init
data
dump_patches
optimizer
training


In [39]:
trainer.args.batch_size

8