In [1]:
import torch
import random
import numpy as np
from utils.data import build_data

In [2]:
def set_seed(seed):
    random.seed(seed)
    # os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

In [3]:
class make_args:
    def __init__(self):
        self.generated_data_directory = "data/NYT/generated_data/"
        self.dataset_name = "NYT"
        self.model_name = "RoBERTa"
        # self.bert_directory = "bert-base-cased"
        self.bert_directory = "roberta-base"
        self.train_file = "data/NYT/exact_data/train.json"
        self.valid_file = "data/NYT/exact_data/valid.json"
        self.test_file = "data/NYT/exact_data/test.json"
        self.num_generated_triples = 15
        self.num_decoder_layers = 3
        self.na_rel_coef = 0.25
        self.matcher = "avg"
        self.rel_loss_weight = 1.0
        self.head_ent_loss_weight = 2.0
        self.tail_ent_loss_weight = 2.0
        self.fix_bert_embeddings = True
        self.batch_size = 8
        self.max_epoch = 100
        self.gradient_accumulation_steps = 1
        self.decoder_lr = 2e-5
        self.encoder_lr = 1e-5
        self.lr_decay = 0.01
        self.weight_decay = 1e-5
        self.max_grad_norm = 2.5
        self.optimizer = "AdamW"
        
        # Evaluation arguments
        self.n_best_size = 100
        self.max_span_length = 12

        # Misc arguments
        self.refresh = False
        self.use_gpu = True
        self.visible_gpu = 1
        self.random_seed = 1
        
        
        
        
    def __iter__(self):
        for attr in dir(self):
            if not callable(getattr(self, attr)) and not attr.startswith("__"):
                yield attr
        
a = make_args()

In [4]:
data = build_data(a)

loaded tokenizer from roberta-base
DATA SUMMARY START:
     Relation Alphabet Size: 24
     Train  Instance Number: 56196
     Valid  Instance Number: 5000
     Test   Instance Number: 5000
DATA SUMMARY END.
Data setting is saved to file:  data/NYT/generated_data/NYT_RoBERTa_data.pickle


In [43]:
from utils.data import load_data_setting

data = load_data_setting(a)

Data setting is loaded from file:  data/NYT/generated_data/NYT_ILPmodel_data.pickle
DATA SUMMARY START:
     Relation Alphabet Size: 24
     Train  Instance Number: 56196
     Valid  Instance Number: 5000
     Test   Instance Number: 5000
DATA SUMMARY END.


In [44]:
# have a look at the data
len(data.train_loader)

56196

In [45]:
from trainer.trainer import Trainer
from models.setpred4RE import SetPred4RE

model = SetPred4RE(a, data.relational_alphabet.size())

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


the rel_weight:  tensor([1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.2500])


In [None]:
import wandb
import random

# start a new wandb run to track this script
wandb.init(
    project="SPN4RE",
    name="SPN4RE-NYT-ILP",
)

In [48]:
trainer = Trainer(model, data, a)



In [50]:
with torch.no_grad():
    trainer.eval_model(data.valid_loader)

the shape of query_embed.weight:  torch.Size([15, 768])
the shape of hidden_states:  torch.Size([8, 15, 768])
the shape of query_embed.weight:  torch.Size([15, 768])
the shape of hidden_states:  torch.Size([8, 15, 768])
the shape of query_embed.weight:  torch.Size([15, 768])
the shape of hidden_states:  torch.Size([8, 15, 768])
the shape of query_embed.weight:  torch.Size([15, 768])
the shape of hidden_states:  torch.Size([8, 15, 768])
the shape of query_embed.weight:  torch.Size([15, 768])
the shape of hidden_states:  torch.Size([8, 15, 768])
the shape of query_embed.weight:  torch.Size([15, 768])
the shape of hidden_states:  torch.Size([8, 15, 768])
the shape of query_embed.weight:  torch.Size([15, 768])
the shape of hidden_states:  torch.Size([8, 15, 768])
the shape of query_embed.weight:  torch.Size([15, 768])
the shape of hidden_states:  torch.Size([8, 15, 768])
the shape of query_embed.weight:  torch.Size([15, 768])
the shape of hidden_states:  torch.Size([8, 15, 768])
the shape 

In [49]:
trainer.train_model()