In [1]:
## Load The Data
import json
import os
#raw = json.load(open('./data/annotations_consistent.json'))

raw = json.load(open('./data/FinEntity.json'))
# raw = data.get("examples")

# for example in raw:
#     for annotation in example['annotations']:
#         #We expect the key of label to be label but the data has tag
#         annotation['label'] = annotation['tag']

In [2]:
## Preparing Sequence Labeling Data for Transformers
from sequence_aligner.labelset import LabelSet
from sequence_aligner.dataset import TrainingDatasetCRF
from sequence_aligner.containers import TraingingBatch
from transformers import BertTokenizerFast

tokenizer = BertTokenizerFast.from_pretrained('yiyanghkust/finbert-pretrain')
label_set = LabelSet(labels=["Neutral", "Positive", "Negative"])  # label in this dataset
print(label_set.ids_to_label)
print(len(label_set.ids_to_label.values()))
dataset = TrainingDatasetCRF(data=raw, tokenizer=tokenizer, label_set=label_set,tokens_per_batch = 128)
print(len(dataset))

{0: 'O', 1: 'B-Neutral', 2: 'I-Neutral', 3: 'L-Neutral', 4: 'U-Neutral', 5: 'B-Positive', 6: 'I-Positive', 7: 'L-Positive', 8: 'U-Positive', 9: 'B-Negative', 10: 'I-Negative', 11: 'L-Negative', 12: 'U-Negative'}
13
983


In [3]:
## Prepare train data and valid data
from torch.utils.data import DataLoader, random_split
import config
train_size = int(config.dev_split_size * len(dataset))
validate_size = len(dataset) - train_size
train_dataset, validate_dataset = random_split(dataset, [train_size, validate_size])

train_loader = DataLoader(train_dataset, batch_size=16, collate_fn=TraingingBatch, shuffle=True, )
val_loader = DataLoader(validate_dataset, batch_size=16, collate_fn=TraingingBatch, shuffle=True, )

print(dataset[1].input_ids)
print(dataset[1].labels)
print(dataset[1].attention_masks)
print(tokenizer.decode(dataset[1].input_ids))
print(dataset.label_set.ids_to_label)

[19, 6, 483, 1342, 585, 14522, 17, 15979, 223, 1146, 739, 293, 11, 13420, 10, 557, 988, 8, 2513, 4462, 10374, 9390, 17119, 5108, 17, 67, 129, 24, 1146, 739, 293, 358, 3174, 746, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 

In [7]:
## Bert+Crf
import warnings
from model.bert_crf import BertCrfForNer
from transformers import get_linear_schedule_with_warmup
from torch import cuda
import config
from util.train import train_epoch, valid_epoch
from  torch.optim import AdamW

warnings.filterwarnings('ignore')
# 'bert-base-cased' 'yiyanghkust/finbert-pretrain'
model = BertCrfForNer.from_pretrained('yiyanghkust/finbert-pretrain', num_labels=len(label_set.ids_to_label.values()))

device = 'cuda:0' if cuda.is_available() else 'cpu'
model.to(device)

len_dataset = len(train_dataset)
t_total = len(train_dataset)
# Prepare optimizer and schedule (linear warmup and decay)
no_decay = ["bias", "LayerNorm.weight"]
bert_param_optimizer = list(model.bert.named_parameters())
crf_param_optimizer = list(model.crf.named_parameters())
optimizer_grouped_parameters = [
        {'params': [p for n, p in bert_param_optimizer if not any(nd in n for nd in no_decay)],
         'weight_decay': config.weight_decay, 'lr': config.lr_crf},
        {'params': [p for n, p in bert_param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0,
         'lr': config.lr_crf},

        {'params': [p for n, p in crf_param_optimizer if not any(nd in n for nd in no_decay)],
         'weight_decay': config.weight_decay, 'lr': config.crf_learning_rate},
        {'params': [p for n, p in crf_param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0,
         'lr': config.crf_learning_rate},

    ]
optimizer = AdamW(optimizer_grouped_parameters, lr=config.lr, eps=1e-6)
warmup_steps = int(t_total * config.warm_up_ratio)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=warmup_steps,
                                                num_training_steps=t_total)

EPOCHS = config.epoch_num # training epoch
for e in range(EPOCHS):
    print("=======START TRAIN EPOCHS %d=======" %(e+1))
    train_loss = train_epoch(e, model, train_loader, optimizer, scheduler,device)
    valid_epoch(e, model, val_loader,device,label_set)



Some weights of the model checkpoint at yiyanghkust/finbert-pretrain were not used when initializing BertCrfForNer: ['cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertCrfForNer from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertCrfForNer from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertCrfForNer were not initialized from the model checkpoint at yiyanghkust/finbert-pretrain and are newly initializ

Epoch: 1, train Loss:33.2203
              precision    recall  f1-score   support

    Negative       0.59      0.15      0.24       106
     Neutral       0.49      0.49      0.49       222
    Positive       0.60      0.07      0.13        84

   micro avg       0.50      0.32      0.39       412
   macro avg       0.56      0.24      0.29       412
weighted avg       0.54      0.32      0.35       412

Epoch: 2, train Loss:5.8562
              precision    recall  f1-score   support

    Negative       0.48      0.09      0.16       106
     Neutral       0.68      0.82      0.74       222
    Positive       0.43      0.51      0.46        84

   micro avg       0.60      0.57      0.59       412
   macro avg       0.53      0.48      0.46       412
weighted avg       0.58      0.57      0.54       412

Epoch: 3, train Loss:2.7158
              precision    recall  f1-score   support

    Negative       0.83      0.14      0.24       106
     Neutral       0.77      0.86      0.81 

In [8]:
# save model
import pickle

# with open('model_bert_crf', 'wb') as f:
#     pickle.dump(model, f)