In [1]:
## Load The Data
import json

raw = json.load(open('./data/FinEntity.json'))


# data = json.load(open('./data/urop_annotation_4000_annotations.json'))
# raw = data.get("examples")
# for example in raw:
#     for annotation in example['annotations']:
#         #We expect the key of label to be label but the data has tag
#         annotation['label'] = annotation['tag']

In [2]:
## Preparing Sequence Labeling Data for Transformers
from sequence_aligner.labelset import LabelSet
from sequence_aligner.dataset import TrainingDataset
from sequence_aligner.containers import TraingingBatch
from transformers import BertTokenizerFast
#bert-base-cased yiyanghkust/finbert-pretrain
tokenizer = BertTokenizerFast.from_pretrained('bert-base-cased')
label_set = LabelSet(labels=["Neutral", "Positive", "Negative"])  # label in this dataset
print(label_set.ids_to_label)
print(len(label_set.ids_to_label.values()))
dataset = TrainingDataset(data=raw, tokenizer=tokenizer, label_set=label_set,tokens_per_batch = 128)
print(len(dataset))

{0: 'O', 1: 'B-Neutral', 2: 'I-Neutral', 3: 'L-Neutral', 4: 'U-Neutral', 5: 'B-Positive', 6: 'I-Positive', 7: 'L-Positive', 8: 'U-Positive', 9: 'B-Negative', 10: 'I-Negative', 11: 'L-Negative', 12: 'U-Negative'}
13
987


In [3]:
## Prepare train data and valid data
from torch.utils.data import DataLoader, random_split

train_size = int(0.8 * len(dataset))
validate_size = len(dataset) - train_size
train_dataset, validate_dataset = random_split(dataset, [train_size, validate_size])

train_loader = DataLoader(train_dataset, batch_size=16, collate_fn=TraingingBatch, shuffle=True, )
val_loader = DataLoader(validate_dataset, batch_size=16, collate_fn=TraingingBatch, shuffle=True, )

print(dataset[1].input_ids)
print(dataset[1].labels)
print(dataset[1].attention_masks)
print(tokenizer.decode(dataset[1].input_ids))
print(dataset.label_set.ids_to_label)

[1212, 1103, 3112, 1334, 117, 24824, 1110, 11158, 1158, 127, 110, 1170, 170, 11147, 1107, 21081, 3791, 1105, 17019, 11166, 27158, 20284, 8552, 1110, 1145, 1146, 1118, 127, 110, 1170, 5022, 3222, 11471, 119, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 

In [4]:
## Bert 
import warnings
from model.bert_crf import BertSoftmaxForNer
from transformers import get_linear_schedule_with_warmup,BertConfig,BertForTokenClassification
from torch import cuda
import config
import numpy as np
from util.train import train_epoch, valid_epoch_not_crf
from  torch.optim import AdamW
import torch 
from util.process import ids_to_labels,Metrics,Metrics_e
warnings.filterwarnings('ignore')
model = BertSoftmaxForNer.from_pretrained('bert-base-cased', num_labels=len(label_set.ids_to_label.values()))

device = 'cuda:0' if cuda.is_available() else 'cpu'
model.to(device)

len_dataset = len(train_dataset)
t_total = len(train_dataset)
# Prepare optimizer and schedule (linear warmup and decay)
no_decay = ["bias", "LayerNorm.weight"]
optimizer_grouped_parameters = [
        {"params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
        "weight_decay": config.weight_decay,},
        {"params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], "weight_decay": 0.0},
    ]
optimizer = AdamW(optimizer_grouped_parameters, lr=config.lr_crf, eps=1e-6)
warmup_steps = int(t_total * config.warm_up_ratio)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=warmup_steps,
                                                num_training_steps=t_total)

EPOCHS = config.epoch_num  
for e in range(EPOCHS):
    print("=======START TRAIN EPOCHS %d=======" %(e+1))
    train_loss = train_epoch(e, model, train_loader, optimizer, scheduler,device)

    valid_epoch_not_crf(e, model, val_loader,device,label_set)


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertSoftmaxForNer: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertSoftmaxForNer from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertSoftmaxForNer from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertSoftmaxForNer were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['clas

Epoch: 1, train Loss:0.6588
              precision    recall  f1-score   support

    Negative       0.00      0.00      0.00       105
     Neutral       0.43      0.08      0.14       222
    Positive       0.00      0.00      0.00       105

   micro avg       0.43      0.04      0.08       432
   macro avg       0.14      0.03      0.05       432
weighted avg       0.22      0.04      0.07       432

Epoch: 2, train Loss:0.1878
              precision    recall  f1-score   support

    Negative       0.62      0.05      0.09       105
     Neutral       0.71      0.59      0.65       222
    Positive       0.48      0.50      0.49       105

   micro avg       0.63      0.44      0.52       432
   macro avg       0.61      0.38      0.41       432
weighted avg       0.64      0.44      0.47       432

Epoch: 3, train Loss:0.0944
              precision    recall  f1-score   support

    Negative       0.75      0.65      0.69       105
     Neutral       0.74      0.69      0.72  