In [1]:
## tool
import pickle
import tqdm
import numpy as np
import pandas as pd
import os
import time
import matplotlib.pyplot as plt
import seaborn as sns
from seqeval.metrics import f1_score, accuracy_score

## Bert
import transformers
from transformers import BertForTokenClassification, AdamW, get_linear_schedule_with_warmup

## module
from module.ner_preprocess import Bert_dataset
from module.ner_trainer import NRE_Trainer

## torch
import torch
from torch.utils.data import DataLoader, random_split

In [2]:
torch.cuda.get_device_name(0)

'GeForce RTX 2080 Ti'

## Preprocess

In [3]:
with open(f'{os.getcwd()}/module/pkl/bert_input.pkl','rb')as f:
    ner_dataset = pickle.load(f)

In [4]:
train_dataset, test_dataset = random_split(ner_dataset,
                                           [int(len(ner_dataset)*0.8), len(ner_dataset)-int(len(ner_dataset)*0.8)])

valid_dataset, test_dataset = random_split(test_dataset, 
                                           [int(len(test_dataset)*0.5), len(test_dataset)-int(len(test_dataset)*0.5)])


## Hyperparameter
n_train = len(train_dataset)
n_valid = len(valid_dataset)
n_test  = len(test_dataset)
BATCH_SIZE = 64

In [5]:
train_loader = DataLoader(
    dataset = train_dataset,
    batch_size = BATCH_SIZE,
    shuffle = True,
    collate_fn = lambda x: Bert_dataset.collate_fn(train_dataset, x)
)

valid_loader = DataLoader(
    dataset = valid_dataset,
    batch_size = BATCH_SIZE,
    shuffle = True,
    collate_fn = lambda x: Bert_dataset.collate_fn(valid_dataset, x)
)

test_loader = DataLoader(
    dataset = test_dataset,
    batch_size = BATCH_SIZE,
    shuffle = True,
    collate_fn = lambda x: Bert_dataset.collate_fn(test_dataset, x)
)

In [6]:
model = BertForTokenClassification.from_pretrained("bert-base-chinese",
    num_labels = 3,
    output_attentions = False,
    output_hidden_states = False
)

Some weights of the model checkpoint at bert-base-chinese were not used when initializing BertForTokenClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-c

In [7]:
trainer = NRE_Trainer(model, train_loader, valid_loader)

device:cuda


In [8]:
trainer.model.load_state_dict(torch.load('params/best-model-test.pth'))

<All keys matched successfully>

In [9]:
acc, total_loss = trainer.evaluation(test=False)
print(f"device: {trainer.device} classification acc: {acc: .4f} validation loss: {total_loss:.4f}")

In [10]:
trainer.training_process(early_stopping = True, 
                         n_iter_no_change = 5, 
                         max_epoch=10, 
                         save_params = True, 
                         verbose = True, 
                         learning_rate = 1e-5, 
                         save_paths='model-best-test.pth')

In [11]:
%matplotlib inline

# Use plot styling from seaborn.
sns.set(style='darkgrid')

# Increase the plot size and font size.
sns.set(font_scale=1.5)
plt.rcParams["figure.figsize"] = (12,6)

# Plot the learning curve.
plt.plot(trainer.loss_values, 'b-o', label="training loss")
plt.plot(trainer.validation_loss_values, 'r-o', label="validation loss")

# Label the plot.
plt.title("Learning curve")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()