In [1]:
%load_ext autoreload
%autoreload 2

import os
import torch
import numpy as np
import pandas as pd
from glob import glob
from tqdm import tqdm
import transformers
import torch.nn as nn
from sklearn import preprocessing
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
# from torchsummary import summary
from transformers import AdamW
from transformers import get_linear_schedule_with_warmup
from transformers import BertTokenizer, RobertaModel, RobertaTokenizerFast, RobertaTokenizer
from transformers import AutoTokenizer, AutoModel, PreTrainedTokenizerFast, AutoConfig, AutoModelForTokenClassification

from param import parent_class_mapping
from param import CONFIG


tokenizer = AutoTokenizer.from_pretrained("phobert-base", use_fast=False)

In [2]:
# !pip install --upgrade --trusted-host files.pythonhosted.org --trusted-host pypi.org --trusted-host pypi.python.org transformers 
# !pip install --upgrade --trusted-host files.pythonhosted.org --trusted-host pypi.org --trusted-host pypi.python.org setuptools_scm 
# !pip install --upgrade --trusted-host files.pythonhosted.org --trusted-host pypi.org --trusted-host pypi.python.org seqeval

In [3]:
class Config:
    MAX_LEN = 100
    TRAIN_BATCH_SIZE = 128
    VAL_BATCH_SIZE = 128
    TEST_BATCH_SIZE = 64
    EPOCHS = 5
    BASE_MODEL = 'roberta-base'
    TRAIN_PATH = 'data/ner_dataset.csv'
    MODEL_PATH = 'entity_model.pt'


def process_data(df):
    enc_tag = preprocessing.LabelEncoder()
    df['TAG_enc'] = enc_tag.fit_transform(df['Tag'])
    sentences = df.groupby('sentID')['Word'].apply(list).values.tolist()
    TAG = df.groupby('sentID')['TAG_enc'].apply(list).values.tolist()
    return sentences, TAG, enc_tag

device_id = 1
torch.cuda.set_device(device_id)

In [4]:

train_df = pd.read_csv('data/covid_train_word.csv', index_col=0)
dev_df = pd.read_csv('data/covid_dev_word.csv', index_col=0)
test_df = pd.read_csv('data/covid_test_word.csv', index_col=0)
train_df.head()
# train_df[train_df['Tag'] == 'B-PATIENT_ID'].head()


# reject_tag = ['B-PATIENT_ID', 'I-PATIENT_ID', 'B-SYMPTOM_AND_DISEASE', 'I-SYMPTOM_AND_DISEASE', 'B-JOB', 'I-JOB']
reject_tag = ['B-JOB', 'I-JOB', 'B-TRANSPORTATION', 'I-TRANSPORTATION']

all_labels = train_df['Tag'].unique().tolist()
train_df.loc[train_df['Tag'].isin(reject_tag), 'Tag'] = 'O'
dev_df.loc[dev_df['Tag'].isin(reject_tag), 'Tag'] = 'O'
test_df.loc[test_df['Tag'].isin(reject_tag), 'Tag'] = 'O'
all_labels = train_df['Tag'].unique().tolist()



# df = df.replace({np.nan: 'nan'})
# df['sentID'].fillna(method='ffill', inplace=True)

In [5]:
print(all_labels)
CLASS_NB = len(all_labels)
print(CLASS_NB)

['O', 'B-ORGANIZATION', 'I-ORGANIZATION', 'B-SYMPTOM_AND_DISEASE', 'I-SYMPTOM_AND_DISEASE', 'B-LOCATION', 'I-LOCATION', 'B-DATE', 'B-PATIENT_ID', 'B-AGE', 'B-NAME', 'I-DATE', 'B-GENDER', 'I-GENDER', 'I-NAME', 'I-AGE', 'I-PATIENT_ID']
17


In [6]:
sent_ids = train_df.sentID.unique().tolist()
print(len(sent_ids))

5027


In [7]:
train_sents, train_tag, enc_tag = process_data(train_df)
dev_sents, dev_tag, _ = process_data(dev_df)
test_sents, test_tag, _ = process_data(test_df)
print('number of dataset: train {}, dev {}, test {}'.format(len(train_sents), len(dev_sents), len(test_sents)))

train_sents = train_sents + dev_sents
train_tag = train_tag + dev_tag


tag_nb = len(enc_tag.classes_)
tag_mapping = dict(zip(enc_tag.classes_, enc_tag.transform(enc_tag.classes_)))
tag_mapping_inv = {value:key for key, value in tag_mapping.items()}
print(tag_mapping)

train_df.head()

number of dataset: train 5027, dev 2000, test 3000
{'B-AGE': 0, 'B-DATE': 1, 'B-GENDER': 2, 'B-LOCATION': 3, 'B-NAME': 4, 'B-ORGANIZATION': 5, 'B-PATIENT_ID': 6, 'B-SYMPTOM_AND_DISEASE': 7, 'I-AGE': 8, 'I-DATE': 9, 'I-GENDER': 10, 'I-LOCATION': 11, 'I-NAME': 12, 'I-ORGANIZATION': 13, 'I-PATIENT_ID': 14, 'I-SYMPTOM_AND_DISEASE': 15, 'O': 16}


Unnamed: 0,sentID,Word,Tag,filename,covid_data,TAG_enc
0,0,Đồng,O,train_word.conll,True,16
1,0,thời,O,train_word.conll,True,16
2,0,",",O,train_word.conll,True,16
3,0,bệnh,O,train_word.conll,True,16
4,0,viện,O,train_word.conll,True,16


In [8]:
print(len(train_sents), len(test_sents))

7027 3000


In [9]:
np.save('tag_encoder_vin.npy', enc_tag.classes_)


In [10]:
from dataset import EntityDataset

train_dataset = EntityDataset(train_sents, train_tag, tokenizer, tag_mapping, is_train=True)
train_loader = DataLoader(train_dataset, batch_size=Config.TRAIN_BATCH_SIZE, num_workers=8, shuffle=True)
val_dataset = EntityDataset(test_sents, test_tag, tokenizer, tag_mapping)
val_loader = DataLoader(val_dataset, batch_size=Config.VAL_BATCH_SIZE, num_workers=8, shuffle=False)

# test_dataset = EntityDataset(test_sents, test_tag, tokenizer, tag_mapping)
# test_loader = DataLoader(test_dataset, batch_size=Config.VAL_BATCH_SIZE, num_workers=8, shuffle=False)


## Test Dataset
data = train_dataset[0]
ids = list(data['ids'].cpu().numpy().squeeze())
seq_len = data['seq_len']

print('ORIGIN SENTENCE: ', data['src_sent'])
print([tokenizer.decode([id]) for id in ids])
# print(tokenizer.decode(ids[:seq_len]))
print(data['tar_tag'])

print([[tokenizer.decode([id]), int(tag)] for id, tag in zip(ids, data['tar_tag'])])

ORIGIN SENTENCE:  Đồng thời , bệnh viện tiếp tục thực hiện các biện pháp phòng chống dịch bệnh COVID - 19 theo hướng dẫn của Bộ Y tế .
['<s>', 'Đồng', 'thời', ',', 'bệnh', 'viện', 'tiếp', 'tục', 'thực', 'hiện', 'các', 'biện', 'pháp', 'phòng', 'chống', 'dịch', 'bệnh', 'CO@@', 'VI@@', 'D', '-', '19', 'theo', 'hướng', 'dẫn', 'của', 'Bộ', 'Y', 'tế', '.', '</s>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
tensor([16, 16, 16

In [11]:
tag_counter = train_df['TAG_enc'].value_counts().to_dict()
class_weight = []

for key in range(CLASS_NB):
    class_weight.append(tag_counter[key])

class_weight = np.array(class_weight)
class_weight = class_weight/class_weight.sum()
print(np.around(class_weight, decimals=3))

[0.004 0.015 0.003 0.032 0.002 0.007 0.019 0.009 0.    0.015 0.    0.074
 0.    0.029 0.    0.014 0.777]


In [12]:
from torch.optim import SGD
from transformers.models.roberta.modeling_roberta import *
from model import MyRobertaForTokenClassification


def parse_data(data, device=torch.device('cuda')):
    return (
        data['ids'].to(device),
        data['mask'].to(device),
        data['token_type_ids'].to(device),
        data['tar_tag'].to(device), 
        data['seq_len']
    )



In [13]:
from torch import functional as F
from sklearn import metrics
from torch.nn import CrossEntropyLoss


def train_fn(model, train_loader, optimizer, device, scheduler):
    model.train()
    final_loss = 0
    
    for data in tqdm(train_loader, total=len(train_loader)):
        ids, mask, token_type_ids, tag, seq_len = parse_data(data)
        optimizer.zero_grad()
        
        inputs = {
            "input_ids": ids,
            "attention_mask": mask,
            "labels": tag,
            "token_type_ids": token_type_ids
        }
        outputs = model(**inputs)
        loss = outputs[0].mean()
        loss.backward()
        optimizer.step()
#         scheduler.step(loss)
        final_loss += loss.item()
        
    N = len(train_loader)
    return final_loss/N
    
    
def val_fn(model, val_loader, device):
    with torch.no_grad():
        model.eval()
        all_targets = []
        all_outputs = []
        f1_score = None
        
        for data in tqdm(val_loader):
            ids, mask, token_type_ids, tags, seq_lens = parse_data(data)
            inputs = {
                "input_ids": ids,
                "attention_mask": mask,
                "labels": tags,
                "token_type_ids": token_type_ids
            }
            outputs = model(**inputs)
            loss, logists = outputs[:2]
            
            for tag, logist, seq_len in zip(tags, logists, seq_lens):
                tag = tag.detach().cpu().numpy()[:seq_len]
                label = [tag_mapping_inv[int(t)] for t in tag]

                out = logist.detach().cpu().numpy()[:seq_len]
                out = np.argmax(out,axis=-1)
                pred =  [tag_mapping_inv[int(p)] for p in out]
                all_targets.append(label)
                all_outputs.append(pred)
                
        return all_targets, all_outputs

# CLASS_WEIGHT = torch.exp(2*torch.Tensor(1-class_weight)).cuda()

CLASS_WEIGHT = torch.ones(CLASS_NB)
CLASS_WEIGHT[tag_mapping['O']] = 1/20

print('CLASS_WEIGHT: ', CLASS_WEIGHT.detach().cpu())

CLASS_WEIGHT:  tensor([1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.0500])


In [16]:
from seqeval.metrics import classification_report, f1_score
from torch.optim.lr_scheduler import ReduceLROnPlateau
from model import MyRobertaForTokenClassification

device = torch.device('cuda')
VLSP_CLASS_NB = 11
model = MyRobertaForTokenClassification.from_pretrained(pretrained_model_name_or_path='phobert-base')
model.classifier = torch.nn.Linear(in_features=768, out_features=VLSP_CLASS_NB, bias=True)
model.num_labels = VLSP_CLASS_NB

loss_function = CrossEntropyLoss(weight=torch.zeros(11))
model.set_loss_function(loss_function)
model.load_state_dict(torch.load('save_model/0.86.pth'))

model.classifier = torch.nn.Linear(in_features=768, out_features=CLASS_NB, bias=True)
model.num_labels = CLASS_NB

# loss_function = CrossEntropyLoss(weight=CLASS_WEIGHT)
loss_function = CrossEntropyLoss()
model.set_loss_function(loss_function)
model.to(device)
model.train()

data = next(iter(train_loader))
ids, mask, token_type_ids, tag, seq_len = parse_data(data, device)
param_optimizer = list(model.named_parameters())
no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
optimizer_parameters = [{
        "params": [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        "weight_decay": 0.0},
    {
        "params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        "weight_decay": 0.0,
    },
]


optimizer = AdamW(optimizer_parameters, lr=5e-5, eps=1e-8)
scheduler = ReduceLROnPlateau(optimizer, factor=0.3, patience=3)
                      
# all_targets, all_outputs = val_fn(model, val_loader, device)       
# report = classification_report(all_targets, all_outputs)
# print(report)


Init robertaFortokenizer Classification model


Some weights of MyRobertaForTokenClassification were not initialized from the model checkpoint at phobert-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


set loss function to <class 'torch.nn.modules.loss.CrossEntropyLoss'>
set loss function to <class 'torch.nn.modules.loss.CrossEntropyLoss'>


In [17]:
from seqeval.metrics import classification_report, f1_score

os.makedirs('save_model', exist_ok=True)
best_F1 = -np.inf
for epoch in range(40):
    loss =  train_fn(model, train_loader, optimizer, device, scheduler)
    all_targets, all_outputs = val_fn(model, val_loader, device)
    
    F1 = f1_score(all_targets, all_outputs)
    report = classification_report(all_targets, all_outputs)
    
    print('======================> Epoch {}'.format(epoch))
    print("Train Loss: {}".format(loss))
    print('F1: ', F1)
    
    if epoch % 5 == 0:
        print(report)
        
    
    if F1 > best_F1:
        torch.save(model.state_dict(), 'save_model/{:.4f}.pth'.format(F1))
        best_F1 = F1
        print(report)
        
    

100%|██████████| 55/55 [00:19<00:00,  2.83it/s]
100%|██████████| 24/24 [00:03<00:00,  6.13it/s]
  _warn_prf(average, modifier, msg_start, len(result))


Train Loss: 1.1530818353999746
F1:  0.7428258396190638
                     precision    recall  f1-score   support

                AGE       0.00      0.00      0.00       571
               DATE       0.96      0.99      0.97      1648
             GENDER       0.00      0.00      0.00       473
           LOCATION       0.87      0.91      0.89      4629
               NAME       0.00      0.00      0.00       551
       ORGANIZATION       0.73      0.87      0.79       786
         PATIENT_ID       0.56      0.98      0.72      2359
SYMPTOM_AND_DISEASE       0.00      0.00      0.00      1144

          micro avg       0.76      0.72      0.74     12161
          macro avg       0.39      0.47      0.42     12161
       weighted avg       0.62      0.72      0.66     12161



  0%|          | 0/55 [00:00<?, ?it/s]

                     precision    recall  f1-score   support

                AGE       0.00      0.00      0.00       571
               DATE       0.96      0.99      0.97      1648
             GENDER       0.00      0.00      0.00       473
           LOCATION       0.87      0.91      0.89      4629
               NAME       0.00      0.00      0.00       551
       ORGANIZATION       0.73      0.87      0.79       786
         PATIENT_ID       0.56      0.98      0.72      2359
SYMPTOM_AND_DISEASE       0.00      0.00      0.00      1144

          micro avg       0.76      0.72      0.74     12161
          macro avg       0.39      0.47      0.42     12161
       weighted avg       0.62      0.72      0.66     12161



100%|██████████| 55/55 [00:19<00:00,  2.87it/s]
100%|██████████| 24/24 [00:03<00:00,  6.23it/s]


Train Loss: 0.7174153317104687
F1:  0.8715854363321133


  0%|          | 0/55 [00:00<?, ?it/s]

                     precision    recall  f1-score   support

                AGE       0.86      0.96      0.91       571
               DATE       0.97      0.99      0.98      1648
             GENDER       0.80      0.88      0.84       473
           LOCATION       0.88      0.93      0.90      4629
               NAME       0.81      0.91      0.85       551
       ORGANIZATION       0.74      0.85      0.79       786
         PATIENT_ID       0.96      0.99      0.97      2359
SYMPTOM_AND_DISEASE       0.43      0.82      0.57      1144

          micro avg       0.82      0.93      0.87     12161
          macro avg       0.81      0.91      0.85     12161
       weighted avg       0.85      0.93      0.88     12161



100%|██████████| 55/55 [00:19<00:00,  2.88it/s]
100%|██████████| 24/24 [00:03<00:00,  6.24it/s]


Train Loss: 0.6267769445072521
F1:  0.9101379724055189


  0%|          | 0/55 [00:00<?, ?it/s]

                     precision    recall  f1-score   support

                AGE       0.95      0.95      0.95       571
               DATE       0.98      0.99      0.98      1648
             GENDER       0.79      0.87      0.83       473
           LOCATION       0.90      0.93      0.91      4629
               NAME       0.81      0.91      0.86       551
       ORGANIZATION       0.81      0.89      0.85       786
         PATIENT_ID       0.97      0.99      0.98      2359
SYMPTOM_AND_DISEASE       0.68      0.85      0.76      1144

          micro avg       0.89      0.94      0.91     12161
          macro avg       0.86      0.92      0.89     12161
       weighted avg       0.89      0.94      0.91     12161



100%|██████████| 55/55 [00:19<00:00,  2.86it/s]
100%|██████████| 24/24 [00:03<00:00,  6.22it/s]
  0%|          | 0/55 [00:00<?, ?it/s]

Train Loss: 0.6040089352564378
F1:  0.7751094112022361


100%|██████████| 55/55 [00:19<00:00,  2.85it/s]
100%|██████████| 24/24 [00:03<00:00,  6.29it/s]
  0%|          | 0/55 [00:00<?, ?it/s]

Train Loss: 0.5725936060602015
F1:  0.7220042854788198


100%|██████████| 55/55 [00:18<00:00,  2.91it/s]
100%|██████████| 24/24 [00:03<00:00,  6.21it/s]
  0%|          | 0/55 [00:00<?, ?it/s]

Train Loss: 0.549939469315789
F1:  0.6607049608355092
                     precision    recall  f1-score   support

                AGE       0.93      0.96      0.95       571
               DATE       0.97      0.99      0.98      1648
             GENDER       0.26      0.96      0.41       473
           LOCATION       0.69      0.72      0.70      4629
               NAME       0.69      0.94      0.79       551
       ORGANIZATION       0.71      0.75      0.73       786
         PATIENT_ID       0.94      0.99      0.96      2359
SYMPTOM_AND_DISEASE       0.13      0.63      0.21      1144

          micro avg       0.55      0.83      0.66     12161
          macro avg       0.67      0.87      0.72     12161
       weighted avg       0.72      0.83      0.75     12161



100%|██████████| 55/55 [00:18<00:00,  2.90it/s]
100%|██████████| 24/24 [00:03<00:00,  6.12it/s]
  0%|          | 0/55 [00:00<?, ?it/s]

Train Loss: 0.5219078855081039
F1:  0.7215891696996497


100%|██████████| 55/55 [00:18<00:00,  2.91it/s]
100%|██████████| 24/24 [00:03<00:00,  6.14it/s]
  0%|          | 0/55 [00:00<?, ?it/s]

Train Loss: 0.46780680851502854
F1:  0.6902088772845953


 35%|███▍      | 19/55 [00:12<00:23,  1.55it/s]


KeyboardInterrupt: 

In [None]:
def merge_B_O_tag(label):
    if label != 'O':
        return label[2:]
    return label

## Test model

In [None]:
# entity_model
entity_model = model
entity_model.eval()
all_ids = []
all_tar_tag = []
all_out_tag = []
cpu = torch.device('cpu')


for batch in val_loader:
    id_batch, mask_batch, token_batch, tag_batch, seq_len_batch = parse_data(data)
#     print(tokenizer.convert_ids_to_tokens(list(id_batch[0].cpu().numpy())))
    out_tag_batch = entity_model(id_batch, mask_batch, token_batch)
    
    id_batch = id_batch.cpu().numpy()
    out_tag_batch = torch.argmax(out_tag_batch, 2).cpu().numpy()
    tag_batch = tag_batch.cpu().numpy()
    BATCH_SIZE = len(id_batch)
    
    for i in range(BATCH_SIZE):
        l = seq_len_batch[i] # sequence length
        ids = id_batch[i][:l]
        tag = tag_batch[i][:l]
        out_tag = out_tag_batch[i][:l]

        all_ids.append(list(ids))
        all_tar_tag.append(list(tag))
        all_out_tag.append(list(out_tag))
        
        if (tag-out_tag).sum() != 0:
            print("="*100)
            print("Output tag ", enc_tag.inverse_transform(out_tag))
            print("Target tag ", enc_tag.inverse_transform(tag))

## Test model with raw sentence

In [None]:
from sklearn.preprocessing import LabelEncoder

def sentence_to_input(sent, device=torch.device('cuda'), max_len=100):
        """
        Convert text input to list of token_id with additinal info
        Input: Sentence in string format
        Output: token_ids, mask, token_type_ids, offset_map, seq_len
        """
        input = tokenizer.encode_plus(sent, add_special_tokens=True, max_length=max_len, padding='max_length')
        token_ids = input['input_ids']
        mask = input['attention_mask']
#         offset_map = input['offset_mapping']
            
        token_type_ids = [0]*len(token_ids)
        token_ids = torch.tensor(token_ids, dtype=torch.long)
        mask = torch.tensor(mask, dtype=torch.long)
        token_type_ids = torch.tensor(token_type_ids, dtype=torch.long)
        real_seq_len = int(mask.sum())

        token_ids = token_ids.unsqueeze(dim=0).to(device)
        mask = mask.unsqueeze(dim=0).to(device)
        token_type_ids = token_type_ids.unsqueeze(dim=0).to(device)
        
        return token_ids, mask, token_type_ids, real_seq_len

def decode_output(output, offset, seq_len):
    output = output.squeeze()[:seq_len]
    offset = offset[:seq_len]
    filted_output = [output[1]]

    for i in range(1, seq_len-1):
        pre_word_ind = offset[i-1]
        cur_word_ind = offset[i]
        if cur_word_ind[0] != pre_word_ind[1]:
            filted_output.append(output[i])
    return filted_output

device = torch.device('cuda')
VLSP_CLASS_NB = 11
model = MyRobertaForTokenClassification.from_pretrained(pretrained_model_name_or_path='phobert-base')
model.classifier = torch.nn.Linear(in_features=768, out_features=VLSP_CLASS_NB, bias=True)
model.num_labels = VLSP_CLASS_NB
loss_function = CrossEntropyLoss(weight=CLASS_WEIGHT)

model.set_loss_function(loss_function)
model.load_state_dict(torch.load('../Team3-BTL NLP/source-code/save_model/0.86.pth'))

model.classifier = torch.nn.Linear(in_features=768, out_features=CLASS_NB, bias=True)
model.num_labels = VLSP_CLASS_NB
print('CLASS WEIGHT: ', CLASS_WEIGHT.shape, CLASS_WEIGHT)
model.to(device)

In [None]:
sent = 'Tôi tên là Nguyễn Thành Trung, người ở xã Hòa Bình - tỉnh Thái Bình. Tôi đang làm giám đốc tại công ty Dash Tech'
encoder = LabelEncoder()
encoder.classes_ = np.load('tag_encoder_vin.npy',  allow_pickle=True)

tag_mapping = dict(zip(enc_tag.classes_, enc_tag.transform(enc_tag.classes_)))
tag_mapping_inv = {value:key for key, value in tag_mapping.items()}

token_ids, mask, token_type_ids, real_seq_len = sentence_to_input(sent)

inputs = {
    "input_ids": token_ids.to(device),
    "attention_mask": mask.to(device),
    "token_type_ids": token_type_ids.to(device)
}
outputs = model(**inputs).logits
out = outputs[0].detach().cpu().numpy()[:real_seq_len]
out = np.argmax(out,axis=-1)
tags =  [tag_mapping_inv[int(p)] for p in out]
token_ids = list(token_ids[:real_seq_len].squeeze())

In [None]:

def decode_output(token_ids, tags):
    result = []
    cur_sent = []
    cur_tag = []
    for token, tag in zip(token_ids, tags):
        if tag == 'O':
            continue
        if 'B-' in tag:
            if cur_sent:
                result.append([tokenizer.decode(cur_sent), cur_tag[0].split('-')[1]])
            cur_sent = [token]
            cur_tag = [tag]

        if 'I-' in tag:
            cur_sent.append(token)
            cur_tag.append(tag)

    if cur_sent:
        result.append([tokenizer.decode(cur_sent), cur_tag[0].split('-')[1]])
    return str(result)

result = decode_output(token_ids, tags)
print(result)

In [None]:
output = ''
for token, tag in zip(token_ids, tags):
    output += ' {} <{}> '.format(tokenizer.decode([token]), tag)
print(output)