# BERT

Clone the BERT-sklearn repository

In [0]:
!git clone -b master https://github.com/charles9n/bert-sklearn
%cd bert-sklearn
!pip install .

Import depencies and load data

In [0]:
import pandas as pd
import numpy as np
from tqdm import tqdm, trange

data = pd.read_csv('Dataset.csv', encoding='latin1')[['Sentence #','Word','POS', 'Tag']]

Sentence Getter

In [0]:
class SentenceGetter(object):
    
    def __init__(self, data):
        self.n_sent = 1
        self.data = data
        self.empty = False
        agg_func = lambda s: [(w, p, t) for w, p, t in zip(s["Word"].values.tolist(),
                                                           s["POS"].values.tolist(),
                                                           s["Tag"].values.tolist())]
        self.grouped = self.data.groupby("Sentence #").apply(agg_func)
        self.sentences = [s for s in self.grouped]
    
    def get_next(self):
        try:
            s = self.grouped["Sentence: {}".format(self.n_sent)]
            self.n_sent += 1
            return s
        except:
            return None

In [0]:
getter = SentenceGetter(data)

In [0]:
sentences = [[s[0] for s in sent] for sent in getter.sentences]

In [0]:
labels = [[s[2] for s in sent] for sent in getter.sentences]

In [0]:
tokenized_texts = [[str(s) for s in sent] for sent in sentences]

In [0]:
X = tokenized_texts
y = labels

The model

In [0]:
!pip install seqeval

In [0]:
import os
import math
import random
import csv
import sys

import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.metrics import f1_score
from seqeval.metrics import precision_score, recall_score, f1_score, classification_report
from nervaluate import Evaluator

import statistics as stats

from bert_sklearn import BertClassifier
from bert_sklearn import BertRegressor
from bert_sklearn import BertTokenClassifier
from bert_sklearn import load_model

def read_tsv(filename, quotechar=None):
    with open(filename, "r", encoding='utf-8') as f:
        return list(csv.reader(f, delimiter="\t", quotechar=quotechar))   

def flatten(l):
    return [item for sublist in l for item in sublist]

def read_CoNLL2003_format(filename, idx=3):
    """Read file in CoNLL-2003 shared task format"""
    
    # read file
    lines =  open(filename).read().strip()   
    
    # find sentence-like boundaries
    lines = lines.split("\n\n")  
    
     # split on newlines
    lines = [line.split("\n") for line in lines]
    
    # get tokens
    tokens = [[l.split()[0] for l in line] for line in lines]
    
    # get labels/tags
    labels = [[l.split()[idx] for l in line] for line in lines]
    
    #convert to df
    data= {'tokens': tokens, 'labels': labels}
    df=pd.DataFrame(data=data)
    
    return df

In [0]:
label_list = list(set(data.Tag.to_list()))

In [0]:
%%time
from sklearn.model_selection import StratifiedKFold



# Cross-Validate
skf = StratifiedKFold(5, shuffle=True, random_state=42)
resultslist = []
oos_x = []    
oos_y = []
oos_pred = []

fold = 0
for train, test in skf.split(X,[1 if 'B' in l else 0 for l in labels]):
    fold+=1
    print(f"Fold #{fold}")
    x_train = np.array(X)[train]
    y_train = np.array(y)[train]
    x_test = np.array(X)[test]
    y_test = np.array(y)[test]

    # define model

    # Choose between BERT or SciBERT
    
    model = BertTokenClassifier(bert_model='bert-base-cased',
    # model = BertTokenClassifier(bert_model='scibert-scivocab-cased',                            
                            max_seq_length=178,
                            epochs=3,
                            gradient_accumulation_steps=4,
                            learning_rate=5e-5,
                            train_batch_size=16,
                            eval_batch_size=16,
                            validation_fraction=0., 
                            label_list=label_list,                           
                            ignore_label=['O'])
    print(model)

    # finetune model
    model.fit(x_train, y_train)

    # score model
    f1_test = model.score(x_test, y_test, 'macro')
    print("Test f1: %0.02f"%(f1_test))

    # make predictions
    pred = model.predict(x_test)

    print(classification_report(y_test, pred))
    evaluator = Evaluator(y_test, pred, tags= [''], loader='list')
    results, results_per_tag = evaluator.evaluate()
    resultslist.append(results)
   
    oos_y.append(y_test)
    oos_pred.append(pred)  
    oos_x.append(x_test)  

# Build the oos prediction list 
oos_y = np.concatenate(oos_y)
oos_pred = np.concatenate(oos_pred)
oos_x = np.concatenate(oos_x)

Results

In [0]:
print(classification_report(oos_y, oos_pred))

In [0]:
# Calculate the SD scores of the B and I

#np.std(np.array(['Fill in the scores']))

In [0]:
# Calculate the SD score of partial and exact

np.std(np.array([r['partial']['precision'] for r in resultslist]))

In [0]:
evaluator = Evaluator(oos_y, oos_pred, tags= [''], loader='list')
results, results_per_tag = evaluator.evaluate()

In [0]:
results