In [1]:
from transformers import AutoModel
from transformers import AutoTokenizer


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import numpy as np
import torch
import pandas as pd
from sklearn.metrics import confusion_matrix as cm
from sklearn.metrics import ConfusionMatrixDisplay as cmd

In [3]:
from datasets import load_dataset

In [4]:
import torch
from torch.utils.data import TensorDataset, DataLoader
from torch.optim import Adam
import time
import pickle

In [5]:
class FullyConnectedModel(torch.nn.Module):

    def __init__(self, input_size, output_size):
        super().__init__()

        self.linear1 = torch.nn.Linear(input_size, 128)
        self.activation1 = torch.nn.ReLU()
        self.linear2 = torch.nn.Linear(128, 64)
        self.activation2 = torch.nn.ReLU()
        self.linear3 = torch.nn.Linear(64, 64)
        self.activation3 = torch.nn.ReLU()
        self.drop = torch.nn.Dropout(.5)
        self.droplast = torch.nn.Dropout(.3)
        self.output_layer = torch.nn.Linear(64, output_size)
        #no activation output layer

        #initialization
        torch.nn.init.xavier_uniform_(self.linear1.weight)
        torch.nn.init.xavier_uniform_(self.linear2.weight)
        torch.nn.init.xavier_uniform_(self.linear3.weight)
        torch.nn.init.xavier_uniform_(self.output_layer.weight)

    def forward(self, inputs):
        x = self.activation1(self.linear1(self.droplast(inputs)))
        x = self.activation2(self.drop(self.linear2(x)))
        x = self.activation3(self.droplast(self.linear3(x)))
        x = self.output_layer(x)
        return x


In [6]:
train_data = pickle.load(open("./final_train_data.pkl", "rb"))
val_data = pickle.load(open("./final_val_data.pkl", "rb"))
train_loader = DataLoader(train_data, batch_size = 128, shuffle = True)
val_loader = DataLoader(val_data, batch_size = 128, shuffle = True)

In [11]:
train_X = []
train_Y = []
val_X = []
val_Y = []

for X,Y in train_data:
    train_X.append(X)
    train_Y.append(Y)
    
for X,Y in val_data:
    val_X.append(X)
    val_Y.append(Y)

In [12]:
train_X = torch.stack(train_X)
train_Y = torch.stack(train_Y)
val_X = torch.stack(val_X)
val_Y = torch.stack(val_Y)

In [13]:
counts = torch.zeros(15).cuda()
for i in range(len(train_Y)):
    counts[train_Y[i]] += 1

In [14]:
count_weights = 1 / (torch.log(counts)/torch.mean(torch.log(counts)))

In [15]:
count_weights

tensor([0.9748, 1.1019, 1.0765, 0.8777, 1.0216, 1.0438, 1.1223, 1.1446, 1.2101,
        1.3750, 0.9523, 1.0806, 0.9798, 1.0695, 0.5519], device='cuda:0')

In [17]:
fc_model = FullyConnectedModel(768, 15).cuda()
loss = torch.nn.CrossEntropyLoss(weight = count_weights)
optimizer = Adam(fc_model.parameters(), lr = 1e-4, weight_decay=1e-3)

In [18]:
for epoch in range(201):
    start_time = time.time()
    if epoch % 10 == 0:
        print(f"starting epoch {epoch}")
    fc_model.train()
    b = 0
    
    for X, Y in train_loader:
        b+=1
        optimizer.zero_grad()
        pred = fc_model(X)
        batch_loss = loss(pred, Y)
        batch_loss.backward()
        optimizer.step()
    fc_model.eval()
    
    if epoch % 10 == 0:
        val_pred = []
        val_label = []
        val_loss = 0
        
        train_loss = 0
        train_pred = []
        train_label = []
    
        for X, Y in train_loader:
            pred = fc_model(X)
            train_pred.append(torch.argmax(pred, axis = 1))
            train_label.append(Y)
            train_loss += batch_loss
            
        for X, Y in val_loader:
            pred = fc_model(X)
            val_pred.append(torch.argmax(pred, axis = 1))
            val_label.append(Y)
            val_loss += loss(pred, Y)

        train_pred = torch.cat(train_pred)
        train_label = torch.cat(train_label)
        val_pred = torch.cat(val_pred)
        val_label = torch.cat(val_label)
        
        val_recall = cm(val_pred.detach().cpu().numpy(), val_label.detach().cpu().numpy(), normalize = "true").diagonal()
        val_precision = cm(val_pred.detach().cpu().numpy(), val_label.detach().cpu().numpy(), normalize = "pred").diagonal()
        train_recall = cm(train_pred.detach().cpu().numpy(), train_label.detach().cpu().numpy(), normalize = "true").diagonal()
        train_precision = cm(train_pred.detach().cpu().numpy(), train_label.detach().cpu().numpy(), normalize = "pred").diagonal()
    
        print(f"train accuracy: {torch.mean((train_pred == train_label).float())}")
        print(f"train f1: {np.mean(2 * train_recall * train_precision / (train_recall + train_precision + 1e-6))}")
        #print(f"train loss: {train_loss/len(train_data)}")
        print(f"val accuracy: {torch.mean((val_pred == val_label).float())}")
        print(f"val f1: {np.mean(2 * val_recall * val_precision / (val_recall + val_precision + 1e-6))}")
        #print(f"val loss: {val_loss/len(val_data)}")
        print(f"epoch time: {time.time() - start_time}")
        print()

starting epoch 0
train accuracy: 0.9125291705131531
train f1: 0.06361758780764237
val accuracy: 0.9287457466125488
val f1: 0.06420374703180108
epoch time: 3.9809982776641846

starting epoch 10
train accuracy: 0.928019642829895
train f1: 0.23498458170612982
val accuracy: 0.9309049844741821
val f1: 0.15816716731060404
epoch time: 2.70182728767395

starting epoch 20
train accuracy: 0.9450591206550598
train f1: 0.4701304102987126
val accuracy: 0.9324070811271667
val f1: 0.29393001262164464
epoch time: 2.684265613555908

starting epoch 30
train accuracy: 0.9566267132759094
train f1: 0.6268753112060784
val accuracy: 0.9332519769668579
val f1: 0.3689080253038367
epoch time: 2.639676094055176

starting epoch 40
train accuracy: 0.9661020636558533
train f1: 0.7316353709050915
val accuracy: 0.9351295232772827
val f1: 0.39153580844121916
epoch time: 2.7793049812316895

starting epoch 50
train accuracy: 0.9718958735466003
train f1: 0.7952101343934151
val accuracy: 0.9305294752120972
val f1: 0.41494

In [21]:
test_X = pickle.load(open("test_X.pkl", "rb"))
last_hidden_X = pickle.load(open("last_hidden_X.pkl", "rb"))
ind_converter = pickle.load(open("ind_converter.pkl", "rb"))

test_conll = open("../data/test_data/anlp-sciner-test-empty.conll", "rt", encoding="utf-8").read().split('\n\n')
for i in range(len(test_conll)):
    test_conll[i] = test_conll[i].split('\n')
    for j in range(len(test_conll[i])):
        test_conll[i][j] = test_conll[i][j].split('\t')

ind_to_label = pickle.load(open("ind_to_label.pkl", "rb"))

test_conll[0]

test_pred = torch.argmax(fc_model(test_X), axis = 1).detach().cpu().numpy()

np.unique(test_pred, return_counts=True)

fc_model.eval()
for i in range(len(last_hidden_X)-1):
    pred = torch.argmax(fc_model(last_hidden_X[i]), axis = 1).detach().cpu().numpy()
    for token in range(len(test_conll[i])):
        inds = ind_converter[i][token]
        unique, counts = np.unique(pred[inds], return_counts=True)
        #print(unique, counts, unique[np.argmax(counts)])
        test_conll[i][token][1] = ind_to_label[unique[np.argmax(counts)]]

output_conll = ""
for i in range(len(test_conll)-1):
    para_tag = ""
    for j in range(len(test_conll[i])):
        para_tag += test_conll[i][j][0] + '\t' + test_conll[i][j][1] + '\n'
    output_conll += para_tag + '\n'
open("test_output.conll", "wt", encoding='utf-8').write(output_conll)

578210