This notebook is used to measure the accuracy of any trained BERT model on the stress-test dataset

In [None]:
#pip install transformers

In [None]:
import requests
import json
import torch
import torch.nn as nn
import os
from tqdm import tqdm
from transformers import BertModel, BertTokenizerFast, AdamW
# AutoTokenizer, AutoModelForQuestionAnswering, BertTokenizer, BertForQuestionAnswering
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import ExponentialLR
import matplotlib.pyplot as plt

In [None]:
MODEL_PATH = "bert-base-uncased"
bert_model = BertModel.from_pretrained(MODEL_PATH)
class QAModel(nn.Module):
    def __init__(self):
        super(QAModel, self).__init__()
        self.bert = bert_model
        self.drop_out = nn.Dropout(0.1)
        self.l1 = nn.Linear(768 * 2, 768 * 2)
        self.l2 = nn.Linear(768 * 2, 2)
        self.linear_relu_stack = nn.Sequential(
            self.drop_out,
            self.l1,
            nn.LeakyReLU(),
            self.l2
        )

    def forward(self, input_ids, attention_mask, token_type_ids):
        model_output = self.bert(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, output_hidden_states=True)
        hidden_states = model_output[2]
        out = torch.cat((hidden_states[-1], hidden_states[-3]), dim=-1)  # taking Start logits from last BERT layer, End Logits from third to last layer
        logits = self.linear_relu_stack(out)

        start_logits, end_logits = logits.split(1, dim=-1)

        start_logits = start_logits.squeeze(-1)
        end_logits = end_logits.squeeze(-1)

        return start_logits, end_logits

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
model = QAModel()
model.to('cuda')
model.load_state_dict(torch.load('/content/drive/MyDrive/OELP_BERT/BERT_SQuad/OELP_final_15k_v2.hdf5'))

<All keys matched successfully>

In [None]:
tokenizerFast = BertTokenizerFast.from_pretrained(MODEL_PATH)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)
def get_answer(question, context):
    inputs = tokenizerFast.encode_plus(question, context, return_tensors='pt').to(device)
    with torch.no_grad():
        output_start, output_end = model(**inputs)

        answer_start = torch.argmax(output_start)
        answer_end = torch.argmax(output_end)

        answer = tokenizerFast.convert_tokens_to_string(tokenizerFast.convert_ids_to_tokens(inputs['input_ids'][0][answer_start:answer_end]))

        return([answer, answer_start, answer_end])

tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

cuda


In [None]:
context = "I am Utsav Patel. I just completed my fine-tuning of the BERT base uncased model. I am glad that it works after 5 attempts."
question = "How many attempts did it take to fine-tune ?"
print(f"Predicted Answer: {get_answer(question, context)[0]}")

Predicted Answer: 5


In [None]:
!wget -nc https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json

--2023-12-06 15:09:04--  https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json
Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.108.153, 185.199.109.153, 185.199.110.153, ...
Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.108.153|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 42123633 (40M) [application/json]
Saving to: ‘train-v2.0.json’


2023-12-06 15:09:05 (219 MB/s) - ‘train-v2.0.json’ saved [42123633/42123633]



In [None]:
def spanMaker(str):
    ans = []
    temp = ""
    for i in range(len(str)):
      if(str[i]==','):
        ans.append(int(temp))
        temp = ""
      else:
        temp += str[i]
    if(temp!=""):
       ans.append(int(temp))
    return ans

In [None]:
import json

# Opening JSON file
f = open('/content/drive/MyDrive/OELP_BERT/BERT_SQuad/MySQuADDataset_With_Ans_New.json')

# returns JSON object as
# a dictionary
data = json.load(f)

In [None]:
class InputDataset(Dataset):
    def __init__(self, encodings):
        self.encodings = encodings
    def __getitem__(self, i):
        return {
            'input_ids': torch.tensor(self.encodings['input_ids'][i]),
            'token_type_ids': torch.tensor(self.encodings['token_type_ids'][i]),
            'attention_mask': torch.tensor(self.encodings['attention_mask'][i]),
            'start_positions': torch.tensor(self.encodings['start_positions'][i]),
            'end_positions': torch.tensor(self.encodings['end_positions'][i])
        }
    def __len__(self):
        return len(self.encodings['input_ids'])

In [None]:

test_questions = []
test_contexts = []
test_answers = []
test_answer_start = []
new_accuracy = 0

print(type(data))

for i in range(len(data)):
    test_contexts.append(data[i]["sentence"])
    test_questions.append(data[i]["question"])
    test_answers.append(data[i]['answer'])
    test_answer_start.append(data[i]['answer_start'])

print((data[0]["answer_start"]))


<class 'list'>
154


In [None]:
def add_answer_end(answers, contexts):
  for answer, context in zip(answers, contexts):
    answer['text'] = answer['text'].lower()
    answer['answer_end'] = answer['answer_start'] + len(answer['text'])

In [None]:
final_answers = []
for i in range(len(test_answers)):
  final_answers.append({ "text" : test_answers[i], "answer_start" : test_answer_start[i]})
add_answer_end(final_answers, test_contexts)

In [None]:
print(test_contexts[0])
print(test_questions[0])
print(test_answers[0])

Beyoncé has worked with Pepsi since 2002, and in 2004 appeared in a Gladiator-themed commercial with Britney Spears, Pink, and Enrique Iglesias. In 2012, Beyoncé signed a $50 million deal to endorse Pepsi. The Center for Science in the Public Interest (CSPINET) wrote Beyoncé an open letter asking her to reconsider the deal because of the unhealthiness of the product and to donate the proceeds to a medical organisation. Nevertheless, NetBase found that Beyoncé's campaign was the most talked about endorsement in April 2013, with a 70 per cent positive audience response to the commercial and print ads.
Who signed a deal for less than $70 million with Pepsi?
beyoncé


In [None]:
print(final_answers)

[{'text': 'beyoncé', 'answer_start': 154, 'answer_end': 161}, {'text': '', 'answer_start': -1, 'answer_end': -1}, {'text': 'beyoncé', 'answer_start': 154, 'answer_end': 161}, {'text': 'netbase', 'answer_start': 437, 'answer_end': 444}, {'text': '3 metres', 'answer_start': 626, 'answer_end': 634}, {'text': '2 metres', 'answer_start': 703, 'answer_end': 711}, {'text': '', 'answer_start': -1, 'answer_end': -1}, {'text': 'beichuan fault', 'answer_start': 304, 'answer_end': 318}, {'text': 'wisconson glaciation', 'answer_start': 11, 'answer_end': 31}, {'text': 'wisconson glaciation', 'answer_start': 11, 'answer_end': 31}, {'text': '', 'answer_start': -1, 'answer_end': -1}, {'text': 'megacity', 'answer_start': 192, 'answer_end': 200}, {'text': 'megacity', 'answer_start': 192, 'answer_end': 200}, {'text': '', 'answer_start': -1, 'answer_end': -1}, {'text': 'buddhism', 'answer_start': 0, 'answer_end': 8}, {'text': '', 'answer_start': -1, 'answer_end': -1}, {'text': 'buddhism', 'answer_start': 0

In [None]:
MAX_LENGTH = 250
tokenizerFast = BertTokenizerFast.from_pretrained(MODEL_PATH)
test_encodings_fast = tokenizerFast(test_questions, test_contexts,  max_length = MAX_LENGTH, truncation=True, padding=True)

In [None]:
def ret_Answer_start_and_end_train(idx):
    ret_start = 0
    ret_end = 0
    answer_encoding_fast = tokenizerFast(final_answers[idx]['text'],  max_length = MAX_LENGTH, truncation=True, padding=True)
    print(answer_encoding_fast['input_ids'])
    for a in range( len(test_encodings_fast['input_ids'][idx]) -  len(answer_encoding_fast['input_ids']) ):
        i = 1
        match = True
        for i in range(1,len(answer_encoding_fast['input_ids']) - 1):

            if (answer_encoding_fast['input_ids'][i] != test_encodings_fast['input_ids'][idx][a + i]):
                match = False
                break
        if match:
            ret_start = a+1
            ret_end = a+i+1 #it was a+i+1 where i was the length of the number of words in answer
            break
    return(ret_start, ret_end)

In [None]:
print(test_encodings_fast['input_ids'][0])
print(tokenizerFast.convert_tokens_to_string(tokenizerFast.convert_ids_to_tokens(test_encodings_fast['input_ids'][0])))
tempo = tokenizerFast(final_answers[0]['text'],  max_length = MAX_LENGTH, truncation=True, padding=True)['input_ids']
print(tempo)
print(tokenizerFast.convert_tokens_to_string(tokenizerFast.convert_ids_to_tokens(tempo)))
print(final_answers[0]['text'])

[101, 2040, 2772, 1037, 3066, 2005, 2625, 2084, 1002, 3963, 2454, 2007, 27237, 1029, 102, 20773, 2038, 2499, 2007, 27237, 2144, 2526, 1010, 1998, 1999, 2432, 2596, 1999, 1037, 5580, 2401, 4263, 1011, 11773, 3293, 2007, 29168, 13957, 1010, 5061, 1010, 1998, 15769, 1045, 17125, 7951, 1012, 1999, 2262, 1010, 20773, 2772, 1037, 1002, 2753, 2454, 3066, 2000, 2203, 5668, 2063, 27237, 1012, 1996, 2415, 2005, 2671, 1999, 1996, 2270, 3037, 1006, 20116, 19265, 2102, 1007, 2626, 20773, 2019, 2330, 3661, 4851, 2014, 2000, 28667, 5644, 18688, 1996, 3066, 2138, 1997, 1996, 4895, 20192, 24658, 9961, 1997, 1996, 4031, 1998, 2000, 21357, 1996, 10951, 2000, 1037, 2966, 5502, 1012, 6600, 1010, 5658, 15058, 2179, 2008, 20773, 1005, 1055, 3049, 2001, 1996, 2087, 5720, 2055, 20380, 1999, 2258, 2286, 1010, 2007, 1037, 3963, 2566, 9358, 3893, 4378, 3433, 2000, 1996, 3293, 1998, 6140, 14997, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [None]:
print(ret_Answer_start_and_end_train(0))
print(final_answers[0]['text'])
print(len(test_encodings_fast['input_ids'][0]))
print(final_answers[0])

[101, 20773, 102]
(15, 16)
beyoncé
250
{'text': 'beyoncé', 'answer_start': 154, 'answer_end': 161}


In [None]:
start_positions = []
end_positions = []
ctr = 0
for h in range(len(test_encodings_fast['input_ids'])):
    s, e = ret_Answer_start_and_end_train(h)
    start_positions.append(s)
    end_positions.append(e)
    if s==0:
        ctr = ctr + 1


test_encodings_fast.update({'start_positions': start_positions, 'end_positions': end_positions})
print(ctr)

[101, 20773, 102]
[101, 102]
[101, 20773, 102]
[101, 5658, 15058, 102]
[101, 1017, 3620, 102]
[101, 1016, 3620, 102]
[101, 102]
[101, 21388, 26200, 2078, 6346, 102]
[101, 15536, 9363, 15551, 1043, 2721, 23247, 102]
[101, 15536, 9363, 15551, 1043, 2721, 23247, 102]
[101, 102]
[101, 13164, 12972, 102]
[101, 13164, 12972, 102]
[101, 102]
[101, 11388, 102]
[101, 102]
[101, 11388, 102]
[101, 102]
[101, 3782, 13144, 102]
[101, 102]
[101, 7065, 2884, 102]
[101, 2236, 14403, 4143, 2015, 102]
[101, 2236, 14403, 4143, 2015, 102]
[101, 102]
[101, 2343, 25398, 102]
[101, 7064, 11620, 102]
[101, 7064, 16864, 102]
[101, 2220, 29529, 102]
[101, 1048, 27268, 7352, 17342, 1010, 24239, 28716, 2102, 1010, 1998, 7327, 14432, 11610, 2247, 2007, 2116, 2060, 7329, 102]
[101, 8915, 2213, 15460, 26029, 5149, 3669, 102]
[101, 2320, 102]
[101, 2321, 2420, 102]
[101, 2254, 102]
[101, 2539, 2420, 102]
[101, 24921, 2661, 102]
[101, 2324, 2661, 102]
[101, 17690, 2401, 102]
[101, 102]
[101, 3347, 12849, 10023, 3676, 

In [None]:
print(test_encodings_fast['start_positions'])

[15, 1, 16, 116, 169, 201, 1, 3, 0, 0, 1, 54, 55, 1, 17, 1, 19, 1, 194, 1, 59, 82, 75, 1, 15, 53, 35, 13, 69, 103, 78, 70, 102, 164, 143, 184, 31, 1, 46, 145, 18, 1, 1, 22, 1, 44]


In [None]:
test_dataset = InputDataset(test_encodings_fast)

In [None]:
test_data_loader = DataLoader(test_dataset, batch_size=4, shuffle=True)

In [None]:
print(len(test_data_loader))

12


In [None]:
acc = []
for batch in test_data_loader:
  input_ids = batch['input_ids'].to(device)
  attention_mask = batch['attention_mask'].to(device)
  token_type_ids = batch['token_type_ids'].to(device)
  start_positions = batch['start_positions'].to(device)
  end_positions = batch['end_positions'].to(device)
  out_start, out_end = model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
  start_pred = torch.argmax(out_start, dim=1)
  end_pred = torch.argmax(out_end, dim=1)

  # for iter in range(16):
  #         print("pred")
  #         print(tokenizerFast.convert_tokens_to_string(tokenizerFast.convert_ids_to_tokens(input_ids[iter][start_pred[iter] : end_pred[iter]])))
  #         print("true ans")
  #         print(tokenizerFast.convert_tokens_to_string(tokenizerFast.convert_ids_to_tokens(input_ids[iter][start_positions[iter] : end_positions[iter]])))
  print(start_pred)
  print(start_positions)

  acc.append(((start_pred == start_positions).sum()/len(start_pred)).item())
  acc.append(((end_pred == end_positions).sum()/len(end_pred)).item())

tensor([21, 50, 53, 55], device='cuda:0')
tensor([ 1, 19, 53,  1], device='cuda:0')
tensor([ 37,  27, 194,  44], device='cuda:0')
tensor([  1,   0, 194,  44], device='cuda:0')
tensor([ 90,  80, 103, 143], device='cuda:0')
tensor([ 46,  15, 103, 143], device='cuda:0')
tensor([ 26, 215,  30, 178], device='cuda:0')
tensor([  0,   3,   1, 102], device='cuda:0')
tensor([43, 17, 63, 18], device='cuda:0')
tensor([16, 17, 78, 18], device='cuda:0')
tensor([145,  62,  31,  13], device='cuda:0')
tensor([145,   1,  31,  13], device='cuda:0')
tensor([54, 70, 20, 50], device='cuda:0')
tensor([54, 70,  1, 15], device='cuda:0')
tensor([134, 164,  22, 184], device='cuda:0')
tensor([ 82, 164,  22, 184], device='cuda:0')
tensor([136,  55, 203, 111], device='cuda:0')
tensor([ 1, 55, 59,  1], device='cuda:0')
tensor([116,  90, 169,  45], device='cuda:0')
tensor([116,  35, 169,   1], device='cuda:0')
tensor([ 69,   0, 211,  19], device='cuda:0')
tensor([ 69,   1, 201,   1], device='cuda:0')
tensor([75, 50],

In [None]:
print(test_answer_start)

[154, -1, 154, 437, 626, 703, -1, 304, 11, 11, -1, 192, 192, -1, 0, -1, 0, -1, 865, -1, 1352, 619, 363, -1, 10, 205, 105, 4, 292, 410, 270, 241, 1542, 672, 601, 751, 20, -1, 108, 624, 0, -1, -1, 17, -1, 98]


In [None]:
print(100*sum(acc)/len(acc))

42.708333333333336


In [None]:
from google.colab import output
output.eval_js('new Audio("https://upload.wikimedia.org/wikipedia/commons/0/05/Beep-09.ogg").play()')

In [None]:
f.close()