In [1]:
import os
import shutil
from collections import Counter
import numpy as np
import json
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoTokenizer, ElectraForQuestionAnswering, DataCollatorWithPadding
from Preprocess.arabertpreprocess import ArabertPreprocessor
import matplotlib.pyplot as plt
import seaborn as sns
import csv
torch.manual_seed(3407)


<torch._C.Generator at 0x7f189cf21d80>

## Read Data and Preprocessing

In [2]:
def add_end_index(answer, context):
  ## 1 if span mathc the context 0 otherwise
  text = answer['text']
  start_idx = answer['answer_start']
  end_idx = start_idx + len(text)
  if text == context[start_idx:end_idx]:
    answer['answer_end'] = end_idx
    return False
  for i in range(1,3):
    if text == context[start_idx-i:end_idx-i]:
      answer['answer_end']= end_idx-1
      answer['answer_start'] = start_idx-1
      return False
  return True

In [3]:
def arabert_preprocess(context,question, answer, arabert_prep):
    answer['text'] = arabert_prep.preprocess(answer['text'])
    context = arabert_prep.preprocess(context)
    question = arabert_prep.preprocess(question)
    res = context.find(answer['text'])
    answer['answer_start'] = res
    return context, question, answer, res

In [4]:
def Read_AAQAD(path,arabert_prep):
  contexts =[]
  answers =[]
  questions =[]
  IDs= []
  cnt = 0
  with open(path) as f:
    aaqad_dict = json.load(f)
    for article in aaqad_dict['data']:
      for passage in article['paragraphs']:
        context = passage['context']
        for qa in passage['qas']:
          question = qa['question']
          if 'plausible_answers' in qa.keys():# there is two cases if the question have no answer then use plausible answer
            access = 'plausible_answers'
            #plausible.append(False)
          else:
            access = 'answers'
            #plausible.append(True)
          for answer in qa[access]:
            context,question, answer, res =  arabert_preprocess(context,question, answer, arabert_prep)
            if res==-1:
              cnt+=1
              continue
            flag = add_end_index(answer, context) #if false dont add the 
            cnt =cnt + flag
            if not flag:
              contexts.append(context)
              answers.append(answer)
              questions.append(question)
              IDs.append(qa['id'])
  return contexts,questions,answers,IDs
            

In [5]:
model_name = "araelectra-base-discriminator"
arabert_prep = ArabertPreprocessor(model_name=model_name)
train_contexts, train_questions, train_answers, train_ids = Read_AAQAD('Data/ASQUAD1.json', arabert_prep)
aqad_train_contexts, aqad_train_questions, aqad_train_answers, aqad_train_ids = Read_AAQAD('Data/AAQAD-train.json', arabert_prep)
arcd_train_contexts, arcd_train_questions, arcd_train_answers, arcd_train_ids = Read_AAQAD('Data/arcd-train.json', arabert_prep)
aqad_val_contexts, aqad_val_questions, aqad_val_answers, aqad_val_ids = Read_AAQAD('Data/AAQAD-dev.json', arabert_prep)
arcd_test_contexts, arcd_test_questions, arcd_test_answers, arcd_test_ids = Read_AAQAD('Data/arcd-test.json', arabert_prep)
aqad_test_contexts, aqad_test_questions, aqad_test_answers, aqad_test_ids = Read_AAQAD('Data/AAQAD-test.json', arabert_prep)


In [6]:
train_contexts = train_contexts +aqad_train_contexts +arcd_train_contexts
train_questions = train_questions+ aqad_train_questions +arcd_train_questions
train_answers = train_answers+ aqad_train_answers+ arcd_train_answers
train_ids = train_ids + aqad_train_ids + arcd_train_ids
print(len(train_contexts), len(train_questions), len(train_answers))

61666 61666 61666


In [7]:
del aqad_train_contexts, aqad_train_questions, aqad_train_answers, aqad_train_ids
del arcd_train_contexts, arcd_train_questions, arcd_train_answers, arcd_train_ids
print(len(train_contexts), len(train_questions), len(train_answers))

61666 61666 61666


In [8]:
print(train_contexts[0])
print(train_questions[0])
print(train_answers[0])

يعتمد ASCII أساس ا على الأبجدية الإنجليزية ، ويقوم بترميز 128 حرف ا محدد ا في أعداد صحيحة من سبعة أجزاء كما هو موضح في مخطط ASCII على اليمين . الأحرف المشفرة هي الأرقام من 0 إلى 9 ، والأحرف الصغيرة ا إلى ز ، والأحرف الكبيرة A إلى Z ، ورموز الترقيم الأساسية ، ورموز التحكم التي نشأت مع أجهزة تيليتيبي ، ومساحة . على سبيل المثال ، سيصبح الحرف الصغير ج 1101010 والعشري 106 . تتضمن ASCII تعريفات ل 128 حرف ا 33 حرف ا تحكم ا غير الطباعة العديد منها الآن قديمة تؤثر على كيفية معالجة النص والمساحة و 95 حرف ا قابلا للطباعة ، بما في ذلك المساحة التي ي عتبر رسم ا غير مرئي 223 .
ما هو ASCII على أساس ؟
{'text': 'الأبجدية الإنجليزية', 'answer_start': 23, 'answer_end': 42}


## Tokenization

In [45]:
#Creating the tokenizer
model_name = model_name = "aubmindlab/araelectra-base-discriminator"

araelectra_tokenizer = AutoTokenizer.from_pretrained(model_name,do_lower_case=False)
train_encodings = araelectra_tokenizer(train_questions, train_contexts, truncation=True)
aqad_val_encodings = araelectra_tokenizer(aqad_val_questions, aqad_val_contexts, truncation=True)
aqad_test_encodings = araelectra_tokenizer(aqad_test_questions, aqad_test_contexts,truncation= True)
arcd_test_encodings = araelectra_tokenizer(arcd_test_questions, arcd_test_contexts,truncation=True)


In [10]:
def index_to_token_position(encodings , answers):
  start_positions = list()
  end_positions = list()
  for i in range(len(answers)):
    start_positions.append(encodings.char_to_token(i, answers[i]['answer_start'], 1))
    end_positions.append(encodings.char_to_token(i, answers[i]['answer_end'], 1))
    #if context truncated
    if start_positions[-1] is None: 
      start_positions[-1] = araelectra_tokenizer.model_max_length
    #if end index is space
    itt = 1
    while end_positions[-1] is None: 
      end_positions[-1] = encodings.char_to_token(i, answers[i]['answer_end']-itt, 1)
      itt = itt + 1 
  encodings.update({'start_positions': torch.tensor(start_positions), 'end_positions': torch.tensor(end_positions)})
  encodings['start_positions'] = encodings['start_positions'].view(len(answers), 1)
  encodings['end_positions'] = encodings['end_positions'].view(len(answers), 1)

In [46]:
index_to_token_position(train_encodings, train_answers)
index_to_token_position(aqad_val_encodings, aqad_val_answers)
index_to_token_position(aqad_test_encodings, aqad_test_answers)
index_to_token_position(arcd_test_encodings, arcd_test_answers)

In [12]:
train_encodings.keys()

dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'start_positions', 'end_positions'])

In [48]:
def fix_ids(ids):
    return [int(x) for x in ids]

In [49]:
arcd_test_ids = fix_ids(arcd_test_ids)

In [50]:
print(type(arcd_test_ids[0]))
print(type(aqad_test_ids[0]))

<class 'int'>
<class 'int'>


In [51]:
#train_encodings.update({'IDs':torch.tensor(train_ids)})
aqad_val_encodings.update({'IDs':torch.tensor(aqad_val_ids)})
aqad_test_encodings.update({'IDs':torch.tensor(aqad_test_ids)})
arcd_test_encodings.update({'IDs':torch.tensor(arcd_test_ids)})

#train_encodings['IDs'] = train_encodings['IDs'].view(len(train_contexts), 1)
aqad_val_encodings['IDs'] = aqad_val_encodings['IDs'].view(len(aqad_val_contexts), 1)
aqad_test_encodings['IDs'] = aqad_test_encodings['IDs'].view(len(aqad_test_contexts), 1)
arcd_test_encodings['IDs'] = arcd_test_encodings['IDs'].view(len(arcd_test_contexts), 1)

## Dataset and Dataloader

In [14]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from tqdm import tqdm

In [53]:
class AqadDataset(torch.utils.data.Dataset):
    def __init__(self, encodings):
        self.encodings = encodings
    def __getitem__(self, idx):
        return {key: val[idx] for key, val in self.encodings.items()}

    def __len__(self):
        return len(self.encodings.input_ids)

train_dataset = AqadDataset(train_encodings)
aqad_val_dataset = AqadDataset(aqad_val_encodings)
aqad_test_dataset = AqadDataset(aqad_test_encodings)
arcd_test_dataset = AqadDataset(arcd_test_encodings)

In [16]:
data_collator = DataCollatorWithPadding(araelectra_tokenizer)

In [54]:
train_loader = DataLoader(train_dataset, batch_size=8, shuffle= True, collate_fn= data_collator)
aqad_val_loader = DataLoader(aqad_val_dataset, batch_size = 8, shuffle = True, collate_fn = data_collator)
aqad_test_loader = DataLoader(aqad_test_dataset, batch_size = 8, shuffle = True, collate_fn = data_collator)
arcd_test_loader = DataLoader(arcd_test_dataset, batch_size = 8, shuffle = True, collate_fn = data_collator)

In [18]:
for step, batch in enumerate(train_loader):
    print(batch['input_ids'].shape)
    print(batch.keys())
    if step>1:
        break

torch.Size([8, 402])
dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'start_positions', 'end_positions'])
torch.Size([8, 184])
dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'start_positions', 'end_positions'])
torch.Size([8, 411])
dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'start_positions', 'end_positions'])


## Checkpoint Saving and Loading

In [19]:
def save_ckp(state, is_best, checkpoint_path, best_model_path):
    """
    state: checkpoint to save
    is_best: is this the best checkpoint; min validation loss
    checkpoint_path: path to save checkpoint
    best_model_path: path to save best checkpoint
    """
    f_path = checkpoint_path
    # save checkpoint data to the path given, checkpoint_path
    torch.save(state, f_path)
    # if it is a best model, min validation loss
    if is_best:
        best_fpath = best_model_path
        # copy that checkpoint file to best path given, best_model_path
        shutil.copyfile(f_path, best_fpath)

In [35]:
def load_ckp(checkpoint_fpath, model, optimizer):
    """
    checkpoint_path: path to saved checkpoint
    model: model to load checkpoint parameters into       
    optimizer: optimizer defined in previous training
    """
    # load check point
    checkpoint = torch.load(checkpoint_fpath)
    # initialize state_dict from checkpoint to model
    model.load_state_dict(checkpoint['state_dict'])
    # initialize optimizer from checkpoint to optimizer
    optimizer.load_state_dict(checkpoint['optimizer'])
    # initialize valid_loss_min from checkpoint to valid_loss_min
    results = checkpoint['result_dict']
    # return model, optimizer, epoch value, min validation loss 
    return model, optimizer, checkpoint['epoch'], results

In [21]:
def order_exp(base_path, exp_name):
  exp_path = os.path.join(base_path, exp_name)
  if not os.path.exists(exp_path):
    os.mkdir(exp_path)
  curr_ckp_path = os.path.join(exp_path,'curr.pt')
  best_ckp_path = os.path.join(exp_path, 'best.pt')
  return curr_ckp_path, best_ckp_path, exp_path

## Evaluate SQuAD

In [62]:
from __future__ import print_function
from collections import Counter
import string
import re
import argparse
import json
import sys
import nltk
import random
nltk.download('punkt')
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))


def f1_score(prediction, ground_truth):
    prediction_tokens = normalize_answer(prediction).split()
    ground_truth_tokens = normalize_answer(ground_truth).split()
    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
    num_same = sum(common.values())
    if num_same == 0:
        return 0
    precision = 1.0 * num_same / len(prediction_tokens)
    recall = 1.0 * num_same / len(ground_truth_tokens)
    f1 = (2 * precision * recall) / (precision + recall)
    return f1


def exact_match_score(prediction, ground_truth):
    return (normalize_answer(prediction) == normalize_answer(ground_truth))


def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):
    scores_for_ground_truths = []
    for ground_truth in ground_truths:
        score = metric_fn(prediction, ground_truth)
        scores_for_ground_truths.append(score)
    return max(scores_for_ground_truths)

def evaluate_squad(dataset, predictions):
    f1 = exact_match = total = exact_sentence = inclusion = random = 0
    f1_ans = exact_match_ans = total_ans = exact_sentence_ans  = 0
    f1_noans = exact_match_noans = total_noans = exact_sentence_noans = 0
    for article in dataset:
        for paragraph in article['paragraphs']:
            for qa in paragraph['qas']:
                flag = False
                total += 1
                if int(qa['id']) not in predictions:
                    message = 'Unanswered question ' + str(qa['id']) + \
                              ' will receive score 0.'
                    print(message, file=sys.stderr)
                    continue
                ground_truths = list(map(lambda x: arabert_prep.preprocess(x['text']), qa['answers']))
                #print(type(ground_truths))
                if len(ground_truths)==0:
                    flag = True
                    total_noans +=1
                else:
                    total_ans+=1
                if flag:
                    ground_truths = list(map(lambda x: arabert_prep.preprocess(x['text']), qa['plausible_answers']))
                prediction = predictions[qa['id']]
                sents = nltk.sent_tokenize(arabert_prep.preprocess(paragraph['context']))
                indx_g = -1
                indx_p = -1
                i = 0
                for sent in sents:
                    if sent.find(ground_truths[0]) != -1:
                        indx_g = i
                    if sent.find(prediction) != -1:
                        indx_p = i
                    i += 1
                if prediction.find(ground_truths[0]) != -1 or ground_truths[0].find(prediction):
                    inclusion += 1
                if indx_g == indx_p and indx_p != -1:
                    exact_sentence += 1
                    if flag:
                        exact_sentence_noans+=1
                    else:
                        exact_sentence_ans+=1

                curr_exact_match= metric_max_over_ground_truths(
                    exact_match_score, prediction, ground_truths)
                curr_f1 = metric_max_over_ground_truths(
                    f1_score, prediction, ground_truths)
                if flag:
                    f1_noans+=curr_f1
                    exact_match_noans+=curr_exact_match
                else:
                    f1_ans+=curr_f1
                    exact_match_ans+=curr_exact_match
                f1+=curr_f1
                exact_match+=curr_exact_match
    print(f"total examples {total}, total have answers {total_ans} total have no answers {total_noans}")
    exact_sentence = 100 * exact_sentence / total
    exact_match = 100.0 * exact_match / total
    f1 = 100.0 * f1 / total
    if total_ans!=0:
        exact_sentence_ans = 100 * exact_sentence_ans / total_ans
        exact_match_ans = 100.0 * exact_match_ans / total_ans
        f1_ans = 100.0 * f1_ans / total_ans
    if total_noans !=0:
        exact_sentence_noans = 100 * exact_sentence_noans / total_noans
        exact_match_noans = 100.0 * exact_match_noans / total_noans
        f1_noans = 100.0 * f1_noans / total_noans



    return {'exact_match': exact_match, 'f1': f1, 'exact_sentence': exact_sentence,
    'exact_match_ans': exact_match_ans, 'f1_ans': f1_ans, 'exact_sentence_ans': exact_match_ans,
    'exact_match_noans': exact_match_noans, 'f1_noans':f1_noans, 'exact_sentence_noans':exact_sentence_noans}


#evaluation
'''
predict_file_path = 'Data/AAQAD-test.json'
predictions_file_path = 'Predictions.json'
with open(predict_file_path) as dataset_file:
  dataset_json = json.load(dataset_file)
  dataset = dataset_json['data']
with open(predictions_file_path) as prediction_file:
  predictions = json.load(prediction_file)
print(json.dumps(evaluate(dataset, predictions)))'''

[nltk_data] Downloading package punkt to /home/azureuser/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


"\npredict_file_path = 'Data/AAQAD-test.json'\npredictions_file_path = 'Predictions.json'\nwith open(predict_file_path) as dataset_file:\n  dataset_json = json.load(dataset_file)\n  dataset = dataset_json['data']\nwith open(predictions_file_path) as prediction_file:\n  predictions = json.load(prediction_file)\nprint(json.dumps(evaluate(dataset, predictions)))"

In [64]:
def evaluate_squadv1(dataset, predictions):
    f1 = exact_match = total = exact_sentence = inclusion = random = 0
    for article in dataset:
        for paragraph in article['paragraphs']:
            for qa in paragraph['qas']:
                total += 1
                if int(qa['id']) not in predictions:
                    message = 'Unanswered question ' + str(qa['id']) + \
                              ' will receive score 0.'
                    print(message, file=sys.stderr)
                    continue
                ground_truths = list(map(lambda x: x['text'], qa['answers']))
                if len(ground_truths)==0:
                    total -=1
                    continue
                prediction = predictions[qa['id']]
                sents = nltk.sent_tokenize(paragraph['context'])
                indx_g = -1
                indx_p = -1
                i = 0
                for sent in sents:
                    if sent.find(ground_truths[0]) != -1:
                        indx_g = i
                    if sent.find(prediction) != -1:
                        indx_p = i
                    i += 1
                if prediction.find(ground_truths[0]) != -1 or ground_truths[0].find(prediction):
                    inclusion += 1
                if indx_g == indx_p and indx_p != -1:
                    exact_sentence += 1
                exact_match += metric_max_over_ground_truths(
                    exact_match_score, prediction, ground_truths)
                f1 += metric_max_over_ground_truths(
                    f1_score, prediction, ground_truths)
    exact_sentence = 100 * exact_sentence / total
    exact_match = 100.0 * exact_match / total
    f1 = 100.0 * f1 / total
    print('Exact Match {exact_match}, f1 Score {f1}, Same Sentence {exact_sentence} total {total}')
    return {'exact_match': exact_match, 'f1': f1, 'exact_sentence': exact_sentence}

In [3]:
print('yes')
!pwd
torch.randn(3)

yes
/mnt/batch/tasks/shared/LS_root/mounts/clusters/youssefgpu/code/Users/Ziyad2018170918/GP/Arabic-MRC


NameError: name 'torch' is not defined

## Evaluate and Train functions

In [56]:
def evaluate(data_loader, model, log, log_path=None, train_loss=None): 
  model.eval()
  with torch.no_grad():
    #F1 = EM = Total = 0
    total_loss = 0.0
    total_predictions = dict()
    #loop = tqdm(data_loader)
    #loop = tqdm(data_loader, leave=True)
    for batch_idx, batch in enumerate(data_loader):
      #moving tensors to gpu    
      tokens = batch['input_ids'].to(device)
      masks = batch['attention_mask'].to(device)
      tokens_type = batch['token_type_ids'].to(device)
      gt_start = batch['start_positions'].to(device)
      gt_end = batch['end_positions'].to(device)
      IDs = batch['IDs'].to(device)
      outputs = model(tokens, masks, tokens_type, start_positions=gt_start, end_positions=gt_end)
      #calculating loss
      loss = outputs.loss
      #update average total loss 
      total_loss = total_loss + ((1 / (batch_idx + 1)) * (loss - total_loss)) 
      #calculating f1 score and EM
      curr_batch_size = gt_start.shape[0]
      #print(curr_batch_size)
      for i in range(curr_batch_size):
        #print(f"this is tensor index {i}")
        start_gt, end_gt = batch['start_positions'][i], batch['end_positions'][i]
        start_pred, end_pred = torch.argmax(outputs.start_logits[i],dim=0), torch.argmax(outputs.end_logits[i],dim =0)
        total_predictions[IDs[i].item()] = araelectra_tokenizer.decode(tokens[i][start_pred.item():end_pred.item()], skip_special_tokens=True, clean_up_tokenization_spaces=True)
    #saving evaluation results
    #evaluation
    #print(total_predictions[5539])
    with open('Data/AAQAD-dev.json') as dataset_file:
        dataset_json = json.load(dataset_file)
        dataset = dataset_json['data']
    result_dict = evaluate_squad(dataset, total_predictions)
    result_dictv1 = evaluate_squadv1(dataset, total_predictions)
    try:
        result_dict['train_loss'] = train_loss.item()
        result_dict['val_loss'] = total_loss.item()
    except:
        pass
    print(type(result_dict))
    print(result_dict)
    #print(json.dumps(result_dict))
    if(log):
      log_path = os.path.join(log_path,'res.csv')
      if not os.path.exists(log_path):
          with open(log_path,'w') as f:
            writer = csv.DictWriter(f, fieldnames=result_dict.keys())
            writer.writeheader()
      with open(log_path, 'a') as f:
        writer = csv.DictWriter(f, fieldnames=result_dict.keys())
        #writer.writeheader()
        writer.writerow(result_dict)
    model.train()
    return result_dict, result_dictv1

In [25]:
def train(model,start_epoch, num_epochs, optimizer,max_compined_metric, train_loader, val_loader, log, exp_name):
  curr_ckp_path, best_ckp_path, exp_path = order_exp('Runs/AraElectra_CombinedData/train', exp_name)
  model.train()
  for epoch in range(start_epoch,num_epochs):
    total_loss = 0.0
    loop = tqdm(train_loader, leave=True)
    for batch_idx, batch in enumerate(loop):
      tokens = batch['input_ids'].to(device)
      masks = batch['attention_mask'].to(device)
      tokens_type = batch['token_type_ids'].to(device)
      gt_start = batch['start_positions'].to(device)
      gt_end = batch['end_positions'].to(device)
      outputs = model(tokens, masks, tokens_type, start_positions=gt_start, end_positions=gt_end)
      loss = outputs.loss
      loss.backward()
      optimizer.step()
      optimizer.zero_grad()
      total_loss = total_loss + ((1 / (batch_idx + 1)) * (loss - total_loss)) 
      loop.set_description(f'Epoch {epoch}')
      loop.set_postfix(loss=loss.item())

    result_dict = evaluate(val_loader, model , log, exp_path, total_loss)
    checkpoint = {
            'epoch': epoch + 1,
            'result_dict':result_dict,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
        }
    curr_compined_metric = result_dict['exact_match']+1.5*result_dict['f1']+0.7*result_dict['exact_sentence']
    if curr_compined_metric>=max_compined_metric:
      max_compined_metric = curr_compined_metric
      save_ckp(checkpoint, True, curr_ckp_path, best_ckp_path)
    else:
      save_ckp(checkpoint, False, curr_ckp_path, best_ckp_path)
  return model


## Modeling

In [28]:
def freeze(Electra, count=None):
    if count is not None:
	      # We freeze here the embeddings of the model
        for param in Electra.electra.embeddings.parameters():
            param.requires_grad = False

        if count != -1:
	          # if freeze_layer_count == -1, we only freeze the embedding layer
	          # otherwise we freeze the first `freeze_layer_count` encoder layers
            for layer in Electra.electra.encoder.layer[:count]:
                for param in layer.parameters():
                    param.requires_grad = False
    print(sum(p.numel() for p in Electra.parameters()), sum(p.numel() for p in Electra.parameters() if p.requires_grad))

In [29]:
QA_AraElectra = ElectraForQuestionAnswering.from_pretrained(model_name)
freeze(QA_AraElectra, 4)

Some weights of the model checkpoint at aubmindlab/araelectra-base-discriminator were not used when initializing ElectraForQuestionAnswering: ['discriminator_predictions.dense.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForQuestionAnswering were not initialized from the model checkpoint at aubmindlab/araelectra-base-discriminator and are newly initialized: ['qa_outputs.weight'

134604290 56704514


In [30]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
QA_AraElectra.to(device)

ElectraForQuestionAnswering(
  (electra): ElectraModel(
    (embeddings): ElectraEmbeddings(
      (word_embeddings): Embedding(64000, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): ElectraEncoder(
      (layer): ModuleList(
        (0): ElectraLayer(
          (attention): ElectraAttention(
            (self): ElectraSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): ElectraSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768

In [31]:
device

device(type='cuda')

In [32]:
num_epochs = 2
learning_rate = 3e-5
optimizer = torch.optim.AdamW(QA_AraElectra.parameters(), lr=learning_rate)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
QA_AraElectra.to(device)

ElectraForQuestionAnswering(
  (electra): ElectraModel(
    (embeddings): ElectraEmbeddings(
      (word_embeddings): Embedding(64000, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): ElectraEncoder(
      (layer): ModuleList(
        (0): ElectraLayer(
          (attention): ElectraAttention(
            (self): ElectraSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): ElectraSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768

In [29]:
trained_model = train(QA_AraElectra, 0, num_epochs, optimizer, 0, train_loader, aqad_val_loader,True, 'firstFreeze4AdamW')

Epoch 0: 100%|██████████| 7709/7709 [1:23:25<00:00,  1.54it/s, loss=2.72] 
Epoch 1: 100%|██████████| 7709/7709 [1:23:35<00:00,  1.54it/s, loss=0.806]


179
total examples 1926, total have answers 1193 total have no answers 733
416
<class 'dict'>
{'exact_match': 3.790238836967809, 'f1': 22.611938207877447, 'exact_sentence': 57.11318795430945, 'exact_match_ans': 4.694048616932104, 'f1_ans': 25.984081185595617, 'exact_sentence_ans': 4.694048616932104, 'exact_match_noans': 2.319236016371078, 'f1_noans': 17.123579991754934, 'exact_sentence_noans': 56.75306957708049, 'train_loss': 2.4689888954162598, 'val_loss': 2.9085168838500977}
179
total examples 1926, total have answers 1193 total have no answers 733
443
<class 'dict'>
{'exact_match': 3.6863966770508827, 'f1': 24.435995525849044, 'exact_sentence': 59.345794392523366, 'exact_match_ans': 4.861693210393965, 'f1_ans': 27.79402539149263, 'exact_sentence_ans': 4.861693210393965, 'exact_match_noans': 1.7735334242837653, 'f1_noans': 18.970607217918772, 'exact_sentence_noans': 60.43656207366985, 'train_loss': 1.9735907316207886, 'val_loss': 2.9121501445770264}


## Validate using SQuADv1

In [36]:
loadedModel, loadedOptime, a, b = load_ckp('Runs/AraElectra_CombinedData/train/firstFreeze4AdamW/best.pt', QA_AraElectra, optimizer)

In [43]:
resv2, resv1 = evaluate(aqad_val_loader,loadedModel,False,None,None )

179
total examples 1926, total have answers 1193 total have no answers 733
443
Exact Match {exact_match}, f1 Score {f1}, Same Sentence {exact_sentence} total {total}
<class 'dict'>
{'exact_match': 3.6863966770508827, 'f1': 24.435995525849044, 'exact_sentence': 59.345794392523366, 'exact_match_ans': 4.861693210393965, 'f1_ans': 27.79402539149263, 'exact_sentence_ans': 4.861693210393965, 'exact_match_noans': 1.7735334242837653, 'f1_noans': 18.970607217918772, 'exact_sentence_noans': 60.43656207366985}


In [44]:
resv1

{'exact_match': 4.442581726739313,
 'f1': 24.117430584141715,
 'exact_sentence': 48.44928751047779}

In [65]:
resv2, resv1 = evaluate(arcd_test_loader,loadedModel,False,None,None )

Unanswered question 16780 will receive score 0.
Unanswered question 16781 will receive score 0.
Unanswered question 16782 will receive score 0.
Unanswered question 16783 will receive score 0.
Unanswered question 16784 will receive score 0.
Unanswered question 16785 will receive score 0.
Unanswered question 16786 will receive score 0.
Unanswered question 16787 will receive score 0.
Unanswered question 16788 will receive score 0.
Unanswered question 16789 will receive score 0.
Unanswered question 16790 will receive score 0.
Unanswered question 16791 will receive score 0.
Unanswered question 16792 will receive score 0.
Unanswered question 16793 will receive score 0.
Unanswered question 16794 will receive score 0.
Unanswered question 16795 will receive score 0.
Unanswered question 16796 will receive score 0.
Unanswered question 16797 will receive score 0.
Unanswered question 16798 will receive score 0.
Unanswered question 16799 will receive score 0.
Unanswered question 16800 will receive s

total examples 1926, total have answers 0 total have no answers 0
Exact Match {exact_match}, f1 Score {f1}, Same Sentence {exact_sentence} total {total}
<class 'dict'>
{'exact_match': 0.0, 'f1': 0.0, 'exact_sentence': 0.0, 'exact_match_ans': 0, 'f1_ans': 0, 'exact_sentence_ans': 0, 'exact_match_noans': 0, 'f1_noans': 0, 'exact_sentence_noans': 0}


In [66]:
print(resv1)

{'exact_match': 0.0, 'f1': 0.0, 'exact_sentence': 0.0}


In [67]:
resv2, resv1 = evaluate(aqad_test_loader,loadedModel,False,None,None )

Unanswered question 16780 will receive score 0.
Unanswered question 16781 will receive score 0.
Unanswered question 16782 will receive score 0.
Unanswered question 16783 will receive score 0.
Unanswered question 16784 will receive score 0.
Unanswered question 16785 will receive score 0.
Unanswered question 16786 will receive score 0.
Unanswered question 16787 will receive score 0.
Unanswered question 16788 will receive score 0.
Unanswered question 16789 will receive score 0.
Unanswered question 16790 will receive score 0.
Unanswered question 16791 will receive score 0.
Unanswered question 16792 will receive score 0.
Unanswered question 16793 will receive score 0.
Unanswered question 16794 will receive score 0.
Unanswered question 16795 will receive score 0.
Unanswered question 16796 will receive score 0.
Unanswered question 16797 will receive score 0.
Unanswered question 16798 will receive score 0.
Unanswered question 16799 will receive score 0.
Unanswered question 16800 will receive s

total examples 1926, total have answers 0 total have no answers 0
Exact Match {exact_match}, f1 Score {f1}, Same Sentence {exact_sentence} total {total}
<class 'dict'>
{'exact_match': 0.0, 'f1': 0.0, 'exact_sentence': 0.0, 'exact_match_ans': 0, 'f1_ans': 0, 'exact_sentence_ans': 0, 'exact_match_noans': 0, 'f1_noans': 0, 'exact_sentence_noans': 0}


In [69]:
print(resv1)
print(resv2)

{'exact_match': 0.0, 'f1': 0.0, 'exact_sentence': 0.0}
{'exact_match': 0.0, 'f1': 0.0, 'exact_sentence': 0.0, 'exact_match_ans': 0, 'f1_ans': 0, 'exact_sentence_ans': 0, 'exact_match_noans': 0, 'f1_noans': 0, 'exact_sentence_noans': 0}
