## load library

In [None]:
!pip install datasets
!pip install transformers
!pip install nltk
!pip install adamp

In [2]:
import pandas as pd
import numpy as np

from torch.utils.data import Dataset
from torch.utils.data import SequentialSampler,RandomSampler
from torch import nn
import torch
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import RandomSampler
from torch.utils.data import DataLoader,RandomSampler,SubsetRandomSampler
from torch.optim import AdamW,Adam,SGD
from adamp import AdamP,SGDP
from torch.optim.lr_scheduler import CosineAnnealingLR,CosineAnnealingWarmRestarts
import torch.nn.functional as F

import datasets
from datasets import load_dataset,load_from_disk,load_metric,DatasetDict,Dataset,Features,Value,concatenate_datasets,Sequence,ClassLabel
from transformers import get_linear_schedule_with_warmup
from transformers import AutoTokenizer,AutoModel
from transformers import BertConfig

import math
import random
from collections import Counter
from tqdm import tqdm,notebook
import functools
import json
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

import nltk; nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.


True

##hyperparameter

In [3]:
max_len = 352
batch_size = 4
device = torch.device('cuda')

num_epochs = 40
learning_rate = 1e-6
hidden_size = 768
hidden_dropout_prob = 0.6
num_choices = 5

k=120

patience = 100
counter = 0
best_val_acc = 0
best_val_loss = np.inf

#fix seed

In [4]:
seed=14

torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

##prepare dataset

In [None]:
file_path = '/content/drive/MyDrive/Problem/data.json'

with open(file_path, "r") as json_file:
    train_datasets = json.load(json_file)
    print(train_datasets)

In [6]:
## dataset class

class EnglishDataset(Dataset):

  def __init__(self,
               dataset):
    self.dataset = dataset

  def __len__(self):
    return len(self.dataset['id'])
  
  def __getitem__(self,idx):
    context = self.dataset['context'][idx]

    question = self.dataset['question'][idx]
   
    options = [self.dataset['opa'][idx],self.dataset['opb'][idx],self.dataset['opc'][idx],self.dataset['opd'][idx],self.dataset['ope'][idx]]

    label = self.dataset['cop'][idx] - 1
    return (context,question,options,label)

In [7]:
#convert batch dataset function

def process_batch(batch,tokenizer,max_len=32):

    expanded_batch = []

    labels = []
    
    for context,question,options,label in batch:
        question_option_pairs = [question+' '+option for option in options]
        contexts = [context]*len(options)
        labels.append(label)
        expanded_batch.extend(zip(contexts,question_option_pairs))

    tokenized_batch = tokenizer.batch_encode_plus(expanded_batch,truncation = True, padding="max_length",max_length=max_len,return_tensors="pt")
    
    return tokenized_batch,torch.tensor(labels)

In [None]:
#no question batch function

#for using only one question dataset

def process_batch(batch,tokenizer,max_len=32):

    expanded_batch = []

    labels = []
    
    for context,question,options,label in batch:
        question_option_pairs = [option for option in options]
        contexts = [context]*len(options)
        labels.append(label)
        expanded_batch.extend(zip(contexts,question_option_pairs))

    tokenized_batch = tokenizer.batch_encode_plus(expanded_batch,truncation = True, padding="max_length",max_length=max_len,return_tensors="pt")
    
    return tokenized_batch,torch.tensor(labels)

## prepare pretrained model

In [8]:
#new pretrained model

model_name_or_path = 'bert-base-multilingual-uncased'

model = AutoModel.from_pretrained(model_name_or_path)

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

Downloading:   0%|          | 0.00/625 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/641M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/851k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.64M [00:00<?, ?B/s]

In [None]:
#load trained model

model_name_or_path = 'bert-base-multilingual-uncased'

model = AutoModel.from_pretrained(model_name_or_path)

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

model.load_state_dict(torch.load('/content/drive/MyDrive/english/model.ckpt'))

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [9]:
#dropout and linear layer and loss

dropout = nn.Dropout(hidden_dropout_prob).to(device)
#dropout = nn.Dropout(hidden_dropout_prob)

linear = nn.Linear(in_features=hidden_size,out_features=1).to(device)

criterion = nn.CrossEntropyLoss()

## train model

In [10]:
#remove special character

new_context = []

for context in train_datasets['context']:

  context = context.replace('\n',' ').replace('  ',' ')

  new_context.append(context)

train_datasets['context'] = new_context

In [11]:
#prepare train dataset

train_dataset = EnglishDataset(train_datasets)


model_collate_fn = functools.partial(process_batch,tokenizer=tokenizer,max_len=max_len)


##EDA

In [None]:
import random
from random import shuffle
random.seed(1)

#stop words list
stop_words = ['i', 'me', 'my', 'myself', 'we', 'our', 
			'ours', 'ourselves', 'you', 'your', 'yours', 
			'yourself', 'yourselves', 'he', 'him', 'his', 
			'himself', 'she', 'her', 'hers', 'herself', 
			'it', 'its', 'itself', 'they', 'them', 'their', 
			'theirs', 'themselves', 'what', 'which', 'who', 
			'whom', 'this', 'that', 'these', 'those', 'am', 
			'is', 'are', 'was', 'were', 'be', 'been', 'being', 
			'have', 'has', 'had', 'having', 'do', 'does', 'did',
			'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or',
			'because', 'as', 'until', 'while', 'of', 'at', 
			'by', 'for', 'with', 'about', 'against', 'between',
			'into', 'through', 'during', 'before', 'after', 
			'above', 'below', 'to', 'from', 'up', 'down', 'in',
			'out', 'on', 'off', 'over', 'under', 'again', 
			'further', 'then', 'once', 'here', 'there', 'when', 
			'where', 'why', 'how', 'all', 'any', 'both', 'each', 
			'few', 'more', 'most', 'other', 'some', 'such', 'no', 
			'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 
			'very', 's', 't', 'can', 'will', 'just', 'don', 
			'should', 'now', '']

#cleaning up text
import re
def get_only_chars(line):

    clean_line = ""

    line = line.replace("’", "")
    line = line.replace("'", "")
    line = line.replace("-", " ") #replace hyphens with spaces
    line = line.replace("\t", " ")
    line = line.replace("\n", " ")
    line = line.lower()

    for char in line:
        if char in 'qwertyuiopasdfghjklzxcvbnm ':
            clean_line += char
        else:
            clean_line += ' '

    clean_line = re.sub(' +',' ',clean_line) #delete extra spaces
    if clean_line[0] == ' ':
        clean_line = clean_line[1:]
    return clean_line

########################################################################
# Synonym replacement
# Replace n words in the sentence with synonyms from wordnet
########################################################################

#for the first time you use wordnet
#import nltk
#nltk.download('wordnet')
from nltk.corpus import wordnet 

def synonym_replacement(words, n):
	new_words = words.copy()
	random_word_list = list(set([word for word in words if word not in stop_words]))
	random.shuffle(random_word_list)
	num_replaced = 0
	for random_word in random_word_list:
		synonyms = get_synonyms(random_word)
		if len(synonyms) >= 1:
			synonym = random.choice(list(synonyms))
			new_words = [synonym if word == random_word else word for word in new_words]
			#print("replaced", random_word, "with", synonym)
			num_replaced += 1
		if num_replaced >= n: #only replace up to n words
			break

	#this is stupid but we need it, trust me
	sentence = ' '.join(new_words)
	new_words = sentence.split(' ')

	return new_words

def get_synonyms(word):
	synonyms = set()
	for syn in wordnet.synsets(word): 
		for l in syn.lemmas(): 
			synonym = l.name().replace("_", " ").replace("-", " ").lower()
			synonym = "".join([char for char in synonym if char in ' qwertyuiopasdfghjklzxcvbnm'])
			synonyms.add(synonym) 
	if word in synonyms:
		synonyms.remove(word)
	return list(synonyms)

########################################################################
# Random deletion
# Randomly delete words from the sentence with probability p
########################################################################

def random_deletion(words, p):

	#obviously, if there's only one word, don't delete it
	if len(words) == 1:
		return words

	#randomly delete words with probability p
	new_words = []
	for word in words:
		r = random.uniform(0, 1)
		if r > p:
			new_words.append(word)

	#if you end up deleting all words, just return a random word
	if len(new_words) == 0:
		rand_int = random.randint(0, len(words)-1)
		return [words[rand_int]]

	return new_words

########################################################################
# Random swap
# Randomly swap two words in the sentence n times
########################################################################

def random_swap(words, n):
	new_words = words.copy()
	for _ in range(n):
		new_words = swap_word(new_words)
	return new_words

def swap_word(new_words):
	random_idx_1 = random.randint(0, len(new_words)-1)
	random_idx_2 = random_idx_1
	counter = 0
	while random_idx_2 == random_idx_1:
		random_idx_2 = random.randint(0, len(new_words)-1)
		counter += 1
		if counter > 3:
			return new_words
	new_words[random_idx_1], new_words[random_idx_2] = new_words[random_idx_2], new_words[random_idx_1] 
	return new_words

########################################################################
# Random insertion
# Randomly insert n words into the sentence
########################################################################

def random_insertion(words, n):
	new_words = words.copy()
	for _ in range(n):
		add_word(new_words)
	return new_words

def add_word(new_words):
	synonyms = []
	counter = 0
	while len(synonyms) < 1:
		random_word = new_words[random.randint(0, len(new_words)-1)]
		synonyms = get_synonyms(random_word)
		counter += 1
		if counter >= 10:
			return
	random_synonym = synonyms[0]
	random_idx = random.randint(0, len(new_words)-1)
	new_words.insert(random_idx, random_synonym)

########################################################################
# main data augmentation function
########################################################################

def eda(sentence, alpha_sr=0.1, alpha_ri=0.1, alpha_rs=0.1, p_rd=0.1, num_aug=9):
	
	sentence = get_only_chars(sentence)
	words = sentence.split(' ')
	words = [word for word in words if word is not '']
	num_words = len(words)
	
	augmented_sentences = []
	num_new_per_technique = int(num_aug/4)+1

	#sr
	if (alpha_sr > 0):
		n_sr = max(1, int(alpha_sr*num_words))
		for _ in range(num_new_per_technique):
			a_words = synonym_replacement(words, n_sr)
			augmented_sentences.append(' '.join(a_words))

	#ri
	if (alpha_ri > 0):
		n_ri = max(1, int(alpha_ri*num_words))
		for _ in range(num_new_per_technique):
			a_words = random_insertion(words, n_ri)
			augmented_sentences.append(' '.join(a_words))

	#rs
	if (alpha_rs > 0):
		n_rs = max(1, int(alpha_rs*num_words))
		for _ in range(num_new_per_technique):
			a_words = random_swap(words, n_rs)
			augmented_sentences.append(' '.join(a_words))

	#rd
	if (p_rd > 0):
		for _ in range(num_new_per_technique):
			a_words = random_deletion(words, p_rd)
			augmented_sentences.append(' '.join(a_words))

	augmented_sentences = [get_only_chars(sentence) for sentence in augmented_sentences]
	shuffle(augmented_sentences)

	#trim so that we have the desired number of augmented sentences
	if num_aug >= 1:
		augmented_sentences = augmented_sentences[:num_aug]
	else:
		keep_prob = num_aug / len(augmented_sentences)
		augmented_sentences = [s for s in augmented_sentences if random.uniform(0, 1) < keep_prob]

	#append the original sentence
	augmented_sentences.append(sentence)

	return augmented_sentences

In [None]:
last_id = train_datasets['id'][-1]

In [None]:
id = last_id

for ind,text in zip(train_datasets['id'],train_datasets['context']):

    
    for augmented in eda(text):

        id += 1
        
        train_datasets['id'].append(id)

        train_datasets['context'].append(augmented)

        train_datasets['question'].append(train_datasets['question'][ind])

        train_datasets['opa'].append(train_datasets['opa'][ind])

        train_datasets['opb'].append(train_datasets['opb'][ind])

        train_datasets['opc'].append(train_datasets['opc'][ind])

        train_datasets['opd'].append(train_datasets['opd'][ind])

        train_datasets['ope'].append(train_datasets['ope'][ind])

        train_datasets['cop'].append(train_datasets['cop'][ind])

        train_datasets['category'].append(train_datasets['category'][ind])
    

    if ind == last_id:

      break

In [12]:
len(train_datasets['id'])

364

In [13]:
Counter(train_datasets['category'])

Counter({'목적': 20,
         '문맥': 13,
         '빈칸': 99,
         '심경': 21,
         '어법': 21,
         '요지': 24,
         '일치': 22,
         '전체 흐름': 20,
         '제목': 38,
         '주장': 18,
         '주제': 25,
         '흐름': 43})

In [None]:
#load test dataset

file_path = '/content/drive/MyDrive/Problem/test_data.json'

with open(file_path, "r") as json_file:
    test_datasets = json.load(json_file)
    print(test_datasets)

In [15]:
test_dataset_count = Counter(test_datasets['category'])

test_dataset_count

Counter({'목적': 1,
         '문맥': 1,
         '빈칸': 4,
         '심경': 1,
         '어법': 1,
         '요지': 1,
         '일치': 1,
         '전체 흐름': 1,
         '제목': 2,
         '주장': 1,
         '주제': 1,
         '흐름': 2})

In [16]:
#prepare validation dataset

index_dict = {}

for key in test_dataset_count.keys():
    
    index_dict[key] = []

In [17]:
for ind,q in zip(train_datasets['id'],train_datasets['question']):

    
    if '목적' in q:
        
        index_dict['목적'].append(ind)
    
    elif '주장' in q:

        index_dict['주장'].append(ind)
    
    elif '요지' in q:

        index_dict['요지'].append(ind)
    
    elif '주제' in q:

        index_dict['주제'].append(ind)
    
    elif '제목' in q:
        
        index_dict['제목'].append(ind)
    
    elif '_' in q:
        
        index_dict['빈칸'].append(ind)
    
    elif '일치' in q:

        index_dict['일치'].append(ind)

    elif '심경' in q:

        index_dict['심경'].append(ind)
    
    elif '어법' in q:

        index_dict['어법'].append(ind)

    elif '전체 흐름' in q:

        index_dict['전체 흐름'].append(ind)
    
    elif '문맥' in q:

        index_dict['문맥'].append(ind)

    else:

        index_dict['흐름'].append(ind)

    


In [18]:
val_index_list = []

for key,count in test_dataset_count.items():
    
    val_index_list.extend(np.random.choice(index_dict[key],count))

In [19]:
val_index_list

[121,
 144,
 354,
 79,
 169,
 58,
 180,
 157,
 73,
 72,
 327,
 266,
 198,
 252,
 223,
 236,
 226]

In [20]:
len(val_index_list)

17

In [21]:
train_index_list = []

for ind in train_datasets['id']:

    if not(ind in val_index_list):
        
        train_index_list.append(ind)

In [22]:
len(train_index_list)

347

In [None]:
#using only one question dataset

index_list = []

for i in range(len(train_dataset)):

    _,question,_,_ = train_dataset[i]

    if '제목' in question:
        
        #continue
        index_list.append(i)
      
    #else:

        #index_list.append(i)

In [23]:
#prepare dataloader

#train_sampler = SubsetRandomSampler(train_index_list)
train_sampler = SequentialSampler(train_index_list)

#train_sampler = RandomSampler(train_dataset)

#val_sampler = SubsetRandomSampler(val_index_list)
val_sampler = SequentialSampler(val_index_list)

train_dataloader = DataLoader(train_dataset,batch_size=batch_size, sampler = train_sampler,collate_fn = model_collate_fn)

val_dataloader = DataLoader(train_dataset,batch_size=batch_size, sampler = val_sampler,collate_fn = model_collate_fn)

In [24]:
#optimizer and scheduler

#optimizer = AdamW(model.parameters(),lr = learning_rate, eps = 1e-8)
#optimizer = Adam(model.parameters(),lr = learning_rate, eps = 1e-8)
#optimizer = SGD(model.parameters(),lr = learning_rate)
optimizer = AdamP(model.parameters(), lr=learning_rate)
"""scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=100,
        num_training_steps=(num_epochs + 1) * math.ceil(len(train_dataset) / batch_size),
    )"""

scheduler = CosineAnnealingLR(optimizer, T_max=100, eta_min=0)

#scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=50, T_mult=2, eta_min=0)

In [None]:
#basic training

model.to(device)

for epoch in notebook.tqdm(range(num_epochs)):
    # train loop
    model.train()

    train_loss = 0
    train_acc = 0
    loss_value = 0

    for idx, (inputs,targets) in notebook.tqdm(enumerate(train_dataloader)):
        
        optimizer.zero_grad()
        
        inputs = inputs.to(device)
        output = model(**inputs)

        pooled_output = output[1]
        #pooled_output = dropout(pooled_output)
        logits = linear(pooled_output)
        reshaped_logits = logits.view(-1,num_choices)

        targets = targets.to(device)
        
        loss = criterion(reshaped_logits, targets)

        loss.backward()
        
        optimizer.step()
        scheduler.step()

        predictions = torch.argmax(reshaped_logits,axis=-1)        
        correct_predictions = torch.sum(predictions==targets)
        accuracy = correct_predictions.cpu().detach().numpy()/predictions.size()[0]

        loss_value += loss
        train_acc += accuracy

        if (idx + 1) % train_log_interval == 0:
            train_loss = loss_value / train_log_interval
            avg_train_acc = train_acc / train_log_interval
            
            current_lr = scheduler.get_last_lr()

            print(
                f"Epoch[{epoch}/{num_epochs}]({idx + 1}/{len(train_dataloader)}) || "
                f"training loss {train_loss:4.4} || training accuracy {avg_train_acc:4.2%} || lr {current_lr}||"
            )

            loss_value = 0
            train_acc = 0
        
        torch.cuda.empty_cache()
    
    #if num_epochs % 5 == 0:

        #torch.save(model.state_dict(),'/content/drive/MyDrive/english/model.ckpt')

    #scheduler.step()

    # val loop
    with torch.no_grad():

        print("Calculating validation results...")
        
        model.eval()
        
        val_loss_items = []

        val_acc = 0
        
        for idx,(inputs,labels) in notebook.tqdm(enumerate(val_dataloader)):

            inputs = inputs.to(device)
            labels = labels.to(device)

            outs = model(**inputs)

            pooled_output = outs[1]
            #pooled_output = dropout(pooled_output)
            logits = linear(pooled_output)
            reshaped_logits = logits.view(-1,num_choices)

            preds = torch.argmax(reshaped_logits, dim=-1)

            correct_predictions = torch.sum(preds==labels)

            accuracy = correct_predictions.cpu().detach().numpy()/preds.size()[0]
            
            val_acc += accuracy
            
            loss_item = criterion(reshaped_logits, labels).item()

            val_loss_items.append(loss_item)

        val_loss = np.sum(val_loss_items) / len(val_dataloader)

        avg_val_acc = val_acc / (idx+1)

        # Callback1: validation accuracy가 향상될수록 모델을 저장합니다.
        if val_loss < best_val_loss:
            best_val_loss = val_loss
        if avg_val_acc > best_val_acc:
            print("New best model for val accuracy! saving the model..")
            torch.save(model.state_dict(), f"result_{epoch:03}_accuracy_{avg_val_acc:4.2%}.ckpt")
            best_val_acc = avg_val_acc
            counter = 0
        else:
            counter += 1
        # Callback2: patience 횟수 동안 성능 향상이 없을 경우 학습을 종료시킵니다.
        if counter > patience:
            print("Early Stopping...")
            break
        
        
        print(
            f"[Val] acc : {avg_val_acc:4.2%}, loss: {val_loss:4.2} ||"
            f"best acc : {best_val_acc:4.2%}, best loss: {best_val_loss:4.2}"
        )

In [25]:
train_log_interval = 80

In [None]:
#dropout training
model.to(device)

for epoch in notebook.tqdm(range(num_epochs)):
    # train loop
    model.train()

    train_loss = 0
    train_acc = 0
    loss_value = 0

    for idx, (inputs,targets) in notebook.tqdm(enumerate(train_dataloader)):
        
        optimizer.zero_grad()
        
        inputs = inputs.to(device)
        output = model(**inputs)

        pooled_output = output[1]
        pooled_output = dropout(pooled_output)
        logits = linear(pooled_output)
        reshaped_logits = logits.view(-1,num_choices)

        targets = targets.to(device)
        
        loss = criterion(reshaped_logits, targets)

        loss.backward()
        
        optimizer.step()
        scheduler.step()

        predictions = torch.argmax(reshaped_logits,axis=-1)        
        correct_predictions = torch.sum(predictions==targets)
        accuracy = correct_predictions.cpu().detach().numpy()/predictions.size()[0]

        loss_value += loss
        train_acc += accuracy

        if (idx + 1) % train_log_interval == 0:
            train_loss = loss_value / train_log_interval
            avg_train_acc = train_acc / train_log_interval
            
            current_lr = scheduler.get_last_lr()

            print(
                f"Epoch[{epoch}/{num_epochs}]({idx + 1}/{len(train_dataloader)}) || "
                f"training loss {train_loss:4.4} || training accuracy {avg_train_acc:4.2%} || lr {current_lr}||"
            )

            loss_value = 0
            train_acc = 0
        
        torch.cuda.empty_cache()

    torch.save(model.state_dict(),'/content/drive/MyDrive/english/model2.ckpt')

    #scheduler.step()

    # val loop
    with torch.no_grad():

        print("Calculating validation results...")
        
        model.eval()
        
        val_loss_items = []

        val_acc = 0
        
        for idx,(inputs,labels) in notebook.tqdm(enumerate(val_dataloader)):

            inputs = inputs.to(device)
            labels = labels.to(device)

            outs = model(**inputs)

            pooled_output = outs[1]
            
            dropout_pooled_output = dropout(pooled_output)

            logits = linear(dropout_pooled_output)
            reshaped_logits = logits.view(-1,num_choices)

            softmax_predict = F.softmax(reshaped_logits)

            for _ in notebook.tqdm(range(k-1)):

                dropout_pooled_output = dropout(pooled_output)

                logits = linear(dropout_pooled_output)
                reshaped_logits = logits.view(-1,num_choices)

                softmax_predict += F.softmax(reshaped_logits)
        
            softmax_predict = softmax_predict/k

            preds = torch.argmax(softmax_predict, dim=-1)

            correct_predictions = torch.sum(preds==labels)

            accuracy = correct_predictions.cpu().detach().numpy()/preds.size()[0]
            
            val_acc += accuracy
            
            loss_item = criterion(reshaped_logits, labels).item()

            val_loss_items.append(loss_item)

        val_loss = np.sum(val_loss_items) / len(val_dataloader)

        avg_val_acc = val_acc / (idx+1)

        # Callback1: validation accuracy가 향상될수록 모델을 저장합니다.
        if val_loss < best_val_loss:
            best_val_loss = val_loss
        if avg_val_acc > best_val_acc:
            print("New best model for val accuracy! saving the model..")
            torch.save(model.state_dict(), f"result_{epoch:03}_accuracy_{avg_val_acc:4.2%}.ckpt")
            best_val_acc = avg_val_acc
            counter = 0
        else:
            counter += 1
        # Callback2: patience 횟수 동안 성능 향상이 없을 경우 학습을 종료시킵니다.
        if counter > patience:
            print("Early Stopping...")
            break
        
        
        print(
            f"[Val] acc : {avg_val_acc:4.2%}, loss: {val_loss:4.2} ||"
            f"best acc : {best_val_acc:4.2%}, best loss: {best_val_loss:4.2}"
        )

  0%|          | 0/40 [00:00<?, ?it/s]

0it [00:00, ?it/s]

Epoch[0/40](80/87) || training loss 1.614 || training accuracy 22.19% || lr [9.549150281252635e-08]||
Calculating validation results...


0it [00:00, ?it/s]



  0%|          | 0/119 [00:00<?, ?it/s]



  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

New best model for val accuracy! saving the model..
[Val] acc : 25.00%, loss:  1.6 ||best acc : 25.00%, best loss:  1.6


0it [00:00, ?it/s]

Epoch[1/40](80/87) || training loss 1.612 || training accuracy 19.38% || lr [7.545207078751851e-07]||
Calculating validation results...


0it [00:00, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

New best model for val accuracy! saving the model..
[Val] acc : 60.00%, loss:  1.6 ||best acc : 60.00%, best loss:  1.6


0it [00:00, ?it/s]

Epoch[2/40](80/87) || training loss 1.613 || training accuracy 23.44% || lr [4.3733338321784766e-07]||
Calculating validation results...


0it [00:00, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

[Val] acc : 30.00%, loss:  1.6 ||best acc : 60.00%, best loss:  1.6


0it [00:00, ?it/s]

In [None]:
#save model
torch.save(model.state_dict(),'/content/drive/MyDrive/english/model.ckpt')

## Test model

In [None]:
#trained model load

model_name_or_path = 'bert-base-multilingual-uncased'

model = AutoModel.from_pretrained(model_name_or_path)

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

model.load_state_dict(torch.load('/content/result_001_accuracy_100.00%.ckpt'))

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [None]:
#load test dataset

file_path = '/content/drive/MyDrive/Problem/test_data.json'

with open(file_path, "r") as json_file:
    test_datasets = json.load(json_file)
    print(test_datasets)

In [None]:
#remove special character

new_context = []

for context in test_datasets['context']:

  context = context.replace('\n',' ').replace('  ',' ')

  new_context.append(context)

test_datasets['context'] = new_context

In [None]:
#prepare inference dataset

inference_dataset = EnglishDataset(test_datasets)

model_collate_fn = functools.partial(
  process_batch,
  tokenizer=tokenizer,
  max_len=max_len
  )

In [None]:
#using only one question dataset

test_index_list = []

for i in range(len(inference_dataset)):

    _,question,_,_ = inference_dataset[i]

    if '제목' in question:

        #continue
        test_index_list.append(i)
    
    #else:

        #test_index_list.append(i)

In [None]:
test_index_list

[4, 6]

In [None]:
#prepare dataloader

#eval_sampler = SubsetRandomSampler(test_index_list)
eval_sampler = SequentialSampler(inference_dataset)

inference_dataloader = DataLoader(inference_dataset,
                            batch_size=1,
                            sampler=eval_sampler,
                            collate_fn=model_collate_fn)

In [None]:
#dropout ensemble inference using different seeds

#device = torch.device('cuda')

#model.to(device)

accuracy_list = []

for i in notebook.tqdm(range(1500,2000)):

    seed=i
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    os.environ['PYTHONHASHSEED'] = str(seed)

    model.eval()

    # 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
    all_predictions = []
    true_labels = []

    for inputs,labels in inference_dataloader:

        with torch.no_grad():

            inputs = inputs.to(device)

            pred = model(**inputs)

            pooled_output = pred[1]
            pooled_output = dropout(pooled_output)
            logits = linear(pooled_output)
            reshaped_logits = logits.view(-1,num_choices)

            top_choices = torch.argmax(reshaped_logits, dim=-1)

            all_predictions.extend(top_choices.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    correct = torch.sum(torch.tensor(all_predictions) == torch.tensor(true_labels))

    accuracy = correct/len(true_labels)

    accuracy_list.append(accuracy.item() * 100)


  0%|          | 0/500 [00:00<?, ?it/s]

KeyboardInterrupt: ignored

In [None]:
max(accuracy_list)

54.54545617103577

In [None]:
accuracy_list.index(max(accuracy_list))

20

In [None]:
all_predictions

[2, 0, 3, 4, 3, 0, 3, 2, 2, 4, 0]

In [None]:
true_labels

[1, 2, 0, 4, 0, 2, 1, 0, 4, 0, 1]

In [None]:
correct = torch.sum(torch.tensor(all_predictions) == torch.tensor(true_labels))

accuracy = correct/len(true_labels)

accuracy.item() * 100

9.090909361839294

In [None]:
#basic inference

model.to(device)

model.eval()

# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
all_predictions = []
true_labels = []

for inputs,labels in inference_dataloader:

    with torch.no_grad():

        inputs = inputs.to(device)

        pred = model(**inputs)

        pooled_output = pred[1]
        #pooled_output = dropout(pooled_output)
        logits = linear(pooled_output)
        reshaped_logits = logits.view(-1,num_choices)

        top_choices = torch.argmax(reshaped_logits, dim=-1)

        all_predictions.extend(top_choices.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())

print(all_predictions)

[1, 2, 1, 3, 0, 3, 1, 2, 2, 0, 1, 1, 2, 2, 3, 2, 3]


In [None]:
print(true_labels)

[1, 2, 0, 4, 0, 2, 1, 0, 4, 0, 1, 4, 3, 2, 3, 4, 3]


In [None]:
correct = torch.sum(torch.tensor(all_predictions) == torch.tensor(true_labels))

accuracy = correct/len(true_labels)

accuracy.item() * 100

52.941179275512695

In [None]:
#basic dropout inference

model.to(device)

model.eval()

# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
all_predictions = []
true_labels = []

for inputs,labels in inference_dataloader:

    with torch.no_grad():

        inputs = inputs.to(device)

        pred = model(**inputs)

        pooled_output = pred[1]

        dropout_pooled_output = dropout(pooled_output)

        logits = linear(dropout_pooled_output)
        reshaped_logits = logits.view(-1,num_choices)

        softmax_predict = F.softmax(reshaped_logits)

        for _ in notebook.tqdm(range(k-1)):

            dropout_pooled_output = dropout(pooled_output)

            logits = linear(dropout_pooled_output)
            reshaped_logits = logits.view(-1,num_choices)

            softmax_predict += F.softmax(reshaped_logits)
        
        softmax_predict = softmax_predict/k

        top_choices = torch.argmax(softmax_predict, dim=-1)

        all_predictions.extend(top_choices.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())



  0%|          | 0/119 [00:00<?, ?it/s]



  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

In [None]:
print(all_predictions)

[1, 2, 0, 3, 0, 3, 1, 2, 3, 0, 1, 1, 2, 4, 3, 2, 3]


In [None]:
print(true_labels)

[1, 2, 0, 4, 0, 2, 1, 0, 4, 0, 1, 4, 3, 2, 3, 4, 3]


In [None]:
correct = torch.sum(torch.tensor(all_predictions) == torch.tensor(true_labels))

accuracy = correct/len(true_labels)

accuracy.item() * 100

52.941179275512695

In [None]:
#test time augmentation inference

model.to(device)

model.eval()

all_predictions = []

true_labels = []

for i in range(len(inference_dataset)):
    
    new_dataset = [inference_dataset[i]]

    context,question,options,ans = inference_dataset[i]

    for augmented in eda(context):
        
        new_dataset.append((augmented,question,options,ans))

    
    inference_dataloader = DataLoader(new_dataset,
                            batch_size=1,
                            collate_fn=model_collate_fn)
    
    
    for ind,(inputs,labels) in notebook.tqdm(enumerate(inference_dataloader)):

        with torch.no_grad():

            inputs = inputs.to(device)

            pred = model(**inputs)

            pooled_output = pred[1]

            logits = linear(pooled_output)
            reshaped_logits = logits.view(-1,num_choices)

            if ind == 0:

                softmax_predict = F.softmax(reshaped_logits)

                true_labels.extend(labels.cpu().numpy())
            
            else:

                softmax_predict += F.softmax(reshaped_logits)
            
    softmax_predict = softmax_predict/k

    top_choices = torch.argmax(softmax_predict, dim=-1)

    all_predictions.extend(top_choices.cpu().numpy())

0it [00:00, ?it/s]



0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

In [None]:
print(all_predictions)

[1, 2, 0, 3, 0, 3, 1, 2, 2, 0, 1, 1, 2, 4, 3, 2, 2]


In [None]:
print(true_labels)

[1, 2, 0, 4, 0, 2, 1, 0, 4, 0, 1, 4, 3, 2, 3, 4, 3]


In [None]:
correct = torch.sum(torch.tensor(all_predictions) == torch.tensor(true_labels))

accuracy = correct/len(true_labels)

accuracy.item() * 100

47.05882370471954