## load library

In [None]:
!pip install datasets
!pip install transformers
!pip install nltk
!pip install adamp

In [None]:
import pandas as pd
import numpy as np

from torch.utils.data import Dataset
from torch.utils.data import SequentialSampler,RandomSampler
from torch import nn
import torch
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import RandomSampler,Subset
from torch.utils.data import DataLoader,RandomSampler,SubsetRandomSampler
from torch.optim import AdamW,Adam,SGD
from adamp import AdamP,SGDP
from torch.optim.lr_scheduler import CosineAnnealingLR,CosineAnnealingWarmRestarts
import torch.nn.functional as F

import datasets
from datasets import load_dataset,load_from_disk,load_metric,DatasetDict,Dataset,Features,Value,concatenate_datasets,Sequence,ClassLabel
from transformers import get_linear_schedule_with_warmup
from transformers import AutoTokenizer,AutoModel
from transformers import BertConfig

import math
import random
from collections import Counter
from tqdm import tqdm,notebook
import functools
import json
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

import nltk; nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


True

##hyperparameter

In [None]:
max_len = 352
batch_size = 4
device = torch.device('cuda')

num_epochs = 50
learning_rate = 1e-6
hidden_size = 768
hidden_dropout_prob = 0.6
num_choices = 5

k=120

patience = 100
counter = 0
best_val_acc = 0
best_val_loss = np.inf

#fix seed

In [None]:
seed=14

torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

##prepare dataset

In [None]:
file_path = '/content/drive/MyDrive/Problem/data.json'

with open(file_path, "r") as json_file:
    train_datasets = json.load(json_file)
    print(train_datasets)

In [None]:
## dataset class

class EnglishDataset(Dataset):

  def __init__(self,
               dataset):
    self.dataset = dataset

  def __len__(self):
    return len(self.dataset['id'])
  
  def __getitem__(self,idx):
    context = self.dataset['context'][idx]

    question = self.dataset['question'][idx]
   
    options = [self.dataset['opa'][idx],self.dataset['opb'][idx],self.dataset['opc'][idx],self.dataset['opd'][idx],self.dataset['ope'][idx]]

    label = self.dataset['cop'][idx] - 1
    return (context,question,options,label)

In [None]:
#convert batch dataset function

def process_batch(batch,tokenizer,max_len=32):

    expanded_batch = []

    labels = []
    
    for context,question,options,label in batch:
        question_option_pairs = [question+' '+option for option in options]
        contexts = [context]*len(options)
        labels.append(label)
        expanded_batch.extend(zip(contexts,question_option_pairs))

    tokenized_batch = tokenizer.batch_encode_plus(expanded_batch,truncation = True, padding="max_length",max_length=max_len,return_tensors="pt")
    
    return tokenized_batch,torch.tensor(labels)

In [None]:
#no question batch function

#for using only one question dataset

def process_batch(batch,tokenizer,max_len=32):

    expanded_batch = []

    labels = []
    
    for context,question,options,label in batch:
        question_option_pairs = [option for option in options]
        contexts = [context]*len(options)
        labels.append(label)
        expanded_batch.extend(zip(contexts,question_option_pairs))

    tokenized_batch = tokenizer.batch_encode_plus(expanded_batch,truncation = True, padding="max_length",max_length=max_len,return_tensors="pt")
    
    return tokenized_batch,torch.tensor(labels)

## prepare pretrained model

In [None]:
#new pretrained model

model_name_or_path = 'bert-base-multilingual-uncased'

model = AutoModel.from_pretrained(model_name_or_path)

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

Downloading:   0%|          | 0.00/625 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/641M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/851k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.64M [00:00<?, ?B/s]

In [None]:
#load trained model

model_name_or_path = 'bert-base-multilingual-uncased'

model = AutoModel.from_pretrained(model_name_or_path)

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

model.load_state_dict(torch.load('/content/drive/MyDrive/english/model4.ckpt'))

Downloading:   0%|          | 0.00/625 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/641M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/851k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.64M [00:00<?, ?B/s]

<All keys matched successfully>

In [None]:
#dropout and linear layer and loss

dropout = nn.Dropout(hidden_dropout_prob).to(device)
#dropout = nn.Dropout(hidden_dropout_prob)

linear = nn.Linear(in_features=hidden_size,out_features=1).to(device)

criterion = nn.CrossEntropyLoss()

## train model

In [None]:
#remove special character

new_context = []

for context in train_datasets['context']:

  context = context.replace('\n',' ').replace('  ',' ')

  new_context.append(context)

train_datasets['context'] = new_context

In [None]:
#prepare train dataset

train_dataset = EnglishDataset(train_datasets)


model_collate_fn = functools.partial(process_batch,tokenizer=tokenizer,max_len=max_len)


##EDA

In [None]:
import random
from random import shuffle
random.seed(1)

#stop words list
stop_words = ['i', 'me', 'my', 'myself', 'we', 'our', 
			'ours', 'ourselves', 'you', 'your', 'yours', 
			'yourself', 'yourselves', 'he', 'him', 'his', 
			'himself', 'she', 'her', 'hers', 'herself', 
			'it', 'its', 'itself', 'they', 'them', 'their', 
			'theirs', 'themselves', 'what', 'which', 'who', 
			'whom', 'this', 'that', 'these', 'those', 'am', 
			'is', 'are', 'was', 'were', 'be', 'been', 'being', 
			'have', 'has', 'had', 'having', 'do', 'does', 'did',
			'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or',
			'because', 'as', 'until', 'while', 'of', 'at', 
			'by', 'for', 'with', 'about', 'against', 'between',
			'into', 'through', 'during', 'before', 'after', 
			'above', 'below', 'to', 'from', 'up', 'down', 'in',
			'out', 'on', 'off', 'over', 'under', 'again', 
			'further', 'then', 'once', 'here', 'there', 'when', 
			'where', 'why', 'how', 'all', 'any', 'both', 'each', 
			'few', 'more', 'most', 'other', 'some', 'such', 'no', 
			'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 
			'very', 's', 't', 'can', 'will', 'just', 'don', 
			'should', 'now', '']

#cleaning up text
import re
def get_only_chars(line):

    clean_line = ""

    line = line.replace("’", "")
    line = line.replace("'", "")
    line = line.replace("-", " ") #replace hyphens with spaces
    line = line.replace("\t", " ")
    line = line.replace("\n", " ")
    line = line.lower()

    for char in line:
        if char in 'qwertyuiopasdfghjklzxcvbnm ':
            clean_line += char
        else:
            clean_line += ' '

    clean_line = re.sub(' +',' ',clean_line) #delete extra spaces
    if clean_line[0] == ' ':
        clean_line = clean_line[1:]
    return clean_line

########################################################################
# Synonym replacement
# Replace n words in the sentence with synonyms from wordnet
########################################################################

#for the first time you use wordnet
#import nltk
#nltk.download('wordnet')
from nltk.corpus import wordnet 

def synonym_replacement(words, n):
	new_words = words.copy()
	random_word_list = list(set([word for word in words if word not in stop_words]))
	random.shuffle(random_word_list)
	num_replaced = 0
	for random_word in random_word_list:
		synonyms = get_synonyms(random_word)
		if len(synonyms) >= 1:
			synonym = random.choice(list(synonyms))
			new_words = [synonym if word == random_word else word for word in new_words]
			#print("replaced", random_word, "with", synonym)
			num_replaced += 1
		if num_replaced >= n: #only replace up to n words
			break

	#this is stupid but we need it, trust me
	sentence = ' '.join(new_words)
	new_words = sentence.split(' ')

	return new_words

def get_synonyms(word):
	synonyms = set()
	for syn in wordnet.synsets(word): 
		for l in syn.lemmas(): 
			synonym = l.name().replace("_", " ").replace("-", " ").lower()
			synonym = "".join([char for char in synonym if char in ' qwertyuiopasdfghjklzxcvbnm'])
			synonyms.add(synonym) 
	if word in synonyms:
		synonyms.remove(word)
	return list(synonyms)

########################################################################
# Random deletion
# Randomly delete words from the sentence with probability p
########################################################################

def random_deletion(words, p):

	#obviously, if there's only one word, don't delete it
	if len(words) == 1:
		return words

	#randomly delete words with probability p
	new_words = []
	for word in words:
		r = random.uniform(0, 1)
		if r > p:
			new_words.append(word)

	#if you end up deleting all words, just return a random word
	if len(new_words) == 0:
		rand_int = random.randint(0, len(words)-1)
		return [words[rand_int]]

	return new_words

########################################################################
# Random swap
# Randomly swap two words in the sentence n times
########################################################################

def random_swap(words, n):
	new_words = words.copy()
	for _ in range(n):
		new_words = swap_word(new_words)
	return new_words

def swap_word(new_words):
	random_idx_1 = random.randint(0, len(new_words)-1)
	random_idx_2 = random_idx_1
	counter = 0
	while random_idx_2 == random_idx_1:
		random_idx_2 = random.randint(0, len(new_words)-1)
		counter += 1
		if counter > 3:
			return new_words
	new_words[random_idx_1], new_words[random_idx_2] = new_words[random_idx_2], new_words[random_idx_1] 
	return new_words

########################################################################
# Random insertion
# Randomly insert n words into the sentence
########################################################################

def random_insertion(words, n):
	new_words = words.copy()
	for _ in range(n):
		add_word(new_words)
	return new_words

def add_word(new_words):
	synonyms = []
	counter = 0
	while len(synonyms) < 1:
		random_word = new_words[random.randint(0, len(new_words)-1)]
		synonyms = get_synonyms(random_word)
		counter += 1
		if counter >= 10:
			return
	random_synonym = synonyms[0]
	random_idx = random.randint(0, len(new_words)-1)
	new_words.insert(random_idx, random_synonym)

########################################################################
# main data augmentation function
########################################################################

def eda(sentence, alpha_sr=0.1, alpha_ri=0.1, alpha_rs=0.1, p_rd=0.1, num_aug=9):
	
	sentence = get_only_chars(sentence)
	words = sentence.split(' ')
	words = [word for word in words if word is not '']
	num_words = len(words)
	
	augmented_sentences = []
	num_new_per_technique = int(num_aug/4)+1

	#sr
	if (alpha_sr > 0):
		n_sr = max(1, int(alpha_sr*num_words))
		for _ in range(num_new_per_technique):
			a_words = synonym_replacement(words, n_sr)
			augmented_sentences.append(' '.join(a_words))

	#ri
	if (alpha_ri > 0):
		n_ri = max(1, int(alpha_ri*num_words))
		for _ in range(num_new_per_technique):
			a_words = random_insertion(words, n_ri)
			augmented_sentences.append(' '.join(a_words))

	#rs
	if (alpha_rs > 0):
		n_rs = max(1, int(alpha_rs*num_words))
		for _ in range(num_new_per_technique):
			a_words = random_swap(words, n_rs)
			augmented_sentences.append(' '.join(a_words))

	#rd
	if (p_rd > 0):
		for _ in range(num_new_per_technique):
			a_words = random_deletion(words, p_rd)
			augmented_sentences.append(' '.join(a_words))

	augmented_sentences = [get_only_chars(sentence) for sentence in augmented_sentences]
	shuffle(augmented_sentences)

	#trim so that we have the desired number of augmented sentences
	if num_aug >= 1:
		augmented_sentences = augmented_sentences[:num_aug]
	else:
		keep_prob = num_aug / len(augmented_sentences)
		augmented_sentences = [s for s in augmented_sentences if random.uniform(0, 1) < keep_prob]

	#append the original sentence
	augmented_sentences.append(sentence)

	return augmented_sentences

In [None]:
last_id = train_datasets['id'][-1]

In [None]:
id = last_id

for ind,text in zip(train_datasets['id'],train_datasets['context']):

    
    for augmented in eda(text):

        id += 1
        
        train_datasets['id'].append(id)

        train_datasets['context'].append(augmented)

        train_datasets['question'].append(train_datasets['question'][ind])

        train_datasets['opa'].append(train_datasets['opa'][ind])

        train_datasets['opb'].append(train_datasets['opb'][ind])

        train_datasets['opc'].append(train_datasets['opc'][ind])

        train_datasets['opd'].append(train_datasets['opd'][ind])

        train_datasets['ope'].append(train_datasets['ope'][ind])

        train_datasets['cop'].append(train_datasets['cop'][ind])

        train_datasets['category'].append(train_datasets['category'][ind])
    

    if ind == last_id:

      break

In [None]:
len(train_datasets['id'])

544

In [None]:
Counter(train_datasets['category'])

Counter({'목적': 28,
         '문맥': 23,
         '빈칸': 152,
         '심경': 26,
         '어법': 28,
         '요지': 37,
         '일치': 47,
         '전체 흐름': 29,
         '제목': 53,
         '주장': 25,
         '주제': 39,
         '흐름': 57})

In [None]:
#load test dataset

file_path = '/content/drive/MyDrive/Problem/test_data.json'

with open(file_path, "r") as json_file:
    test_datasets = json.load(json_file)
    print(test_datasets)

In [None]:
test_dataset_count = Counter(test_datasets['category'])

test_dataset_count

Counter({'목적': 1,
         '문맥': 1,
         '빈칸': 4,
         '심경': 1,
         '어법': 1,
         '요지': 1,
         '일치': 1,
         '전체 흐름': 1,
         '제목': 2,
         '주장': 1,
         '주제': 1,
         '흐름': 2})

In [None]:
#prepare validation dataset

index_dict = {}

for key in test_dataset_count.keys():
    
    index_dict[key] = []

In [None]:
for ind,q in zip(train_datasets['id'],train_datasets['question']):

    
    if '목적' in q:
        
        index_dict['목적'].append(ind)
    
    elif '주장' in q:

        index_dict['주장'].append(ind)
    
    elif '요지' in q:

        index_dict['요지'].append(ind)
    
    elif '주제' in q:

        index_dict['주제'].append(ind)
    
    elif '제목' in q:
        
        index_dict['제목'].append(ind)
    
    elif '_' in q:
        
        index_dict['빈칸'].append(ind)
    
    elif '일치' in q:

        index_dict['일치'].append(ind)

    elif '심경' in q:

        index_dict['심경'].append(ind)
    
    elif '어법' in q:

        index_dict['어법'].append(ind)

    elif '전체 흐름' in q:

        index_dict['전체 흐름'].append(ind)
    
    elif '문맥' in q:

        index_dict['문맥'].append(ind)

    else:

        index_dict['흐름'].append(ind)

    


In [None]:
for ind,q in zip(train_datasets['id'],train_datasets['question']):

    
    if '목적' in q:
        
        pass
    
    elif '주장' in q:

        pass
    
    elif '요지' in q:

        pass
    
    elif '주제' in q:

        pass
    
    elif '제목' in q:
        
        pass
    
    elif '_' in q:
        
        pass
    
    elif '일치' in q:

        pass

    elif '심경' in q:

        pass
    
    elif '어법' in q:

        pass

    elif '전체 흐름' in q:

        index_dict['전체 흐름'].append(ind)
    
    elif '문맥' in q:

        pass

    else:

        pass

In [None]:
val_index_list = []

for key,count in test_dataset_count.items():
    
    val_index_list.extend(np.random.choice(index_dict[key],count))

In [None]:
val_index_list = []

for key,count in test_dataset_count.items():
    
    if key == '흐름':
        
        val_index_list.extend(np.random.choice(index_dict[key], count))
    
    else:
    
        pass

In [None]:
val_index_list

[383, 270]

In [None]:
len(val_index_list)

2

In [None]:
train_index_list = []

for ind in train_datasets['id']:

    if not(ind in val_index_list):
        
        train_index_list.append(ind)

In [None]:
train_index_list = []

for ind,q in zip(train_datasets['id'],train_datasets['question']):
    
    if '목적' in q:

        pass

    elif '_' in q:

        pass

    elif '주제' in q:

        pass

    elif '요지' in q:

        pass

    elif '주장' in q:

        pass

    elif '제목' in q:

        pass
    
    elif '일치' in q:
        
        pass

    elif '심경' in q:

        pass
    
    elif '어법' in q:

        pass

    elif '전체 흐름' in q:

        train_index_list.append(ind)
    
    elif '문맥' in q:

        pass
    
    else:

        pass

In [None]:
train_index_list = []

for ind,q in zip(train_datasets['id'],train_datasets['question']):
    
    if '전체 흐름' in q:
        
        train_index_list.append(ind)

    else:
        
        pass

In [None]:
len(train_index_list)

29

In [None]:
#using only one question dataset

index_list = []

for i in range(len(train_dataset)):

    _,question,_,_ = train_dataset[i]

    if '제목' in question:
        
        #continue
        index_list.append(i)
      
    #else:

        #index_list.append(i)

In [None]:
train = Subset(train_dataset,train_index_list)

#val = Subset(train_dataset,val_index_list)

In [None]:
train[0]

#adjust max_len

In [None]:
expanded_batch = []
    
for context,question,options,label in train:
    question_option_pairs = [question+' '+option for option in options]
    contexts = [context]*len(options)
    expanded_batch.extend(zip(contexts,question_option_pairs))

In [None]:
batch_len = []

for batch in expanded_batch:
    
    batch_len.append(len(tokenizer.encode(batch)))

max(batch_len)

312

In [None]:
max_len = 312

#dataloader & optimizer & scheduler

In [None]:
#prepare dataloader

#train_sampler = SubsetRandomSampler(train_index_list)
train_sampler = SequentialSampler(train)

#train_sampler = RandomSampler(train_dataset)

#val_sampler = SubsetRandomSampler(val_index_list)
val_sampler = SequentialSampler(val)

#train_dataloader = DataLoader(train_dataset,batch_size=batch_size, sampler = train_sampler,collate_fn = model_collate_fn)
train_dataloader = DataLoader(train,batch_size=batch_size, sampler = train_sampler,collate_fn = model_collate_fn)

#val_dataloader = DataLoader(train_dataset,batch_size=batch_size, sampler = val_sampler,collate_fn = model_collate_fn)
val_dataloader = DataLoader(val,batch_size=batch_size, sampler = val_sampler,collate_fn = model_collate_fn)

In [None]:
#optimizer and scheduler

#optimizer = AdamW(model.parameters(),lr = learning_rate, eps = 1e-8)
#optimizer = Adam(model.parameters(),lr = learning_rate, eps = 1e-8)
#optimizer = SGD(model.parameters(),lr = learning_rate)
optimizer = AdamP(model.parameters(), lr=learning_rate)
"""scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=100,
        num_training_steps=(num_epochs + 1) * math.ceil(len(train_dataset) / batch_size),
    )"""

#scheduler = CosineAnnealingLR(optimizer, T_max=100, eta_min=0)

scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=50, T_mult=2, eta_min=0)

# training model

In [None]:
#basic training

model.to(device)

for epoch in notebook.tqdm(range(num_epochs)):
    # train loop
    model.train()

    train_loss = 0
    train_acc = 0
    loss_value = 0

    for idx, (inputs,targets) in notebook.tqdm(enumerate(train_dataloader)):
        
        optimizer.zero_grad()
        
        inputs = inputs.to(device)
        output = model(**inputs)

        pooled_output = output[1]
        #pooled_output = dropout(pooled_output)
        logits = linear(pooled_output)
        reshaped_logits = logits.view(-1,num_choices)

        targets = targets.to(device)
        
        loss = criterion(reshaped_logits, targets)

        loss.backward()
        
        optimizer.step()
        scheduler.step()

        predictions = torch.argmax(reshaped_logits,axis=-1)        
        correct_predictions = torch.sum(predictions==targets)
        accuracy = correct_predictions.cpu().detach().numpy()/predictions.size()[0]

        loss_value += loss
        train_acc += accuracy

        if (idx + 1) % train_log_interval == 0:
            train_loss = loss_value / train_log_interval
            avg_train_acc = train_acc / train_log_interval
            
            current_lr = scheduler.get_last_lr()

            print(
                f"Epoch[{epoch}/{num_epochs}]({idx + 1}/{len(train_dataloader)}) || "
                f"training loss {train_loss:4.4} || training accuracy {avg_train_acc:4.2%} || lr {current_lr}||"
            )

            loss_value = 0
            train_acc = 0
        
        torch.cuda.empty_cache()
    
    #if num_epochs % 5 == 0:

        #torch.save(model.state_dict(),'/content/drive/MyDrive/english/model.ckpt')

    #scheduler.step()

    # val loop
    with torch.no_grad():

        print("Calculating validation results...")
        
        model.eval()
        
        val_loss_items = []

        val_acc = 0
        
        for idx,(inputs,labels) in notebook.tqdm(enumerate(val_dataloader)):

            inputs = inputs.to(device)
            labels = labels.to(device)

            outs = model(**inputs)

            pooled_output = outs[1]
            #pooled_output = dropout(pooled_output)
            logits = linear(pooled_output)
            reshaped_logits = logits.view(-1,num_choices)

            preds = torch.argmax(reshaped_logits, dim=-1)

            correct_predictions = torch.sum(preds==labels)

            accuracy = correct_predictions.cpu().detach().numpy()/preds.size()[0]
            
            val_acc += accuracy
            
            loss_item = criterion(reshaped_logits, labels).item()

            val_loss_items.append(loss_item)

        val_loss = np.sum(val_loss_items) / len(val_dataloader)

        avg_val_acc = val_acc / (idx+1)

        # Callback1: validation accuracy가 향상될수록 모델을 저장합니다.
        if val_loss < best_val_loss:
            best_val_loss = val_loss
        if avg_val_acc > best_val_acc:
            print("New best model for val accuracy! saving the model..")
            torch.save(model.state_dict(), f"result_{epoch:03}_accuracy_{avg_val_acc:4.2%}.ckpt")
            best_val_acc = avg_val_acc
            counter = 0
        else:
            counter += 1
        # Callback2: patience 횟수 동안 성능 향상이 없을 경우 학습을 종료시킵니다.
        if counter > patience:
            print("Early Stopping...")
            break
        
        
        print(
            f"[Val] acc : {avg_val_acc:4.2%}, loss: {val_loss:4.2} ||"
            f"best acc : {best_val_acc:4.2%}, best loss: {best_val_loss:4.2}"
        )

In [None]:
train_log_interval = 61

In [None]:
num_epochs = 1

In [None]:
patience = 100
counter = 0

In [None]:
#dropout training
model.to(device)

for epoch in notebook.tqdm(range(num_epochs)):
    # train loop
    model.train()

    train_loss = 0
    train_acc = 0
    loss_value = 0

    for idx, (inputs,targets) in notebook.tqdm(enumerate(train_dataloader)):
        
        optimizer.zero_grad()
        
        inputs = inputs.to(device)
        output = model(**inputs)

        pooled_output = output[1]
        pooled_output = dropout(pooled_output)
        logits = linear(pooled_output)
        reshaped_logits = logits.view(-1,num_choices)

        targets = targets.to(device)
        
        loss = criterion(reshaped_logits, targets)

        loss.backward()
        
        optimizer.step()
        scheduler.step()

        predictions = torch.argmax(reshaped_logits,axis=-1)        
        correct_predictions = torch.sum(predictions==targets)
        accuracy = correct_predictions.cpu().detach().numpy()/predictions.size()[0]

        loss_value += loss
        train_acc += accuracy

        if (idx + 1) % train_log_interval == 0:
            train_loss = loss_value / train_log_interval
            avg_train_acc = train_acc / train_log_interval
            
            current_lr = scheduler.get_last_lr()

            print(
                f"Epoch[{epoch}/{num_epochs}]({idx + 1}/{len(train_dataloader)}) || "
                f"training loss {train_loss:4.4} || training accuracy {avg_train_acc:4.2%} || lr {current_lr}||"
            )

            loss_value = 0
            train_acc = 0
        
        torch.cuda.empty_cache()

    torch.save(model.state_dict(),'/content/drive/MyDrive/english/model4.ckpt')

    #scheduler.step()

    # val loop
    with torch.no_grad():

        print("Calculating validation results...")
        
        model.eval()
        
        val_loss_items = []

        val_acc = 0
        
        for idx,(inputs,labels) in notebook.tqdm(enumerate(val_dataloader)):

            inputs = inputs.to(device)
            labels = labels.to(device)

            outs = model(**inputs)

            pooled_output = outs[1]
            
            dropout_pooled_output = dropout(pooled_output)

            logits = linear(dropout_pooled_output)
            reshaped_logits = logits.view(-1,num_choices)

            softmax_predict = F.softmax(reshaped_logits)

            for _ in notebook.tqdm(range(k-1)):

                dropout_pooled_output = dropout(pooled_output)

                logits = linear(dropout_pooled_output)
                reshaped_logits = logits.view(-1,num_choices)

                softmax_predict += F.softmax(reshaped_logits)
        
            softmax_predict = softmax_predict/k

            preds = torch.argmax(softmax_predict, dim=-1)

            correct_predictions = torch.sum(preds==labels)

            accuracy = correct_predictions.cpu().detach().numpy()/preds.size()[0]
            
            val_acc += accuracy
            
            loss_item = criterion(reshaped_logits, labels).item()

            val_loss_items.append(loss_item)

        val_loss = np.sum(val_loss_items) / len(val_dataloader)

        avg_val_acc = val_acc / (idx+1)

        # Callback1: validation accuracy가 향상될수록 모델을 저장합니다.
        if val_loss < best_val_loss:
            best_val_loss = val_loss
        if avg_val_acc > best_val_acc:
            print("New best model for val accuracy! saving the model..")
            torch.save(model.state_dict(), f"result_{epoch:03}_accuracy_{avg_val_acc:4.2%}.ckpt")
            best_val_acc = avg_val_acc
            counter = 0
        else:
            counter += 1
        # Callback2: patience 횟수 동안 성능 향상이 없을 경우 학습을 종료시킵니다.
        if counter > patience:
            print("Early Stopping...")
            break
        
        
        print(
            f"[Val] acc : {avg_val_acc:4.2%}, loss: {val_loss:4.2} ||"
            f"best acc : {best_val_acc:4.2%}, best loss: {best_val_loss:4.2}"
        )

  0%|          | 0/1 [00:00<?, ?it/s]

0it [00:00, ?it/s]

Epoch[0/1](61/61) || training loss 1.31 || training accuracy 45.90% || lr [9.704403844771127e-07]||
Calculating validation results...


0it [00:00, ?it/s]



  0%|          | 0/119 [00:00<?, ?it/s]



  0%|          | 0/119 [00:00<?, ?it/s]

  0%|          | 0/119 [00:00<?, ?it/s]

New best model for val accuracy! saving the model..
[Val] acc : 91.67%, loss: 0.71 ||best acc : 91.67%, best loss: 0.71


In [None]:
#save model
torch.save(model.state_dict(),'/content/drive/MyDrive/english/model.ckpt')

## Test model

In [None]:
#trained model load

model_name_or_path = 'bert-base-multilingual-uncased'

model = AutoModel.from_pretrained(model_name_or_path)

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

model.load_state_dict(torch.load('/content/drive/MyDrive/english/model_flow.ckpt'))

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [None]:
#load test dataset

file_path = '/content/drive/MyDrive/Problem/test_data.json'

with open(file_path, "r") as json_file:
    test_datasets = json.load(json_file)
    print(test_datasets)

In [None]:
#remove special character

new_context = []

for context in test_datasets['context']:

  context = context.replace('\n',' ').replace('  ',' ')

  new_context.append(context)

test_datasets['context'] = new_context

In [None]:
#prepare inference dataset

inference_dataset = EnglishDataset(test_datasets)

model_collate_fn = functools.partial(
  process_batch,
  tokenizer=tokenizer,
  max_len=max_len
  )

In [None]:
#using only one question dataset

test_index_list = []

for i in range(len(inference_dataset)):

    _,question,_,_ = inference_dataset[i]

    if '제목' in question:

        #continue
        test_index_list.append(i)
    
    #else:

        #test_index_list.append(i)

In [None]:
test_index_list = []

for ind,q in zip(test_datasets['id'],test_datasets['question']):

    
    if '목적' in q:
        
        pass
    
    elif '주장' in q:

        pass
    
    elif '요지' in q:

        pass
    
    elif '주제' in q:

        pass
    
    elif '제목' in q:
        
        pass
    
    elif '_' in q:
        
        pass
    
    elif '일치' in q:

        pass

    elif '심경' in q:

        pass
    
    elif '어법' in q:

        pass

    elif '전체 흐름' in q:

        test_index_list.append(ind)

    
    elif '문맥' in q:

        pass

    else:

        pass


In [None]:
test_index_list

[14]

In [None]:
inference_dataset = Subset(inference_dataset,test_index_list)

In [None]:
#prepare dataloader

#eval_sampler = SequentialSampler(test_index_list)
#eval_sampler = SubsetRandomSampler(test_index_list)
eval_sampler = SequentialSampler(inference_dataset)

inference_dataloader = DataLoader(inference_dataset,
                            batch_size=1,
                            sampler=eval_sampler,
                            collate_fn=model_collate_fn)

In [None]:
inference_dataset[0]

('Introduction of robots into factories, while employment of human workers is being reduced, creates worry and fear. ( ① ) It is the responsibility of management to prevent or, at least, to ease these fears. ( ② ) For example, robots could be introduced only in new plants rather than replacing humans in existing assembly lines. ( ③ ) Workers should be included in the planning for new factories or the introduction of robots into existing plants, so they can participate in the process. ( ④ ) It may be that robots are needed to reduce manufacturing costs so that the company remains competitive, but planning for such cost reductions should be done jointly by labor and management. ( ⑤ ) Since robots are particularly good at highly repetitive simple motions, the replaced human workers should be moved to positions where judgment and decisions beyond the abilities of robots are required.',
 'Retraining current employees for new positions within the company will also greatly reduce their fear o

In [None]:
a_list = []

for a in inference_dataloader:
    
    a_list.append(a)

In [None]:
tokenizer.decode(a_list[6][0]['input_ids'][5])

IndexError: ignored

In [None]:
tokenizer.decode(a[0]['input_ids'][0])

'[CLS] classifying things together into groups is something we do all the time, and it isn [UNK] t hard to see why. imagine trying to shop in a supermarket where the food was arranged in random order on the shelves : tomato soup next to the white bread in one aisle, chicken soup in the back next to the 60 - watt light bulbs, one brand of cream cheese in front and another in aisle 8 near the cookies. the task of finding what you want would be time - consuming and extremely difficult, if not impossible. in the case of a supermarket, someone had to design the system of classification. but there is also a ready - made system of classification embodied in our language. the word [UNK] dog, [UNK] for example, groups together a certain class of animals and distinguishes them from other animals. such a grouping may seem too abstract to be called a classification, but this is only because you have already mastered the word. as a child learning to speak, you had to work hard to learn the system o

In [None]:
#dropout ensemble inference using different seeds

#device = torch.device('cuda')

#model.to(device)

accuracy_list = []

for i in notebook.tqdm(range(1500,2000)):

    seed=i
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    os.environ['PYTHONHASHSEED'] = str(seed)

    model.eval()

    # 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
    all_predictions = []
    true_labels = []

    for inputs,labels in inference_dataloader:

        with torch.no_grad():

            inputs = inputs.to(device)

            pred = model(**inputs)

            pooled_output = pred[1]
            pooled_output = dropout(pooled_output)
            logits = linear(pooled_output)
            reshaped_logits = logits.view(-1,num_choices)

            top_choices = torch.argmax(reshaped_logits, dim=-1)

            all_predictions.extend(top_choices.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    correct = torch.sum(torch.tensor(all_predictions) == torch.tensor(true_labels))

    accuracy = correct/len(true_labels)

    accuracy_list.append(accuracy.item() * 100)


  0%|          | 0/500 [00:00<?, ?it/s]

KeyboardInterrupt: ignored

In [None]:
max(accuracy_list)

54.54545617103577

In [None]:
accuracy_list.index(max(accuracy_list))

20

In [None]:
all_predictions

[2, 0, 3, 4, 3, 0, 3, 2, 2, 4, 0]

In [None]:
true_labels

[1, 2, 0, 4, 0, 2, 1, 0, 4, 0, 1]

In [None]:
correct = torch.sum(torch.tensor(all_predictions) == torch.tensor(true_labels))

accuracy = correct/len(true_labels)

accuracy.item() * 100

9.090909361839294

In [None]:
#basic inference

model.to(device)

model.eval()

# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
all_predictions = []
true_labels = []

for inputs,labels in inference_dataloader:

    with torch.no_grad():

        inputs = inputs.to(device)

        pred = model(**inputs)

        pooled_output = pred[1]
        #pooled_output = dropout(pooled_output)
        logits = linear(pooled_output)
        reshaped_logits = logits.view(-1,num_choices)

        top_choices = torch.argmax(reshaped_logits, dim=-1)

        all_predictions.extend(top_choices.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())

print(all_predictions)

[4]


In [None]:
print(true_labels)

[3]


In [None]:
correct = torch.sum(torch.tensor(all_predictions) == torch.tensor(true_labels))

accuracy = correct/len(true_labels)

accuracy.item() * 100

66.66666865348816

In [None]:
k=120

In [None]:
#basic dropout inference

model.to(device)

model.eval()

# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
all_predictions = []
true_labels = []

for inputs,labels in inference_dataloader:

    with torch.no_grad():

        inputs = inputs.to(device)

        pred = model(**inputs)

        pooled_output = pred[1]

        dropout_pooled_output = dropout(pooled_output)

        logits = linear(dropout_pooled_output)
        reshaped_logits = logits.view(-1,num_choices)

        softmax_predict = F.softmax(reshaped_logits)

        for _ in notebook.tqdm(range(k-1)):

            dropout_pooled_output = dropout(pooled_output)

            logits = linear(dropout_pooled_output)
            reshaped_logits = logits.view(-1,num_choices)

            print(torch.argmax(F.softmax(reshaped_logits), dim=-1))

            softmax_predict += F.softmax(reshaped_logits)
        
        softmax_predict = softmax_predict/k

        top_choices = torch.argmax(softmax_predict, dim=-1)

        all_predictions.extend(top_choices.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())

In [None]:
print(all_predictions)

[3]


In [None]:
print(true_labels)

[3]


In [None]:
correct = torch.sum(torch.tensor(all_predictions) == torch.tensor(true_labels))

accuracy = correct/len(true_labels)

accuracy.item() * 100

0.0

In [None]:
#test time augmentation inference

model.to(device)

model.eval()

all_predictions = []

true_labels = []

for i in range(len(inference_dataset)):
    
    new_dataset = [inference_dataset[i]]

    context,question,options,ans = inference_dataset[i]

    for augmented in eda(context):
        
        new_dataset.append((augmented,question,options,ans))

    
    inference_dataloader = DataLoader(new_dataset,
                            batch_size=1,
                            collate_fn=model_collate_fn)
    
    
    for ind,(inputs,labels) in notebook.tqdm(enumerate(inference_dataloader)):

        with torch.no_grad():

            inputs = inputs.to(device)

            pred = model(**inputs)

            pooled_output = pred[1]

            logits = linear(pooled_output)
            reshaped_logits = logits.view(-1,num_choices)

            if ind == 0:

                softmax_predict = F.softmax(reshaped_logits)

                true_labels.extend(labels.cpu().numpy())

                print(torch.argmax(F.softmax(reshaped_logits), dim=-1))
            
            else:

                softmax_predict += F.softmax(reshaped_logits)

                print(torch.argmax(F.softmax(reshaped_logits), dim=-1))
            
    softmax_predict = softmax_predict/k

    top_choices = torch.argmax(softmax_predict, dim=-1)

    all_predictions.extend(top_choices.cpu().numpy())

0it [00:00, ?it/s]



tensor([4], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')


In [None]:
print(all_predictions)

[1]


In [None]:
print(true_labels)

[3]


In [None]:
correct = torch.sum(torch.tensor(all_predictions) == torch.tensor(true_labels))

accuracy = correct/len(true_labels)

accuracy.item() * 100

0.0