뉴스 토픽 분류 AI 경진대회의 코드를 주로 참고해 대회를 진행했습니다.

In [None]:
import os 
import re 
import copy 
import time
import json
import random
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder


from googletrans import Translator
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
from torch.utils.data import Dataset, DataLoader
from torch.nn.modules.loss import _WeightedLoss
from transformers import logging, AutoTokenizer, AutoModelForSequenceClassification, AutoModel
logging.set_verbosity_error()


import warnings
warnings.filterwarnings(action='ignore')

In [None]:
# KLUE data load
json_train_path = 'klue-nli-v1.1_train.json'
json_test_path = 'klue-nli-v1.1_dev.json'

with open(json_train_path, 'r', encoding="utf-8") as f:
    json_train = json.load(f)
with open(json_test_path, 'r', encoding="utf-8") as f:
    json_test = json.load(f)
    
json_train_df = pd.DataFrame(json_train)[['premise','hypothesis','gold_label']]
json_test_df = pd.DataFrame(json_test)[['premise','hypothesis','gold_label']]        
json_train_df.rename(columns = {'gold_label' : 'label'}, inplace = True)
json_test_df.rename(columns = {'gold_label' : 'label'}, inplace = True)
df1 = pd.concat([json_train_df, json_test_df]).reset_index(drop=True)

df_grp1 = df1.groupby(df1.columns.tolist()) # 전체 열 비교
df_di1 = df_grp1.groups # 딕셔너리로 만들기 
idx_T1 = [x[0] for x in df_di1.values() if len(x) == 1] # 중복X 인덱스 검토
idx_F1 = [x[0] for x in df_di1.values() if not len(x) == 1] # 중복O 인덱스 검토
df_concated1 = pd.concat([df1.loc[idx_T1,:], df1.loc[idx_F1,:]])
df_concated1 = df_concated1.dropna(how='any') # Null 값이 존재하는 행 제거
df_concated1 = df_concated1.reset_index(drop=True)

## Back-Translation

문장을 번역할 때, papago와 google translator를 사용
일차적으로 papago를 사용하여 번역을 진행한 후, 번역이 안되는 경우 부가적으로 google translator를 이용하여 번역을 시도
1-step 번역이 진행됨에 따라 공백 또는 번역이 이루어지지 않는 데이터를 nan값으로 처리 후 재번역 시도
2-step 번역이 진행됨에 따라 일부분만 번역된 경우 또한 재번역 시도
3-step 재번역에도 일부분 번역이 안되는 단어의 경우 문장에서 분리 후 번역기로 해당 단어만 번역 시도
4-step 한글 -> 영어로 번역 시, 번역된 문장이 기존 문장의 길이에 대한 비율 0.5이하이면 재번역 시도
BackTranslation을 진행할 때 papago API를 이용한 것이 아니라 오래 걸림.

In [None]:
def chrome_setting(path):
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')
    driver = webdriver.Chrome(path, options=chrome_options)
    return driver

def google_translation(dataset, raw_text_list, sk, tk):
    raw_trans_list = []
    for idx in tqdm(raw_text_list):
        translator = Translator()
        translator.raise_Exception = True
        trans = translator.translate(dataset.iloc[idx], src=sk, dest=tk)
        time.sleep(1.5)
        raw_trans_list.append(trans.text)
    
    return raw_trans_list

def papago_translation(text_data, sk, tk, driver, save_name=None, index_list=None, mode='save'):
    target_present = EC.presence_of_element_located((By.XPATH, '//*[@id="txtTarget"]'))
    trans_list = []
    
    if index_list is not None:
        final_index = index_list
    else:
        final_index = range(len(text_data))
    
    for idx in tqdm(final_index): 
        try:
            driver.get('https://papago.naver.com/?sk='+sk+'&tk='+tk+'&st='+text_data.iloc[idx])
            time.sleep(1.5)
            element=WebDriverWait(driver, 10).until(target_present)
            backtrans = element.text 

            if (backtrans=='')|(backtrans==' '):
                element=WebDriverWait(driver, 10).until(target_present)
                backtrans = element.text 
                time.sleep(0.1)
                trans_list.append(backtrans)
            else:
                trans_list.append(backtrans)
        except:
            trans_list.append('')
    
        if mode == 'save':
            if idx%100==0:
                np.save(save_name+'_{}_{}.npy'.format(0,(final_index[-1] + 1)),trans_list)
    
    driver.close()
    driver.quit()  
    os.system('killall chrome') 

    if mode == 'save':    
        np.save(save_name+'_{}_{}.npy'.format(0,(final_index[-1] + 1)),trans_list)
        print(save_name+'_{}_to_{} is translated!'.format(sk, tk))
    else:    
        return trans_list
    
    
def nan_list_retranslation(raw_array_df, train, col_name, sk, tk, path):
    
    print('nan_list re-translation.')
    # 번역이 안 된 문장이 존재하는 경우 재번역  
    raw_array_df[col_name].replace('', np.nan, inplace=True)
    raw_array_df[col_name].replace(' ', np.nan, inplace=True)
    nan_list = raw_array_df[raw_array_df[col_name].isnull()].index

    if len(nan_list) != 0:
        count = 0
        
        while len(nan_list) != 0:
            driver = chrome_setting(path)
            if count < 2:
                re_trans_list = papago_translation(train[col_name], sk, tk, driver, index_list=nan_list, mode='retry') # 원본 데이터 재번역
                
            elif count >= 2:
                re_trans_list = google_translation(train[col_name], nan_list, sk, tk) # google-translator로 재번역
            
            for idx, value in zip(nan_list, re_trans_list): 
                raw_array_df[col_name].iloc[idx] = value   
            
            driver.quit()
            raw_array_df[col_name].replace('', np.nan, inplace=True)
            raw_array_df[col_name].replace(' ', np.nan, inplace=True)

            nan_list = raw_array_df[raw_array_df[col_name].isnull()].index
            count+=1
            if count == 4:
                break
        os.system('killall chrome')
        
    return raw_array_df, nan_list


def hangul_list_retranslation(raw_array_df, train, col_name, sk, tk, path):
    
    print('hangul_list re-translation.')
    # sk -> tk 번역된 문장에 sk가 존재하는 경우 재번역 
    hangul_ind=[]
    for i in range(0,len(raw_array_df)):
        temp=re.findall('[a-zA-Z]',str(raw_array_df[col_name][i]))
        if len(temp)!=0:
            hangul_ind.append(i)

    if len(hangul_ind) != 0:
        count = 0
        
        while len(hangul_ind) != 0:
            driver = chrome_setting(path)
            
            if count < 2:
                re_trans_list = papago_translation(train[col_name], sk, tk, driver, index_list=hangul_ind, mode='retry') # 원본 데이터 재번역
                            
            elif count >= 2:
                re_trans_list = google_translation(train[col_name], hangul_ind, sk, tk) # google-translator로 재번역

            for idx, value in zip(hangul_ind, re_trans_list): 
                raw_array_df[col_name].iloc[idx] = value  
            
            driver.quit()
            hangul_ind=[]
            for i in range(0,len(raw_array_df)):
                temp=re.findall('[a-zA-Z]',str(raw_array_df[col_name][i]))
                if len(temp)!=0:
                    hangul_ind.append(i)
            count+=1
            if count == 4:
                break
            
        os.system('killall chrome')
        
    return raw_array_df, hangul_ind


def hangul_word_translation(raw_array_df, col_name, sk, tk, path):
    print('hangul_word re-translation.')
    hangul_ind=[]
    for i in range(0,len(raw_array_df)):
        temp=re.findall('[a-zA-Z]',str(raw_array_df[col_name][i]))
        if len(temp)!=0:
            hangul_ind.append(i)
                    
    if len(hangul_ind) != 0:
        count = 0
        while len(hangul_ind) != 0:
            
            if count < 1:
                for idx in tqdm(hangul_ind):
                    dictt = {}
                    words_raw = re.sub('[^A-Z a-z]', ' ', raw_array_df[col_name].iloc[idx])
                    words = words_raw.split("  ")
                    words = [x.strip() for x in words if x.strip()]
                    
                    transResult_list = []
                    for text in words: 
                        driver = chrome_setting(path)
                        driver.get('https://papago.naver.com/?sk=' + sk + '&tk=' + tk + '&hn=0&st=')
                        
                        time.sleep(1)
                        driver.find_element_by_xpath('//*[@id="sourceEditArea"]/label').send_keys(text)
                        driver.find_element_by_xpath('//*[@id="btnTranslate"]').click()
                        time.sleep(1.5)
                        transResult = driver.find_element_by_xpath('//*[@id="txtTarget"]').text
                        time.sleep(1)
                        transResult_list.append(transResult)
                        driver.quit()
                        os.system('killall chrome')
                        
                    dictt['word'] = words
                    dictt['translated_word'] = transResult_list

                    for i in range(len(dictt['word'])):
                        raw_array_df[col_name].iloc[idx] = raw_array_df[col_name].iloc[idx].replace(dictt['word'][i],dictt['translated_word'][i])
                
            elif count >= 1:
                for idx in tqdm(hangul_ind):
                    dictt = {}
                    words_raw = re.sub('[^A-Z a-z]', ' ', raw_array_df[col_name].iloc[idx])
                    words = words_raw.split("  ")
                    words = [x.strip() for x in words if x.strip()]
                    
                    transResult_list = []
                    for text in words: 
                        translator = Translator()
                        translator.raise_Exception = True
                        trans = translator.translate(text, src=sk, dest=tk)
                        time.sleep(1.5)
                        transResult_list.append(trans.text)
                        os.system('killall chrome')
                    dictt['word'] = words
                    dictt['translated_word'] = transResult_list 
                    
                    for i in range(len(dictt['word'])):
                        raw_array_df[col_name].iloc[idx] = raw_array_df[col_name].iloc[idx].replace(dictt['word'][i],dictt['translated_word'][i])
                            
            hangul_ind=[]
            for i in range(0,len(raw_array_df)):
                temp=re.findall('[a-zA-Z]',str(raw_array_df[col_name][i]))
                if len(temp)!=0:
                    hangul_ind.append(i)
                    
            count+=1
            if count >= 2:
                break
            
        os.system('killall chrome')
    return raw_array_df

def len_retranslation(raw_array_df, train, col_name, sk, tk, path):
# Attempt to re-translate a translated sentence if the translated sentence has a ratio of less than 0.5 to the length of an existing sentence
    print('len rate re-translation.')    
    retrans_ind=[]
    for i in range(0,len(raw_array_df)):
        if len(raw_array_df[col_name][i])/len(df_concated1[col_name][i])<=0.5:
            retrans_ind.append(i)
            
    retrans_ind=list(set(retrans_ind))
    count = 0
    
    while len(retrans_ind) != 0:
        driver = chrome_setting(path)
        raw_trans_list = google_translation(train[col_name], retrans_ind, sk, tk)
        
        for idx, value in zip(retrans_ind, raw_trans_list): 
            raw_array_df[col_name].iloc[idx] = value  
            
        retrans_ind=[]
        for i in range(0,len(raw_array_df)):
            if len(raw_array_df[col_name][i])/len(train[col_name][i])<=0.5:
                retrans_ind.append(i)
        
        retrans_ind=list(set(retrans_ind))   
        driver.quit()
        
        count+=1
        if count >= 2:
            break
        
        os.system('killall chrome')
    return raw_array_df

한글 -> 영어 번역

영어로 번역된 데이터 npy로 저장 후, 번역이 제대로 진행되지 않은 부분을 위해 저장된 npy 로드하여 재번역 시도

In [None]:
for column in ['premise', 'hypothesis']:    
    print('Col_name : '+column)

    set_setting = {'path':'/chromedriver', # your chrome driver path
                    'col_name':column,
                    'sk':'ko',
                    'tk':'en',
                    'final_save_name':'to_eng_{}'.format(column)}

    path = set_setting['path']
    col_name = set_setting['col_name']
    sk = set_setting['sk']
    tk = set_setting['tk']
    final_save_name = set_setting['final_save_name']

    driver = chrome_setting(path)
    back_translation_file = 'to_eng_{}_0_27996.npy'.format(col_name)
    
    # to_eng_premise_0_27996.npy, to_eng_hypothesis_0_27996.npy 생성 
    papago_translation(df_concated1[col_name], sk, tk, driver, save_name=final_save_name) 

    raw_array = np.load('{}'.format(back_translation_file))
    raw_array_df = pd.DataFrame(raw_array, columns=[col_name])

    nan_list = [1]
    hangul_ind = [1]
    driver.quit()
    os.system('killall chrome')
    
    # retry
    for ii in range(2):
        raw_array_df, nan_list = nan_list_retranslation(raw_array_df, df_concated1, col_name, sk, tk, path)
        raw_array_df, hangul_ind = hangul_list_retranslation(raw_array_df, df_concated1, col_name, sk, tk, path)
        raw_array_df = hangul_word_translation(raw_array_df, col_name, sk, tk, path)
        raw_array_df = len_retranslation(raw_array_df, df_concated1, col_name, sk, tk, path) 
    
    #  ko -> en 번역된 데이터 csv 파일로 저장. ex) to_eng_premise.csv, to_eng_hypothesis.csv
    raw_array_df.to_csv('{}.csv'.format(final_save_name), index=False) 

영어 -> 한글 번역

한글로 번역된 데이터 npy로 저장 후, 번역이 제대로 진행되지 않은 부분을 위해 저장된 npy 로드하여 재번역 시도

같은 의미의 문장이라도 한글 대비 영어 문장이 길다고 판단되어 길이에 대한 비율에 따른 재번역은 진행 X

In [None]:
for column in ['premise', 'hypothesis']:    
    print('Col_name : '+column)

    set_setting = {'path':'/chromedriver', # your chrome driver path
                    'col_name':column,
                    'sk':'en',
                    'tk':'ko',
                    'final_save_name':'to_kor_{}'.format(column)}

    path = set_setting['path']
    col_name = set_setting['col_name']
    sk = set_setting['sk']
    tk = set_setting['tk']
    final_save_name = set_setting['final_save_name']

    driver = chrome_setting(path)
    eng_data = pd.read_csv('to_eng_{}_0_27996.csv'.format(col_name))
    back_translation_file = 'to_kor_{}_0_27996.npy'.format(col_name)
    
    # to_kor_premise_0_27996.npy, to_kor_hypothesis_0_27996.npy 생성 
    papago_translation(eng_data[col_name], sk, tk, driver, save_name=final_save_name)

    raw_array = np.load('{}'.format(back_translation_file))
    raw_array_df = pd.DataFrame(raw_array, columns=[col_name])

    nan_list = [1]
    hangul_ind = [1]
    driver.quit()
    os.system('killall chrome')
    
    # retry
    for ii in range(2):
        raw_array_df, nan_list = nan_list_retranslation(raw_array_df, eng_data, col_name, sk, tk, path)
        raw_array_df, hangul_ind = hangul_list_retranslation(raw_array_df, eng_data, col_name, sk, tk, path)
        raw_array_df = hangul_word_translation(raw_array_df, col_name, sk, tk, path)
    
    #  en -> ko 번역된 데이터 csv 파일로 저장. ex) to_kor_premise.csv, to_kor_hypothesis.csv
    raw_array_df.to_csv('{}.csv'.format(final_save_name), index=False)

### Sentence BERT(SBERT)
번역된 데이터와 원본 데이터와의 유사도를 구해 유사도가 0.9 이상인 데이터만을 학습에 사용

In [None]:
def pytorch_cos_sim(a: Tensor, b: Tensor):
    """
    Computes the cosine similarity cos_sim(a[i], b[j]) for all i and j.
    :return: Matrix with res[i][j]  = cos_sim(a[i], b[j])
    """
    return cos_sim(a, b)

def cos_sim(a: Tensor, b: Tensor):
    """
    Computes the cosine similarity cos_sim(a[i], b[j]) for all i and j.
    :return: Matrix with res[i][j]  = cos_sim(a[i], b[j])
    """
    if not isinstance(a, torch.Tensor):
        a = torch.tensor(a)

    if not isinstance(b, torch.Tensor):
        b = torch.tensor(b)

    if len(a.shape) == 1:
        a = a.unsqueeze(0)

    if len(b.shape) == 1:
        b = b.unsqueeze(0)

    a_norm = torch.nn.functional.normalize(a, p=2, dim=1)
    b_norm = torch.nn.functional.normalize(b, p=2, dim=1)
    return torch.mm(a_norm, b_norm.transpose(0, 1))

In [None]:
back_premise = pd.read_csv('to_kor_premise.csv')
bcak_hypothesis = pd.read_csv('to_kor_hypothesis.csv')

sim_data = copy.copy(df_concated1)
sim_data['back_premise'] = back_premise
sim_data['back_hypothesis'] = bcak_hypothesis

sim_data.replace('', np.nan, inplace=True)
sim_data.replace(' ', np.nan, inplace=True)
nan_list = [index for index, row in sim_data.iterrows() if row.isnull().any()]

hangul_ind=[]
for i in range(0,len(sim_data)):
    temp=re.findall('[a-zA-Z]',str(sim_data.drop(columns=['label']).iloc[i].values))
    if len(temp)!=0:
        hangul_ind.append(i)
        
sim_data.drop(index = hangul_ind, inplace=True)
sim_data = sim_data[sim_data['back_premise'].apply(lambda x: len(x)>=19)] # 'premise' 글자 수 18 미만인 데이터 제거 
sim_data = sim_data[sim_data['back_premise'].apply(lambda x: len(x)<=90)] # 'premise' 글자 수 89 초과인 데이터 제거
sim_data = sim_data[sim_data['back_hypothesis'].apply(lambda x: len(x)>=5)] # 'hypothesis' 글자 수 5 미만인 데이터 제거
sim_data = sim_data[sim_data['back_hypothesis'].apply(lambda x: len(x)<=103)] # 'hypothesis' 글자 수 103 초과인 데이터 제거

sim_data = sim_data.reset_index(drop=True)
sim_data[['back_premise', 'back_hypothesis']] = sim_data[['back_premise', 'back_hypothesis']].applymap(lambda x: x.strip() if isinstance(x, str) else x)

In [None]:
#Mean Pooling - Take attention mask into account for correct averaging
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)

# Load model from HuggingFace Hub
tokenizer = AutoTokenizer.from_pretrained('bespin-global/klue-sentence-roberta-base')
model = AutoModel.from_pretrained('bespin-global/klue-sentence-roberta-base')

results={}
# for col_name in ['premise']:
for col_name in ['premise', 'hypothesis']:

    # Tokenize sentences
    corpus_input = tokenizer(list(sim_data[col_name]), max_length=103, padding='max_length', truncation=True, return_tensors='pt')
    queries_input = tokenizer(list(sim_data['back_'+col_name]), max_length=103, padding='max_length', truncation=True, return_tensors='pt')

    # Compute token embeddings
    with torch.no_grad():
        corpus_output = model(**corpus_input)
        queries_output = model(**queries_input)

    # Perform pooling. In this case, mean pooling.
    corpus_embeddings = mean_pooling(corpus_output, corpus_input['attention_mask'])
    queries_embeddings = mean_pooling(queries_output, corpus_input['attention_mask'])
    results['corpus'] = corpus_embeddings
    results['queries'] = queries_embeddings
    
    cos_similarity = torch.diagonal(pytorch_cos_sim(results['corpus'], results['queries']), 0)
    sim_data[col_name+'_cos'] = cos_similarity

In [None]:
condition = (sim_data.premise_cos >= 0.9) & (sim_data.hypothesis_cos >= 0.9) # 조건식 작성
back_train = sim_data[condition]
back_train = back_train.reset_index(drop=True)
# Augmentation data save to csv file
back_train[['back_premise', 'back_hypothesis', 'label']].to_csv('./data/Augmented_data.csv', index=False)

## Model

DACON TRAIN DATA와 KLUE DATA를 기본적으로 사용해 모델을 구축했습니다.
 
Model 1. Roberta-large + 10 epoch 학습

Model 2. Koelectra + 10 epoch 학습

Model 3. Dacon data + Klue data + KORNLI data 
( 앞 20000개 데이터 중 length가 100보다 작은 KORNLI 데이터 추가 ),
Roberta + 10 epoch 학습

을 학습시켜 보팅한 값을 결과로 제출했습니다.


### SEED : 42 / COLAB PRO GPU 점검 / SET LIBRARY

In [1]:
!nvidia-smi

Fri Mar  4 05:13:14 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   40C    P0    27W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
pip install transformers



In [4]:
# ignore warning message
import warnings 
warnings.filterwarnings("ignore")

# get dir
import os
os.chdir('/content/drive/MyDrive/nli')

# seed, preprocessing, for time check
from tqdm import tqdm, tqdm_notebook
import time
import random
import pandas as pd 
import numpy as np 
import re
from collections import Counter

# modeling
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset

from transformers import AutoTokenizer, ElectraForSequenceClassification, AutoModel, AutoModelForSequenceClassification
from transformers import AdamWeightDecay, AdamW
from transformers.optimization import get_cosine_schedule_with_warmup, get_linear_schedule_with_warmup

In [5]:
def get_seed(seed:int = 2022):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  
    torch.backends.cudnn.deterministic = True 
    torch.backends.cudnn.benchmark = True  

get_seed(42)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


전처리 전에 KLUE data를 불러와서 추가해줍니다.

KLUE DATA NOTICE : https://klue-benchmark.com/tasks/68/data/download

KLUE LINK : https://aistages-prod-server-public.s3.amazonaws.com/app/Competitions/000068/data/klue-nli-v1.1.tar.gz

In [6]:
pd.read_json('./data/klue-nli-v1.1/klue-nli-v1.1_train.json', encoding='utf8')[['premise','hypothesis','gold_label']].to_csv('./data/klue_train.csv', index=False)
pd.read_json('./data/klue-nli-v1.1/klue-nli-v1.1_dev.json', encoding='utf8')[['premise','hypothesis','gold_label']].to_csv('./data/klue_dev.csv', index=False)

Dacon data와 KLUE data를 불러오는 코드를 작성. ( prep_dat )

Dacon data와 KLUE data에 길이가 KORNLI 앞의 20000개 데이터 중 100보다 작은 데이터를 추가로 불러오는 코드를 작성. ( prep_dat2 )

NOTE) 제일 긴 문장의 경우 길이가 102인 것을 확인하였고, Tokenizer의 max_length를 100으로 설정.

In [10]:
def prep_dat():

    ## get train
    train = pd.read_csv('./data/Augmented_data.csv')
    train.drop(['index'], axis=1, inplace=True)

    ## using KLUE data 
    klue_train = pd.read_csv('./data/klue_train.csv')
    klue_dev = pd.read_csv('./data/klue_dev.csv')
    klue_train.columns = ['premise', 'hypothesis', 'label']
    klue_dev.columns = ['premise', 'hypothesis', 'label']

    ## using 20000 KORNLI data
    # kornli_train = pd.read_csv('snli_1.0_train.ko.tsv.txt', delimiter = '\t')
    # kornli_train.columns = ['premise', 'hypothesis', 'label']
    # kornli_train = kornli_train.loc[kornli_train['premise'].str.len() < 100]
    # kornli_train = kornli_train.loc[kornli_train['hypothesis'].str.len() < 100]
    # kornli_train = kornli_train[:20001] 

    ## get test and submission
    test = pd.read_csv('./data/test_data.csv')
    sub = pd.read_csv('./data/sample_submission.csv')

    ## concat
    train = pd.concat([train,klue_train], ignore_index=True)
    train = pd.concat([train,klue_dev], ignore_index=True, join='inner')
    # train = pd.concat([train,kornli_train], ignore_index=True, join='inner')
    train.dropna(inplace=True)
    train.drop_duplicates(inplace=True,ignore_index=True)

    ## matching label
    label_match = {"entailment" : 0, "contradiction" : 1, "neutral" : 2}
    train['label'] = train['label'].map(label_match)

    ## re : 1
    train['premise'] = train['premise'].map(lambda x: re.sub('[-=+.,#/\?:^$.@*\"※~&%ㆍ!』\\‘|\(\)\[\]\<\>`\'…》]', '', x))
    train['hypothesis'] = train['hypothesis'].map(lambda x: re.sub('[-=+.,#/\?:^$.@*\"※~&%ㆍ!』\\‘|\(\)\[\]\<\>`\'…》]', '', x))
    test['premise'] = test['premise'].map(lambda x: re.sub('[-=+.,#/\?:^$.@*\"※~&%ㆍ!』\\‘|\(\)\[\]\<\>`\'…》]', '', x))
    test['hypothesis'] = test['hypothesis'].map(lambda x: re.sub('[-=+.,#/\?:^$.@*\"※~&%ㆍ!』\\‘|\(\)\[\]\<\>`\'…》]', '', x))

    ## re : 2
    train['premise'] = train['premise'].map(lambda x: re.sub('[-=+,#:;//●<>▲\?:^$.☆!★()Ⅰ@*\"※~>`\'…》→←]', '', x))
    train['hypothesis'] = train['hypothesis'].map(lambda x: re.sub('[-=+,#:;//●<>▲\?:^$.☆!★()Ⅰ@*\"※~>`\'…》→←]', '', x))
    test['premise'] = test['premise'].map(lambda x: re.sub('[-=+,#:;//●<>▲\?:^$.☆!★()Ⅰ@*\"※~>`\'…》→←]', '', x))
    test['hypothesis'] = test['hypothesis'].map(lambda x: re.sub('[-=+,#:;//●<>▲\?:^$.☆!★()Ⅰ@*\"※~>`\'…》→←]', '', x))

    return train,test,sub 

def prep_dat2():

    ## get train
    train = pd.read_csv('./data/train_data.csv')
    train.drop(['index'], axis=1, inplace=True)

    ## using KLUE data 
    klue_train = pd.read_csv('./data/klue_train.csv')
    klue_dev = pd.read_csv('./data/klue_dev.csv')
    klue_train.columns = ['premise', 'hypothesis', 'label']
    klue_dev.columns = ['premise', 'hypothesis', 'label']

    # using 20000 KORNLI data conditioning on length < 100
    kornli_train = pd.read_csv('./data/snli_1.0_train.ko.tsv.txt', delimiter = '\t')
    kornli_train.columns = ['premise', 'hypothesis', 'label']
    kornli_train = kornli_train.loc[kornli_train['premise'].str.len() < 100]
    kornli_train = kornli_train.loc[kornli_train['hypothesis'].str.len() < 100]
    kornli_train = kornli_train[:20001] 

    ## get test and submission
    test = pd.read_csv('./data/test_data.csv')
    sub = pd.read_csv('./data/sample_submission.csv')

    ## concat
    train = pd.concat([train,klue_train], ignore_index=True)
    train = pd.concat([train,klue_dev], ignore_index=True, join='inner')
    train = pd.concat([train,kornli_train], ignore_index=True, join='inner')
    train.dropna(inplace=True)
    train.drop_duplicates(inplace=True,ignore_index=True)

    ## matching label
    label_match = {"entailment" : 0, "contradiction" : 1, "neutral" : 2}
    train['label'] = train['label'].map(label_match)

    ## re : 1
    train['premise'] = train['premise'].map(lambda x: re.sub('[-=+.,#/\?:^$.@*\"※~&%ㆍ!』\\‘|\(\)\[\]\<\>`\'…》]', '', x))
    train['hypothesis'] = train['hypothesis'].map(lambda x: re.sub('[-=+.,#/\?:^$.@*\"※~&%ㆍ!』\\‘|\(\)\[\]\<\>`\'…》]', '', x))
    test['premise'] = test['premise'].map(lambda x: re.sub('[-=+.,#/\?:^$.@*\"※~&%ㆍ!』\\‘|\(\)\[\]\<\>`\'…》]', '', x))
    test['hypothesis'] = test['hypothesis'].map(lambda x: re.sub('[-=+.,#/\?:^$.@*\"※~&%ㆍ!』\\‘|\(\)\[\]\<\>`\'…》]', '', x))

    ## re : 2
    train['premise'] = train['premise'].map(lambda x: re.sub('[-=+,#:;//●<>▲\?:^$.☆!★()Ⅰ@*\"※~>`\'…》→←]', '', x))
    train['hypothesis'] = train['hypothesis'].map(lambda x: re.sub('[-=+,#:;//●<>▲\?:^$.☆!★()Ⅰ@*\"※~>`\'…》→←]', '', x))
    test['premise'] = test['premise'].map(lambda x: re.sub('[-=+,#:;//●<>▲\?:^$.☆!★()Ⅰ@*\"※~>`\'…》→←]', '', x))
    test['hypothesis'] = test['hypothesis'].map(lambda x: re.sub('[-=+,#:;//●<>▲\?:^$.☆!★()Ⅰ@*\"※~>`\'…》→←]', '', x))
    
    return train,test,sub 


def correct_token(dat):
    dat['token_sen'] = "[CLS]" + dat["premise"] + "[SEP]" + " " + dat["hypothesis"] + "[SEP]"
    dat = dat[['token_sen','label']]
    return dat 

In [11]:
## get DATA
train, test, sub = prep_dat()
correct_train, correct_test = correct_token(train), correct_token(test)

## token size를 100으로 결정
max_len = np.max(train['premise'].str.len())
min_len = np.min(train['premise'].str.len())
mean_len = np.mean(train['premise'].str.len())

print('Max Premise Length: ', max_len)
print('Min Premise Length: ', min_len)
print('Mean Premise Lenght: ', mean_len, '\n')

max_len = np.max(train['hypothesis'].str.len())
min_len = np.min(train['hypothesis'].str.len())
mean_len = np.mean(train['hypothesis'].str.len())

print('Max Hypothesis Length: ', max_len)
print('Min Hypothesis Length: ', min_len)
print('Mean Hypothesis Lenght: ', mean_len)

Max Premise Length:  89
Min Premise Length:  18
Mean Premise Lenght:  44.161737391055865 

Max Hypothesis Length:  102
Min Hypothesis Length:  4
Mean Hypothesis Lenght:  23.937312473210458


In [12]:
class NLIDataset(Dataset):
  
  def __init__(self, dataset, option, modelname):
    self.dataset = dataset 
    self.option = option
    self.tokenizer = AutoTokenizer.from_pretrained(modelname)
  
  def __len__(self):
    return len(self.dataset)
  
  def __getitem__(self, idx):
    row = self.dataset.iloc[idx, 0:2].values  
    text = row[0]

    inputs = self.tokenizer(
        text, 
        return_tensors='pt',
        max_length=100, 
        truncation=True,
        pad_to_max_length=True,
        add_special_tokens=False
        )
    
    input_ids = inputs['input_ids'][0]
    attention_mask = inputs['attention_mask'][0]
    
    if self.option =='train':
        y = row[1]
        return input_ids, attention_mask, y

    return input_ids, attention_mask


## Model 1. Dacon data와 Klue data를 사용, Roberta-large + 10 epoch 학습

In [None]:
## DEFINE MODEL PARA
model_name = 'klue/roberta-large'
# model_name = 'monologg/koelectra-base-v3-discriminator'
num_epochs = 10 
batch_size = 10 
lr = 1e-5

## get DATA
train, test, sub = prep_dat()
correct_train, correct_test = correct_token(train), correct_token(test)
train_dataset = NLIDataset(correct_train, 'train', model_name)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

## DECLARE MODEL
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
model = nn.DataParallel(model).to(device)

optimizer = AdamW(model.parameters(), lr=lr, eps = 1e-8)
warmup_ratio = 0.1
total_steps = len(train_loader) * num_epochs
scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=500, num_training_steps=total_steps) 


## TRAINING PROCESS
start_time = time.time()
for epoch in tqdm(range(num_epochs)):
    print('-------------------------------------- Training Begin! --------------------------------------')
    batch_num = 0; eq_sum = 0; y_len =0

    model.train()  
    
    for input_ids_batch, attention_masks_batch, y_batch in train_loader:
        optimizer.zero_grad()  
        y_batch = y_batch.to(device)  
        y_pred = model(input_ids_batch.to(device), attention_mask = attention_masks_batch.to(device))[0] 
        loss = F.cross_entropy(y_pred, y_batch) 
        loss.backward()  
        optimizer.step() 
        scheduler.step()
        _, predicted = torch.max(y_pred, 1)
        eq_sum += (predicted == y_batch).sum()
        y_len += len(y_batch)

        # CHECK PROCESS
        batch_num += 1
        if batch_num % 100 == 0:
            acc = eq_sum.float() / y_len
            print(f'BATCH_NUM : {batch_num}, TRAIN_LOSS : {loss.item():.4f}, TRAIN_ACC : {acc.item():.4f}')
        if batch_num % 1000 == 0:
            print(f'FOR 1000 EPOCH : {time.time() - start_time} SECONDS!')
            start_time = time.time()    

Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.decoder.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bias', 'class

-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 1.0321, TRAIN_ACC : 0.3460
BATCH_NUM : 200, TRAIN_LOSS : 1.0845, TRAIN_ACC : 0.3435
BATCH_NUM : 300, TRAIN_LOSS : 0.6294, TRAIN_ACC : 0.4023
BATCH_NUM : 400, TRAIN_LOSS : 0.4136, TRAIN_ACC : 0.4838
BATCH_NUM : 500, TRAIN_LOSS : 0.4041, TRAIN_ACC : 0.5480
BATCH_NUM : 600, TRAIN_LOSS : 0.3009, TRAIN_ACC : 0.5957
BATCH_NUM : 700, TRAIN_LOSS : 0.2262, TRAIN_ACC : 0.6300
BATCH_NUM : 800, TRAIN_LOSS : 0.2258, TRAIN_ACC : 0.6548
BATCH_NUM : 900, TRAIN_LOSS : 0.3027, TRAIN_ACC : 0.6737
BATCH_NUM : 1000, TRAIN_LOSS : 0.4486, TRAIN_ACC : 0.6919
FOR 1000 EPOCH : 382.2381942272186 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.6432, TRAIN_ACC : 0.7072
BATCH_NUM : 1200, TRAIN_LOSS : 0.3845, TRAIN_ACC : 0.7218
BATCH_NUM : 1300, TRAIN_LOSS : 0.3789, TRAIN_ACC : 0.7335
BATCH_NUM : 1400, TRAIN_LOSS : 0.2094, TRAIN_ACC : 0.7446
BATCH_NUM : 1500, TRAIN_LOSS : 0.1762, TRAIN_ACC : 0.7527
B

 10%|█         | 1/10 [17:49<2:40:25, 1069.50s/it]

BATCH_NUM : 2800, TRAIN_LOSS : 0.2412, TRAIN_ACC : 0.8115
-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.2114, TRAIN_ACC : 0.9520
BATCH_NUM : 200, TRAIN_LOSS : 0.0328, TRAIN_ACC : 0.9450
BATCH_NUM : 300, TRAIN_LOSS : 0.0067, TRAIN_ACC : 0.9383
BATCH_NUM : 400, TRAIN_LOSS : 0.3061, TRAIN_ACC : 0.9373
BATCH_NUM : 500, TRAIN_LOSS : 0.2947, TRAIN_ACC : 0.9348
BATCH_NUM : 600, TRAIN_LOSS : 0.0263, TRAIN_ACC : 0.9343
BATCH_NUM : 700, TRAIN_LOSS : 0.6833, TRAIN_ACC : 0.9337
BATCH_NUM : 800, TRAIN_LOSS : 0.1554, TRAIN_ACC : 0.9334
BATCH_NUM : 900, TRAIN_LOSS : 0.1559, TRAIN_ACC : 0.9329
BATCH_NUM : 1000, TRAIN_LOSS : 0.2226, TRAIN_ACC : 0.9358
FOR 1000 EPOCH : 687.5083591938019 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.0404, TRAIN_ACC : 0.9359
BATCH_NUM : 1200, TRAIN_LOSS : 0.4801, TRAIN_ACC : 0.9368
BATCH_NUM : 1300, TRAIN_LOSS : 0.2868, TRAIN_ACC : 0.9372
BATCH_NUM : 1400, TRAIN_LOSS : 0.3518, TRAIN_ACC : 0.9371
B

 20%|██        | 2/10 [35:38<2:22:34, 1069.35s/it]

BATCH_NUM : 2800, TRAIN_LOSS : 0.0740, TRAIN_ACC : 0.9342
-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.0103, TRAIN_ACC : 0.9690
BATCH_NUM : 200, TRAIN_LOSS : 0.0048, TRAIN_ACC : 0.9745
BATCH_NUM : 300, TRAIN_LOSS : 0.0022, TRAIN_ACC : 0.9710
BATCH_NUM : 400, TRAIN_LOSS : 0.1883, TRAIN_ACC : 0.9713
BATCH_NUM : 500, TRAIN_LOSS : 0.0287, TRAIN_ACC : 0.9718
BATCH_NUM : 600, TRAIN_LOSS : 0.2509, TRAIN_ACC : 0.9738
BATCH_NUM : 700, TRAIN_LOSS : 0.0052, TRAIN_ACC : 0.9733
BATCH_NUM : 800, TRAIN_LOSS : 0.0096, TRAIN_ACC : 0.9725
BATCH_NUM : 900, TRAIN_LOSS : 0.3013, TRAIN_ACC : 0.9713
BATCH_NUM : 1000, TRAIN_LOSS : 0.0034, TRAIN_ACC : 0.9715
FOR 1000 EPOCH : 687.231698513031 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.0493, TRAIN_ACC : 0.9709
BATCH_NUM : 1200, TRAIN_LOSS : 0.0137, TRAIN_ACC : 0.9709
BATCH_NUM : 1300, TRAIN_LOSS : 0.0601, TRAIN_ACC : 0.9708
BATCH_NUM : 1400, TRAIN_LOSS : 0.2031, TRAIN_ACC : 0.9702
BA

 30%|███       | 3/10 [53:27<2:04:44, 1069.18s/it]

BATCH_NUM : 2800, TRAIN_LOSS : 0.0551, TRAIN_ACC : 0.9697
-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.0018, TRAIN_ACC : 0.9870
BATCH_NUM : 200, TRAIN_LOSS : 0.0506, TRAIN_ACC : 0.9840
BATCH_NUM : 300, TRAIN_LOSS : 0.0409, TRAIN_ACC : 0.9820
BATCH_NUM : 400, TRAIN_LOSS : 0.0052, TRAIN_ACC : 0.9845
BATCH_NUM : 500, TRAIN_LOSS : 0.0077, TRAIN_ACC : 0.9834
BATCH_NUM : 600, TRAIN_LOSS : 0.0111, TRAIN_ACC : 0.9842
BATCH_NUM : 700, TRAIN_LOSS : 0.0163, TRAIN_ACC : 0.9831
BATCH_NUM : 800, TRAIN_LOSS : 0.2081, TRAIN_ACC : 0.9825
BATCH_NUM : 900, TRAIN_LOSS : 0.1103, TRAIN_ACC : 0.9824
BATCH_NUM : 1000, TRAIN_LOSS : 0.6945, TRAIN_ACC : 0.9823
FOR 1000 EPOCH : 687.0909006595612 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.0093, TRAIN_ACC : 0.9818
BATCH_NUM : 1200, TRAIN_LOSS : 0.0038, TRAIN_ACC : 0.9819
BATCH_NUM : 1300, TRAIN_LOSS : 0.0056, TRAIN_ACC : 0.9823
BATCH_NUM : 1400, TRAIN_LOSS : 0.0160, TRAIN_ACC : 0.9830
B

 40%|████      | 4/10 [1:11:16<1:46:54, 1069.03s/it]

BATCH_NUM : 2800, TRAIN_LOSS : 0.0027, TRAIN_ACC : 0.9826
-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.0022, TRAIN_ACC : 0.9900
BATCH_NUM : 200, TRAIN_LOSS : 0.0588, TRAIN_ACC : 0.9925
BATCH_NUM : 300, TRAIN_LOSS : 0.0008, TRAIN_ACC : 0.9933
BATCH_NUM : 400, TRAIN_LOSS : 0.0007, TRAIN_ACC : 0.9938
BATCH_NUM : 500, TRAIN_LOSS : 0.0024, TRAIN_ACC : 0.9930
BATCH_NUM : 600, TRAIN_LOSS : 0.0015, TRAIN_ACC : 0.9928
BATCH_NUM : 700, TRAIN_LOSS : 0.0006, TRAIN_ACC : 0.9930
BATCH_NUM : 800, TRAIN_LOSS : 0.0104, TRAIN_ACC : 0.9923
BATCH_NUM : 900, TRAIN_LOSS : 0.0028, TRAIN_ACC : 0.9917
BATCH_NUM : 1000, TRAIN_LOSS : 0.0013, TRAIN_ACC : 0.9917
FOR 1000 EPOCH : 687.0102813243866 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.0073, TRAIN_ACC : 0.9915
BATCH_NUM : 1200, TRAIN_LOSS : 0.1239, TRAIN_ACC : 0.9912
BATCH_NUM : 1300, TRAIN_LOSS : 0.0038, TRAIN_ACC : 0.9906
BATCH_NUM : 1400, TRAIN_LOSS : 0.0018, TRAIN_ACC : 0.9904
B

 50%|█████     | 5/10 [1:29:05<1:29:04, 1068.93s/it]

BATCH_NUM : 2800, TRAIN_LOSS : 0.0020, TRAIN_ACC : 0.9900
-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.0005, TRAIN_ACC : 0.9990
BATCH_NUM : 200, TRAIN_LOSS : 0.0027, TRAIN_ACC : 0.9955
BATCH_NUM : 300, TRAIN_LOSS : 0.6747, TRAIN_ACC : 0.9947
BATCH_NUM : 400, TRAIN_LOSS : 0.4422, TRAIN_ACC : 0.9935
BATCH_NUM : 500, TRAIN_LOSS : 0.0034, TRAIN_ACC : 0.9944
BATCH_NUM : 600, TRAIN_LOSS : 0.0029, TRAIN_ACC : 0.9937
BATCH_NUM : 700, TRAIN_LOSS : 0.8175, TRAIN_ACC : 0.9937
BATCH_NUM : 800, TRAIN_LOSS : 0.0018, TRAIN_ACC : 0.9935
BATCH_NUM : 900, TRAIN_LOSS : 0.4916, TRAIN_ACC : 0.9932
BATCH_NUM : 1000, TRAIN_LOSS : 0.0024, TRAIN_ACC : 0.9931
FOR 1000 EPOCH : 686.8720238208771 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.0010, TRAIN_ACC : 0.9932
BATCH_NUM : 1200, TRAIN_LOSS : 0.0018, TRAIN_ACC : 0.9930
BATCH_NUM : 1300, TRAIN_LOSS : 0.0090, TRAIN_ACC : 0.9929
BATCH_NUM : 1400, TRAIN_LOSS : 0.0084, TRAIN_ACC : 0.9930
B

 60%|██████    | 6/10 [1:46:53<1:11:15, 1068.76s/it]

BATCH_NUM : 2800, TRAIN_LOSS : 0.0011, TRAIN_ACC : 0.9930
-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.0760, TRAIN_ACC : 0.9990
BATCH_NUM : 200, TRAIN_LOSS : 0.0015, TRAIN_ACC : 0.9980
BATCH_NUM : 300, TRAIN_LOSS : 0.0178, TRAIN_ACC : 0.9980
BATCH_NUM : 400, TRAIN_LOSS : 0.0003, TRAIN_ACC : 0.9980
BATCH_NUM : 500, TRAIN_LOSS : 0.0011, TRAIN_ACC : 0.9982
BATCH_NUM : 600, TRAIN_LOSS : 0.0011, TRAIN_ACC : 0.9982
BATCH_NUM : 700, TRAIN_LOSS : 0.0006, TRAIN_ACC : 0.9980
BATCH_NUM : 800, TRAIN_LOSS : 0.0013, TRAIN_ACC : 0.9974
BATCH_NUM : 900, TRAIN_LOSS : 0.0007, TRAIN_ACC : 0.9976
BATCH_NUM : 1000, TRAIN_LOSS : 0.0006, TRAIN_ACC : 0.9977
FOR 1000 EPOCH : 687.0638766288757 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.0003, TRAIN_ACC : 0.9978
BATCH_NUM : 1200, TRAIN_LOSS : 0.0003, TRAIN_ACC : 0.9979
BATCH_NUM : 1300, TRAIN_LOSS : 0.0169, TRAIN_ACC : 0.9977
BATCH_NUM : 1400, TRAIN_LOSS : 0.0013, TRAIN_ACC : 0.9976
B

 70%|███████   | 7/10 [2:04:42<53:26, 1068.81s/it]  

BATCH_NUM : 2800, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9972
-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.0005, TRAIN_ACC : 0.9990
BATCH_NUM : 200, TRAIN_LOSS : 0.0003, TRAIN_ACC : 0.9995
BATCH_NUM : 300, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9990
BATCH_NUM : 400, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9988
BATCH_NUM : 500, TRAIN_LOSS : 0.0014, TRAIN_ACC : 0.9988
BATCH_NUM : 600, TRAIN_LOSS : 0.0017, TRAIN_ACC : 0.9987
BATCH_NUM : 700, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9989
BATCH_NUM : 800, TRAIN_LOSS : 0.0021, TRAIN_ACC : 0.9984
BATCH_NUM : 900, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9983
BATCH_NUM : 1000, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9985
FOR 1000 EPOCH : 686.9569048881531 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.0004, TRAIN_ACC : 0.9985
BATCH_NUM : 1200, TRAIN_LOSS : 0.0006, TRAIN_ACC : 0.9984
BATCH_NUM : 1300, TRAIN_LOSS : 0.0003, TRAIN_ACC : 0.9985
BATCH_NUM : 1400, TRAIN_LOSS : 0.0004, TRAIN_ACC : 0.9985
B

 80%|████████  | 8/10 [2:22:30<35:37, 1068.67s/it]

BATCH_NUM : 2800, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9985
-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.0002, TRAIN_ACC : 1.0000
BATCH_NUM : 200, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9985
BATCH_NUM : 300, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9983
BATCH_NUM : 400, TRAIN_LOSS : 0.0001, TRAIN_ACC : 0.9988
BATCH_NUM : 500, TRAIN_LOSS : 0.0004, TRAIN_ACC : 0.9990
BATCH_NUM : 600, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9992
BATCH_NUM : 700, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9990
BATCH_NUM : 800, TRAIN_LOSS : 0.0043, TRAIN_ACC : 0.9989
BATCH_NUM : 900, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9990
BATCH_NUM : 1000, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9990
FOR 1000 EPOCH : 686.956282377243 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.0001, TRAIN_ACC : 0.9991
BATCH_NUM : 1200, TRAIN_LOSS : 0.0004, TRAIN_ACC : 0.9992
BATCH_NUM : 1300, TRAIN_LOSS : 0.0003, TRAIN_ACC : 0.9991
BATCH_NUM : 1400, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9991
BA

 90%|█████████ | 9/10 [2:40:19<17:48, 1068.76s/it]

BATCH_NUM : 2800, TRAIN_LOSS : 0.0003, TRAIN_ACC : 0.9992
-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.0002, TRAIN_ACC : 1.0000
BATCH_NUM : 200, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9995
BATCH_NUM : 300, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9997
BATCH_NUM : 400, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9998
BATCH_NUM : 500, TRAIN_LOSS : 0.0003, TRAIN_ACC : 0.9996
BATCH_NUM : 600, TRAIN_LOSS : 0.0001, TRAIN_ACC : 0.9995
BATCH_NUM : 700, TRAIN_LOSS : 0.0001, TRAIN_ACC : 0.9996
BATCH_NUM : 800, TRAIN_LOSS : 0.0004, TRAIN_ACC : 0.9996
BATCH_NUM : 900, TRAIN_LOSS : 0.0003, TRAIN_ACC : 0.9997
BATCH_NUM : 1000, TRAIN_LOSS : 0.0004, TRAIN_ACC : 0.9996
FOR 1000 EPOCH : 687.1064350605011 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.0005, TRAIN_ACC : 0.9995
BATCH_NUM : 1200, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9996
BATCH_NUM : 1300, TRAIN_LOSS : 0.0006, TRAIN_ACC : 0.9996
BATCH_NUM : 1400, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9996
B

100%|██████████| 10/10 [2:58:08<00:00, 1068.88s/it]

BATCH_NUM : 2800, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9996





In [None]:
torch.save(model, './model/upload_model1.pth')

In [None]:
test_dataset = NLIDataset(correct_test,'test', model_name)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0)

pred = []; ans = []

m = torch.load('./model/upload_model1.pth')
m.eval()

with torch.no_grad():
    for input_ids_batch, attention_masks_batch in tqdm(test_loader):
        y_pred = m(
            input_ids_batch.to(device),
            attention_mask=attention_masks_batch.to(device)
            )[0].detach().cpu().numpy()
        ans.extend(y_pred.argmax(axis=1))
        
pred.append(ans); pred = np.array(pred).T

pred_mode = []
for i in tqdm(range(len(test_dataset))):
    cnt = Counter(pred[i])
    pred_mode.append(cnt.most_common()[0][0])

label_match = {0:"entailment" , 1: "contradiction" , 2:"neutral"}
sub['label'] = [label_match[_] for _ in pred_mode]
sub.to_csv('./sub/upload_model1.csv', index=False)

100%|██████████| 27/27 [00:17<00:00,  1.56it/s]
100%|██████████| 1666/1666 [00:00<00:00, 114122.33it/s]


## Model 2. Dacon data와 Klue data를 사용, Koelectra + 10 epoch 학습

In [9]:
## DEFINE MODEL PARA
# model_name = 'klue/roberta-large'
model_name = 'monologg/koelectra-base-v3-discriminator'
num_epochs = 10 
batch_size = 10 
lr = 1e-5

## get DATA
train, test, sub = prep_dat()
correct_train, correct_test = correct_token(train), correct_token(test)
train_dataset = NLIDataset(correct_train, 'train', model_name)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

## DECLARE MODEL
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
model = nn.DataParallel(model).to(device)

optimizer = AdamW(model.parameters(), lr=lr, eps = 1e-8)
warmup_ratio = 0.1
total_steps = len(train_loader) * num_epochs
scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=500, num_training_steps=total_steps) 


## TRAINING PROCESS
start_time = time.time()
for epoch in tqdm(range(num_epochs)):
    print('-------------------------------------- Training Begin! --------------------------------------')
    batch_num = 0; eq_sum = 0; y_len =0

    model.train()  
    
    for input_ids_batch, attention_masks_batch, y_batch in train_loader:
        optimizer.zero_grad()  
        y_batch = y_batch.to(device)  
        y_pred = model(input_ids_batch.to(device), attention_mask = attention_masks_batch.to(device))[0] 
        loss = F.cross_entropy(y_pred, y_batch) 
        loss.backward()  
        optimizer.step() 
        scheduler.step()
        _, predicted = torch.max(y_pred, 1)
        eq_sum += (predicted == y_batch).sum()
        y_len += len(y_batch)

        # CHECK PROCESS
        batch_num += 1
        if batch_num % 100 == 0:
            acc = eq_sum.float() / y_len
            print(f'BATCH_NUM : {batch_num}, TRAIN_LOSS : {loss.item():.4f}, TRAIN_ACC : {acc.item():.4f}')
        if batch_num % 1000 == 0:
            print(f'FOR 1000 EPOCH : {time.time() - start_time} SECONDS!')
            start_time = time.time()    

Downloading:   0%|          | 0.00/61.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/467 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/257k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/431M [00:00<?, ?B/s]

Some weights of the model checkpoint at monologg/koelectra-base-v3-discriminator were not used when initializing ElectraForSequenceClassification: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at monologg/koelectra-base-v3-discriminator and are newly initialized: 

-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 1.0897, TRAIN_ACC : 0.3740
BATCH_NUM : 200, TRAIN_LOSS : 1.0884, TRAIN_ACC : 0.3945
BATCH_NUM : 300, TRAIN_LOSS : 1.0945, TRAIN_ACC : 0.4280
BATCH_NUM : 400, TRAIN_LOSS : 0.8247, TRAIN_ACC : 0.4953
BATCH_NUM : 500, TRAIN_LOSS : 0.7507, TRAIN_ACC : 0.5518
BATCH_NUM : 600, TRAIN_LOSS : 0.4409, TRAIN_ACC : 0.5918
BATCH_NUM : 700, TRAIN_LOSS : 0.4934, TRAIN_ACC : 0.6181
BATCH_NUM : 800, TRAIN_LOSS : 0.5567, TRAIN_ACC : 0.6410
BATCH_NUM : 900, TRAIN_LOSS : 0.5560, TRAIN_ACC : 0.6613
BATCH_NUM : 1000, TRAIN_LOSS : 0.3700, TRAIN_ACC : 0.6787
FOR 1000 EPOCH : 120.06734585762024 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.6708, TRAIN_ACC : 0.6896
BATCH_NUM : 1200, TRAIN_LOSS : 0.4304, TRAIN_ACC : 0.7027
BATCH_NUM : 1300, TRAIN_LOSS : 0.6446, TRAIN_ACC : 0.7115
BATCH_NUM : 1400, TRAIN_LOSS : 0.4112, TRAIN_ACC : 0.7198
BATCH_NUM : 1500, TRAIN_LOSS : 0.7172, TRAIN_ACC : 0.7269


 10%|█         | 1/10 [05:35<50:22, 335.87s/it]

BATCH_NUM : 2800, TRAIN_LOSS : 0.4995, TRAIN_ACC : 0.7829
-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.3337, TRAIN_ACC : 0.9200
BATCH_NUM : 200, TRAIN_LOSS : 0.1282, TRAIN_ACC : 0.9110
BATCH_NUM : 300, TRAIN_LOSS : 0.4090, TRAIN_ACC : 0.9067
BATCH_NUM : 400, TRAIN_LOSS : 0.1892, TRAIN_ACC : 0.9073
BATCH_NUM : 500, TRAIN_LOSS : 0.4635, TRAIN_ACC : 0.9052
BATCH_NUM : 600, TRAIN_LOSS : 0.1488, TRAIN_ACC : 0.9032
BATCH_NUM : 700, TRAIN_LOSS : 0.1936, TRAIN_ACC : 0.9049
BATCH_NUM : 800, TRAIN_LOSS : 0.2813, TRAIN_ACC : 0.9074
BATCH_NUM : 900, TRAIN_LOSS : 0.2850, TRAIN_ACC : 0.9094
BATCH_NUM : 1000, TRAIN_LOSS : 0.3214, TRAIN_ACC : 0.9083
FOR 1000 EPOCH : 215.60003638267517 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.0308, TRAIN_ACC : 0.9090
BATCH_NUM : 1200, TRAIN_LOSS : 0.6107, TRAIN_ACC : 0.9092
BATCH_NUM : 1300, TRAIN_LOSS : 0.4883, TRAIN_ACC : 0.9089
BATCH_NUM : 1400, TRAIN_LOSS : 0.2295, TRAIN_ACC : 0.9086


 20%|██        | 2/10 [11:11<44:44, 335.59s/it]

BATCH_NUM : 2800, TRAIN_LOSS : 0.1851, TRAIN_ACC : 0.9097
-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.0496, TRAIN_ACC : 0.9610
BATCH_NUM : 200, TRAIN_LOSS : 0.0347, TRAIN_ACC : 0.9575
BATCH_NUM : 300, TRAIN_LOSS : 0.1066, TRAIN_ACC : 0.9570
BATCH_NUM : 400, TRAIN_LOSS : 0.0994, TRAIN_ACC : 0.9558
BATCH_NUM : 500, TRAIN_LOSS : 0.0263, TRAIN_ACC : 0.9562
BATCH_NUM : 600, TRAIN_LOSS : 0.1533, TRAIN_ACC : 0.9560
BATCH_NUM : 700, TRAIN_LOSS : 0.0139, TRAIN_ACC : 0.9553
BATCH_NUM : 800, TRAIN_LOSS : 0.0115, TRAIN_ACC : 0.9551
BATCH_NUM : 900, TRAIN_LOSS : 0.0214, TRAIN_ACC : 0.9544
BATCH_NUM : 1000, TRAIN_LOSS : 0.1084, TRAIN_ACC : 0.9550
FOR 1000 EPOCH : 215.5657937526703 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.2092, TRAIN_ACC : 0.9552
BATCH_NUM : 1200, TRAIN_LOSS : 0.2789, TRAIN_ACC : 0.9541
BATCH_NUM : 1300, TRAIN_LOSS : 0.0283, TRAIN_ACC : 0.9540
BATCH_NUM : 1400, TRAIN_LOSS : 0.2041, TRAIN_ACC : 0.9536
B

 30%|███       | 3/10 [16:46<39:08, 335.47s/it]

BATCH_NUM : 2800, TRAIN_LOSS : 0.0245, TRAIN_ACC : 0.9511
-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.0482, TRAIN_ACC : 0.9710
BATCH_NUM : 200, TRAIN_LOSS : 0.0105, TRAIN_ACC : 0.9715
BATCH_NUM : 300, TRAIN_LOSS : 0.0101, TRAIN_ACC : 0.9727
BATCH_NUM : 400, TRAIN_LOSS : 0.0796, TRAIN_ACC : 0.9713
BATCH_NUM : 500, TRAIN_LOSS : 0.0115, TRAIN_ACC : 0.9730
BATCH_NUM : 600, TRAIN_LOSS : 0.0109, TRAIN_ACC : 0.9717
BATCH_NUM : 700, TRAIN_LOSS : 0.2488, TRAIN_ACC : 0.9704
BATCH_NUM : 800, TRAIN_LOSS : 0.0035, TRAIN_ACC : 0.9713
BATCH_NUM : 900, TRAIN_LOSS : 0.0103, TRAIN_ACC : 0.9706
BATCH_NUM : 1000, TRAIN_LOSS : 0.0094, TRAIN_ACC : 0.9719
FOR 1000 EPOCH : 215.5764021873474 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.0997, TRAIN_ACC : 0.9717
BATCH_NUM : 1200, TRAIN_LOSS : 0.3649, TRAIN_ACC : 0.9721
BATCH_NUM : 1300, TRAIN_LOSS : 0.0191, TRAIN_ACC : 0.9715
BATCH_NUM : 1400, TRAIN_LOSS : 0.1102, TRAIN_ACC : 0.9719
B

 40%|████      | 4/10 [22:21<33:32, 335.42s/it]

BATCH_NUM : 2800, TRAIN_LOSS : 0.0130, TRAIN_ACC : 0.9707
-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.0024, TRAIN_ACC : 0.9800
BATCH_NUM : 200, TRAIN_LOSS : 0.0537, TRAIN_ACC : 0.9810
BATCH_NUM : 300, TRAIN_LOSS : 0.0031, TRAIN_ACC : 0.9823
BATCH_NUM : 400, TRAIN_LOSS : 0.0142, TRAIN_ACC : 0.9830
BATCH_NUM : 500, TRAIN_LOSS : 0.0119, TRAIN_ACC : 0.9830
BATCH_NUM : 600, TRAIN_LOSS : 0.0166, TRAIN_ACC : 0.9828
BATCH_NUM : 700, TRAIN_LOSS : 0.0131, TRAIN_ACC : 0.9837
BATCH_NUM : 800, TRAIN_LOSS : 0.2060, TRAIN_ACC : 0.9836
BATCH_NUM : 900, TRAIN_LOSS : 0.0027, TRAIN_ACC : 0.9837
BATCH_NUM : 1000, TRAIN_LOSS : 0.0035, TRAIN_ACC : 0.9832
FOR 1000 EPOCH : 215.61085844039917 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.0271, TRAIN_ACC : 0.9830
BATCH_NUM : 1200, TRAIN_LOSS : 0.0081, TRAIN_ACC : 0.9833
BATCH_NUM : 1300, TRAIN_LOSS : 0.0145, TRAIN_ACC : 0.9832
BATCH_NUM : 1400, TRAIN_LOSS : 0.0033, TRAIN_ACC : 0.9838


 50%|█████     | 5/10 [27:57<27:56, 335.39s/it]

BATCH_NUM : 2800, TRAIN_LOSS : 0.0323, TRAIN_ACC : 0.9819
-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.0144, TRAIN_ACC : 0.9920
BATCH_NUM : 200, TRAIN_LOSS : 0.0022, TRAIN_ACC : 0.9900
BATCH_NUM : 300, TRAIN_LOSS : 0.0093, TRAIN_ACC : 0.9897
BATCH_NUM : 400, TRAIN_LOSS : 0.0041, TRAIN_ACC : 0.9908
BATCH_NUM : 500, TRAIN_LOSS : 0.0204, TRAIN_ACC : 0.9908
BATCH_NUM : 600, TRAIN_LOSS : 0.1473, TRAIN_ACC : 0.9913
BATCH_NUM : 700, TRAIN_LOSS : 0.0411, TRAIN_ACC : 0.9910
BATCH_NUM : 800, TRAIN_LOSS : 0.0051, TRAIN_ACC : 0.9913
BATCH_NUM : 900, TRAIN_LOSS : 0.0032, TRAIN_ACC : 0.9893
BATCH_NUM : 1000, TRAIN_LOSS : 0.0116, TRAIN_ACC : 0.9895
FOR 1000 EPOCH : 215.52324795722961 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.0059, TRAIN_ACC : 0.9895
BATCH_NUM : 1200, TRAIN_LOSS : 0.2883, TRAIN_ACC : 0.9893
BATCH_NUM : 1300, TRAIN_LOSS : 0.0110, TRAIN_ACC : 0.9895
BATCH_NUM : 1400, TRAIN_LOSS : 0.0049, TRAIN_ACC : 0.9893


 60%|██████    | 6/10 [33:32<22:21, 335.34s/it]

BATCH_NUM : 2800, TRAIN_LOSS : 0.0011, TRAIN_ACC : 0.9894
-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.0017, TRAIN_ACC : 0.9950
BATCH_NUM : 200, TRAIN_LOSS : 0.1589, TRAIN_ACC : 0.9935
BATCH_NUM : 300, TRAIN_LOSS : 0.0193, TRAIN_ACC : 0.9953
BATCH_NUM : 400, TRAIN_LOSS : 0.0014, TRAIN_ACC : 0.9948
BATCH_NUM : 500, TRAIN_LOSS : 0.0272, TRAIN_ACC : 0.9940
BATCH_NUM : 600, TRAIN_LOSS : 0.0014, TRAIN_ACC : 0.9938
BATCH_NUM : 700, TRAIN_LOSS : 0.0021, TRAIN_ACC : 0.9940
BATCH_NUM : 800, TRAIN_LOSS : 0.1257, TRAIN_ACC : 0.9935
BATCH_NUM : 900, TRAIN_LOSS : 0.0026, TRAIN_ACC : 0.9937
BATCH_NUM : 1000, TRAIN_LOSS : 0.0200, TRAIN_ACC : 0.9934
FOR 1000 EPOCH : 215.51435661315918 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.0031, TRAIN_ACC : 0.9932
BATCH_NUM : 1200, TRAIN_LOSS : 0.0027, TRAIN_ACC : 0.9931
BATCH_NUM : 1300, TRAIN_LOSS : 0.4970, TRAIN_ACC : 0.9929
BATCH_NUM : 1400, TRAIN_LOSS : 0.2846, TRAIN_ACC : 0.9926


 70%|███████   | 7/10 [39:07<16:45, 335.31s/it]

BATCH_NUM : 2800, TRAIN_LOSS : 0.2977, TRAIN_ACC : 0.9928
-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.0020, TRAIN_ACC : 0.9930
BATCH_NUM : 200, TRAIN_LOSS : 0.0411, TRAIN_ACC : 0.9955
BATCH_NUM : 300, TRAIN_LOSS : 0.0027, TRAIN_ACC : 0.9947
BATCH_NUM : 400, TRAIN_LOSS : 0.0012, TRAIN_ACC : 0.9933
BATCH_NUM : 500, TRAIN_LOSS : 0.0023, TRAIN_ACC : 0.9940
BATCH_NUM : 600, TRAIN_LOSS : 0.0012, TRAIN_ACC : 0.9938
BATCH_NUM : 700, TRAIN_LOSS : 0.2974, TRAIN_ACC : 0.9940
BATCH_NUM : 800, TRAIN_LOSS : 0.0007, TRAIN_ACC : 0.9945
BATCH_NUM : 900, TRAIN_LOSS : 0.0017, TRAIN_ACC : 0.9944
BATCH_NUM : 1000, TRAIN_LOSS : 0.0103, TRAIN_ACC : 0.9946
FOR 1000 EPOCH : 215.50969338417053 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.0126, TRAIN_ACC : 0.9948
BATCH_NUM : 1200, TRAIN_LOSS : 0.0011, TRAIN_ACC : 0.9946
BATCH_NUM : 1300, TRAIN_LOSS : 0.0231, TRAIN_ACC : 0.9948
BATCH_NUM : 1400, TRAIN_LOSS : 0.2426, TRAIN_ACC : 0.9948


 80%|████████  | 8/10 [44:42<11:10, 335.29s/it]

BATCH_NUM : 2800, TRAIN_LOSS : 0.0066, TRAIN_ACC : 0.9946
-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.0010, TRAIN_ACC : 0.9970
BATCH_NUM : 200, TRAIN_LOSS : 0.0301, TRAIN_ACC : 0.9980
BATCH_NUM : 300, TRAIN_LOSS : 0.0010, TRAIN_ACC : 0.9980
BATCH_NUM : 400, TRAIN_LOSS : 0.0125, TRAIN_ACC : 0.9978
BATCH_NUM : 500, TRAIN_LOSS : 0.0117, TRAIN_ACC : 0.9972
BATCH_NUM : 600, TRAIN_LOSS : 0.0008, TRAIN_ACC : 0.9975
BATCH_NUM : 700, TRAIN_LOSS : 0.0063, TRAIN_ACC : 0.9973
BATCH_NUM : 800, TRAIN_LOSS : 0.0024, TRAIN_ACC : 0.9975
BATCH_NUM : 900, TRAIN_LOSS : 0.0007, TRAIN_ACC : 0.9974
BATCH_NUM : 1000, TRAIN_LOSS : 0.0007, TRAIN_ACC : 0.9975
FOR 1000 EPOCH : 215.48284316062927 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.0010, TRAIN_ACC : 0.9975
BATCH_NUM : 1200, TRAIN_LOSS : 0.0006, TRAIN_ACC : 0.9975
BATCH_NUM : 1300, TRAIN_LOSS : 0.0009, TRAIN_ACC : 0.9973
BATCH_NUM : 1400, TRAIN_LOSS : 0.0024, TRAIN_ACC : 0.9969


 90%|█████████ | 9/10 [50:18<05:35, 335.27s/it]

BATCH_NUM : 2800, TRAIN_LOSS : 0.0007, TRAIN_ACC : 0.9961
-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.0007, TRAIN_ACC : 0.9960
BATCH_NUM : 200, TRAIN_LOSS : 0.0010, TRAIN_ACC : 0.9960
BATCH_NUM : 300, TRAIN_LOSS : 0.0007, TRAIN_ACC : 0.9967
BATCH_NUM : 400, TRAIN_LOSS : 0.0100, TRAIN_ACC : 0.9958
BATCH_NUM : 500, TRAIN_LOSS : 0.0006, TRAIN_ACC : 0.9960
BATCH_NUM : 600, TRAIN_LOSS : 0.0053, TRAIN_ACC : 0.9962
BATCH_NUM : 700, TRAIN_LOSS : 0.0018, TRAIN_ACC : 0.9961
BATCH_NUM : 800, TRAIN_LOSS : 0.0011, TRAIN_ACC : 0.9959
BATCH_NUM : 900, TRAIN_LOSS : 0.0010, TRAIN_ACC : 0.9956
BATCH_NUM : 1000, TRAIN_LOSS : 0.0010, TRAIN_ACC : 0.9955
FOR 1000 EPOCH : 215.49077439308167 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.0066, TRAIN_ACC : 0.9955
BATCH_NUM : 1200, TRAIN_LOSS : 0.0016, TRAIN_ACC : 0.9953
BATCH_NUM : 1300, TRAIN_LOSS : 0.0039, TRAIN_ACC : 0.9955
BATCH_NUM : 1400, TRAIN_LOSS : 0.0517, TRAIN_ACC : 0.9956


100%|██████████| 10/10 [55:53<00:00, 335.34s/it]

BATCH_NUM : 2800, TRAIN_LOSS : 0.0008, TRAIN_ACC : 0.9964





In [10]:
torch.save(model, './model/upload_model2.pth')

In [11]:
test_dataset = NLIDataset(correct_test,'test', model_name)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0)

pred = []; ans = []

m = torch.load('./model/upload_model2.pth')
m.eval()

with torch.no_grad():
    for input_ids_batch, attention_masks_batch in tqdm(test_loader):
        y_pred = m(
            input_ids_batch.to(device),
            attention_mask=attention_masks_batch.to(device)
            )[0].detach().cpu().numpy()
        ans.extend(y_pred.argmax(axis=1))
        
pred.append(ans); pred = np.array(pred).T

pred_mode = []
for i in tqdm(range(len(test_dataset))):
    cnt = Counter(pred[i])
    pred_mode.append(cnt.most_common()[0][0])

label_match = {0:"entailment" , 1: "contradiction" , 2:"neutral"}
sub['label'] = [label_match[_] for _ in pred_mode]
sub.to_csv('./sub/upload_model2.csv', index=False)

100%|██████████| 27/27 [00:05<00:00,  4.58it/s]
100%|██████████| 1666/1666 [00:00<00:00, 218529.85it/s]


## Model 3. Dacon data + Klue data
## + KORNLI data ( 앞 20000개 데이터 중 length가 100보다 작은 데이터 사용 )
## Roberta + 10 epoch 학습

In [12]:
## DEFINE MODEL PARA
model_name = 'klue/roberta-large'
# model_name = 'monologg/koelectra-base-v3-discriminator'
num_epochs = 10 
batch_size = 10 
lr = 1e-5

## get DATA
train, test, sub = prep_dat2() ##
correct_train, correct_test = correct_token(train), correct_token(test)
train_dataset = NLIDataset(correct_train, 'train', model_name)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

## DECLARE MODEL
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
model = nn.DataParallel(model).to(device)

optimizer = AdamW(model.parameters(), lr=lr, eps = 1e-8)
warmup_ratio = 0.1
total_steps = len(train_loader) * num_epochs
scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=500, num_training_steps=total_steps) 


## TRAINING PROCESS
start_time = time.time()
for epoch in tqdm(range(num_epochs)):
    print('-------------------------------------- Training Begin! --------------------------------------')
    batch_num = 0; eq_sum = 0; y_len =0

    model.train()  
    
    for input_ids_batch, attention_masks_batch, y_batch in train_loader:
        optimizer.zero_grad()  
        y_batch = y_batch.to(device)  
        y_pred = model(input_ids_batch.to(device), attention_mask = attention_masks_batch.to(device))[0] 
        loss = F.cross_entropy(y_pred, y_batch) 
        loss.backward()  
        optimizer.step() 
        scheduler.step()
        _, predicted = torch.max(y_pred, 1)
        eq_sum += (predicted == y_batch).sum()
        y_len += len(y_batch)

        # CHECK PROCESS
        batch_num += 1
        if batch_num % 100 == 0:
            acc = eq_sum.float() / y_len
            print(f'BATCH_NUM : {batch_num}, TRAIN_LOSS : {loss.item():.4f}, TRAIN_ACC : {acc.item():.4f}')
        if batch_num % 1000 == 0:
            print(f'FOR 1000 EPOCH : {time.time() - start_time} SECONDS!')
            start_time = time.time()    

Downloading:   0%|          | 0.00/375 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/243k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/734k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/173 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/547 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.25G [00:00<?, ?B/s]

Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'cla

-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 1.0846, TRAIN_ACC : 0.3170
BATCH_NUM : 200, TRAIN_LOSS : 1.1617, TRAIN_ACC : 0.3340
BATCH_NUM : 300, TRAIN_LOSS : 1.1788, TRAIN_ACC : 0.3847
BATCH_NUM : 400, TRAIN_LOSS : 0.7565, TRAIN_ACC : 0.4558
BATCH_NUM : 500, TRAIN_LOSS : 0.7649, TRAIN_ACC : 0.5086
BATCH_NUM : 600, TRAIN_LOSS : 1.3399, TRAIN_ACC : 0.5450
BATCH_NUM : 700, TRAIN_LOSS : 0.6682, TRAIN_ACC : 0.5781
BATCH_NUM : 800, TRAIN_LOSS : 0.5782, TRAIN_ACC : 0.6079
BATCH_NUM : 900, TRAIN_LOSS : 0.3331, TRAIN_ACC : 0.6261
BATCH_NUM : 1000, TRAIN_LOSS : 0.3545, TRAIN_ACC : 0.6440
FOR 1000 EPOCH : 379.8486626148224 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.9257, TRAIN_ACC : 0.6575
BATCH_NUM : 1200, TRAIN_LOSS : 0.3650, TRAIN_ACC : 0.6703
BATCH_NUM : 1300, TRAIN_LOSS : 0.2244, TRAIN_ACC : 0.6812
BATCH_NUM : 1400, TRAIN_LOSS : 0.2978, TRAIN_ACC : 0.6903
BATCH_NUM : 1500, TRAIN_LOSS : 0.4648, TRAIN_ACC : 0.6977
B

 10%|█         | 1/10 [30:21<4:33:16, 1821.86s/it]

-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.4888, TRAIN_ACC : 0.9100
BATCH_NUM : 200, TRAIN_LOSS : 0.0198, TRAIN_ACC : 0.9130
BATCH_NUM : 300, TRAIN_LOSS : 0.0281, TRAIN_ACC : 0.9100
BATCH_NUM : 400, TRAIN_LOSS : 0.0148, TRAIN_ACC : 0.9060
BATCH_NUM : 500, TRAIN_LOSS : 0.0715, TRAIN_ACC : 0.9030
BATCH_NUM : 600, TRAIN_LOSS : 0.3656, TRAIN_ACC : 0.9025
BATCH_NUM : 700, TRAIN_LOSS : 0.2610, TRAIN_ACC : 0.9011
BATCH_NUM : 800, TRAIN_LOSS : 0.1104, TRAIN_ACC : 0.9009
BATCH_NUM : 900, TRAIN_LOSS : 0.3646, TRAIN_ACC : 0.9012
BATCH_NUM : 1000, TRAIN_LOSS : 0.1289, TRAIN_ACC : 0.9003
FOR 1000 EPOCH : 682.270968914032 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.1434, TRAIN_ACC : 0.9014
BATCH_NUM : 1200, TRAIN_LOSS : 0.3273, TRAIN_ACC : 0.9014
BATCH_NUM : 1300, TRAIN_LOSS : 0.1582, TRAIN_ACC : 0.9011
BATCH_NUM : 1400, TRAIN_LOSS : 0.0970, TRAIN_ACC : 0.9013
BATCH_NUM : 1500, TRAIN_LOSS : 0.2126, TRAIN_ACC : 0.9013
BA

 20%|██        | 2/10 [1:00:43<4:02:53, 1821.63s/it]

-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.2116, TRAIN_ACC : 0.9480
BATCH_NUM : 200, TRAIN_LOSS : 0.0403, TRAIN_ACC : 0.9420
BATCH_NUM : 300, TRAIN_LOSS : 0.0404, TRAIN_ACC : 0.9450
BATCH_NUM : 400, TRAIN_LOSS : 0.1430, TRAIN_ACC : 0.9445
BATCH_NUM : 500, TRAIN_LOSS : 0.1009, TRAIN_ACC : 0.9442
BATCH_NUM : 600, TRAIN_LOSS : 0.0185, TRAIN_ACC : 0.9432
BATCH_NUM : 700, TRAIN_LOSS : 0.0341, TRAIN_ACC : 0.9431
BATCH_NUM : 800, TRAIN_LOSS : 0.2330, TRAIN_ACC : 0.9425
BATCH_NUM : 900, TRAIN_LOSS : 0.0756, TRAIN_ACC : 0.9420
BATCH_NUM : 1000, TRAIN_LOSS : 0.1168, TRAIN_ACC : 0.9422
FOR 1000 EPOCH : 682.3611679077148 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.0443, TRAIN_ACC : 0.9425
BATCH_NUM : 1200, TRAIN_LOSS : 0.0411, TRAIN_ACC : 0.9425
BATCH_NUM : 1300, TRAIN_LOSS : 0.0144, TRAIN_ACC : 0.9418
BATCH_NUM : 1400, TRAIN_LOSS : 0.0554, TRAIN_ACC : 0.9410
BATCH_NUM : 1500, TRAIN_LOSS : 0.0738, TRAIN_ACC : 0.9406
B

 30%|███       | 3/10 [1:31:04<3:32:31, 1821.64s/it]

-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.0026, TRAIN_ACC : 0.9700
BATCH_NUM : 200, TRAIN_LOSS : 0.5979, TRAIN_ACC : 0.9645
BATCH_NUM : 300, TRAIN_LOSS : 0.2268, TRAIN_ACC : 0.9657
BATCH_NUM : 400, TRAIN_LOSS : 0.1932, TRAIN_ACC : 0.9668
BATCH_NUM : 500, TRAIN_LOSS : 0.3070, TRAIN_ACC : 0.9666
BATCH_NUM : 600, TRAIN_LOSS : 0.2925, TRAIN_ACC : 0.9665
BATCH_NUM : 700, TRAIN_LOSS : 0.1496, TRAIN_ACC : 0.9657
BATCH_NUM : 800, TRAIN_LOSS : 0.0113, TRAIN_ACC : 0.9660
BATCH_NUM : 900, TRAIN_LOSS : 0.0220, TRAIN_ACC : 0.9664
BATCH_NUM : 1000, TRAIN_LOSS : 0.0057, TRAIN_ACC : 0.9658
FOR 1000 EPOCH : 682.3740196228027 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.0212, TRAIN_ACC : 0.9646
BATCH_NUM : 1200, TRAIN_LOSS : 0.0344, TRAIN_ACC : 0.9642
BATCH_NUM : 1300, TRAIN_LOSS : 0.0165, TRAIN_ACC : 0.9648
BATCH_NUM : 1400, TRAIN_LOSS : 0.0263, TRAIN_ACC : 0.9648
BATCH_NUM : 1500, TRAIN_LOSS : 1.4373, TRAIN_ACC : 0.9653
B

 40%|████      | 4/10 [2:01:26<3:02:09, 1821.62s/it]

-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.0044, TRAIN_ACC : 0.9740
BATCH_NUM : 200, TRAIN_LOSS : 0.0144, TRAIN_ACC : 0.9785
BATCH_NUM : 300, TRAIN_LOSS : 0.0719, TRAIN_ACC : 0.9780
BATCH_NUM : 400, TRAIN_LOSS : 0.0037, TRAIN_ACC : 0.9775
BATCH_NUM : 500, TRAIN_LOSS : 0.0066, TRAIN_ACC : 0.9784
BATCH_NUM : 600, TRAIN_LOSS : 0.2317, TRAIN_ACC : 0.9783
BATCH_NUM : 700, TRAIN_LOSS : 0.1285, TRAIN_ACC : 0.9787
BATCH_NUM : 800, TRAIN_LOSS : 0.0205, TRAIN_ACC : 0.9788
BATCH_NUM : 900, TRAIN_LOSS : 0.0183, TRAIN_ACC : 0.9779
BATCH_NUM : 1000, TRAIN_LOSS : 0.0706, TRAIN_ACC : 0.9775
FOR 1000 EPOCH : 682.3713254928589 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.2185, TRAIN_ACC : 0.9775
BATCH_NUM : 1200, TRAIN_LOSS : 0.0075, TRAIN_ACC : 0.9771
BATCH_NUM : 1300, TRAIN_LOSS : 0.1930, TRAIN_ACC : 0.9768
BATCH_NUM : 1400, TRAIN_LOSS : 0.0027, TRAIN_ACC : 0.9771
BATCH_NUM : 1500, TRAIN_LOSS : 0.0205, TRAIN_ACC : 0.9771
B

 50%|█████     | 5/10 [2:31:48<2:31:47, 1821.58s/it]

-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.0053, TRAIN_ACC : 0.9890
BATCH_NUM : 200, TRAIN_LOSS : 0.0530, TRAIN_ACC : 0.9900
BATCH_NUM : 300, TRAIN_LOSS : 0.0024, TRAIN_ACC : 0.9890
BATCH_NUM : 400, TRAIN_LOSS : 0.0101, TRAIN_ACC : 0.9888
BATCH_NUM : 500, TRAIN_LOSS : 0.0008, TRAIN_ACC : 0.9884
BATCH_NUM : 600, TRAIN_LOSS : 0.0074, TRAIN_ACC : 0.9880
BATCH_NUM : 700, TRAIN_LOSS : 0.0192, TRAIN_ACC : 0.9876
BATCH_NUM : 800, TRAIN_LOSS : 0.0183, TRAIN_ACC : 0.9879
BATCH_NUM : 900, TRAIN_LOSS : 0.0207, TRAIN_ACC : 0.9874
BATCH_NUM : 1000, TRAIN_LOSS : 0.0048, TRAIN_ACC : 0.9876
FOR 1000 EPOCH : 682.3995735645294 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.0037, TRAIN_ACC : 0.9873
BATCH_NUM : 1200, TRAIN_LOSS : 0.0019, TRAIN_ACC : 0.9876
BATCH_NUM : 1300, TRAIN_LOSS : 0.4104, TRAIN_ACC : 0.9873
BATCH_NUM : 1400, TRAIN_LOSS : 0.0032, TRAIN_ACC : 0.9873
BATCH_NUM : 1500, TRAIN_LOSS : 0.0073, TRAIN_ACC : 0.9875
B

 60%|██████    | 6/10 [3:02:10<2:01:26, 1821.71s/it]

-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.0004, TRAIN_ACC : 0.9940
BATCH_NUM : 200, TRAIN_LOSS : 0.0008, TRAIN_ACC : 0.9925
BATCH_NUM : 300, TRAIN_LOSS : 0.0006, TRAIN_ACC : 0.9927
BATCH_NUM : 400, TRAIN_LOSS : 0.0003, TRAIN_ACC : 0.9918
BATCH_NUM : 500, TRAIN_LOSS : 0.0018, TRAIN_ACC : 0.9916
BATCH_NUM : 600, TRAIN_LOSS : 0.0003, TRAIN_ACC : 0.9915
BATCH_NUM : 700, TRAIN_LOSS : 0.0003, TRAIN_ACC : 0.9917
BATCH_NUM : 800, TRAIN_LOSS : 0.0127, TRAIN_ACC : 0.9915
BATCH_NUM : 900, TRAIN_LOSS : 0.0331, TRAIN_ACC : 0.9918
BATCH_NUM : 1000, TRAIN_LOSS : 0.0009, TRAIN_ACC : 0.9917
FOR 1000 EPOCH : 682.5283813476562 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.0169, TRAIN_ACC : 0.9915
BATCH_NUM : 1200, TRAIN_LOSS : 0.0047, TRAIN_ACC : 0.9915
BATCH_NUM : 1300, TRAIN_LOSS : 0.0030, TRAIN_ACC : 0.9915
BATCH_NUM : 1400, TRAIN_LOSS : 0.0100, TRAIN_ACC : 0.9916
BATCH_NUM : 1500, TRAIN_LOSS : 0.0005, TRAIN_ACC : 0.9916
B

 70%|███████   | 7/10 [3:32:31<1:31:05, 1821.77s/it]

-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.0048, TRAIN_ACC : 0.9960
BATCH_NUM : 200, TRAIN_LOSS : 0.0022, TRAIN_ACC : 0.9950
BATCH_NUM : 300, TRAIN_LOSS : 0.0010, TRAIN_ACC : 0.9960
BATCH_NUM : 400, TRAIN_LOSS : 0.0353, TRAIN_ACC : 0.9963
BATCH_NUM : 500, TRAIN_LOSS : 0.0056, TRAIN_ACC : 0.9968
BATCH_NUM : 600, TRAIN_LOSS : 0.0004, TRAIN_ACC : 0.9967
BATCH_NUM : 700, TRAIN_LOSS : 0.0004, TRAIN_ACC : 0.9964
BATCH_NUM : 800, TRAIN_LOSS : 0.0065, TRAIN_ACC : 0.9964
BATCH_NUM : 900, TRAIN_LOSS : 0.0003, TRAIN_ACC : 0.9967
BATCH_NUM : 1000, TRAIN_LOSS : 0.0003, TRAIN_ACC : 0.9966
FOR 1000 EPOCH : 682.4692211151123 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.0001, TRAIN_ACC : 0.9965
BATCH_NUM : 1200, TRAIN_LOSS : 0.0006, TRAIN_ACC : 0.9959
BATCH_NUM : 1300, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9962
BATCH_NUM : 1400, TRAIN_LOSS : 0.0007, TRAIN_ACC : 0.9963
BATCH_NUM : 1500, TRAIN_LOSS : 0.0008, TRAIN_ACC : 0.9963
B

 80%|████████  | 8/10 [4:02:53<1:00:43, 1821.81s/it]

-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.0001, TRAIN_ACC : 0.9980
BATCH_NUM : 200, TRAIN_LOSS : 0.0019, TRAIN_ACC : 0.9970
BATCH_NUM : 300, TRAIN_LOSS : 0.0004, TRAIN_ACC : 0.9967
BATCH_NUM : 400, TRAIN_LOSS : 0.0059, TRAIN_ACC : 0.9968
BATCH_NUM : 500, TRAIN_LOSS : 0.0003, TRAIN_ACC : 0.9966
BATCH_NUM : 600, TRAIN_LOSS : 0.0564, TRAIN_ACC : 0.9967
BATCH_NUM : 700, TRAIN_LOSS : 0.0006, TRAIN_ACC : 0.9969
BATCH_NUM : 800, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9970
BATCH_NUM : 900, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9967
BATCH_NUM : 1000, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9969
FOR 1000 EPOCH : 682.4601016044617 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.0001, TRAIN_ACC : 0.9972
BATCH_NUM : 1200, TRAIN_LOSS : 0.0001, TRAIN_ACC : 0.9972
BATCH_NUM : 1300, TRAIN_LOSS : 0.0031, TRAIN_ACC : 0.9970
BATCH_NUM : 1400, TRAIN_LOSS : 0.0009, TRAIN_ACC : 0.9970
BATCH_NUM : 1500, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9971
B

 90%|█████████ | 9/10 [4:33:15<30:21, 1821.84s/it]  

-------------------------------------- Training Begin! --------------------------------------
BATCH_NUM : 100, TRAIN_LOSS : 0.0001, TRAIN_ACC : 0.9980
BATCH_NUM : 200, TRAIN_LOSS : 0.0057, TRAIN_ACC : 0.9970
BATCH_NUM : 300, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9977
BATCH_NUM : 400, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9980
BATCH_NUM : 500, TRAIN_LOSS : 0.0001, TRAIN_ACC : 0.9976
BATCH_NUM : 600, TRAIN_LOSS : 0.0388, TRAIN_ACC : 0.9978
BATCH_NUM : 700, TRAIN_LOSS : 0.0001, TRAIN_ACC : 0.9976
BATCH_NUM : 800, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9974
BATCH_NUM : 900, TRAIN_LOSS : 0.0001, TRAIN_ACC : 0.9973
BATCH_NUM : 1000, TRAIN_LOSS : 0.0001, TRAIN_ACC : 0.9975
FOR 1000 EPOCH : 682.4787013530731 SECONDS!
BATCH_NUM : 1100, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9975
BATCH_NUM : 1200, TRAIN_LOSS : 0.0001, TRAIN_ACC : 0.9977
BATCH_NUM : 1300, TRAIN_LOSS : 0.0005, TRAIN_ACC : 0.9978
BATCH_NUM : 1400, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9979
BATCH_NUM : 1500, TRAIN_LOSS : 0.0002, TRAIN_ACC : 0.9979
B

100%|██████████| 10/10 [5:03:37<00:00, 1821.76s/it]


In [13]:
torch.save(model, './model/upload_model3.pth')

In [14]:
test_dataset = NLIDataset(correct_test,'test', model_name)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0)

pred = []; ans = []

m = torch.load('./model/upload_model3.pth')
m.eval()

with torch.no_grad():
    for input_ids_batch, attention_masks_batch in tqdm(test_loader):
        y_pred = m(
            input_ids_batch.to(device),
            attention_mask=attention_masks_batch.to(device)
            )[0].detach().cpu().numpy()
        ans.extend(y_pred.argmax(axis=1))
        
pred.append(ans); pred = np.array(pred).T

pred_mode = []
for i in tqdm(range(len(test_dataset))):
    cnt = Counter(pred[i])
    pred_mode.append(cnt.most_common()[0][0])

label_match = {0:"entailment" , 1: "contradiction" , 2:"neutral"}
sub['label'] = [label_match[_] for _ in pred_mode]
sub.to_csv('./sub/upload_model3.csv', index=False)

100%|██████████| 27/27 [00:17<00:00,  1.58it/s]
100%|██████████| 1666/1666 [00:00<00:00, 207196.75it/s]


## Voting ( Model 1 ~ 3)

In [10]:
def adj_softmax(x):
    y = np.exp(x - np.max(x))
    f_x = y / np.sum(np.exp(x))
    return f_x

## model 1

model_name = 'klue/roberta-large'
test_dataset = NLIDataset(correct_test,'test', model_name)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0)

m = torch.load('./model/upload_model1.pth')
m.eval()
roberta_large_probs = np.empty((0,3), float)

with torch.no_grad():
    for input_ids_batch, attention_masks_batch in tqdm(test_loader):
        y_pred = m(input_ids_batch.to(device), attention_mask=attention_masks_batch.to(device))[0].detach().cpu().numpy()
        roberta_large_probs = np.vstack((roberta_large_probs, adj_softmax(np.array(y_pred)))) 

## model 2

model_name = 'monologg/koelectra-base-v3-discriminator'
test_dataset = NLIDataset(correct_test,'test', model_name)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0)

m = torch.load('./model/upload_model2.pth')
m.eval()
koelectra_probs = np.empty((0,3), float)

with torch.no_grad():
    for input_ids_batch, attention_masks_batch in tqdm(test_loader):
        y_pred = m(input_ids_batch.to(device), attention_mask=attention_masks_batch.to(device))[0].detach().cpu().numpy()
        koelectra_probs = np.vstack((koelectra_probs, adj_softmax(np.array(y_pred)))) 

## model 3

model_name = 'klue/roberta-large'
test_dataset = NLIDataset(correct_test,'test', model_name)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0)

m = torch.load('./model/upload_model3.pth')
m.eval()
roberta_kornli_probs = np.empty((0,3), float)

with torch.no_grad():
    for input_ids_batch, attention_masks_batch in tqdm(test_loader):
        y_pred = m(input_ids_batch.to(device), attention_mask=attention_masks_batch.to(device))[0].detach().cpu().numpy()
        roberta_kornli_probs = np.vstack((roberta_kornli_probs, adj_softmax(np.array(y_pred))))

Downloading:   0%|          | 0.00/375 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/243k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/734k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/173 [00:00<?, ?B/s]

100%|██████████| 27/27 [00:17<00:00,  1.55it/s]


Downloading:   0%|          | 0.00/61.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/467 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/257k [00:00<?, ?B/s]

100%|██████████| 27/27 [00:06<00:00,  4.45it/s]
100%|██████████| 27/27 [00:17<00:00,  1.56it/s]


In [11]:
roberta_class = [np.argmax(_) for _ in roberta_large_probs]
roberta_kornli_class = [np.argmax(_) for _ in roberta_kornli_probs]
koelectra_class = [np.argmax(_) for _ in koelectra_probs]

pred = []
for roberta, roberta_kornli, koelectra in zip(roberta_class, roberta_kornli_class, koelectra_class):
    counter = Counter([roberta, roberta_kornli, koelectra])
    pred.append(counter.most_common(1)[0][0])

label_match = {0:"entailment" , 1: "contradiction" , 2:"neutral"}

sub['label'] = [label_match[_] for _ in pred]

sub

Unnamed: 0,index,label
0,0,contradiction
1,1,neutral
2,2,entailment
3,3,contradiction
4,4,contradiction
...,...,...
1661,1661,neutral
1662,1662,entailment
1663,1663,neutral
1664,1664,neutral


In [12]:
sub.to_csv('./sub/upload_ens.csv', index=False)