<a href="https://colab.research.google.com/github/ttogle918/AI_practice/blob/main/MLP/Bert_classifier_4label.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pytorch-transformers transformers

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import random
import pandas as pd
import numpy as np
import os

from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import f1_score

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import AdamW

import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from transformers import AutoTokenizer, AutoModel, BertConfig
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler

from tqdm.auto import tqdm
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings(action='ignore') 

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
CFG = {
    'EPOCHS':10,
    'LEARNING_RATE':1e-4,
    'BATCH_SIZE':64,
    'SEED':41
}

# file load

In [None]:
path = '/content/drive/MyDrive/Colab Notebooks/dataset/dacon/open.zip'

import zipfile
from zipfile import ZipFile

with zipfile.ZipFile(f'{path}', 'r') as obj:
  namelist = obj.namelist()
  obj.extractall('/content/competition_data')

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

In [None]:
data_path = '/content/competition_data'

In [None]:
all_df = pd.read_csv(f'{data_path}/train.csv')
all_df.head()

Unnamed: 0,ID,문장,유형,극성,시제,확실성,label
0,TRAIN_00000,0.75%포인트 금리 인상은 1994년 이후 28년 만에 처음이다.,사실형,긍정,현재,확실,사실형-긍정-현재-확실
1,TRAIN_00001,이어 ＂앞으로 전문가들과 함께 4주 단위로 상황을 재평가할 예정＂이라며 ＂그 이전이...,사실형,긍정,과거,확실,사실형-긍정-과거-확실
2,TRAIN_00002,정부가 고유가 대응을 위해 7월부터 연말까지 유류세 인하 폭을 30%에서 37%까지...,사실형,긍정,미래,확실,사실형-긍정-미래-확실
3,TRAIN_00003,"서울시는 올해 3월 즉시 견인 유예시간 60분을 제공하겠다고 밝혔지만, 하루 만에 ...",사실형,긍정,과거,확실,사실형-긍정-과거-확실
4,TRAIN_00004,익사한 자는 사다리에 태워 거꾸로 놓고 소금으로 코를 막아 가득 채운다.,사실형,긍정,현재,확실,사실형-긍정-현재-확실


In [None]:
test = pd.read_csv(f'{data_path}/test.csv')
all_df.head()

Unnamed: 0,ID,문장,유형,극성,시제,확실성,label
0,TRAIN_00000,0.75%포인트 금리 인상은 1994년 이후 28년 만에 처음이다.,사실형,긍정,현재,확실,사실형-긍정-현재-확실
1,TRAIN_00001,이어 ＂앞으로 전문가들과 함께 4주 단위로 상황을 재평가할 예정＂이라며 ＂그 이전이...,사실형,긍정,과거,확실,사실형-긍정-과거-확실
2,TRAIN_00002,정부가 고유가 대응을 위해 7월부터 연말까지 유류세 인하 폭을 30%에서 37%까지...,사실형,긍정,미래,확실,사실형-긍정-미래-확실
3,TRAIN_00003,"서울시는 올해 3월 즉시 견인 유예시간 60분을 제공하겠다고 밝혔지만, 하루 만에 ...",사실형,긍정,과거,확실,사실형-긍정-과거-확실
4,TRAIN_00004,익사한 자는 사다리에 태워 거꾸로 놓고 소금으로 코를 막아 가득 채운다.,사실형,긍정,현재,확실,사실형-긍정-현재-확실


In [None]:
all_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16541 entries, 0 to 16540
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   ID      16541 non-null  object
 1   문장      16541 non-null  object
 2   유형      16541 non-null  object
 3   극성      16541 non-null  object
 4   시제      16541 non-null  object
 5   확실성     16541 non-null  object
 6   label   16541 non-null  object
dtypes: object(7)
memory usage: 904.7+ KB


In [None]:
print(all_df['유형'].unique())
print(all_df['극성'].unique())
print(all_df['시제'].unique())
print(all_df['확실성'].unique())

['사실형' '추론형' '예측형' '대화형']
['긍정' '부정' '미정']
['현재' '과거' '미래']
['확실' '불확실']


In [None]:
# 제공된 학습데이터를 학습 / 검증 데이터셋으로 재 분할
train, val, _, _ = train_test_split(all_df[['문장']], all_df['label'], test_size=0.2, random_state=CFG['SEED'])

In [None]:
# train = all_df.copy()   # train data 전체 넣기 ( 검증셋 구분 안함 )

# Pre processing

In [None]:
model_name = 'klue/bert-base'
tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModel.from_pretrained(model_name)

Downloading:   0%|          | 0.00/289 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/425 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/248k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/495k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/125 [00:00<?, ?B/s]

In [None]:
# 2. Label Encoding (유형, 극성, 시제, 확실성)
type_le = preprocessing.LabelEncoder()
train["유형"] = type_le.fit_transform(train["유형"].values)
val["유형"] = type_le.transform(val["유형"].values)

polarity_le = preprocessing.LabelEncoder()
train["극성"] = polarity_le.fit_transform(train["극성"].values)
val["극성"] = polarity_le.transform(val["극성"].values)

tense_le = preprocessing.LabelEncoder()
train["시제"] = tense_le.fit_transform(train["시제"].values)
val["시제"] = tense_le.transform(val["시제"].values)

certainty_le = preprocessing.LabelEncoder()
train["확실성"] = certainty_le.fit_transform(train["확실성"].values)
val["확실성"] = certainty_le.transform(val["확실성"].values)

In [None]:
li_type = np.zeros((len(train), len(train['유형'].unique())))
li_polarity = np.zeros((len(train), len(train['극성'].unique())))
li_tense = np.zeros((len(train), len(train['시제'].unique())))
li_certainty = np.zeros((len(train), len(train['확실성'].unique())))

li_type.shape, li_polarity.shape, li_tense.shape, li_certainty.shape

((13232, 4), (13232, 3), (13232, 3), (13232, 2))

In [None]:
for i, (ty, pol, ten, cert) in enumerate(train[['유형', '극성', '시제', '확실성']].values) :
  li_type[i][ty] = 1
  li_polarity[i][pol] = 1
  li_tense[i][ten] = 1
  li_certainty[i][cert] = 1

li_type = list(li_type)
li_polarity = list(li_polarity)
li_tense = list(li_tense)
li_certainty = list(li_certainty)

In [None]:
train['유형_li'] = [str(li) for li in li_type]
train['극성_li'] = [str(li) for li in li_polarity]
train['시제_li'] = [str(li) for li in li_tense]
train['확실성_li'] = [str(li) for li in li_certainty]


In [None]:
train['유형_li'] = train['유형_li'].apply(lambda x : eval(x.replace('.', ',')))
train['극성_li'] = train['극성_li'].apply(lambda x : eval(x.replace('.', ',')))
train['시제_li'] = train['시제_li'].apply(lambda x : eval(x.replace('.', ',')))
train['확실성_li'] = train['확실성_li'].apply(lambda x : eval(x.replace('.', ',')))

In [None]:
train.head()

Unnamed: 0,ID,문장,유형,극성,시제,확실성,label,유형_li,극성_li,시제_li,확실성_li
6724,TRAIN_06724,용산구청 관계자는 ＂재정이 열악한 지자체로서는 1800억원을 마련할 수 없다＂며 서...,1,0,0,1,사실형-긍정-과거-확실,"[0, 1, 0, 0]","[1, 0, 0]","[1, 0, 0]","[0, 1]"
15546,TRAIN_15546,부산시는 이처럼 부산이 가파른 상승세를 보이는 이유에 대해 지난해부터 추진하고 있는...,1,0,0,1,사실형-긍정-과거-확실,"[0, 1, 0, 0]","[1, 0, 0]","[1, 0, 0]","[0, 1]"
1153,TRAIN_01153,"그러나 미숙아, 만성호흡기질환, 선천 심장병, 선천 면역결핍질환, 암환자 등의 고위...",1,0,2,1,사실형-긍정-현재-확실,"[0, 1, 0, 0]","[1, 0, 0]","[0, 0, 1]","[0, 1]"
9134,TRAIN_09134,탁구 종목에서 중국 대표팀 위상이 뛰어나기 때문이다.,3,0,2,1,추론형-긍정-현재-확실,"[0, 0, 0, 1]","[1, 0, 0]","[0, 0, 1]","[0, 1]"
14896,TRAIN_14896,이 논문에 따르면 ＇BT-11＇은 뇌의 신경전달물질인 아세틸콜린을 분해하는 효소의 ...,1,0,2,1,사실형-긍정-현재-확실,"[0, 1, 0, 0]","[1, 0, 0]","[0, 0, 1]","[0, 1]"


In [None]:
li_type = np.zeros((len(val), len(val['유형'].unique())))
li_polarity = np.zeros((len(val), len(val['극성'].unique())))
li_tense = np.zeros((len(val), len(val['시제'].unique())))
li_certainty = np.zeros((len(val), len(val['확실성'].unique())))

li_type.shape, li_polarity.shape, li_tense.shape, li_certainty.shape

((3309, 4), (3309, 3), (3309, 3), (3309, 2))

In [None]:
for i, (ty, pol, ten, cert) in enumerate(val[['유형', '극성', '시제', '확실성']].values) :
  li_type[i][ty] = 1
  li_polarity[i][pol] = 1
  li_tense[i][ten] = 1
  li_certainty[i][cert] = 1

li_type = list(li_type)
li_polarity = list(li_polarity)
li_tense = list(li_tense)
li_certainty = list(li_certainty)

In [None]:
val['유형_li'] = [str(li) for li in li_type]
val['극성_li'] = [str(li) for li in li_polarity]
val['시제_li'] = [str(li) for li in li_tense]
val['확실성_li'] = [str(li) for li in li_certainty]
val

Unnamed: 0,ID,문장,유형,극성,시제,확실성,label,유형_li,극성_li,시제_li,확실성_li
16300,TRAIN_16300,장교 출신 예비역 최 모씨(29)는 ＂정당하게 병역의무를 다한 사람은 마치 비양심인...,1,0,0,1,사실형-긍정-과거-확실,[0. 1. 0. 0.],[1. 0. 0.],[1. 0. 0.],[0. 1.]
12866,TRAIN_12866,배 대표는 ＂1998년 아버지에게 누룩 회사인 ＇한국발효＇를 받아 누룩 사업을 하다...,1,0,0,1,사실형-긍정-과거-확실,[0. 1. 0. 0.],[1. 0. 0.],[1. 0. 0.],[0. 1.]
10862,TRAIN_10862,바이러스라는 전혀 예상치 못했던 돌발 변수 출현으로 투자자 모집과 거래 완료까지 소...,1,0,2,1,사실형-긍정-현재-확실,[0. 1. 0. 0.],[1. 0. 0.],[0. 0. 1.],[0. 1.]
4092,TRAIN_04092,참가자들은 총 3개까지 출품 희망 부문을 복수 선택할 수 있다.,3,0,1,0,추론형-긍정-미래-불확실,[0. 0. 0. 1.],[1. 0. 0.],[0. 1. 0.],[1. 0.]
265,TRAIN_00265,샤론 최씨도 ＂그렇다. 영화감독을 하고 싶다＂고 말하자 진행자들은 ＂내년 시상식에서...,1,0,0,1,사실형-긍정-과거-확실,[0. 1. 0. 0.],[1. 0. 0.],[1. 0. 0.],[0. 1.]
...,...,...,...,...,...,...,...,...,...,...,...
3288,TRAIN_03288,"이번 소집 명단에서 한국과 일본을 제외한 유럽파는 정우영(SC프라이부르크, 분데스리...",1,0,2,1,사실형-긍정-현재-확실,[0. 1. 0. 0.],[1. 0. 0.],[0. 0. 1.],[0. 1.]
11475,TRAIN_11475,"건선은 대개 팔꿈치, 무릎, 사지의 바깥 부분, 둔부, 두피 등 외부의 자극을 받기...",1,0,0,1,사실형-긍정-과거-확실,[0. 1. 0. 0.],[1. 0. 0.],[1. 0. 0.],[0. 1.]
2653,TRAIN_02653,다른 경찰관은 김창룡 경찰청장을 직접 언급하며 ＂(청장의) 잔여 임기 동안 경찰국 ...,1,0,0,1,사실형-긍정-과거-확실,[0. 1. 0. 0.],[1. 0. 0.],[1. 0. 0.],[0. 1.]
11171,TRAIN_11171,코로나19 이후 지속된 초저금리와 증시 부진이 발목을 잡았다.,1,0,0,1,사실형-긍정-과거-확실,[0. 1. 0. 0.],[1. 0. 0.],[1. 0. 0.],[0. 1.]


In [None]:
val['유형_li'] = val['유형_li'].apply(lambda x : eval(x.replace('.', ',')))
val['극성_li'] = val['극성_li'].apply(lambda x : eval(x.replace('.', ',')))
val['시제_li'] = val['시제_li'].apply(lambda x : eval(x.replace('.', ',')))
val['확실성_li'] = val['확실성_li'].apply(lambda x : eval(x.replace('.', ',')))

In [None]:
val.head()

Unnamed: 0,ID,문장,유형,극성,시제,확실성,label,유형_li,극성_li,시제_li,확실성_li
16300,TRAIN_16300,장교 출신 예비역 최 모씨(29)는 ＂정당하게 병역의무를 다한 사람은 마치 비양심인...,1,0,0,1,사실형-긍정-과거-확실,"[0, 1, 0, 0]","[1, 0, 0]","[1, 0, 0]","[0, 1]"
12866,TRAIN_12866,배 대표는 ＂1998년 아버지에게 누룩 회사인 ＇한국발효＇를 받아 누룩 사업을 하다...,1,0,0,1,사실형-긍정-과거-확실,"[0, 1, 0, 0]","[1, 0, 0]","[1, 0, 0]","[0, 1]"
10862,TRAIN_10862,바이러스라는 전혀 예상치 못했던 돌발 변수 출현으로 투자자 모집과 거래 완료까지 소...,1,0,2,1,사실형-긍정-현재-확실,"[0, 1, 0, 0]","[1, 0, 0]","[0, 0, 1]","[0, 1]"
4092,TRAIN_04092,참가자들은 총 3개까지 출품 희망 부문을 복수 선택할 수 있다.,3,0,1,0,추론형-긍정-미래-불확실,"[0, 0, 0, 1]","[1, 0, 0]","[0, 1, 0]","[1, 0]"
265,TRAIN_00265,샤론 최씨도 ＂그렇다. 영화감독을 하고 싶다＂고 말하자 진행자들은 ＂내년 시상식에서...,1,0,0,1,사실형-긍정-과거-확실,"[0, 1, 0, 0]","[1, 0, 0]","[1, 0, 0]","[0, 1]"


# Train

# dataset

In [None]:
import re
class CustomDataset(Dataset):
    def __init__(self, sentence, st_labels):
        self.sentence = sentence
        self.st_labels = st_labels

    def __getitem__(self, index):
        if self.st_labels is not None:
            st_type = self.st_labels['type'][index]
            st_polarity = self.st_labels['polarity'][index]
            st_tense = self.st_labels['tense'][index]
            st_certainty = self.st_labels['certainty'][index]
            # return self.sentence[index], {'type' : st_type, 'polarity' : st_polarity, 'tense' : st_tense, 'certainty' : st_certainty}
            return self.sentence[index], st_type, st_polarity, st_tense, st_certainty

        else:
            return self.sentence[index]

    def __len__(self):
        return len(self.sentence)

    def cleaning(self, sentence) :
        return re.sub('[^가-힣A-Za-z0-9]','', sentence)

In [None]:
def make_dataloader(dataset, tok_model, batch_size, s='train') :
  global tokenizer
  tokenizer = AutoTokenizer.from_pretrained(tok_model)
  if s == 'train' :
    dataloader = DataLoader(
        dataset,
        batch_size =batch_size,
        sampler = RandomSampler(dataset),
        collate_fn = custom_collate_fn
    )
  else :
    dataloader = DataLoader(
        dataset,
        batch_size =batch_size,
        sampler = SequentialSampler(dataset),
        collate_fn = custom_collate_fn
    )
  print(f'batch_size : {batch_size}')
  return dataloader

In [None]:
def custom_collate_fn(batch):
    input1_list, target_list_type, target_list_polarity, target_list_tense, target_list_certainty = [], [], [], [], []
    if batch[0][1] == None :
        for _input1, _, _, _, _ in batch:
          input1_list.append(_input1)
    else :
      for _input1, _target_type, _target_polarity, _target_tense, _target_certainty in batch:
          input1_list.append(_input1)
          target_list_type.append(_target_type)
          target_list_polarity.append(_target_polarity)
          target_list_tense.append(_target_tense)
          target_list_certainty.append(_target_certainty)
    
    tensorized_input = tokenizer(
        input1_list,
        add_special_tokens=True,
        padding="longest",  # 배치내 가장 긴 문장을 기준으로 부족한 문장은 [PAD] 토큰을 추가
        truncation=True, # max_length를 넘는 문장은 이 후 토큰을 제거함
        max_length=512,
        return_tensors='pt' # 토크나이즈된 결과 값을 텐서 형태로 반환
    )

    return tensorized_input, torch.Tensor(target_list_type), torch.Tensor(target_list_polarity), torch.Tensor(target_list_tense), torch.Tensor(target_list_certainty)

In [None]:
train_type = train["유형_li"].values # sentence type
train_polarity = train["극성_li"].values # sentence polarity
train_tense = train["시제_li"].values # sentence tense
train_certainty = train["확실성_li"].values # sentence certainty

train_labels = {
    'type' : train_type,
    'polarity' : train_polarity,
    'tense' : train_tense,
    'certainty' : train_certainty
}

In [None]:
val_type = val["유형_li"].values # sentence type
val_polarity = val["극성_li"].values # sentence polarity
val_tense = val["시제_li"].values # sentence tense
val_certainty = val["확실성_li"].values # sentence certainty

val_labels = {
    'type' : val_type,
    'polarity' : val_polarity,
    'tense' : val_tense,
    'certainty' : val_certainty
}

In [None]:
train_dataset = CustomDataset(train['문장'].values, train_labels)
val_dataset = CustomDataset(val['문장'].values, val_labels)

In [None]:
train_loader = make_dataloader(train_dataset, model_name, CFG['BATCH_SIZE'], 'train')

val_loader = make_dataloader(val_dataset, model_name, CFG['BATCH_SIZE'], 'val')

batch_size : 64
batch_size : 64


# model define

In [None]:
class BaseModel(nn.Module):
    def __init__(self, model_name):
        super(BaseModel, self).__init__()
        self.feature_extract = AutoModel.from_pretrained(model_name)
        self.input_dim = self.feature_extract.config.hidden_size
        self.type_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=self.input_dim, out_features=256),
            nn.Linear(in_features=256, out_features=128),
            nn.Dropout(p=0.3),
            nn.Linear(in_features=128, out_features=4),
        )
        self.polarity_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=self.input_dim, out_features=128),
            nn.Dropout(p=0.3),
            nn.Linear(in_features=128, out_features=3),
        )
        self.tense_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=self.input_dim, out_features=128),
            nn.Linear(in_features=128, out_features=32),
            nn.Linear(in_features=32, out_features=3),
        )
        self.certainty_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=self.input_dim, out_features=256),
            nn.Linear(in_features=256, out_features=256),
            nn.Linear(in_features=256, out_features=2),
        )
         
            
    def forward(self, x):
        x = self.feature_extract(**x)['last_hidden_state']
        # 문장 유형, 극성, 시제, 확실성을 각각 분류
        type_output = self.type_classifier(x)
        polarity_output = self.polarity_classifier(x)
        tense_output = self.tense_classifier(x)
        certainty_output = self.certainty_classifier(x)
        return type_output, polarity_output, tense_output, certainty_output

# train

In [None]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    
    criterion = {
        'type' : nn.CrossEntropyLoss().to(device),
        'polarity' : nn.CrossEntropyLoss().to(device),
        'tense' : nn.CrossEntropyLoss().to(device),
        'certainty' : nn.CrossEntropyLoss().to(device)
    }
    
    best_loss = 999999
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for sentence, type_label, polarity_label, tense_label, certainty_label in tqdm(iter(train_loader)):
            sentence = sentence.to(device)
            type_label = type_label.to(device)
            polarity_label = polarity_label.to(device)
            tense_label = tense_label.to(device)
            certainty_label = certainty_label.to(device)
            
            optimizer.zero_grad()
            
            type_logit, polarity_logit, tense_logit, certainty_logit = model(sentence)

            loss = 0.25 * criterion['type'](type_logit, type_label.long()) + 0.25 * criterion['polarity'](polarity_logit, polarity_label.long()) + 0.25 * criterion['tense'](tense_logit, tense_label.long()) + 0.25 * criterion['certainty'](certainty_logit, certainty_label.long())
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
        
        val_loss, val_type_f1, val_polarity_f1, val_tense_f1, val_certainty_f1 = validation(model, val_loader, criterion, device)
        print(f'Epoch : [{epoch}] Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}] 유형 F1 : [{val_type_f1:.5f}] 극성 F1 : [{val_polarity_f1:.5f}] 시제 F1 : [{val_tense_f1:.5f}] 확실성 F1 : [{val_certainty_f1:.5f}]')
        
        if scheduler is not None:
            scheduler.step(val_loss)
            
        if best_loss > val_loss:
            best_loss = val_loss
            best_model = model
            
    return best_model

In [None]:
def validation(model, val_loader, criterion, device):
    model.eval()
    val_loss = []
    
    type_preds, polarity_preds, tense_preds, certainty_preds = [], [], [], []
    type_labels, polarity_labels, tense_labels, certainty_labels = [], [], [], []
    
    
    with torch.no_grad():
        for sentence, type_label, polarity_label, tense_label, certainty_label in tqdm(iter(val_loader)):
            sentence = sentence.to(device)
            type_label = type_label.to(device)
            polarity_label = polarity_label.to(device)
            tense_label = tense_label.to(device)
            certainty_label = certainty_label.to(device)
            
            type_logit, polarity_logit, tense_logit, certainty_logit = model(sentence)
            
            loss = 0.25 * criterion['type'](type_logit, type_label.long()) + 0.25 * criterion['polarity'](polarity_logit, polarity_label.long()) + 0.25 * criterion['tense'](tense_logit, tense_label.long()) + 0.25 * criterion['certainty'](certainty_logit, certainty_label.long())
            
            val_loss.append(loss.item())
            
            type_preds += type_logit.argmax(1).detach().cpu().numpy().tolist()
            type_labels += type_label.detach().cpu().numpy().tolist()
            
            polarity_preds += polarity_logit.argmax(1).detach().cpu().numpy().tolist()
            polarity_labels += polarity_label.detach().cpu().numpy().tolist()
            
            tense_preds += tense_logit.argmax(1).detach().cpu().numpy().tolist()
            tense_labels += tense_label.detach().cpu().numpy().tolist()
            
            certainty_preds += certainty_logit.argmax(1).detach().cpu().numpy().tolist()
            certainty_labels += certainty_label.detach().cpu().numpy().tolist()
    
    type_f1 = f1_score(type_labels, type_preds, average='weighted')
    polarity_f1 = f1_score(polarity_labels, polarity_preds, average='weighted')
    tense_f1 = f1_score(tense_labels, tense_preds, average='weighted')
    certainty_f1 = f1_score(certainty_labels, certainty_preds, average='weighted')
    
    return np.mean(val_loss), type_f1, polarity_f1, tense_f1, certainty_f1

# run

In [None]:
model_name
config = BertConfig.from_pretrained(model_name)
config.max_length = 512


In [None]:
model = BaseModel(model_name)
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-8, verbose=True)


Downloading:   0%|          | 0.00/445M [00:00<?, ?B/s]

Some weights of the model checkpoint at klue/bert-base were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)


  0%|          | 0/207 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

Epoch : [1] Train Loss : [0.39329] Val Loss : [0.16006] 유형 F1 : [0.80726] 극성 F1 : [0.96661] 시제 F1 : [0.88148] 확실성 F1 : [0.92171]


  0%|          | 0/207 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

Epoch : [2] Train Loss : [0.14819] Val Loss : [0.14668] 유형 F1 : [0.85817] 극성 F1 : [0.97586] 시제 F1 : [0.89640] 확실성 F1 : [0.91893]


  0%|          | 0/207 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

Epoch : [3] Train Loss : [0.11174] Val Loss : [0.15803] 유형 F1 : [0.87731] 극성 F1 : [0.97243] 시제 F1 : [0.89354] 확실성 F1 : [0.92778]


  0%|          | 0/207 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

Epoch : [4] Train Loss : [0.07697] Val Loss : [0.18077] 유형 F1 : [0.87718] 극성 F1 : [0.97590] 시제 F1 : [0.89520] 확실성 F1 : [0.92149]


  0%|          | 0/207 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

Epoch : [5] Train Loss : [0.04903] Val Loss : [0.21960] 유형 F1 : [0.86918] 극성 F1 : [0.97379] 시제 F1 : [0.89562] 확실성 F1 : [0.92756]
Epoch 00005: reducing learning rate of group 0 to 5.0000e-05.


  0%|          | 0/207 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

Epoch : [6] Train Loss : [0.02391] Val Loss : [0.27299] 유형 F1 : [0.88041] 극성 F1 : [0.97133] 시제 F1 : [0.89173] 확실성 F1 : [0.92419]


  0%|          | 0/207 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

Epoch : [7] Train Loss : [0.01346] Val Loss : [0.29494] 유형 F1 : [0.87981] 극성 F1 : [0.97472] 시제 F1 : [0.89418] 확실성 F1 : [0.92500]


  0%|          | 0/207 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

Epoch : [8] Train Loss : [0.00860] Val Loss : [0.31734] 유형 F1 : [0.87899] 극성 F1 : [0.97333] 시제 F1 : [0.89362] 확실성 F1 : [0.92434]
Epoch 00008: reducing learning rate of group 0 to 2.5000e-05.


  0%|          | 0/207 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

Epoch : [9] Train Loss : [0.00455] Val Loss : [0.32422] 유형 F1 : [0.88186] 극성 F1 : [0.97619] 시제 F1 : [0.89664] 확실성 F1 : [0.92499]


  0%|          | 0/207 [00:00<?, ?it/s]

  0%|          | 0/52 [00:00<?, ?it/s]

Epoch : [10] Train Loss : [0.00260] Val Loss : [0.34114] 유형 F1 : [0.87995] 극성 F1 : [0.97585] 시제 F1 : [0.89601] 확실성 F1 : [0.92494]


In [None]:
import gc
gc.collect()

179

# inference

In [None]:
test_dataset = CustomDataset(test['문장'].values, None)
test_loader = make_dataloader(test_dataset, model_name, CFG['BATCH_SIZE'], 'val')

batch_size : 64


In [None]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    
    type_preds, polarity_preds, tense_preds, certainty_preds = [], [], [], []
    
    with torch.no_grad():
        for sentence, _, _, _, _ in tqdm(test_loader):
            sentence = sentence.to(device)
            
            type_logit, polarity_logit, tense_logit, certainty_logit = model(sentence)
            
            type_preds += type_logit.argmax(1).detach().cpu().numpy().tolist()
            polarity_preds += polarity_logit.argmax(1).detach().cpu().numpy().tolist()
            tense_preds += tense_logit.argmax(1).detach().cpu().numpy().tolist()
            certainty_preds += certainty_logit.argmax(1).detach().cpu().numpy().tolist()
            
    return type_preds, polarity_preds, tense_preds, certainty_preds

In [None]:
type_preds, polarity_preds, tense_preds, certainty_preds = inference(infer_model, test_loader, device)

  0%|          | 0/111 [00:00<?, ?it/s]

In [None]:
type_preds = np.array(type_preds).argmax(1)
polarity_preds = np.array(polarity_preds).argmax(1)
tense_preds = np.array(tense_preds).argmax(1)
certainty_preds = np.array(certainty_preds).argmax(1)

In [None]:
type_preds = type_le.inverse_transform(type_preds)
polarity_preds = polarity_le.inverse_transform(polarity_preds)
tense_preds = tense_le.inverse_transform(tense_preds)
certainty_preds = certainty_le.inverse_transform(certainty_preds)

In [None]:
type_preds

array(['사실형', '사실형', '사실형', ..., '사실형', '추론형', '사실형'], dtype=object)

In [None]:
type_le.inverse_transform([1])

array(['사실형'], dtype=object)

In [None]:
predictions = []
for type_pred, polarity_pred, tense_pred, certainty_pred in zip(type_preds, polarity_preds, tense_preds, certainty_preds):
    predictions.append(type_pred + '-' + polarity_pred + '-' + tense_pred + '-' + certainty_pred)