In [None]:
!pip install transformers
!pip install datasets
!pip install sentencepiece

https://beomi.github.io/2020/02/24/Pytorch-with-TPU-on-Colab/

In [None]:
!unzip open.zip

Archive:  open.zip
  inflating: sample_submission.csv   
  inflating: test.csv                
  inflating: train.csv               


In [None]:
import random
import pandas as pd
import numpy as np
import os
import re

from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import f1_score

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from tqdm.auto import tqdm

import warnings
warnings.filterwarnings(action='ignore') 

In [None]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
CFG = {
    'EPOCHS':10,
    'LEARNING_RATE':1e-4,
    'BATCH_SIZE':256,
    'SEED':41
}

## 증강 데이터 불러오기 및 레이블 인코딩

In [None]:
df_train_aug = pd.read_csv('df_train_aug.csv')
df_val_aug = pd.read_csv('df_val_aug.csv')

In [None]:
df_train_aug = df_train_aug.sample(frac=1)
df_val_aug = df_val_aug.sample(frac=1)

In [None]:
len(df_val_aug)

7946

In [None]:
df_train_aug.index

Int64Index([19864,   799, 29485, 16796, 20458, 14073, 19907, 17694,  1456,
            19153,
            ...
            12458, 24897, 12647, 16765,  9606, 29310,    72, 30544, 15299,
            19933],
           dtype='int64', length=30708)

In [None]:
import random
def under_sampling(df):
  for i in df.index:
    r = random.uniform(0, 1)
    if r>0.5:
      if df['극성'][i] == '긍정' and df['확실성'][i] == '확실':
        df = df.drop(i)
    
  return df.reset_index(drop=True)

In [None]:
df_train_aug_under = under_sampling(df_train_aug)
df_val_aug_under = under_sampling(df_val_aug)

In [None]:
df_train_aug.유형.value_counts(normalize=True), df_train_aug_under.유형.value_counts(normalize=True)

In [None]:
df_train_aug.극성.value_counts(normalize=True), df_train_aug_under.극성.value_counts(normalize=True)

In [None]:
df_train_aug.시제.value_counts(normalize=True), df_train_aug_under.시제.value_counts(normalize=True)

In [None]:
df_train_aug.확실성.value_counts(normalize=True), df_train_aug_under.확실성.value_counts(normalize=True)

In [None]:
def label_enc(df):
  type_le = preprocessing.LabelEncoder()
  df["유형"] = type_le.fit_transform(df["유형"].values)

  polarity_le = preprocessing.LabelEncoder()
  df["극성"] = polarity_le.fit_transform(df["극성"].values)

  tense_le = preprocessing.LabelEncoder()
  df["시제"] = tense_le.fit_transform(df["시제"].values)


  certainty_le = preprocessing.LabelEncoder()
  df["확실성"] = certainty_le.fit_transform(df["확실성"].values)

  return type_le, polarity_le, tense_le, certainty_le


In [None]:
len(df_train_aug)

30708

In [None]:
df_aug = pd.concat([df_train_aug.reset_index(drop=True), df_val_aug.reset_index(drop=True)])

type_le, polarity_le, tense_le, certainty_le = label_enc(df_aug)

df_train_aug = df_aug[:30708]
df_val_aug = df_aug[30708:]

In [None]:
type_le, polarity_le, tense_le, certainty_le = label_enc(df_aug)

In [None]:
type_le, polarity_le, tense_le, certainty_le = label_enc(df)

# 토큰화 및 모델 불러오기

## 데이터셋 분리

In [None]:
from datasets import Dataset, DatasetDict, load_dataset

In [None]:
def make_dataset(df):
  raw_dict=df.to_dict("list") #테이블을 딕셔너리형태로 바꿈.
  raw_ds=Dataset.from_dict(raw_dict) #딕셔너리를 데이터세트 형태로 바꿈.

  return raw_ds

#ds = make_dataset(df)

In [None]:
#ds_train = make_dataset(df_train_aug_under)
#ds_val = make_dataset(df_val_aug_under)
ds_train = make_dataset(df_train_aug)
ds_val = make_dataset(df_val_aug)

In [None]:
ds_test = make_dataset(df_test)

In [None]:
ds_val

Dataset({
    features: ['문장', '유형', '극성', '시제', '확실성'],
    num_rows: 7946
})

## 토큰화 CLS 토큰 hidden state 만들기

In [None]:
from huggingface_hub import notebook_login
notebook_login()

Token is valid.
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [None]:
from transformers import AutoModel, AutoTokenizer
model_ckpt = "snunlp/KR-BERT-char16424"
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
model = AutoModel.from_pretrained(model_ckpt).to(device)


Some weights of the model checkpoint at snunlp/KR-BERT-char16424 were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
config=AutoConfig.from_pretrained(model_ckpt)

In [None]:
config

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
model

In [None]:
tokenizer.vocab

In [None]:
tokenizer.model_max_length

In [None]:
tokenizer.model_input_names

In [None]:
def tokenize(batch):
  return tokenizer(batch['문장'], padding=True, truncation=True, return_token_type_ids=False)

def encode(dataset):
  datasets_encoded = dataset.map(tokenize, batched=True, batch_size=None)
  return datasets_encoded


In [None]:
#ds_encoded = encode(ds)

Map:   0%|          | 0/16506 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [None]:
ds_train_encoded = encode(ds_train)
ds_val_encoded = encode(ds_val)


Map:   0%|          | 0/30708 [00:00<?, ? examples/s]

Map:   0%|          | 0/7946 [00:00<?, ? examples/s]

In [None]:
ds_test_encoded = encode(ds_test)

Map:   0%|          | 0/7090 [00:00<?, ? examples/s]

In [None]:
ds_test_encoded

Dataset({
    features: ['ID', '문장', 'input_ids', 'attention_mask'],
    num_rows: 7090
})

In [None]:
ds_val_encoded

Dataset({
    features: ['문장', '유형', '극성', '시제', '확실성', 'input_ids', 'attention_mask'],
    num_rows: 7946
})

In [None]:
def extract_hidden_states(batch):
  inputs={k:v.to(device) for k, v in batch.items() if k in tokenizer.model_input_names}
  with torch.no_grad():
    last_hidden_state=model(**inputs).last_hidden_state

  return {"hidden_state": last_hidden_state[:,0].cpu().numpy()}

#ds_encoded.set_format("torch", columns=['input_ids', 'attention_mask'])

#hidden = ds_encoded.map(extract_hidden_states, batched=True)

In [None]:
#hidden = hidden.train_test_split(test_size=0.2, seed=1)
#train_hidden = hidden['train']
#val_hidden = hidden['test']

In [None]:
ds_train_encoded.set_format("torch", columns=['input_ids', 'attention_mask', '유형', '극성', '시제', '확실성'])
ds_val_encoded.set_format("torch", columns=['input_ids', 'attention_mask', '유형', '극성', '시제', '확실성'])


In [None]:
ds_train_encoded

Dataset({
    features: ['문장', '유형', '극성', '시제', '확실성', 'input_ids', 'attention_mask'],
    num_rows: 30708
})

In [None]:
train_hidden = ds_train_encoded.map(extract_hidden_states, batched=True)
val_hidden = ds_val_encoded.map(extract_hidden_states, batched=True)

Map:   0%|          | 0/30708 [00:00<?, ? examples/s]

Map:   0%|          | 0/7946 [00:00<?, ? examples/s]

In [None]:
ds_test_encoded.set_format("torch", columns=['input_ids', 'attention_mask'])
test_hidden = ds_test_encoded.map(extract_hidden_states, batched = True)

Map:   0%|          | 0/7090 [00:00<?, ? examples/s]

In [None]:
train_hidden

Dataset({
    features: ['문장', '유형', '극성', '시제', '확실성', 'input_ids', 'attention_mask', 'hidden_state'],
    num_rows: 30708
})

In [None]:
val_hidden

Dataset({
    features: ['문장', '유형', '극성', '시제', '확실성', 'input_ids', 'attention_mask', 'hidden_state'],
    num_rows: 7946
})

In [None]:
train_hidden.save_to_disk("train_hidden_aug")
val_hidden.save_to_disk("val_hidden_aug")
#test_hidden.save_to_disk("test_hidden")

Saving the dataset (0/1 shards):   0%|          | 0/30708 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/7946 [00:00<?, ? examples/s]

In [None]:
!zip -r /content/train_hdn.zip /content/train_hidden/
!zip -r /content/val_hdn.zip /content/val_hidden/
!zip -r /content/test_hdn.zip /content/test_hidden/

  adding: content/train_hidden_aug/ (stored 0%)
  adding: content/train_hidden_aug/data-00000-of-00001.arrow (deflated 31%)
  adding: content/train_hidden_aug/state.json (deflated 42%)
  adding: content/train_hidden_aug/dataset_info.json (deflated 75%)


In [None]:
!zip -r /content/train_hdn_aug.zip /content/train_hidden_aug/
!zip -r /content/val_hdn_aug.zip /content/val_hidden_aug/

In [None]:
!zip -r /content/train_hdn_aug_3000.zip /content/train_hidden_aug_30000/
!zip -r /content/val_hdn_aug_8000.zip /content/val_hidden_aug_8000/

  adding: content/train_hidden_aug_30000/ (stored 0%)
  adding: content/train_hidden_aug_30000/data-00000-of-00001.arrow (deflated 37%)
  adding: content/train_hidden_aug_30000/state.json (deflated 42%)
  adding: content/train_hidden_aug_30000/dataset_info.json (deflated 74%)
  adding: content/val_hidden_aug_8000/ (stored 0%)
  adding: content/val_hidden_aug_8000/data-00000-of-00001.arrow (deflated 26%)
  adding: content/val_hidden_aug_8000/state.json (deflated 43%)
  adding: content/val_hidden_aug_8000/dataset_info.json (deflated 74%)


## CustomModel

https://heekangpark.github.io/nlp/huggingface-bert

In [None]:
from typing import Optional, Sequence
from torch import Tensor

In [None]:
class FocalLoss(nn.Module):
    """ Focal Loss, as described in https://arxiv.org/abs/1708.02002.
    It is essentially an enhancement to cross entropy loss and is
    useful for classification tasks when there is a large class imbalance.
    x is expected to contain raw, unnormalized scores for each class.
    y is expected to contain class labels.
    Shape:
        - x: (batch_size, C) or (batch_size, C, d1, d2, ..., dK), K > 0.
        - y: (batch_size,) or (batch_size, d1, d2, ..., dK), K > 0.
    """

    def __init__(self,
                 alpha: Optional[Tensor] = None,
                 gamma: float = 0.,
                 reduction: str = 'mean',
                 ignore_index: int = -100):
        """Constructor.
        Args:
            alpha (Tensor, optional): Weights for each class. Defaults to None.
            gamma (float, optional): A constant, as described in the paper.
                Defaults to 0.
            reduction (str, optional): 'mean', 'sum' or 'none'.
                Defaults to 'mean'.
            ignore_index (int, optional): class label to ignore.
                Defaults to -100.
        """
        if reduction not in ('mean', 'sum', 'none'):
            raise ValueError(
                'Reduction must be one of: "mean", "sum", "none".')

        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.ignore_index = ignore_index
        self.reduction = reduction

        self.nll_loss = nn.NLLLoss(
            weight=alpha, reduction='none', ignore_index=ignore_index)

    def __repr__(self):
        arg_keys = ['alpha', 'gamma', 'ignore_index', 'reduction']
        arg_vals = [self.__dict__[k] for k in arg_keys]
        arg_strs = [f'{k}={v!r}' for k, v in zip(arg_keys, arg_vals)]
        arg_str = ', '.join(arg_strs)
        return f'{type(self).__name__}({arg_str})'

    def forward(self, x: Tensor, y: Tensor) -> Tensor:
        if x.ndim > 2:
            # (N, C, d1, d2, ..., dK) --> (N * d1 * ... * dK, C)
            c = x.shape[1]
            x = x.permute(0, *range(2, x.ndim), 1).reshape(-1, c)
            # (N, d1, d2, ..., dK) --> (N * d1 * ... * dK,)
            y = y.view(-1)

        unignored_mask = y != self.ignore_index
        y = y[unignored_mask]
        if len(y) == 0:
            return torch.tensor(0.)
        x = x[unignored_mask]

        # compute weighted cross entropy term: -alpha * log(pt)
        # (alpha is already part of self.nll_loss)
        log_p = F.log_softmax(x, dim=-1)
        ce = self.nll_loss(log_p, y)

        # get true class column from each row
        all_rows = torch.arange(len(x))
        log_pt = log_p[all_rows, y]

        # compute focal term: (1 - pt)^gamma
        pt = log_pt.exp()
        focal_term = (1 - pt)**self.gamma

        # the full loss: -alpha * ((1 - pt)^gamma) * log(pt)
        loss = focal_term * ce

        if self.reduction == 'mean':
            loss = loss.mean()
        elif self.reduction == 'sum':
            loss = loss.sum()

        return loss


def focal_loss(alpha: Optional[Sequence] = None,
               gamma: float = 0.,
               reduction: str = 'mean',
               ignore_index: int = -100,
               device='cpu',
               dtype=torch.float32) -> FocalLoss:
    """Factory function for FocalLoss.
    Args:
        alpha (Sequence, optional): Weights for each class. Will be converted
            to a Tensor if not None. Defaults to None.
        gamma (float, optional): A constant, as described in the paper.
            Defaults to 0.
        reduction (str, optional): 'mean', 'sum' or 'none'.
            Defaults to 'mean'.
        ignore_index (int, optional): class label to ignore.
            Defaults to -100.
        device (str, optional): Device to move alpha to. Defaults to 'cpu'.
        dtype (torch.dtype, optional): dtype to cast alpha to.
            Defaults to torch.float32.
    Returns:
        A FocalLoss object
    """
    if alpha is not None:
        if not isinstance(alpha, Tensor):
            alpha = torch.tensor(alpha)
        alpha = alpha.to(device=device, dtype=dtype)

    fl = FocalLoss(
        alpha=alpha,
        gamma=gamma,
        reduction=reduction,
        ignore_index=ignore_index)
    return fl
        
def compute_metrics(pred):
    # label = [[cls1,cls2,...],]
    # preds = n list
    focal_loss = FocalLoss()
    labels = pred.label_ids
    preds = pred.predictions
    f1 = []
    focal = []
    for i in range(4):
        # focal.append(focal_loss(torch.tensor(preds[i], dtype=torch.float), torch.tensor(labels[::, i],dtype=torch.float)))
        f1.append(f1_score(y_true = labels[::, i], y_pred = preds[i], average='weighted'))
    return {
        #'focal': sum(focal),
        'f1-sum': sum(f1)/4
    }

In [None]:
from transformers import AutoConfig

config=AutoConfig.from_pretrained(model_ckpt)

In [None]:
class CustomModel(nn.Module):
    def __init__(self, input_dim=768):
        super(CustomModel, self).__init__()
        self.feature_extract = nn.Sequential(
            nn.Linear(in_features=input_dim, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.type_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=4),
        )
        self.polarity_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.tense_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.certainty_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=2),
        )
            
    def forward(self, x):
        x = self.feature_extract(x)
        tense_output = self.tense_classifier(x)
        type_output = self.type_classifier(x)
        polarity_output = self.polarity_classifier(x)
        certainty_output = self.certainty_classifier(x)
    
        return type_output, polarity_output, tense_output, certainty_output

### Activate Function

In [None]:
class CustomModel_Elu(nn.Module):
    def __init__(self, input_dim=768):
        super(CustomModel_Elu, self).__init__()
        self.feature_extract = nn.Sequential(
            nn.Linear(in_features=input_dim, out_features=512),
            nn.BatchNorm1d(512),
            nn.ELU()
        )

        self.type_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=4),
        )
        self.polarity_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.tense_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.certainty_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=2),
        )
            
    def forward(self, x):
        x = self.feature_extract(x)
        tense_output = self.tense_classifier(x)
        type_output = self.type_classifier(x)
        polarity_output = self.polarity_classifier(x)
        certainty_output = self.certainty_classifier(x)

    
        return type_output, polarity_output, tense_output, certainty_output

In [None]:
class CustomModel_tanh(nn.Module):
    def __init__(self, input_dim=768):
        super(CustomModel_tanh, self).__init__()
        self.feature_extract = nn.Sequential(
            nn.Linear(in_features=input_dim, out_features=512),
            nn.BatchNorm1d(512),
            nn.Tanh()
        )

        self.type_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=4),
        )
        self.polarity_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.tense_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.certainty_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=2),
        )
            
    def forward(self, x):
        x = self.feature_extract(x)
        tense_output = self.tense_classifier(x)
        type_output = self.type_classifier(x)
        polarity_output = self.polarity_classifier(x)
        certainty_output = self.certainty_classifier(x)

    
        return type_output, polarity_output, tense_output, certainty_output

In [None]:
class CustomModel_Gelu(nn.Module):
    def __init__(self, input_dim=768):
        super(CustomModel_Gelu, self).__init__()
        self.feature_extract = nn.Sequential(
            nn.Linear(in_features=input_dim, out_features=512),
            nn.BatchNorm1d(512),
            nn.GELU()
        )

        self.type_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=4),
        )
        self.polarity_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.tense_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.certainty_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=2),
        )
            
    def forward(self, x):
        x = self.feature_extract(x)
        tense_output = self.tense_classifier(x)
        type_output = self.type_classifier(x)
        polarity_output = self.polarity_classifier(x)
        certainty_output = self.certainty_classifier(x)

    
        return type_output, polarity_output, tense_output, certainty_output

### Ensemble

In [None]:
class CustomModel_a(nn.Module): #시 유 극 확
    def __init__(self, input_dim=768):
        super(CustomModel_a, self).__init__()
        self.feature_extract_first = nn.Sequential( #cls -> 시제 vec
            nn.Linear(in_features=input_dim, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_second = nn.Sequential( #시제 vec -> 유형 vec
            nn.Linear(in_features=512+768, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_third = nn.Sequential( #유형 vec -> 극성 vec
            nn.Linear(in_features=512+768, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_forth = nn.Sequential( #극성 vec -> 확실성 vec
            nn.Linear(in_features=512+768, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.type_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=4),
        )
        self.polarity_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.tense_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.certainty_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=2),
        )
            
    def forward(self, t):
        t1 = self.feature_extract_first(t)
        tense_output = self.tense_classifier(t1)

        prediction_vector = torch.cat((t1, t), dim=1)
        t2 = self.feature_extract_second(prediction_vector)
        type_output = self.type_classifier(t2)

        prediction_vector = torch.cat((t2, t), dim=1)
        t3 = self.feature_extract_third(prediction_vector)
        polarity_output = self.polarity_classifier(t3)
        
        prediction_vector = torch.cat((t3, t), dim=1)
        t4 = self.feature_extract_forth(prediction_vector)
        certainty_output = self.certainty_classifier(t4)

    
        return type_output, polarity_output, tense_output, certainty_output

In [None]:
class CustomModel_b(nn.Module): #극 시 확 유
    def __init__(self, input_dim=768):
        super(CustomModel_b, self).__init__()
        self.feature_extract_first = nn.Sequential( #cls -> 극성 vec
            nn.Linear(in_features=input_dim, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_second = nn.Sequential( #극성 vec -> 시제 vec
            nn.Linear(in_features=512+768, out_features=512),
            nn.LeakyReLU(),
            nn.Linear(in_features=512, out_features=512),
            nn.LeakyReLU()
        )

        self.feature_extract_third = nn.Sequential( #시제 vec -> 확실성 vec
            nn.Linear(in_features=512+768, out_features=512),
            nn.LeakyReLU(),
            nn.Linear(in_features=512, out_features=512),
            nn.LeakyReLU()
        )

        self.feature_extract_forth = nn.Sequential( #확실성 vec -> 유형 vec
            nn.Linear(in_features=512+768, out_features=512),
            nn.LeakyReLU(),
            nn.Linear(in_features=512, out_features=512),
            nn.LeakyReLU()
        )


        self.type_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=4),
        )
        self.polarity_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.tense_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.certainty_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=2),
        )
            
    def forward(self, t):
        t1 = self.feature_extract_first(t)
        polarity_output = self.polarity_classifier(t1)

        prediction_vector = torch.cat((t1, t), dim=1)
        t2 = self.feature_extract_second(prediction_vector)
        tense_output = self.tense_classifier(t2)
        
        prediction_vector = torch.cat((t2, t), dim=1)
        t3 = self.feature_extract_third(prediction_vector)
        certainty_output = self.certainty_classifier(t3)
        
        prediction_vector = torch.cat((t3, t), dim=1)
        t4 = self.feature_extract_forth(prediction_vector)
        type_output = self.type_classifier(t4)
    
        return type_output, polarity_output, tense_output, certainty_output

In [None]:
class CustomModel_c(nn.Module): #확 유 극 시
    def __init__(self, input_dim=768):
        super(CustomModel_c, self).__init__()
        self.feature_extract_first = nn.Sequential( #cls -> 확실성 vec
            nn.Linear(in_features=input_dim, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_second = nn.Sequential( #확실성 vec -> 유형 vec
            nn.Linear(in_features=512+768, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_third = nn.Sequential( #유형 vec -> 극성 vec
            nn.Linear(in_features=512+768, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_forth = nn.Sequential( #극성 vec -> 시제 vec
            nn.Linear(in_features=512+768, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.type_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=4),
        )
        self.polarity_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.tense_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.certainty_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=2),
        )
            
    def forward(self, t):
        t1 = self.feature_extract_first(t)
        certainty_output = self.certainty_classifier(t1)

        prediction_vector = torch.cat((t1, t), dim=1)
        t2 = self.feature_extract_second(prediction_vector)
        type_output = self.type_classifier(t2)
        
        prediction_vector = torch.cat((t2, t), dim=1)
        t3 = self.feature_extract_third(prediction_vector)
        polarity_output = self.polarity_classifier(t3)
        
        prediction_vector = torch.cat((t3, t), dim=1)
        t4 = self.feature_extract_forth(prediction_vector)
        tense_output = self.tense_classifier(t4)
    
        return type_output, polarity_output, tense_output, certainty_output

In [None]:
class CustomModel_d(nn.Module): #극 유 시 확
    def __init__(self, input_dim=768):
        super(CustomModel_d, self).__init__()
        self.feature_extract_first = nn.Sequential( #cls -> 극성 vec
            nn.Linear(in_features=input_dim, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_second = nn.Sequential( #극성 vec -> 유형 vec
            nn.Linear(in_features=512+768, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_third = nn.Sequential( #유형 vec -> 시제 vec
            nn.Linear(in_features=512+768, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_forth = nn.Sequential( #시제 vec -> 확실성 vec
            nn.Linear(in_features=512+768, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.type_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=4),
        )
        self.polarity_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.tense_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.certainty_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=2),
        )
            
    def forward(self, t):
        t1 = self.feature_extract_first(t)
        polarity_output = self.polarity_classifier(t1)

        prediction_vector = torch.cat((t1, t), dim=1)
        t2 = self.feature_extract_second(prediction_vector)
        type_output = self.type_classifier(t2)
        
        prediction_vector = torch.cat((t2, t), dim=1)
        t3 = self.feature_extract_third(prediction_vector)
        tense_output = self.tense_classifier(t3)
        
        prediction_vector = torch.cat((t3, t), dim=1)
        t4 = self.feature_extract_forth(prediction_vector)
        certainty_output = self.certainty_classifier(t4)
    
        return type_output, polarity_output, tense_output, certainty_output

In [None]:
class CustomModel_e(nn.Module): #유 시 확 극
    def __init__(self, input_dim=768):
        super(CustomModel_e, self).__init__()
        self.feature_extract_first = nn.Sequential( #cls -> 유형 vec
            nn.Linear(in_features=input_dim, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_second = nn.Sequential( #유형 vec -> 시제 vec
            nn.Linear(in_features=512+768, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_third = nn.Sequential( #시제 vec -> 확실성 vec
            nn.Linear(in_features=512+768, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_forth = nn.Sequential( #확실성 vec -> 극성 vec
            nn.Linear(in_features=512+768, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.type_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=4),
        )
        self.polarity_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.tense_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.certainty_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=2),
        )
            
    def forward(self, t):
        t1 = self.feature_extract_first(t)
        type_output = self.type_classifier(t1)

        prediction_vector = torch.cat((t1, t), dim=1)
        t2 = self.feature_extract_second(prediction_vector)
        tense_output = self.tense_classifier(t2)
        
        prediction_vector = torch.cat((t2, t), dim=1)
        t3 = self.feature_extract_third(prediction_vector)
        certainty_output = self.certainty_classifier(t3)
        
        prediction_vector = torch.cat((t3, t), dim=1)
        t4 = self.feature_extract_forth(prediction_vector)
        polarity_output = self.polarity_classifier(t4)
    
        return type_output, polarity_output, tense_output, certainty_output

In [None]:
class CustomModel_f(nn.Module): #시 확 극 유
    def __init__(self, input_dim=768):
        super(CustomModel_f, self).__init__()
        self.feature_extract_first = nn.Sequential( #cls -> 시제 vec
            nn.Linear(in_features=input_dim, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_second = nn.Sequential( #시제 vec -> 확실성 vec
            nn.Linear(in_features=512+768, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_third = nn.Sequential( #확실성 vec -> 극성 vec
            nn.Linear(in_features=512+768, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_forth = nn.Sequential( #극성 vec -> 유형 vec
            nn.Linear(in_features=512+768, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.type_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=4),
        )
        self.polarity_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.tense_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.certainty_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=2),
        )
            
    def forward(self, t):
        t1 = self.feature_extract_first(t)
        tense_output = self.tense_classifier(t1)

        prediction_vector = torch.cat((t1, t), dim=1)
        t2 = self.feature_extract_second(prediction_vector)
        certainty_output = self.certainty_classifier(t2)
        
        prediction_vector = torch.cat((t2, t), dim=1)
        t3 = self.feature_extract_third(prediction_vector)
        polarity_output = self.polarity_classifier(t3)
        
        prediction_vector = torch.cat((t3, t), dim=1)
        t4 = self.feature_extract_forth(prediction_vector)
        type_output = self.type_classifier(t4)
    
        return type_output, polarity_output, tense_output, certainty_output

In [None]:
class CustomModel_g(nn.Module): #유 확 극 시
    def __init__(self, input_dim=768):
        super(CustomModel_g, self).__init__()
        self.feature_extract_first = nn.Sequential( #cls -> 유형 vec
            nn.Linear(in_features=input_dim, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_second = nn.Sequential( #유형 vec -> 확실성 vec
            nn.Linear(in_features=512, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_third = nn.Sequential( #확실성 vec -> 극성 vec
            nn.Linear(in_features=512, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_forth = nn.Sequential( #극성 vec -> 시제 vec
            nn.Linear(in_features=512, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.type_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=4),
        )
        self.polarity_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.tense_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.certainty_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=2),
        )
            
    def forward(self, t):
        t1 = self.feature_extract_first(t)
        type_output = self.type_classifier(t1)

        t2 = self.feature_extract_second(t1)
        certainty_output = self.certainty_classifier(t2)
        
        t3 = self.feature_extract_third(t2)
        polarity_output = self.polarity_classifier(t3)
        
        t4 = self.feature_extract_forth(t3)
        tense_output = self.tense_classifier(t4)
    
        return type_output, polarity_output, tense_output, certainty_output

In [None]:
class CustomModel_h(nn.Module): #확 시 극 유
    def __init__(self, input_dim=768):
        super(CustomModel_h, self).__init__()
        self.feature_extract_first = nn.Sequential( #cls -> 유형 vec
            nn.Linear(in_features=input_dim, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_second = nn.Sequential( #유형 vec -> 시제 vec
            nn.Linear(in_features=512, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_third = nn.Sequential( #시제 vec -> 확실성 vec
            nn.Linear(in_features=512, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_forth = nn.Sequential( #확실성 vec -> 극성 vec
            nn.Linear(in_features=512, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.type_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=4),
        )
        self.polarity_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.tense_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.certainty_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=2),
        )
            
    def forward(self, t):
        t1 = self.feature_extract_first(t)
        type_output = self.type_classifier(t1)

        t2 = self.feature_extract_second(t1)
        tense_output = self.tense_classifier(t2)
        
        t3 = self.feature_extract_third(t2)
        certainty_output = self.certainty_classifier(t3)
        
        t4 = self.feature_extract_forth(t3)
        polarity_output = self.polarity_classifier(t4)
    
        return type_output, polarity_output, tense_output, certainty_output

In [None]:
class CustomModel_i(nn.Module): #극 유 확 시
    def __init__(self, input_dim=768):
        super(CustomModel_i, self).__init__()
        self.feature_extract_first = nn.Sequential( #cls -> 극성 vec
            nn.Linear(in_features=input_dim, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_second = nn.Sequential( #극성 vec -> 유형 vec
            nn.Linear(in_features=512, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_third = nn.Sequential( #유형 vec -> 확실성 vec
            nn.Linear(in_features=512, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_forth = nn.Sequential( #확실성 vec -> 시제 vec
            nn.Linear(in_features=512, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.type_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=4),
        )
        self.polarity_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.tense_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.certainty_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=2),
        )
            
    def forward(self, t):
        t1 = self.feature_extract_first(t)
        polarity_output = self.polarity_classifier(t1)

        t2 = self.feature_extract_second(t1)
        type_output = self.type_classifier(t2)
        
        t3 = self.feature_extract_third(t2)
        certainty_output = self.certainty_classifier(t3)
        
        t4 = self.feature_extract_forth(t3)
        tense_output = self.tense_classifier(t4)
    
        return type_output, polarity_output, tense_output, certainty_output

In [None]:
class CustomModel_j(nn.Module): #시 확 유 극
    def __init__(self, input_dim=768):
        super(CustomModel_j, self).__init__()
        self.feature_extract_first = nn.Sequential( #cls -> 시제 vec
            nn.Linear(in_features=input_dim, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_second = nn.Sequential( #시제 vec -> 확실성 vec
            nn.Linear(in_features=512, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_third = nn.Sequential( #확실성 vec -> 유형 vec
            nn.Linear(in_features=512, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.feature_extract_forth = nn.Sequential( #유형 vec -> 극성 vec
            nn.Linear(in_features=512, out_features=512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU()
        )

        self.type_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=4),
        )
        self.polarity_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.tense_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=3),
        )
        self.certainty_classifier = nn.Sequential(
            nn.Dropout(p=0.3),
            nn.Linear(in_features=512, out_features=2),
        )
            
    def forward(self, t):
        t1 = self.feature_extract_first(t)
        tense_output = self.tense_classifier(t1)

        t2 = self.feature_extract_second(t1)
        certainty_output = self.certainty_classifier(t2)
        
        t3 = self.feature_extract_third(t2)
        type_output = self.type_classifier(t3)
        
        t4 = self.feature_extract_forth(t3)
        polarity_output = self.polarity_classifier(t4)
    
        return type_output, polarity_output, tense_output, certainty_output

### train, validation

In [None]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
  
    model.to(device)
    
    criterion = {
        'type' : FocalLoss().to(device),
        'polarity' : FocalLoss().to(device),
        'tense' : FocalLoss().to(device),
        'certainty' : FocalLoss().to(device)
    }
    
    best_loss = 999999
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+41):
        model.train()
        train_loss = []
        for type_label, polarity_label, tense_label, certainty_label, hidden_state in tqdm(iter(train_loader)):
            type_label = type_label.to(device)
            polarity_label = polarity_label.to(device)
            tense_label = tense_label.to(device)
            certainty_label = certainty_label.to(device)
            hidden_state = hidden_state.to(device)

            optimizer.zero_grad()
            
            type_logit, polarity_logit, tense_logit, certainty_logit = model(hidden_state)
            
            loss = 0.28 * criterion['type'](type_logit, type_label) + \
                    0.16 * criterion['polarity'](polarity_logit, polarity_label) + \
                    0.28 * criterion['tense'](tense_logit, tense_label) + \
                    0.28 * criterion['certainty'](certainty_logit, certainty_label)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
        
        val_loss, val_type_f1, val_polarity_f1, val_tense_f1, val_certainty_f1 = validation(model, val_loader, criterion, device)
        print(f'Epoch : [{epoch}] Train Loss : [{np.mean(train_loss):.5f}] Val Loss : [{val_loss:.5f}] 유형 F1 : [{val_type_f1:.5f}] 극성 F1 : [{val_polarity_f1:.5f}] 시제 F1 : [{val_tense_f1:.5f}] 확실성 F1 : [{val_certainty_f1:.5f}]')
        
        if scheduler is not None:
            scheduler.step(val_loss)
            
        if best_loss > val_loss:
            best_loss = val_loss
            best_model = model
            
    return best_model

In [None]:
def validation(model, val_loader, criterion, device):
    model.eval()
    val_loss = []
    
    type_preds, polarity_preds, tense_preds, certainty_preds = [], [], [], []
    type_labels, polarity_labels, tense_labels, certainty_labels = [], [], [], []
    
    
    with torch.no_grad():
        for type_label, polarity_label, tense_label, certainty_label, hidden_state in tqdm(iter(val_loader)):
            type_label = type_label.to(device)
            polarity_label = polarity_label.to(device)
            tense_label = tense_label.to(device)
            certainty_label = certainty_label.to(device)
            hidden_state = hidden_state.to(device)

            type_logit, polarity_logit, tense_logit, certainty_logit = model(hidden_state)
            
            loss = 0.25 * criterion['type'](type_logit, type_label) + \
                    0.25 * criterion['polarity'](polarity_logit, polarity_label) + \
                    0.25 * criterion['tense'](tense_logit, tense_label) + \
                    0.25 * criterion['certainty'](certainty_logit, certainty_label)
            
            val_loss.append(loss.item())
            
            type_preds += type_logit.argmax(1).detach().cpu().numpy().tolist()
            type_labels += type_label.detach().cpu().numpy().tolist()
            
            polarity_preds += polarity_logit.argmax(1).detach().cpu().numpy().tolist()
            polarity_labels += polarity_label.detach().cpu().numpy().tolist()
            
            tense_preds += tense_logit.argmax(1).detach().cpu().numpy().tolist()
            tense_labels += tense_label.detach().cpu().numpy().tolist()
            
            certainty_preds += certainty_logit.argmax(1).detach().cpu().numpy().tolist()
            certainty_labels += certainty_label.detach().cpu().numpy().tolist()
    
    type_f1 = f1_score(type_labels, type_preds, average='weighted')
    polarity_f1 = f1_score(polarity_labels, polarity_preds, average='weighted')
    tense_f1 = f1_score(tense_labels, tense_preds, average='weighted')
    certainty_f1 = f1_score(certainty_labels, certainty_preds, average='weighted')
    
    return np.mean(val_loss), type_f1, polarity_f1, tense_f1, certainty_f1

In [None]:
from torch.utils.data import Dataset

class MyDataset(Dataset):
  def __init__(self, dataset):
    dataset.set_format(type = 'pandas')  
    self.df = dataset[:]

  def __len__(self):
    return len(self.df)

  def __getitem__(self, index):
    if '유형' in self.df.columns:
      type_label = self.df['유형'][index]
      polarity_label = self.df['극성'][index]
      tense_label = self.df['시제'][index]
      certainty_label = self.df['확실성'][index]
      hidden_state = torch.tensor(self.df['hidden_state'][index])

      return type_label, polarity_label, tense_label, certainty_label, hidden_state

    else:
      hidden_state = torch.tensor(self.df['hidden_state'][index])
      return hidden_state


In [None]:
import random
def under_sampling(df):
  random.seed(42)
  for i in df.index:
    r = random.uniform(0, 1)
    if r>0.4:
      if df['극성'][i] == '긍정' and df['확실성'][i] == '확실':
        df = df.drop(i)
    
  return df.reset_index(drop=True)

df_train_aug_under = under_sampling(df_train_aug)
df_val_aug_under = under_sampling(df_val_aug)

In [None]:
df_train_aug_under = under_sampling(df_train_aug)
df_val_aug_under = under_sampling(df_val_aug)

In [None]:
df1 = under_sampling(df)

# 이전에 함수를 실행했을 때와 결과가 동일한지 확인합니다.
df2 = under_sampling(df)
assert df1.equals(df2)

In [None]:
def label_enc(df):
  type_le = preprocessing.LabelEncoder()
  df["유형"] = type_le.fit_transform(df["유형"].values)

  polarity_le = preprocessing.LabelEncoder()
  df["극성"] = polarity_le.fit_transform(df["극성"].values)

  tense_le = preprocessing.LabelEncoder()
  df["시제"] = tense_le.fit_transform(df["시제"].values)


  certainty_le = preprocessing.LabelEncoder()
  df["확실성"] = certainty_le.fit_transform(df["확실성"].values)

  return type_le, polarity_le, tense_le, certainty_le

### k-fold

In [None]:
from sklearn.model_selection import train_test_split, KFold

In [None]:
k = 5
kf = KFold(n_splits=k)

In [None]:
kf_generator = kf.split(ds)

In [None]:
train_index, val_index = next(iter(kf_generator))

In [None]:
dataset_t = MyDataset(ds.select_columns(['유형', '극성', '시제', '확실성', 'hidden_state']).select(train_index))
dataset_v = MyDataset(ds.select_columns(['유형', '극성', '시제', '확실성', 'hidden_state']).select(val_index))

train_loader = DataLoader(dataset_t, batch_size=256, shuffle=False)
val_loader = DataLoader(dataset_v, batch_size=256, shuffle=False)

model = CustomModel()
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-8, verbose=True)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)


### train-val split

In [None]:
#!unzip train_hdn.zip
#!unzip val_hdn.zip
!unzip test_hdn.zip
!unzip train_hdn_aug.zip
!unzip val_hdn_aug.zip

Archive:  test_hdn.zip
   creating: content/test_hidden/
  inflating: content/test_hidden/data-00000-of-00001.arrow  
  inflating: content/test_hidden/dataset_info.json  
  inflating: content/test_hidden/state.json  
Archive:  train_hdn_aug.zip
   creating: content/train_hidden_aug/
  inflating: content/train_hidden_aug/data-00000-of-00001.arrow  
  inflating: content/train_hidden_aug/state.json  
  inflating: content/train_hidden_aug/dataset_info.json  
Archive:  val_hdn_aug.zip
   creating: content/val_hidden_aug/
  inflating: content/val_hidden_aug/data-00000-of-00001.arrow  
  inflating: content/val_hidden_aug/state.json  
  inflating: content/val_hidden_aug/dataset_info.json  


In [None]:
!unzip ds_hidden.zip


Archive:  ds_hidden.zip
replace content/ds_hidden/dataset_info.json? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [None]:
ds_hidden = load_from_disk("/content/content/ds_hidden")

In [None]:
from datasets import load_from_disk

train_hidden = load_from_disk("/content/content/train_hidden_aug")
val_hidden = load_from_disk("/content/content/val_hidden_aug")
test_hidden = load_from_disk("/content/content/test_hidden")

#train_hidden = load_from_disk("/content/content/train_hidden_aug_30000")
#val_hidden = load_from_disk("/content/content/val_hidden_aug_8000")


In [None]:
train_hidden

Dataset({
    features: ['문장', '유형', '극성', '시제', '확실성', 'input_ids', 'attention_mask', 'hidden_state'],
    num_rows: 18698
})

In [None]:
val_hidden

Dataset({
    features: ['문장', '유형', '극성', '시제', '확실성', 'input_ids', 'attention_mask', 'hidden_state'],
    num_rows: 4879
})

In [None]:
from datasets import Dataset, DatasetDict, load_dataset

train_hidden.set_format(type='pandas')
val_hidden.set_format(type='pandas')
train_hidden_df=train_hidden[:]
val_hidden_df=val_hidden[:]

df_hidden = pd.concat([train_hidden_df, val_hidden_df], axis=0)


In [None]:
def label_enc(df):
  type_le = preprocessing.LabelEncoder()
  df["유형"] = type_le.fit_transform(df["유형"].values)

  polarity_le = preprocessing.LabelEncoder()
  df["극성"] = polarity_le.fit_transform(df["극성"].values)

  tense_le = preprocessing.LabelEncoder()
  df["시제"] = tense_le.fit_transform(df["시제"].values)


  certainty_le = preprocessing.LabelEncoder()
  df["확실성"] = certainty_le.fit_transform(df["확실성"].values)

  return type_le, polarity_le, tense_le, certainty_le

In [None]:
type_le, polarity_le, tense_le, certainty_le = label_enc(df_hidden)

train_df_hidden = df_hidden[:18698]
val_df_hidden = df_hidden[18698:]

In [None]:
def to_ds(df):
  
  dic= df.to_dict("list") #테이블을 딕셔너리형태로 바꿈.
  ds = Dataset.from_dict(dic) #딕셔너리를 데이터세트 형태로 바꿈.

  return ds

#ds=to_ds(df)

In [None]:
from datasets import Dataset, DatasetDict, load_dataset

train_hidden = to_ds(train_df_hidden)
val_hidden = to_ds(val_df_hidden)

In [None]:
train_hidden['유형'][0]

tensor(1)

In [None]:
train_hidden.set_format("torch", columns=['input_ids', 'attention_mask', '유형', '극성', '시제', '확실성'])
val_hidden.set_format("torch", columns=['input_ids', 'attention_mask', '유형', '극성', '시제', '확실성'])

In [None]:
dataset_t = MyDataset(train_hidden.select_columns(['유형', '극성', '시제', '확실성', 'hidden_state']))
dataset_v = MyDataset(val_hidden.select_columns(['유형', '극성', '시제', '확실성', 'hidden_state']))

train_loader = DataLoader(dataset_t, batch_size=512, shuffle=True)
val_loader = DataLoader(dataset_v, batch_size=512, shuffle=True)


In [None]:
model = CustomModel()
model.eval()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-8, verbose=True)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

In [None]:
model_Elu = CustomModel_Elu()
model_Elu.eval()
optimizer = torch.optim.Adam(params = model_Elu.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-8, verbose=True)

infer_model = train(model_Elu, optimizer, train_loader, val_loader, scheduler, device)

In [None]:
model_tanh = CustomModel_tanh()
model_tanh.eval()
optimizer = torch.optim.Adam(params = model_tanh.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-8, verbose=True)

infer_model = train(model_tanh, optimizer, train_loader, val_loader, scheduler, device)

In [None]:
model_Gelu = CustomModel_Gelu()
model_Gelu.eval()
optimizer = torch.optim.Adam(params = model_Gelu.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2,threshold_mode='abs',min_lr=1e-8, verbose=True)

infer_model = train(model_Gelu, optimizer, train_loader, val_loader, scheduler, device)

### inference

In [None]:
dataset_test = MyDataset(test_hidden.select_columns(['hidden_state']))

# 배치 사이즈 설정
batch_size = 256

# 데이터 로더 설정
test_loader = DataLoader(dataset_test, batch_size=batch_size, shuffle=False)


In [None]:
def inference(model1,model2, model3, test_loader, device):
    model1.to(device)
    model1.eval()

    model2.to(device)
    model2.eval()

    model3.to(device)
    model3.eval()

    #model4.to(device)
    #model4.eval()

    type_preds, polarity_preds, tense_preds, certainty_preds = [], [], [], []
    
    with torch.no_grad():
        for hidden_state in tqdm(test_loader):
            hidden_state = hidden_state.to(device)
            
            type_logit_1, polarity_logit_1, tense_logit_1, certainty_logit_1 = model1(hidden_state)
            type_logit_2, polarity_logit_2, tense_logit_2, certainty_logit_2 = model2(hidden_state)
            type_logit_3, polarity_logit_3, tense_logit_3, certainty_logit_3 = model3(hidden_state)
            #type_logit_4, polarity_logit_4, tense_logit_4, certainty_logit_4 = model3(hidden_state)
            
            # Get the maximum logits for each prediction type
            type_pred = torch.argmax((type_logit_1 + type_logit_2 + type_logit_3)/3, dim=1).tolist()
            polarity_pred = torch.argmax((polarity_logit_1 + polarity_logit_2 + polarity_logit_3)/3, dim=1).tolist()
            tense_pred = torch.argmax((tense_logit_1 + tense_logit_2 + tense_logit_3)/3, dim=1).tolist()
            certainty_pred = torch.argmax((certainty_logit_1 + certainty_logit_2 + certainty_logit_3)/3, dim=1).tolist()

            # Append the maximum logits to the prediction type lists
            type_preds += type_pred
            polarity_preds += polarity_pred
            tense_preds += tense_pred
            certainty_preds += certainty_pred
            
    return type_preds, polarity_preds, tense_preds, certainty_preds

type_preds, polarity_preds, tense_preds, certainty_preds = inference(model_Elu, model_tanh, model_Gelu, test_loader, device) #results are wierd....

  0%|          | 0/28 [00:00<?, ?it/s]

In [None]:
type_preds, polarity_preds, tense_preds, certainty_preds = inference(model_Elu, model_tanh, model_Gelu, test_loader, device)

  0%|          | 0/28 [00:00<?, ?it/s]

In [None]:
type_preds

#### submission

In [None]:
type_preds = type_le.inverse_transform(type_preds)
polarity_preds = polarity_le.inverse_transform(polarity_preds)
tense_preds = tense_le.inverse_transform(tense_preds)
certainty_preds = certainty_le.inverse_transform(certainty_preds)

In [None]:
type_preds

array(['사실형', '사실형', '사실형', ..., '사실형', '추론형', '사실형'], dtype=object)

In [None]:
predictions = []
for type_pred, polarity_pred, tense_pred, certainty_pred in zip(type_preds, polarity_preds, tense_preds, certainty_preds):
    predictions.append(type_pred+'-'+polarity_pred+'-'+tense_pred+'-'+certainty_pred)

In [None]:
submit = pd.read_csv('sample_submission.csv')
submit['label'] = predictions

In [None]:
submit

Unnamed: 0,ID,label
0,TEST_0000,사실형-긍정-현재-확실
1,TEST_0001,사실형-긍정-현재-확실
2,TEST_0002,사실형-긍정-과거-확실
3,TEST_0003,사실형-긍정-과거-확실
4,TEST_0004,사실형-긍정-과거-확실
...,...,...
7085,TEST_7085,사실형-긍정-현재-확실
7086,TEST_7086,사실형-긍정-현재-확실
7087,TEST_7087,사실형-긍정-현재-확실
7088,TEST_7088,추론형-긍정-미래-확실


In [None]:
submit.to_csv('submit_bert_focal_aug_func_ens4.csv', index=False, encoding='utf-8-sig')