In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# 설치
!pip install transformers
!pip install sentencepiece
!pip install 'git+https://github.com/SKTBrain/KoBERT.git#egg=kobert_tokenizer&subdirectory=kobert_hf'
!pip install transformers torch

Collecting kobert_tokenizer
  Cloning https://github.com/SKTBrain/KoBERT.git to /tmp/pip-install-2o4tr_uj/kobert-tokenizer_c8991f3736314d3ab21b0c888315a046
  Running command git clone --filter=blob:none --quiet https://github.com/SKTBrain/KoBERT.git /tmp/pip-install-2o4tr_uj/kobert-tokenizer_c8991f3736314d3ab21b0c888315a046
  Resolved https://github.com/SKTBrain/KoBERT.git to commit 47a69af87928fc24e20f571fe10c3cc9dd9af9a3
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: kobert_tokenizer
  Building wheel for kobert_tokenizer (setup.py) ... [?25l[?25hdone
  Created wheel for kobert_tokenizer: filename=kobert_tokenizer-0.1-py3-none-any.whl size=4632 sha256=b3c8639acf1ab4b20bcebc0230ab52aa33fc1645930c13232586ef5aea8450e9
  Stored in directory: /tmp/pip-ephem-wheel-cache-zvamw9hp/wheels/e9/1a/3f/a864970e8a169c176befa3c4a1e07aa612f69195907a4045fe
Successfully built kobert_tokenizer
Installing collected packages: kobert_tokenizer
Successfully ins

In [None]:
# 모듈 불러오기
import pandas as pd
import numpy as np

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from kobert_tokenizer import KoBERTTokenizer  # 대문자 'BERT'로 수정
from transformers import BertTokenizerFast, BertForSequenceClassification, AdamW, get_linear_schedule_with_warmup, Trainer, TrainingArguments

from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report, confusion_matrix
from sklearn.model_selection import StratifiedKFold
from itertools import product

In [None]:
# 데이터 불러오기
reviews_df = pd.read_csv('/content/drive/MyDrive/Final/전처리/불용어_처리/spelled_bareun_final.csv')

## max_len

In [None]:
# 문장 길이
lengths = get_sentence_length_distribution(reviews_df['new_content_cleaned_spelled'])

# 가장 긴 값
max_len = max(lengths)
max_len

156

# Hugging

In [None]:
# 감성 라벨이 있는 데이터만 필터링 (NaN 값 제거)
#auto_train_labeled = auto_train[auto_train['sentiment_label'].notna()]
manual_train_labeled = manual_train[manual_train['sentiment_label'].notna()]
test_labeled = test[test['sentiment_label'].notna()]

In [None]:
# 라벨 값 변환: -1 -> 0 (부정), 0 -> 1 (중립), 1 -> 2 (긍정)
manual_train_labeled['sentiment_label'] = manual_train_labeled['sentiment_label'].map({-1: 0, 0: 1, 1: 2})
test_labeled['sentiment_label'] = test_labeled['sentiment_label'].map({-1: 0, 0: 1, 1: 2})

# 만약 auto_train_labeled에도 라벨이 존재한다면 동일하게 적용
auto_train['sentiment_label'] = auto_train['sentiment_label'].map({-1: 0, 0: 1, 1: 2})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  auto_train['sentiment_label'] = auto_train['sentiment_label'].map({-1: 0, 0: 1, 1: 2})


In [None]:
# Hugging Face의 KoBERT 모델과 KoBERT 토크나이저 로드
tokenizer = KoBERTTokenizer.from_pretrained('skt/kobert-base-v1')
model = BertForSequenceClassification.from_pretrained('skt/kobert-base-v1', num_labels=3)

# GPU 사용 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
print(next(model.parameters()).device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'XLNetTokenizer'. 
The class this function is called from is 'KoBERTTokenizer'.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at skt/kobert-base-v1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cuda:0


In [None]:
# 데이터셋 정의
class SentimentDataset(Dataset):
    def __init__(self, df, tokenizer, max_len, has_labels=True):
        self.df = df
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.has_labels = has_labels  # 레이블이 있는지 여부 확인

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        review = self.df.iloc[index]['new_content_cleaned_spelled']

        encoding = self.tokenizer.encode_plus(
            review,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )

        item = {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
        }

        # 레이블이 있는 경우에만 labels 반환
        if self.has_labels:
            label = self.df.iloc[index]['sentiment_label']
            item['labels'] = torch.tensor(label, dtype=torch.long)

        return item

In [None]:
# score 계산 함수
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

In [None]:
# KoBERT 토크나이저 불러오기
tokenizer = KoBERTTokenizer.from_pretrained('skt/kobert-base-v1')

# 튜닝할 하이퍼파라미터 설정
learning_rates = [1e-5, 2e-5]
batch_sizes = [8]
num_train_epochs = [2, 4]
weight_decays = [0.01, 0.1]

# Grid Search를 위한 하이퍼파라미터 조합 생성
param_grid = list(product(learning_rates, batch_sizes, num_train_epochs, weight_decays))

# 결과 저장을 위한 리스트
results_list = []

# 최적의 F1 스코어와 모델을 저장하기 위한 변수 초기화
best_f1_score = 0
best_model = None
best_model_path = '/content/drive/MyDrive/best_model'

# Grid Search 방식으로 각 하이퍼파라미터 조합에 대해 학습 및 평가
for params in param_grid:
    learning_rate, batch_size, num_epochs, weight_decay = params
    print(f"Training with lr={learning_rate}, batch_size={batch_size}, epochs={num_epochs}, weight_decay={weight_decay}")

    # 모델 초기화: 하나의 하이퍼파라미터 조합마다 새로운 모델 생성
    model = BertForSequenceClassification.from_pretrained('skt/kobert-base-v1', num_labels=3)

    # StratifiedKFold 설정: app_name을 기준으로 5개의 폴드로 나눔, random_state=42로 설정
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    # 각 fold의 성능 결과 저장을 위한 리스트
    accuracy_list = []
    f1_list = []

    # fold 별로 데이터 나누고 학습 및 검증
    for fold, (train_index, val_index) in enumerate(skf.split(manual_train_labeled, manual_train_labeled['sentiment_label'])):
        print(f"Fold {fold + 1}")

        # train_index와 val_index를 사용해 데이터를 나눕니다.
        train_data = manual_train_labeled.iloc[train_index]
        val_data = manual_train_labeled.iloc[val_index]

        # 각 fold마다 새로운 Dataset을 만듭니다.
        train_dataset = SentimentDataset(train_data, tokenizer, max_len=max_len)
        val_dataset = SentimentDataset(val_data, tokenizer, max_len=max_len)

        # TrainingArguments 설정
        training_args = TrainingArguments(
            output_dir=f'./results_lr_{learning_rate}_bs_{batch_size}_epochs_{num_epochs}_wd_{weight_decay}_fold_{fold + 1}',
            num_train_epochs=num_epochs,
            per_device_train_batch_size=batch_size,
            per_device_eval_batch_size=batch_size,
            warmup_steps=500,
            weight_decay=weight_decay,
            logging_dir=f'./logs_lr_{learning_rate}_bs_{batch_size}_epochs_{num_epochs}_wd_{weight_decay}_fold_{fold + 1}',
            logging_steps=10,
            learning_rate=learning_rate,
            evaluation_strategy="epoch",
            save_strategy="epoch",
            load_best_model_at_end=True,
            metric_for_best_model="f1"
        )

        # Trainer 설정
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=val_dataset,
            compute_metrics=compute_metrics
        )

        # 모델 학습
        trainer.train()

        # 평가 (각 폴드에서의 성능 저장)
        results = trainer.evaluate()
        accuracy_list.append(results['eval_accuracy'])
        f1_list.append(results['eval_f1'])

    # 각 폴드의 평균 성능 계산
    mean_accuracy = sum(accuracy_list) / len(accuracy_list)
    mean_f1 = sum(f1_list) / len(f1_list)

    # 결과 저장
    result_dict = {
        'learning_rate': learning_rate,
        'batch_size': batch_size,
        'num_epochs': num_epochs,
        'weight_decay': weight_decay,
        'mean_accuracy': mean_accuracy,
        'mean_f1': mean_f1
    }

    results_list.append(result_dict)

    # 만약 현재 모델의 F1 스코어가 최고라면, 그 모델을 저장
    if mean_f1 > best_f1_score:
        best_f1_score = mean_f1
        best_model = model

        # 현재의 최적 모델 저장
        best_model.save_pretrained(best_model_path)
        tokenizer.save_pretrained(best_model_path)
        print(f"New best model saved with F1 score: {best_f1_score}")

# 결과를 DataFrame으로 변환
results_df = pd.DataFrame(results_list)

# 최적의 하이퍼파라미터를 보고 싶은 경우
best_row = results_df.loc[results_df['mean_f1'].idxmax()]
print("Best hyperparameters based on mean F1:")
print(best_row)

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'XLNetTokenizer'. 
The class this function is called from is 'KoBERTTokenizer'.


Training with lr=1e-05, batch_size=8, epochs=2, weight_decay=0.01


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at skt/kobert-base-v1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.092,0.188993,0.94917,0.944671,0.940217,0.94917
2,0.089,0.194248,0.953838,0.949484,0.945184,0.953838


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


Fold 2


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.107,0.134095,0.962137,0.958532,0.956163,0.962137
2,0.0357,0.129512,0.969398,0.965095,0.960973,0.969398


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


Fold 3


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.2468,0.064042,0.985477,0.984305,0.984407,0.985477
2,0.1417,0.054309,0.987033,0.986265,0.98664,0.987033


Fold 4




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0434,0.04009,0.989621,0.989741,0.989965,0.989621
2,0.0608,0.032073,0.993254,0.993233,0.993327,0.993254


Fold 5




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.092,0.011416,0.997405,0.997398,0.997403,0.997405
2,0.0003,0.010391,0.996886,0.996993,0.997279,0.996886


New best model saved with F1 score: 0.9782950734520405
Training with lr=1e-05, batch_size=8, epochs=2, weight_decay=0.1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at skt/kobert-base-v1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1117,0.198862,0.945021,0.941083,0.937332,0.945021
2,0.1407,0.222746,0.946058,0.941491,0.936978,0.946058


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


Fold 2


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1276,0.141969,0.963174,0.959575,0.957213,0.963174
2,0.0675,0.144544,0.967842,0.965251,0.964224,0.967842


  _warn_prf(average, modifier, msg_start, len(result))


Fold 3




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.2341,0.104547,0.978216,0.97639,0.978165,0.978216
2,0.0636,0.081921,0.982884,0.981504,0.982904,0.982884


Fold 4




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1283,0.037704,0.991697,0.991542,0.991525,0.991697
2,0.0933,0.041393,0.991178,0.990769,0.990991,0.991178


Fold 5




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0689,0.037363,0.99014,0.989724,0.9903,0.99014
2,0.0014,0.045257,0.991178,0.9911,0.991175,0.991178


Training with lr=1e-05, batch_size=8, epochs=4, weight_decay=0.01


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at skt/kobert-base-v1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1012,0.210344,0.938797,0.936712,0.937299,0.938797
2,0.1187,0.188554,0.95332,0.949205,0.945238,0.95332
3,0.0942,0.199086,0.94917,0.945416,0.94206,0.94917
4,0.0036,0.215042,0.953838,0.950301,0.946793,0.953838


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Fold 2




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0912,0.093766,0.97666,0.973192,0.978279,0.97666
2,0.0051,0.10335,0.97666,0.975583,0.975,0.97666
3,0.0015,0.09042,0.982365,0.981949,0.982225,0.982365
4,0.001,0.093754,0.982365,0.981716,0.981846,0.982365


Fold 3




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0892,0.056774,0.990145,0.989808,0.990152,0.990145
2,0.0012,0.038061,0.993257,0.99319,0.993274,0.993257
3,0.0005,0.034485,0.993776,0.99377,0.993856,0.993776
4,0.0122,0.026847,0.995332,0.995301,0.995335,0.995332


Fold 4




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0006,0.024082,0.995848,0.995826,0.995848,0.995848
2,0.1094,0.002349,0.999481,0.999482,0.999483,0.999481
3,0.0001,0.001636,0.999481,0.999488,0.99951,0.999481
4,0.0001,0.000492,1.0,1.0,1.0,1.0


Fold 5




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0833,0.012758,0.998443,0.998471,0.998552,0.998443
2,0.0002,0.023957,0.995848,0.995894,0.996087,0.995848


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0833,0.012758,0.998443,0.998471,0.998552,0.998443
2,0.0002,0.023957,0.995848,0.995894,0.996087,0.995848
3,0.0,7.2e-05,1.0,1.0,1.0,1.0
4,0.0,0.007472,0.998962,0.998991,0.999071,0.998962


New best model saved with F1 score: 0.9855100930815123
Training with lr=1e-05, batch_size=8, epochs=4, weight_decay=0.1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at skt/kobert-base-v1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0756,0.182493,0.948133,0.944355,0.940939,0.948133
2,0.0774,0.17624,0.952801,0.948649,0.944626,0.952801
3,0.0255,0.185464,0.95695,0.95293,0.949188,0.95695
4,0.0023,0.211619,0.95695,0.952882,0.948884,0.95695


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


Fold 2


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0136,0.109093,0.975622,0.972122,0.977082,0.975622
2,0.0175,0.091273,0.977178,0.97492,0.976577,0.977178
3,0.0008,0.103053,0.97666,0.976115,0.976451,0.97666
4,0.001,0.09417,0.977178,0.977297,0.977695,0.977178


Fold 3




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1552,0.034359,0.990664,0.990422,0.990657,0.990664
2,0.0215,0.024373,0.994295,0.994244,0.994292,0.994295
3,0.0004,0.019562,0.996369,0.996369,0.996369,0.996369
4,0.0002,0.019205,0.996369,0.996343,0.996386,0.996369


Fold 4




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.001,0.014705,0.997405,0.997467,0.997638,0.997405
2,0.0292,0.010876,0.997924,0.997986,0.998157,0.997924
3,0.0001,0.020721,0.99533,0.995307,0.995376,0.99533
4,0.0001,0.009296,0.997405,0.99743,0.997515,0.997405


Fold 5




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0003,0.011369,0.994811,0.994766,0.994842,0.994811
2,0.0002,0.009539,0.996886,0.996907,0.996942,0.996886
3,0.0,0.006528,0.998962,0.998991,0.999071,0.998962
4,0.0,0.007504,0.998962,0.998991,0.999071,0.998962


Training with lr=2e-05, batch_size=8, epochs=2, weight_decay=0.01


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at skt/kobert-base-v1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0994,0.181586,0.94917,0.945822,0.943478,0.94917
2,0.0409,0.190142,0.951245,0.94641,0.941735,0.951245


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


Fold 2


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1257,0.123745,0.967842,0.96399,0.961213,0.967842
2,0.0655,0.11379,0.974585,0.972954,0.973064,0.974585


  _warn_prf(average, modifier, msg_start, len(result))


Fold 3




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.3037,0.075507,0.981328,0.979999,0.981289,0.981328
2,0.0357,0.047397,0.990145,0.989862,0.990282,0.990145


Fold 4




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.096,0.057703,0.982356,0.983069,0.984648,0.982356
2,0.1962,0.027866,0.993773,0.993781,0.993795,0.993773


Fold 5




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1307,0.017728,0.994292,0.994333,0.994432,0.994292
2,0.1137,0.00643,0.998443,0.998436,0.998444,0.998443


Training with lr=2e-05, batch_size=8, epochs=2, weight_decay=0.1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at skt/kobert-base-v1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1384,0.217967,0.944502,0.939237,0.934419,0.944502
2,0.1649,0.207042,0.952801,0.94798,0.943318,0.952801


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))


Fold 2


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1118,0.143182,0.9611,0.957766,0.956296,0.9611
2,0.049,0.126958,0.969917,0.967909,0.967014,0.969917


  _warn_prf(average, modifier, msg_start, len(result))


Fold 3




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1414,0.069046,0.985477,0.983811,0.98556,0.985477
2,0.0023,0.079626,0.983921,0.98255,0.983947,0.983921


Fold 4




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1092,0.070839,0.982875,0.982311,0.982315,0.982875
2,0.0693,0.041609,0.99014,0.989846,0.989816,0.99014


Fold 5




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0478,0.037548,0.992735,0.992419,0.99278,0.992735
2,0.0008,0.039022,0.991178,0.991066,0.991023,0.991178


Training with lr=2e-05, batch_size=8, epochs=4, weight_decay=0.01


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at skt/kobert-base-v1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0916,0.206382,0.947095,0.942126,0.937392,0.947095
2,0.0481,0.203364,0.954876,0.950023,0.945356,0.954876
3,0.0596,0.207311,0.951763,0.949578,0.949108,0.951763
4,0.0024,0.238796,0.953838,0.951685,0.949962,0.953838


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Fold 2




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1143,0.086959,0.981328,0.979667,0.982092,0.981328
2,0.0151,0.089319,0.981328,0.980098,0.979926,0.981328
3,0.0011,0.091951,0.982365,0.98098,0.981111,0.982365
4,0.001,0.088086,0.982884,0.981488,0.981614,0.982884


Fold 3




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0404,0.032814,0.993257,0.993143,0.993173,0.993257
2,0.0423,0.035424,0.993776,0.993731,0.993763,0.993776
3,0.0363,0.042031,0.991701,0.991609,0.991762,0.991701
4,0.0005,0.036721,0.993257,0.99314,0.993282,0.993257


Fold 4




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0021,0.010509,0.997924,0.997928,0.997936,0.997924
2,0.0009,0.01298,0.997405,0.997386,0.997385,0.997405
3,0.0003,0.010637,0.998443,0.998434,0.998442,0.998443
4,0.0004,0.01263,0.997405,0.997391,0.997404,0.997405


Fold 5




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0016,0.019875,0.996367,0.996595,0.997175,0.996367
2,0.0878,0.036156,0.993773,0.994297,0.995672,0.993773
3,0.001,0.011917,0.997405,0.997435,0.997518,0.997405
4,0.0003,0.00966,0.997924,0.997952,0.998033,0.997924


Training with lr=2e-05, batch_size=8, epochs=4, weight_decay=0.1


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at skt/kobert-base-v1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fold 1


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0964,0.239014,0.929979,0.929708,0.936101,0.929979
2,0.0767,0.182952,0.953838,0.9487,0.944384,0.953838
3,0.052,0.218394,0.951245,0.949637,0.949272,0.951245
4,0.0047,0.224156,0.95332,0.952521,0.951865,0.95332


  _warn_prf(average, modifier, msg_start, len(result))


Fold 2




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0051,0.084776,0.979772,0.978322,0.979012,0.979772
2,0.0377,0.077425,0.983921,0.982759,0.984159,0.983921
3,0.0015,0.098944,0.981328,0.980661,0.982547,0.981328
4,0.062,0.090373,0.98444,0.984005,0.984923,0.98444


Fold 3




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0442,0.044284,0.990145,0.989972,0.990208,0.990145
2,0.0011,0.035704,0.993776,0.993684,0.993779,0.993776
3,0.0007,0.018548,0.995851,0.99584,0.995847,0.995851
4,0.0022,0.021558,0.995332,0.995325,0.995334,0.995332


Fold 4




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.032,0.012944,0.998443,0.998449,0.998473,0.998443
2,0.1049,0.002592,0.999481,0.999488,0.99951,0.999481
3,0.0002,0.009971,0.997924,0.997927,0.997955,0.997924
4,0.0002,0.005368,0.998962,0.998969,0.998991,0.998962


Fold 5




Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0023,0.009586,0.997924,0.997918,0.997928,0.997924
2,0.0006,0.009406,0.997405,0.997401,0.997417,0.997405
3,0.0001,0.011867,0.997405,0.997401,0.997417,0.997405
4,0.0,0.012185,0.997924,0.997924,0.997924,0.997924


New best model saved with F1 score: 0.9859557854482773
Best hyperparameters based on mean F1:
learning_rate    0.000020
batch_size       8.000000
num_epochs       4.000000
weight_decay     0.100000
mean_accuracy    0.986203
mean_f1          0.985956
Name: 7, dtype: float64


In [None]:
results_df = pd.DataFrame(results_list)
results_df

Unnamed: 0,learning_rate,batch_size,num_epochs,weight_decay,mean_accuracy,mean_f1
0,1e-05,8,2,0.01,0.980186,0.978295
1,1e-05,8,2,0.1,0.975932,0.974177
2,1e-05,8,4,0.01,0.986307,0.98551
3,1e-05,8,4,0.1,0.985477,0.984715
4,2e-05,8,2,0.01,0.981638,0.980289
5,2e-05,8,2,0.1,0.978214,0.976393
6,2e-05,8,4,0.01,0.985373,0.984658
7,2e-05,8,4,0.1,0.986203,0.985956


## Auto Train만 이용하여 Fine Tuning

In [None]:
# KoBERT의 토크나이저 불러오기
best_model_path = '/content/drive/MyDrive/best_model'
tokenizer = KoBERTTokenizer.from_pretrained(best_model_path)

# 저장된 best_model 불러오기
new_model = BertForSequenceClassification.from_pretrained(best_model_path)

In [None]:
# auto_train 데이터셋을 SentimentDataset으로 변환
auto_train_dataset = SentimentDataset(auto_train, tokenizer, max_len=max_len, has_labels=False)

# 사전 학습된 모델을 사용하여 라벨 없는 데이터에 대한 예측 수행

# 최적의 하이퍼 파라미터 설정
training_args = TrainingArguments(
    output_dir='./results_final',
    num_train_epochs=4,      # 최적의 에폭 수
    per_device_train_batch_size=8,  # 최적의 배치 크기
    learning_rate=2e-5,  # 최적의 학습률
    weight_decay=0.1,    # 최적의 weight decay
    logging_dir='./logs_final',
    evaluation_strategy="no",  # 평가를 생략
    save_strategy="epoch",  # 각 epoch마다 모델을 저장
    load_best_model_at_end=False  # load_best_model_at_end을 끄기
)

# auto_train으로 학습
trainer = Trainer(
    model=new_model,  # 사전 학습된 best model 사용
    args=training_args,
    train_dataset=auto_train_dataset
)

# 예측 수행
predictions = trainer.predict(auto_train_dataset)  # logits 예측

# Step 5: 각 예측에서 가장 높은 확률을 가지는 라벨을 pseudo-label로 지정
pseudo_labels = np.argmax(predictions.predictions, axis=1)



In [None]:
# 신뢰도 계산 (각 샘플의 최대 확률값을 신뢰도로 간주)
confidences = np.max(predictions.predictions, axis=1)

# 신뢰도 임계값 설정
confidence_threshold = 0.8
high_confidence_indices = np.where(confidences >= confidence_threshold)[0]

# 신뢰도가 높은 샘플만 선택하여 새로운 auto_train 데이터셋 생성
high_confidence_auto_train = auto_train.iloc[high_confidence_indices].copy()

# pseudo-label을 신뢰도가 높은 샘플에 추가
high_confidence_auto_train['sentiment_label'] = pseudo_labels[high_confidence_indices]

# Pseudo-label이 포함된 새로운 Dataset 생성
auto_train_dataset_with_confident_labels = SentimentDataset(
    high_confidence_auto_train, tokenizer, max_len=max_len, has_labels=True
)

In [None]:
# 신뢰도 높은 pseudo-label 데이터로 모델 Fine-tuning
trainer = Trainer(
    model=new_model,
    args=training_args,
    train_dataset=auto_train_dataset_with_confident_labels  # 신뢰도 높은 데이터셋으로 학습
)

# 모델 학습 수행
trainer.train()

Step,Training Loss
500,0.0979
1000,0.103
1500,0.1087
2000,0.0883
2500,0.0876
3000,0.1167
3500,0.0825
4000,0.0792
4500,0.0885
5000,0.0652


TrainOutput(global_step=19276, training_loss=0.04196019135809806, metrics={'train_runtime': 2427.5978, 'train_samples_per_second': 63.518, 'train_steps_per_second': 7.94, 'total_flos': 1.2361487701870944e+16, 'train_loss': 0.04196019135809806, 'epoch': 4.0})

In [None]:
# test set에 mapping
test['sentiment_label'] = test['sentiment_label'].map({-1: 0, 0: 1, 1: 2})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['sentiment_label'] = test['sentiment_label'].map({-1: 0, 0: 1, 1: 2})


## test set 예측하기

In [None]:
# KoBERT의 토크나이저 불러오기
best_model_path = '/content/drive/MyDrive/best_model_second'
tokenizer = KoBERTTokenizer.from_pretrained(best_model_path)

# 저장된 best_model 불러오기
new_model = BertForSequenceClassification.from_pretrained(best_model_path)

# 최적의 하이퍼 파라미터 설정
training_args = TrainingArguments(
    output_dir='./results_final',
    num_train_epochs=4,      # 최적의 에폭 수
    per_device_train_batch_size=8,  # 최적의 배치 크기
    learning_rate=2e-5,  # 최적의 학습률
    weight_decay=0.1,    # 최적의 weight decay
    logging_dir='./logs_final',
    evaluation_strategy="no",  # 평가를 생략
    save_strategy="epoch",  # 각 epoch마다 모델을 저장
    load_best_model_at_end=False  # load_best_model_at_end을 끄기
)

# auto_train으로 학습
trainer = Trainer(
    model=new_model,  # 사전 학습된 best model 사용
    args=training_args,
)



In [None]:
# Test 데이터에 대한 예측 수행
test_dataset = SentimentDataset(test, tokenizer, max_len=max_len, has_labels=True)

# 모델을 사용하여 테스트 데이터에 대한 예측
predictions, labels, _ = trainer.predict(test_dataset)
preds = np.argmax(predictions, axis=1)

# classification_report 출력
print("Classification Report:")
print(classification_report(labels, preds, target_names=['Negative', 'Neutral', 'Positive']))

# confusion_matrix 출력
print("Confusion Matrix:")
print(confusion_matrix(labels, preds))

Classification Report:
              precision    recall  f1-score   support

    Negative       0.81      0.87      0.84       554
     Neutral       0.32      0.15      0.20        60
    Positive       0.98      0.98      0.98      4356

    accuracy                           0.95      4970
   macro avg       0.70      0.67      0.67      4970
weighted avg       0.95      0.95      0.95      4970

Confusion Matrix:
[[ 482   10   62]
 [  21    9   30]
 [  92    9 4255]]


In [None]:
# 모델 학습 후 저장하는 경로 설정
output_dir = "/content/drive/MyDrive/best_model_second"

# 모델 저장 코드
new_model.save_pretrained(output_dir)

# 토크나이저 저장 코드
tokenizer.save_pretrained(output_dir)

print(f"모델과 토크나이저가 {output_dir}에 저장되었습니다.")

모델과 토크나이저가 /content/drive/MyDrive/best_model_second에 저장되었습니다.


In [None]:
# auto_train 데이터셋을 SentimentDataset으로 변환
auto_train_dataset = SentimentDataset(auto_train, tokenizer, max_len=max_len, has_labels=False)

# 예측 수행
predictions = trainer.predict(auto_train_dataset)  # logits 예측

# 각 예측에서 가장 높은 확률을 가지는 라벨을 pseudo-label로 지정
pseudo_labels = np.argmax(predictions.predictions, axis=1)

In [None]:
# 파일 저장
auto_train['sentiment_label'] = pseudo_labels
auto_train['sentiment_label'] = auto_train['sentiment_label'] -1

auto_train.to_csv('/content/drive/MyDrive/auto_train_labeled.csv', encoding='utf-8-sig')