# ABSA Model (최종 버전)

- kykim/electra-kor-base : ACD
- klue/roberta-base : ASC
- back translation & stratifiedKFold 적용
- spacing 적용
- max_len = 128 로 수정

In [None]:
!pip install transformers



In [None]:
!pip install datasets



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 모듈 import 및 전역 변수 설정

In [None]:
import json
import os
from tqdm import trange, tqdm
import re
import random

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import AdamW
from torch.amp import autocast, GradScaler
from transformers import AutoModel, AutoConfig, AutoTokenizer
from transformers import get_linear_schedule_with_warmup

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, precision_score, recall_score
import pandas as pd
import numpy as np
import copy
from collections import Counter

In [None]:
PADDING_TOKEN = 0
S_OPEN_TOKEN = 1
S_CLOSE_TOKEN = 2

do_eval=True

# 경로 설정
BASE_DIR = '/content/drive/MyDrive/ABSA'
DATA_DIR = os.path.join(BASE_DIR, 'data')
SAVED_MODEL_DIR = os.path.join(BASE_DIR, 'saved_model')
ACD_MODEL_DIR = os.path.join(BASE_DIR, 'saved_model/ACD')
ASC_MODEL_DIR = os.path.join(BASE_DIR, 'saved_model/ASC')
pred_result_DIR = os.path.join(BASE_DIR, 'pred_result')
final_output_DIR = os.path.join(BASE_DIR, 'final_output')

base_data_path = os.path.join(DATA_DIR, 'base_data.jsonl')
converted_base_data_path = os.path.join(DATA_DIR, 'converted_base_data.jsonl')

train_data_path = os.path.join(DATA_DIR, 'train.jsonl')
dev_data_path = os.path.join(DATA_DIR, 'dev.jsonl')
test_data_path = os.path.join(DATA_DIR, 'test.jsonl')

acd_best_model_path = os.path.join(ACD_MODEL_DIR, 'best_model_last.pt')
asc_best_model_path = os.path.join(ASC_MODEL_DIR, 'best_model_last.pt')

raw_data_path = os.path.join(DATA_DIR, 'rawdata_spaced_final.jsonl')
converted_raw_data_path = os.path.join(DATA_DIR, 'converted_raw_data.jsonl')
final_output_path = os.path.join(final_output_DIR, 'final_spaced_output.jsonl')

# 하이퍼파라미터 설정
max_len = 128
batch_size = 16
acd_base_model = 'kykim/electra-kor-base'
asc_base_model = 'klue/roberta-base'
learning_rate = 3e-6
eps = 1e-8
num_train_epochs = 30
dropout_prob = 0.1
label_smoothing = 0.1
max_grad_norm = 1.0
threshold = 0.95

# 속성 카테고리 정의
entity_property_pair = [
    '세정', '자극', '거품', '향', '가격', '머릿결', '탈모', '쿨링'
]

# ACD 라벨 (multi-label classification)
tf_id_to_name = ['False', 'True']
tf_name_to_id = {name: idx for idx, name in enumerate(tf_id_to_name)}

# ASC 라벨 (multi-class classification)
polarity_id_to_name = ['positive', 'negative', 'neutral']
polarity_name_to_id = {name: idx for idx, name in enumerate(polarity_id_to_name)}

# device 정의
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# special_tokens 정의
special_tokens_dict = {
    'additional_special_tokens': [
        '&name&', '&affiliation&',
        '&social-security-num&',
        '&tel-num&', '&card-num&', '&bank-account&',
        '&num&', '&online-account&'
    ]
}

json 및 jsonl 파일 read, write 함수

In [None]:
def jsonload(fname, encoding="utf-8"):
    with open(fname, encoding=encoding) as f:
        return json.load(f)

# json 객체를 파일이름으로 깔끔하게 저장
def jsondump(j, fname):
    with open(fname, "w", encoding="UTF8") as f:
        json.dump(j, f, ensure_ascii=False, indent=2)

# jsonl 파일 읽어서 list에 저장
def jsonlload(fname_list, encoding="utf-8"):
    if isinstance(fname_list, str):
        fname_list = [fname_list]

    json_list = []
    for fname in fname_list:
        path = fname if os.path.isfile(fname) else os.path.join(DATA_DIR, fname)
        with open(path, encoding=encoding) as f:
            for line in f:
                json_list.append(json.loads(line))
    return json_list

# list에 담긴 json 객체를 jsonl 파일에 저장
def jsonldump(jlist, fname):
    with open(fname, "w", encoding="utf-8") as f:
        for item in jlist:
            f.write(json.dumps(item, ensure_ascii=False) + "\n")

# jsonl 파일에서 불러온 데이터 분할
def split_jsonl_file(jsonl_path, output_dir, train_ratio=0.7, dev_ratio=0.15, test_ratio=0.15, seed=42):
    with open(jsonl_path, 'r', encoding='utf-8') as f:
        lines = [json.loads(line) for line in f]

    train_data, temp_data = train_test_split(lines, test_size=(1 - train_ratio), random_state=seed)
    dev_data, test_data = train_test_split(temp_data, test_size=test_ratio / (dev_ratio + test_ratio), random_state=seed)

    os.makedirs(output_dir, exist_ok=True)
    jsonldump(train_data, os.path.join(output_dir, 'train.jsonl'))
    jsonldump(dev_data, os.path.join(output_dir, 'dev.jsonl'))
    jsonldump(test_data, os.path.join(output_dir, 'test.jsonl'))

    print(f"데이터 분할 완료: train={len(train_data)}, dev={len(dev_data)}, test={len(test_data)}")

def split_jsonl_file_train_dev_only(jsonl_path, output_dir, train_ratio=0.85, dev_ratio=0.15, seed=42):
    assert abs(train_ratio + dev_ratio - 1.0) < 1e-6, "train과 dev의 비율 합이 1이 되어야 합니다."

    with open(jsonl_path, 'r', encoding='utf-8') as f:
        lines = [json.loads(line) for line in f]

    train_data, dev_data = train_test_split(lines, test_size=dev_ratio, random_state=seed)

    os.makedirs(output_dir, exist_ok=True)
    jsonldump(train_data, os.path.join(output_dir, 'train.jsonl'))
    jsonldump(dev_data, os.path.join(output_dir, 'dev.jsonl'))

    print(f"데이터 분할 완료: train={len(train_data)}, dev={len(dev_data)}")

def ensure_annotation(data):
    for sample in data:
        if not sample.get("annotation") or sample["annotation"] == []:
            sample["annotation"] = [["없음", [None, 0, 0], None]]
    return data

def truncate_left(input_ids, attention_mask, max_len, pad_token_id=0):
    if len(input_ids) > max_len:
        input_ids = input_ids[-max_len:]
        attention_mask = attention_mask[-max_len:]
    else:
        pad_len = max_len - len(input_ids)
        input_ids = input_ids + [pad_token_id] * pad_len
        attention_mask = attention_mask + [0] * pad_len
    return input_ids, attention_mask

def convert_to_absa_format(data):
    converted = []
    sentiment_map = {"긍정": "positive", "부정": "negative", "중립": "neutral"}

    for item in data:
        sentence = item.get("text", "")
        label_list = item.get("entities") or item.get("label") or []
        annos = []

        for label_item in label_list:
            try:
                start, end, full_label = label_item
                word = sentence[start:end]

                if "-" not in full_label:
                    continue

                aspect, sentiment_ko = full_label.split("-")

                if aspect not in entity_property_pair:
                    continue

                polarity = sentiment_map.get(sentiment_ko.strip())
                if polarity is None:
                    continue

                annos.append([aspect, [word, start, end], polarity])
            except Exception:
                continue

        converted.append({
            "sentence_form": sentence,
            "annotation": annos
        })

    return converted

In [None]:
base_data = jsonlload(base_data_path)
converted_base_data = convert_to_absa_format(base_data)
jsonldump(converted_base_data, converted_base_data_path)
jsonlload(converted_base_data_path)

[{'sentence_form': '바오밥 신제품 나와서 사봤어요 시 카라인이라서 그런지 두피 세정이 잘 되는 느낌이에요',
  'annotation': [['세정', [' 두피 세정이 잘 되는 느낌이', 29, 45], 'positive']]},
 {'sentence_form': '이렇게 예쁜 샴푸는 처음 이 야 종류도 다양하고 저는 탈모샴푸로 비컨피던트 구매해 봤는데 감을 때 시원하고 향도 시원해서 여름에 사용하기 참 좋더라구 요 향기 좋고 세정력 좋은 샴푸로 추천합니다',
  'annotation': [['세정', [' 때 시', 52, 56], 'positive'],
   ['향', ['하고 향도 시', 57, 64], 'positive'],
   ['향', ['라구 요 ', 81, 86], 'positive'],
   ['세정', ['기 좋고 세정력 ', 87, 96], 'positive']]},
 {'sentence_form': '케이스부터 고급 지네요 지루성 두피염이라 아무거나 못 쓰는 편이라 신중히 고르는 편이예요노모어 오일이 조금 더 비싸서 좋지 않을까 했는데 오일샴푸라 린 언미로 겟했어요 그런데 딱 원하는 제품이네요 뾰루지가 잘 나는 두피인데 염증 완화에 도움을 준데요 합성계면 활성제, 합성 방부제 없고 그 외에도 전제품 EWG 98 프로 이상 유해성분 제외라고 하니 믿고 써봅니다향이 시원한 느낌이고  거품도 잘 나며 개운한 느낌입니다',
  'annotation': [['향', [' 제외라고 하니 믿고', 187, 198], 'positive'],
   ['거품', ['써봅니다향이 ', 199, 206], 'positive'],
   ['세정', ['원한 느낌이고 ', 207, 215], 'positive']]},
 {'sentence_form': '좋아요.리필팩도 들어 있어서 원플러스 원처럼 구입하게 되서 좋아요.',
  'annotation': [['가격', ['서 원플러스 원처럼 구입하게 되서 좋', 14, 34], 'positive'

# 데이터 전처리

- len 200 이하로 자르고 띄어쓰기 수행하는데, 속성 주변 기준으로 리뷰 자르게 되면
    - truncate_and_spacing 수정 필요

In [None]:
# 1. 한글, 영어, 숫자, 공백, 직접 정한 허가 특수문자들 이외 공백처리
def clean_review(text):
    allowed_punctuations = "!?.,%+=~&()"
    pattern = rf"[^ㄱ-ㅎㅏ-ㅣ가-힣a-zA-Z0-9\s{re.escape(allowed_punctuations)}]"

    text = re.sub(pattern, ' ', text)

    return text.strip()

# 2. 소괄호 안 부연설명 삭제
def del_bracket(text):
    # 괄호 안 부연설명 제거 (중첩 괄호 포함)
    while re.search(r'\([^()]*\)', text):
        text = re.sub(r'\([^()]*\)', ' ', text)

    # 잔여 괄호 제거
    text = text.replace("(", " ")
    text = text.replace(")", " ")

    return text.strip()

# 3. 의미없는 반복 문자열 축약
# 반복어구
dupchars_pattern = re.compile(r'(.)\1{2,}')
dupsymbols_pattern = re.compile(r'([!?~%+=&])\1{1,}')
# 더블스페이스
doublespace_pattern = re.compile(r'\s+')

def contract_dupchars(text, n = 3):
    if n > 0:
        text = dupchars_pattern.sub('\\1' * n, text)

    text = dupsymbols_pattern.sub('\\1', text)
    text = doublespace_pattern.sub(' ', text)

    return text.strip()

# 4. 협찬 문장 제거
def del_sponsored(text):
    sponsored_pattern = r'''
        (판매자(에게|로부터)|업체로부터|본\s상품\s후기는).{0,70}?
        (후기(입니다|입니다타|에요|입니\s?다)?|
         리뷰(입니다|했습니다|적었습니다|하였습니다|입니다요)?|
         작성하였습니다|기남겨요|기랍니다|흐기입니다|전달합니다|올립니다|것\s?입니다)
        [!.~\s]{0,2}
    '''
    sponsored_pattern = re.compile(sponsored_pattern, flags=re.VERBOSE)
    text = sponsored_pattern.sub(' ', text)

    return text.strip()

# 전처리 종합
def preprocessing(form):
    form = clean_review(form)
    form = del_bracket(form)
    form = contract_dupchars(form)
    form = del_sponsored(form)

    return form

# 모델 정의

- ACD 를 위한 ky-kim/electra-kor-base
- ASC 를 위한 klue/roberta-base

In [None]:
class AttentionPooling(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.attention = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.Tanh(),
            nn.Linear(hidden_size, 1)
        )

    def forward(self, hidden_states, attention_mask):
        scores = self.attention(hidden_states).squeeze(-1)
        scores = scores.masked_fill(attention_mask == 0, -1e4)
        weights = torch.softmax(scores, dim=-1)
        pooled = torch.sum(hidden_states * weights.unsqueeze(-1), dim=1)
        return pooled

class SimpleClassifier(nn.Module):
    def __init__(self, hidden_size, num_labels, dropout_prob=dropout_prob):
        super().__init__()
        self.dense = nn.Linear(hidden_size, hidden_size)
        self.dropout = nn.Dropout(dropout_prob)
        self.norm = nn.LayerNorm(hidden_size)
        self.act = nn.Tanh()
        self.output = nn.Linear(hidden_size, num_labels)

    def forward(self, x):
        x = self.dropout(x)
        x = self.dense(x)
        x = self.norm(x)
        x = self.act(x)
        x = self.dropout(x)
        return self.output(x)

class ABSA_Model(nn.Module):
    def __init__(self, base_model, num_labels, tokenizer_len=None, dropout_prob=dropout_prob):
        super().__init__()

        config = AutoConfig.from_pretrained(base_model)
        self.backbone = AutoModel.from_pretrained(base_model, config=config)

        if tokenizer_len is not None:
            self.backbone.resize_token_embeddings(tokenizer_len)

        self.attn_pool = AttentionPooling(config.hidden_size)
        self.classifier = SimpleClassifier(config.hidden_size, num_labels, dropout_prob)

        self.loss_fn = nn.CrossEntropyLoss(label_smoothing=label_smoothing)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.backbone(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        hidden_states = outputs.last_hidden_state
        pooled_output = self.attn_pool(hidden_states, attention_mask)
        logits = self.classifier(pooled_output)

        if labels is not None:
            loss = self.loss_fn(logits, labels)
            return loss, logits
        else:
            return None, logits

# 데이터 파싱 및 tokenization 함수 정의


In [None]:
def tokenize_and_align_labels(tokenizer, form, annotations, max_len):
    entity_dict = {'input_ids': [], 'attention_mask': [], 'label': []}
    polarity_dict = {'input_ids': [], 'attention_mask': [], 'label': []}

    if not form or not isinstance(form, str):
        return entity_dict, polarity_dict

    pad_token_id = tokenizer.pad_token_id if tokenizer.pad_token_id is not None else 0

    for pair in entity_property_pair:
        matched = False

        encoded = tokenizer(
            form,
            pair,
            padding=False,
            truncation=False,
            return_tensors='pt',
            add_special_tokens=True
        )

        input_ids = encoded['input_ids'][0].tolist()
        attention_mask = encoded['attention_mask'][0].tolist()
        input_ids, attention_mask = truncate_left(input_ids, attention_mask, max_len, pad_token_id)

        for annotation in annotations:
            if len(annotation) < 3:
                continue

            entity_property, _, polarity = annotation
            if polarity == '------------':
                continue

            if entity_property != '없음' and entity_property == pair:
                entity_dict['input_ids'].append(input_ids)
                entity_dict['attention_mask'].append(attention_mask)
                entity_dict['label'].append(tf_name_to_id['True'])

                polarity_id = polarity_name_to_id.get(polarity)
                if polarity_id is not None:
                    polarity_dict['input_ids'].append(input_ids)
                    polarity_dict['attention_mask'].append(attention_mask)
                    polarity_dict['label'].append(polarity_id)

                matched = True
                break

        if not matched:
            entity_dict['input_ids'].append(input_ids)
            entity_dict['attention_mask'].append(attention_mask)
            entity_dict['label'].append(tf_name_to_id['False'])

    return entity_dict, polarity_dict

def get_dataset(raw_data, tokenizer, max_len):
    entity_inputs, entity_masks, entity_labels = [], [], []
    polarity_inputs, polarity_masks, polarity_labels = [], [], []

    for utterance in raw_data:
        form = utterance.get('sentence_form', '')
        form = preprocessing(form)
        if len(form) < 10:
            continue
        annotations = utterance.get('annotation', [])

        entity_dict, polarity_dict = tokenize_and_align_labels(tokenizer, form, annotations, max_len)

        entity_inputs.extend(entity_dict['input_ids'])
        entity_masks.extend(entity_dict['attention_mask'])
        entity_labels.extend(entity_dict['label'])

        polarity_inputs.extend(polarity_dict['input_ids'])
        polarity_masks.extend(polarity_dict['attention_mask'])
        polarity_labels.extend(polarity_dict['label'])

    if not entity_inputs:
        raise ValueError("No entity data found. Check preprocessing or filtering conditions.")
    if not polarity_inputs:
        raise ValueError("No polarity data found. Check preprocessing or filtering conditions.")

    def compute_class_weight(labels, label_size):
        counter = Counter(labels)
        total = sum(counter.values())
        return torch.tensor([
            (total / count) if count > 0 else 0.0
            for i in range(label_size)
            for count in [counter.get(i, 0)]
        ], dtype=torch.float)

    entity_dataset = TensorDataset(
        torch.tensor(entity_inputs, dtype=torch.long),
        torch.tensor(entity_masks, dtype=torch.long),
        torch.tensor(entity_labels, dtype=torch.long)
    )

    polarity_dataset = TensorDataset(
        torch.tensor(polarity_inputs, dtype=torch.long),
        torch.tensor(polarity_masks, dtype=torch.long),
        torch.tensor(polarity_labels, dtype=torch.long)
    )

    entity_weights = compute_class_weight(entity_labels, len(tf_name_to_id))
    polarity_weights = compute_class_weight(polarity_labels, len(polarity_name_to_id))

    return entity_dataset, polarity_dataset, entity_weights, polarity_weights

- tokenizer = acd_tokenizer or asc_tokenizer 로 task 에 맞게 할당

- return 값 중 필요한 것만 쓰기
    - 필요 없는 건 _ 로 무시

# 모델 학습 및 최적화

- 콜백

In [None]:
def evaluation(y_true, y_pred, label_len):
    count_list = [0] * label_len
    hit_list = [0] * label_len

    for i in range(len(y_true)):
        count_list[y_true[i]] += 1
        if y_true[i] == y_pred[i]:
            hit_list[y_true[i]] += 1

    acc_list = [hit / count if count > 0 else 0 for hit, count in zip(hit_list, count_list)]
    print(f'Accuracy: {sum(hit_list) / sum(count_list):.4f}')
    print(f'Macro Accuracy: {sum(acc_list) / label_len:.4f}')
    print('F1 (per class):', f1_score(y_true, y_pred, average=None))
    print('F1 Micro:', f1_score(y_true, y_pred, average='micro'))
    print('F1 Macro:', f1_score(y_true, y_pred, average='macro'))

scaler = GradScaler()
def train_one_epoch(model, dataloader, optimizer, scheduler, loss_fn):
    model.train()
    total_loss = 0

    for batch in dataloader:
        input_ids, attention_mask, labels = [b.to(device) for b in batch]

        optimizer.zero_grad()
        with autocast(device_type="cuda"):
          loss, logits = model(input_ids, attention_mask, labels)
          loss = loss_fn(logits, labels)

        scaler.scale(loss).backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        total_loss += loss.item()

    return total_loss / len(dataloader)

def evaluate_model(model, dataloader):
    model.eval()
    preds, labels = [], []

    with torch.no_grad():
        for batch in dataloader:
            input_ids, attention_mask, label_ids = [t.to(device) for t in batch]
            _, logits = model(input_ids, attention_mask)
            pred = torch.argmax(logits, dim=-1)
            preds.extend(pred.tolist())
            labels.extend(label_ids.tolist())

    macro_f1 = f1_score(labels, preds, average='macro')
    return macro_f1, preds, labels

def get_optimizer_scheduler(model, dataloader):
    no_decay = ['bias', 'gamma', 'beta']
    param_optimizer = list(model.named_parameters())
    grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]
    optimizer = AdamW(grouped_parameters, lr=learning_rate, eps=eps)
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0,
                                                num_training_steps=len(dataloader) * num_train_epochs)
    return optimizer, scheduler

class EarlyStopping:
    def __init__(self, patience=4, mode='max'):
        self.patience = patience
        self.mode = mode
        self.counter = 0
        self.best_score = None
        self.should_stop = False

    def step(self, score):
        if self.best_score is None or \
           (self.mode == 'max' and score > self.best_score) or \
           (self.mode == 'min' and score < self.best_score):
            self.best_score = score
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.should_stop = True

def train_sentiment_analysis(train_data, dev_data):

    print('train_sentiment_analysis START')

    acd_tokenizer = AutoTokenizer.from_pretrained(acd_base_model)
    asc_tokenizer = AutoTokenizer.from_pretrained(asc_base_model)
    acd_tokenizer.add_special_tokens(special_tokens_dict)
    asc_tokenizer.add_special_tokens(special_tokens_dict)

    train_data = ensure_annotation(train_data)
    dev_data = ensure_annotation(dev_data)

    entity_train, _, entity_weights, _ = get_dataset(train_data, acd_tokenizer, max_len)
    _, polarity_train, _, polarity_weights = get_dataset(train_data, asc_tokenizer, max_len)
    entity_dev, _, _, _ = get_dataset(dev_data, acd_tokenizer, max_len)
    _, polarity_dev, _, _ = get_dataset(dev_data, asc_tokenizer, max_len)

    print("Entity Class Weights:")
    for i, (name, weight) in enumerate(zip(tf_id_to_name, entity_weights.tolist())):
        print(f"  - {name} (class {i}): weight = {weight:.4f}")

    print("Polarity Class Weights:")
    for i, (name, weight) in enumerate(zip(polarity_id_to_name, polarity_weights.tolist())):
        print(f"  - {name} (class {i}): weight = {weight:.4f}")

    entity_train_loader = DataLoader(entity_train, shuffle=True, batch_size=batch_size)
    entity_dev_loader = DataLoader(entity_dev, shuffle=False, batch_size=batch_size)
    polarity_train_loader = DataLoader(polarity_train, shuffle=True, batch_size=batch_size)
    polarity_dev_loader = DataLoader(polarity_dev, shuffle=False, batch_size=batch_size)

    entity_model = ABSA_Model(acd_base_model,  len(tf_name_to_id), len(acd_tokenizer)).to(device)
    polarity_model = ABSA_Model(asc_base_model,  len(polarity_name_to_id), len(asc_tokenizer)).to(device)

    entity_loss_fn = torch.nn.CrossEntropyLoss(weight=entity_weights.to(device))
    polarity_loss_fn = torch.nn.CrossEntropyLoss(weight=polarity_weights.to(device))

    entity_opt, entity_sched = get_optimizer_scheduler(entity_model, entity_train_loader)
    polarity_opt, polarity_sched = get_optimizer_scheduler(polarity_model, polarity_train_loader)

    early_stop_entity = EarlyStopping(patience=4, mode='max')
    early_stop_polarity = EarlyStopping(patience=4, mode='max')

    for epoch in trange(num_train_epochs, desc="Epoch"):
        entity_loss = train_one_epoch(entity_model, entity_train_loader, entity_opt, entity_sched, entity_loss_fn)
        print(f"[Entity] Epoch {epoch+1} | Train Loss: {entity_loss:.4f}")

        if do_eval:
            f1, preds, labels = evaluate_model(entity_model, entity_dev_loader)
            print(f"[Entity] Dev F1_macro: {f1:.4f}")
            if f1 > (early_stop_entity.best_score or 0):
                torch.save(entity_model.state_dict(), os.path.join(ACD_MODEL_DIR, 'best_model.pt'))
                print("Saved best entity model")
            early_stop_entity.step(f1)
            if early_stop_entity.should_stop:
                print("Early stopping triggered (Entity)")
                if early_stop_polarity.should_stop:
                  break

        polarity_loss = train_one_epoch(polarity_model, polarity_train_loader, polarity_opt, polarity_sched, polarity_loss_fn)
        print(f"[Polarity] Epoch {epoch+1} | Train Loss: {polarity_loss:.4f}")

        if do_eval:
            f1, preds, labels = evaluate_model(polarity_model, polarity_dev_loader)
            print(f"[Polarity] Dev F1_macro: {f1:.4f}")
            if f1 > (early_stop_polarity.best_score or 0):
                torch.save(polarity_model.state_dict(), os.path.join(ASC_MODEL_DIR, 'best_model.pt'))
                print("Saved best polarity model")
            early_stop_polarity.step(f1)
            if early_stop_polarity.should_stop:
                print("Early stopping triggered (Polarity)")
                if early_stop_entity.should_stop:
                  break

    print("Training complete.")

In [None]:
split_jsonl_file(
    jsonl_path=converted_base_data_path,
    output_dir=DATA_DIR,
    train_ratio=0.7,
    dev_ratio=0.15,
    test_ratio=0.15
)

데이터 분할 완료: train=1098, dev=235, test=236


In [None]:
train_data = jsonlload(train_data_path)
dev_data = jsonlload(dev_data_path)
train_sentiment_analysis(train_data, dev_data)

train_sentiment_analysis START


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/80.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/620 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/344k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/375 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/248k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/752k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/173 [00:00<?, ?B/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (624 > 512). Running this sequence through the model will result in indexing errors


Entity Class Weights:
  - False (class 0): weight = 1.3846
  - True (class 1): weight = 3.6000
Polarity Class Weights:
  - positive (class 0): weight = 1.1885
  - negative (class 1): weight = 9.1729
  - neutral (class 2): weight = 20.1653


pytorch_model.bin:   0%|          | 0.00/473M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/473M [00:00<?, ?B/s]

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


config.json:   0%|          | 0.00/546 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/443M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

Epoch:   0%|          | 0/20 [00:00<?, ?it/s][A

[Entity] Epoch 1 | Train Loss: 0.6890
[Entity] Dev F1_macro: 0.5555
Saved best entity model
[Polarity] Epoch 1 | Train Loss: 1.0818
[Polarity] Dev F1_macro: 0.3333



Epoch:   5%|▌         | 1/20 [01:11<22:43, 71.75s/it][A

Saved best polarity model
[Entity] Epoch 2 | Train Loss: 0.6335
[Entity] Dev F1_macro: 0.6972
Saved best entity model
[Polarity] Epoch 2 | Train Loss: 1.0008
[Polarity] Dev F1_macro: 0.4655



Epoch:  10%|█         | 2/20 [01:55<16:39, 55.55s/it][A

Saved best polarity model
[Entity] Epoch 3 | Train Loss: 0.3629
[Entity] Dev F1_macro: 0.9116
Saved best entity model
[Polarity] Epoch 3 | Train Loss: 0.9059
[Polarity] Dev F1_macro: 0.5240



Epoch:  15%|█▌        | 3/20 [02:41<14:23, 50.79s/it][A

Saved best polarity model
[Entity] Epoch 4 | Train Loss: 0.2373
[Entity] Dev F1_macro: 0.9302
Saved best entity model
[Polarity] Epoch 4 | Train Loss: 0.8477
[Polarity] Dev F1_macro: 0.5926



Epoch:  20%|██        | 4/20 [03:25<12:50, 48.13s/it][A

Saved best polarity model
[Entity] Epoch 5 | Train Loss: 0.2549
[Entity] Dev F1_macro: 0.9446
Saved best entity model
[Polarity] Epoch 5 | Train Loss: 0.7669



Epoch:  25%|██▌       | 5/20 [04:08<11:37, 46.49s/it][A

[Polarity] Dev F1_macro: 0.5852
[Entity] Epoch 6 | Train Loss: 0.3016
[Entity] Dev F1_macro: 0.9427
[Polarity] Epoch 6 | Train Loss: 0.7400



Epoch:  30%|███       | 6/20 [04:50<10:28, 44.89s/it][A

[Polarity] Dev F1_macro: 0.5542
[Entity] Epoch 7 | Train Loss: 0.3337
[Entity] Dev F1_macro: 0.9438
[Polarity] Epoch 7 | Train Loss: 0.6634



Epoch:  35%|███▌      | 7/20 [05:32<09:29, 43.83s/it][A

[Polarity] Dev F1_macro: 0.5791
[Entity] Epoch 8 | Train Loss: 0.2923
[Entity] Dev F1_macro: 0.9384
[Polarity] Epoch 8 | Train Loss: 0.5587



Epoch:  40%|████      | 8/20 [06:13<08:37, 43.09s/it][A

[Polarity] Dev F1_macro: 0.5816
Early stopping triggered (Polarity)
[Entity] Epoch 9 | Train Loss: 0.2804


Epoch:  40%|████      | 8/20 [06:47<10:10, 50.89s/it]

[Entity] Dev F1_macro: 0.9430
Early stopping triggered (Entity)
Training complete.





# 모델 평가

학습된 모델을 바탕으로 국어원 데이터 형태를 만드는 방법 예시

In [None]:
def predict_from_korean_form(acd_tokenizer, asc_tokenizer, acd_best_model, asc_best_model, data, max_len, threshold):
    acd_confidences_all = []
    asc_confidences_all = []

    acd_best_model.eval()
    asc_best_model.eval()

    acd_pad_token_id = acd_tokenizer.pad_token_id
    asc_pad_token_id = asc_tokenizer.pad_token_id

    for sentence in data:
        form = sentence.get('sentence_form', '')
        form = preprocessing(form)
        sentence['annotation'] = []

        if not isinstance(form, str) or not form.strip():
            print(f"Invalid sentence skipped: {form}")
            continue

        for pair in entity_property_pair:
            # ACD 수행
            acd_encoded = acd_tokenizer(
                form,
                pair,
                padding=False,
                truncation=False,
                return_tensors='pt',
                add_special_tokens=True
            )

            acd_input_ids = acd_encoded['input_ids'][0].tolist()
            acd_attention_mask = acd_encoded['attention_mask'][0].tolist()
            acd_input_ids, acd_attention_mask = truncate_left(acd_input_ids, acd_attention_mask, max_len, acd_pad_token_id)

            acd_input_ids = torch.tensor([acd_input_ids]).to(device)
            acd_attention_mask = torch.tensor([acd_attention_mask]).to(device)

            with torch.no_grad():
                _, acd_logits = acd_best_model(acd_input_ids, acd_attention_mask)

            acd_probs = torch.softmax(acd_logits, dim=-1)
            acd_confidence, acd_pred = torch.max(acd_probs, dim=-1)
            acd_pred = acd_pred.item()
            acd_confidence = acd_confidence.item()
            acd_confidences_all.append(acd_confidence)

            if tf_id_to_name[acd_pred] == 'True' and acd_confidence >= threshold:
                # ASC 수행
                asc_encoded = asc_tokenizer(
                    form,
                    pair,
                    padding=False,
                    truncation=False,
                    return_tensors='pt',
                    add_special_tokens=True
                )
                asc_input_ids = asc_encoded['input_ids'][0].tolist()
                asc_attention_mask = asc_encoded['attention_mask'][0].tolist()
                asc_input_ids, asc_attention_mask = truncate_left(asc_input_ids, asc_attention_mask, max_len, asc_pad_token_id)

                asc_input_ids = torch.tensor([asc_input_ids]).to(device)
                asc_attention_mask = torch.tensor([asc_attention_mask]).to(device)

                with torch.no_grad():
                    _, asc_logits = asc_best_model(asc_input_ids, asc_attention_mask)

                asc_probs = torch.softmax(asc_logits, dim=-1)
                asc_confidence, asc_pred = torch.max(asc_probs, dim=-1)
                asc_pred = asc_pred.item()
                asc_confidence = asc_confidence.item()
                asc_confidences_all.append(asc_confidence)

                if 0 <= asc_pred < len(polarity_id_to_name):
                    polarity = polarity_id_to_name[asc_pred]
                else:
                    polarity = "UNKNOWN"

                sentence['annotation'].append([
                    pair,
                    [None, 0, 0],
                    polarity
                ])

        if not sentence['annotation']:
            sentence['annotation'] = [["없음", [None, 0, 0], None]]

    if acd_confidences_all:
        print("Confidence 값 분포 (ACD 단계):")
        print(f"  - max: {max(acd_confidences_all):.4f}")
        print(f"  - min: {min(acd_confidences_all):.4f}")
        print(f"  - mean: {np.mean(acd_confidences_all):.4f}")
        print(f"  - median: {np.median(acd_confidences_all):.4f}")

    if asc_confidences_all:
        print("\nConfidence 값 분포 (ASC 단계):")
        print(f"  - max: {max(asc_confidences_all):.4f}")
        print(f"  - min: {min(asc_confidences_all):.4f}")
        print(f"  - mean: {np.mean(asc_confidences_all):.4f}")
        print(f"  - median: {np.median(asc_confidences_all):.4f}")

    return data

F1 score 계산 - acd 성능 및 전체 성능 (absa) 에 대한 F1 score 따로 계산

In [None]:
def evaluation_f1(true_data, pred_data):
    acd_eval = {'tp': 0, 'fp': 0, 'fn': 0}
    absa_eval = {'tp': 0, 'fp': 0, 'fn': 0}

    if len(true_data) != len(pred_data):
        print(f"Warning: Length mismatch (true={len(true_data)}, pred={len(pred_data)})")

    for true_item, pred_item in zip(true_data, pred_data):
        true_annos = true_item.get('annotation', [])
        pred_annos = pred_item.get('annotation', [])

        true_acd_set = set()
        true_absa_set = set()
        for anno in true_annos:
            if len(anno) == 3 and anno[0] != '없음':
                true_acd_set.add(anno[0])
                true_absa_set.add((anno[0], anno[2]))

        pred_acd_set = set()
        pred_absa_set = set()
        for anno in pred_annos:
            if len(anno) == 3 and anno[0] != '없음':
                pred_acd_set.add(anno[0])
                pred_absa_set.add((anno[0], anno[2]))

        acd_eval['tp'] += len(true_acd_set & pred_acd_set)
        acd_eval['fp'] += len(pred_acd_set - true_acd_set)
        acd_eval['fn'] += len(true_acd_set - pred_acd_set)

        absa_eval['tp'] += len(true_absa_set & pred_absa_set)
        absa_eval['fp'] += len(pred_absa_set - true_absa_set)
        absa_eval['fn'] += len(true_absa_set - pred_absa_set)

    def calc_f1(tp, fp, fn):
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
        f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0
        return {
            'Precision': round(precision, 4),
            'Recall': round(recall, 4),
            'F1': round(f1, 4)
        }

    return {
        'ACD result': calc_f1(**acd_eval),
        'entire ABSA result': calc_f1(**absa_eval)
    }

def evaluation_per_aspect(true_data, pred_data):
    aspect_labels = list(set(
        anno[0]
        for sample in true_data
        for anno in sample.get("annotation", [])
        if len(anno) == 3
    ))
    if '없음' not in aspect_labels:
        aspect_labels.append('없음')

    aspect_metrics = {}

    for aspect in sorted(aspect_labels):
        y_true_acd = []
        y_pred_acd = []

        tp = fp = fn = 0

        for true_item, pred_item in zip(true_data, pred_data):
            true_annos = [anno for anno in true_item.get("annotation", []) if len(anno) == 3]
            pred_annos = [anno for anno in pred_item.get("annotation", []) if len(anno) == 3]

            true_aspects_set = set(anno[0] for anno in true_annos)
            pred_aspects_set = set(anno[0] for anno in pred_annos)

            if aspect == '없음':
                y_true_acd.append(1 if len(true_aspects_set - {'없음'}) == 0 else 0)
                y_pred_acd.append(1 if len(pred_aspects_set - {'없음'}) == 0 else 0)
            else:
                y_true_acd.append(1 if aspect in true_aspects_set else 0)
                y_pred_acd.append(1 if aspect in pred_aspects_set else 0)

            if aspect == '없음':
                y_true_absa_set = set((anno[0], anno[2]) for anno in true_annos if anno[0] != '없음')
                y_pred_absa_set = set((anno[0], anno[2]) for anno in pred_annos if anno[0] != '없음')

                y_true_none = len(true_absa_set) == 0
                y_pred_none = len(pred_absa_set) == 0

                if y_true_none and y_pred_none:
                    tp += 1
                elif y_pred_none and not y_true_none:
                    fn += 1
                elif y_true_none and not y_pred_none:
                    fp += 1
                else:
                    pass
            else:
                true_absa_set = set((anno[0], anno[2]) for anno in true_annos if anno[0] == aspect)
                pred_absa_set = set((anno[0], anno[2]) for anno in pred_annos if anno[0] == aspect)

                tp += len(true_absa_set & pred_absa_set)
                fp += len(pred_absa_set - true_absa_set)
                fn += len(true_absa_set - pred_absa_set)

        acd_precision = precision_score(y_true_acd, y_pred_acd, zero_division=0)
        acd_recall = recall_score(y_true_acd, y_pred_acd, zero_division=0)
        acd_f1 = f1_score(y_true_acd, y_pred_acd, zero_division=0)

        absa_precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        absa_recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        absa_f1 = (
            2 * absa_precision * absa_recall / (absa_precision + absa_recall)
            if (absa_precision + absa_recall) > 0 else 0
        )

        aspect_metrics[aspect] = {
            'ACD_Precision': round(acd_precision, 4),
            'ACD_Recall': round(acd_recall, 4),
            'ACD_F1': round(acd_f1, 4),
            'ABSA_Precision': round(absa_precision, 4),
            'ABSA_Recall': round(absa_recall, 4),
            'ABSA_F1': round(absa_f1, 4),
            'Support': sum(y_true_acd)
        }

    return aspect_metrics

테스트 데이터에 대한 평가

In [None]:
def load_model(model_class, path, model_name, label_size, tokenizer_len):
    model = model_class(model_name, label_size, tokenizer_len)
    model.load_state_dict(torch.load(path, map_location=device))
    model.to(device)
    model.eval()
    return model

def pretty_print_result(result_dict, aspect_dict=None):
    print("\nF1 Evaluation Result:")
    for name, metrics in result_dict.items():
        print(f"\n▶ {name}")
        for k, v in metrics.items():
            print(f"   {k}: {v:.4f}")

    if aspect_dict:
        print("\nPer-Aspect Performance:")
        for aspect, metrics in aspect_dict.items():
            print(f"\n - {aspect}")
            for k, v in metrics.items():
                print(f"   {k}: {v}")

def test_sentiment_analysis(test_data, save_path=None):
    print("Starting Sentiment Analysis Test...")

    try:
        acd_tokenizer = AutoTokenizer.from_pretrained(acd_base_model)
        acd_tokenizer.add_special_tokens(special_tokens_dict)
        asc_tokenizer = AutoTokenizer.from_pretrained(asc_base_model)
        asc_tokenizer.add_special_tokens(special_tokens_dict)
    except Exception as e:
        print(f"Tokenizer load error: {e}")
        return

    try:
        test_data = ensure_annotation(test_data)
    except Exception as e:
        print(f"Failed to load test data: {e}")
        return

    try:
        entity_test_data, _, _, _ = get_dataset(test_data, acd_tokenizer, max_len)
        _, polarity_test_data, _, _ = get_dataset(test_data, asc_tokenizer, max_len)
    except Exception as e:
        print(f"Failed to preprocess test data: {e}")
        return

    entity_test_loader = DataLoader(entity_test_data, shuffle=False, batch_size=batch_size)
    polarity_test_loader = DataLoader(polarity_test_data, shuffle=False, batch_size=batch_size)

    try:
        acd_best_model = load_model(ABSA_Model, acd_best_model_path, acd_base_model, len(tf_id_to_name), len(acd_tokenizer))
        asc_best_model = load_model(ABSA_Model, asc_best_model_path, asc_base_model, len(polarity_id_to_name), len(asc_tokenizer))
    except Exception as e:
        print(f"Model load error: {e}")
        return

    acd_best_model.eval()
    asc_best_model.eval()

    acd_total_loss, acd_batches = 0.0, 0
    asc_total_loss, asc_batches = 0.0, 0

    with torch.no_grad():
        for input_ids, attention_mask, labels in entity_test_loader:
            input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)
            loss, _ = acd_best_model(input_ids, attention_mask, labels)
            acd_total_loss += loss.item()
            acd_batches += 1

        for input_ids, attention_mask, labels in polarity_test_loader:
            input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)
            loss, _ = asc_best_model(input_ids, attention_mask, labels)
            asc_total_loss += loss.item()
            asc_batches += 1

    pred_data = predict_from_korean_form(acd_tokenizer, asc_tokenizer, acd_best_model, asc_best_model, copy.deepcopy(test_data), max_len, threshold)

    result = evaluation_f1(test_data, pred_data)
    aspect_result = evaluation_per_aspect(test_data, pred_data)

    pretty_print_result(result, aspect_result)

    print("\nAverage Loss:")
    if acd_batches > 0:
        print(f" - ACD Loss: {acd_total_loss / acd_batches:.4f}")
    if asc_batches > 0:
        print(f" - ASC Loss: {asc_total_loss / asc_batches:.4f}")

    save_path = pred_result_DIR
    if save_path:
        try:
            jsondump(pred_data, 'pred_data.jsonl')
            print(f"Saved predictions to {save_path}")
        except Exception as e:
            print(f"Failed to save predictions: {e}")

In [None]:
test_data = jsonlload(test_data_path)
test_sentiment_analysis(test_data)

Starting Sentiment Analysis Test...


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Confidence 값 분포 (ACD 단계):
  - max: 0.9999
  - min: 0.5351
  - mean: 0.9952
  - median: 0.9997

Confidence 값 분포 (ASC 단계):
  - max: 0.9671
  - min: 0.3570
  - mean: 0.7467
  - median: 0.8195

F1 Evaluation Result:

▶ ACD result
   Precision: 0.9105
   Recall: 0.9124
   F1: 0.9114

▶ entire ABSA result
   Precision: 0.7078
   Recall: 0.7036
   F1: 0.7056

Per-Aspect Performance:

 - 가격
   ACD_Precision: 0.92
   ACD_Recall: 0.9583
   ACD_F1: 0.9388
   ABSA_Precision: 0.64
   ABSA_Recall: 0.6667
   ABSA_F1: 0.6531
   Support: 24

 - 거품
   ACD_Precision: 1.0
   ACD_Recall: 0.9703
   ACD_F1: 0.9849
   ABSA_Precision: 0.7551
   ABSA_Recall: 0.7327
   ABSA_F1: 0.7437
   Support: 101

 - 머릿결
   ACD_Precision: 0.75
   ACD_Recall: 0.9808
   ACD_F1: 0.85
   ABSA_Precision: 0.5294
   ABSA_Recall: 0.6923
   ABSA_F1: 0.6
   Support: 52

 - 세정
   ACD_Precision: 0.9035
   ACD_Recall: 0.8655
   ACD_F1: 0.8841
   ABSA_Precision: 0.7632
   ABSA_Recall: 0.7311
   ABSA_F1: 0.7468
   Support: 119

 - 없음
   AC

# 증강 데이터 사용

In [None]:
aug_all_data_path = os.path.join(DATA_DIR, 'aug_all_withko_nn.jsonl')
converted_aug_all_data_path = os.path.join(DATA_DIR, 'converted_aug_all_withko_nn.jsonl')

aug_data = jsonlload(aug_all_data_path)
converted_aug_data = convert_to_absa_format(aug_data)
jsonldump(converted_aug_data, converted_aug_all_data_path)
jsonlload(converted_aug_all_data_path)

[{'sentence_form': '바오밥 신제품 나와서 사봤어요 시카라인이라서 그런지 두피세정이 잘 되는 느낌이에요',
  'annotation': [['세정', ['두피세정이 잘 되는 느낌이에요', 29, 45], 'positive']]},
 {'sentence_form': '이렇게 예쁜 샴푸는 처음이야종류도 다양하고 저는 탈모샴푸로 비컨피던트 구매해 봤는데 감을 때 시원하고 향도 시원해서 여름에 사용하기 참좋더라구요 향기 좋고 세정력 좋은 샴푸로 추천합니다',
  'annotation': [['세정', ['시원하고', 52, 56], 'positive'],
   ['향', ['향도 시원해서', 57, 64], 'positive'],
   ['향', ['향기 좋고', 81, 86], 'positive'],
   ['세정', ['세정력 좋은 샴푸', 87, 96], 'positive']]},
 {'sentence_form': '케이스부터 고급지네요지루성두피염이라 아무거나 못쓰는 편이라신중히 고르는 편이예요노모어오일이 조금 더 비싸서 좋지 않을까 했는데오일샴푸라 린언미로 겟했어요그런데 딱 원하는 제품이네요뾰루지가 잘 나는 두피인데 염증 완화에 도움을준데요합성계면 활성제, 합성 방부제 없고 그외에도 전제품 EWG 98프로 이상 유해성분 제외라고하니믿고 써봅니다향이 시원한 느낌이고 거품도 잘나며 개운한느낌입니다',
  'annotation': [['향', ['향이 시원한 느낌이고', 187, 198], 'positive'],
   ['거품', ['거품도 잘나며', 199, 206], 'positive'],
   ['세정', ['개운한느낌입니다', 207, 215], 'positive']]},
 {'sentence_form': '좋아요.리필팩도 들어있어서원플러스 원처럼 구입하게 되서 좋아요.',
  'annotation': [['가격', ['원플러스 원처럼 구입하게 되서 좋아요', 14, 34], 'positive']]},
 {'sentence_form': '샴푸를

In [None]:
split_jsonl_file_train_dev_only(
    jsonl_path=converted_aug_all_data_path,
    output_dir=DATA_DIR,
    train_ratio=0.9,
    dev_ratio=0.1
)

데이터 분할 완료: train=2367, dev=264


In [None]:
train_data = jsonlload(train_data_path)
dev_data = jsonlload(dev_data_path)
train_sentiment_analysis(train_data, dev_data)

train_sentiment_analysis START


Token indices sequence length is longer than the specified maximum sequence length for this model (630 > 512). Running this sequence through the model will result in indexing errors


Entity Class Weights:
  - False (class 0): weight = 1.2977
  - True (class 1): weight = 4.3593
Polarity Class Weights:
  - positive (class 0): weight = 1.6535
  - negative (class 1): weight = 3.8054
  - neutral (class 2): weight = 7.5513


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:   0%|          | 0/30 [00:00<?, ?it/s]

[Entity] Epoch 1 | Train Loss: 0.5967
[Entity] Dev F1_macro: 0.8385
Saved best entity model
[Polarity] Epoch 1 | Train Loss: 0.8240
[Polarity] Dev F1_macro: 0.6154


Epoch:   3%|▎         | 1/30 [01:30<43:42, 90.44s/it]

Saved best polarity model
[Entity] Epoch 2 | Train Loss: 0.3177
[Entity] Dev F1_macro: 0.8771
Saved best entity model
[Polarity] Epoch 2 | Train Loss: 0.6805
[Polarity] Dev F1_macro: 0.7110


Epoch:   7%|▋         | 2/30 [03:01<42:20, 90.74s/it]

Saved best polarity model
[Entity] Epoch 3 | Train Loss: 0.2815
[Entity] Dev F1_macro: 0.8827
Saved best entity model
[Polarity] Epoch 3 | Train Loss: 0.6076
[Polarity] Dev F1_macro: 0.7236


Epoch:  10%|█         | 3/30 [04:32<40:52, 90.83s/it]

Saved best polarity model
[Entity] Epoch 4 | Train Loss: 0.2734
[Entity] Dev F1_macro: 0.8962
Saved best entity model
[Polarity] Epoch 4 | Train Loss: 0.5730
[Polarity] Dev F1_macro: 0.7390


Epoch:  13%|█▎        | 4/30 [06:03<39:22, 90.86s/it]

Saved best polarity model
[Entity] Epoch 5 | Train Loss: 0.2923
[Entity] Dev F1_macro: 0.9009
Saved best entity model
[Polarity] Epoch 5 | Train Loss: 0.5138
[Polarity] Dev F1_macro: 0.7454


Epoch:  17%|█▋        | 5/30 [07:34<37:53, 90.93s/it]

Saved best polarity model
[Entity] Epoch 6 | Train Loss: 0.3673
[Entity] Dev F1_macro: 0.9109
Saved best entity model
[Polarity] Epoch 6 | Train Loss: 0.4807
[Polarity] Dev F1_macro: 0.7721


Epoch:  20%|██        | 6/30 [09:05<36:24, 91.00s/it]

Saved best polarity model
[Entity] Epoch 7 | Train Loss: 0.4547
[Entity] Dev F1_macro: 0.9133
Saved best entity model
[Polarity] Epoch 7 | Train Loss: 0.4112
[Polarity] Dev F1_macro: 0.7952


Epoch:  23%|██▎       | 7/30 [10:36<34:57, 91.18s/it]

Saved best polarity model
[Entity] Epoch 8 | Train Loss: 0.4789
[Entity] Dev F1_macro: 0.9160
Saved best entity model
[Polarity] Epoch 8 | Train Loss: 0.3675
[Polarity] Dev F1_macro: 0.7954


Epoch:  27%|██▋       | 8/30 [12:08<33:30, 91.39s/it]

Saved best polarity model
[Entity] Epoch 9 | Train Loss: 0.4099
[Entity] Dev F1_macro: 0.9143
[Polarity] Epoch 9 | Train Loss: 0.3368
[Polarity] Dev F1_macro: 0.8225


Epoch:  30%|███       | 9/30 [13:33<31:18, 89.45s/it]

Saved best polarity model
[Entity] Epoch 10 | Train Loss: 0.3606
[Entity] Dev F1_macro: 0.9223
Saved best entity model
[Polarity] Epoch 10 | Train Loss: 0.2604
[Polarity] Dev F1_macro: 0.8250


Epoch:  33%|███▎      | 10/30 [15:05<29:58, 89.94s/it]

Saved best polarity model
[Entity] Epoch 11 | Train Loss: 0.3514
[Entity] Dev F1_macro: 0.9225
Saved best entity model
[Polarity] Epoch 11 | Train Loss: 0.2350


Epoch:  37%|███▋      | 11/30 [16:30<28:01, 88.50s/it]

[Polarity] Dev F1_macro: 0.8126
[Entity] Epoch 12 | Train Loss: 0.3508
[Entity] Dev F1_macro: 0.9216
[Polarity] Epoch 12 | Train Loss: 0.2145
[Polarity] Dev F1_macro: 0.8479


Epoch:  40%|████      | 12/30 [17:54<26:11, 87.29s/it]

Saved best polarity model
[Entity] Epoch 13 | Train Loss: 0.3395
[Entity] Dev F1_macro: 0.9205
[Polarity] Epoch 13 | Train Loss: 0.2091


Epoch:  43%|████▎     | 13/30 [19:18<24:25, 86.20s/it]

[Polarity] Dev F1_macro: 0.8326
[Entity] Epoch 14 | Train Loss: 0.3058
[Entity] Dev F1_macro: 0.9211
[Polarity] Epoch 14 | Train Loss: 0.1960
[Polarity] Dev F1_macro: 0.8639


Epoch:  47%|████▋     | 14/30 [20:42<22:50, 85.64s/it]

Saved best polarity model
[Entity] Epoch 15 | Train Loss: 0.2659
[Entity] Dev F1_macro: 0.9162
Early stopping triggered (Entity)
[Polarity] Epoch 15 | Train Loss: 0.1950


Epoch:  50%|█████     | 15/30 [22:06<21:15, 85.05s/it]

[Polarity] Dev F1_macro: 0.8620
[Entity] Epoch 16 | Train Loss: 0.2518
[Entity] Dev F1_macro: 0.9287
Saved best entity model
Early stopping triggered (Entity)
[Polarity] Epoch 16 | Train Loss: 0.1654


Epoch:  53%|█████▎    | 16/30 [23:31<19:48, 84.88s/it]

[Polarity] Dev F1_macro: 0.8622
[Entity] Epoch 17 | Train Loss: 0.2547
[Entity] Dev F1_macro: 0.9231
Early stopping triggered (Entity)
[Polarity] Epoch 17 | Train Loss: 0.1577
[Polarity] Dev F1_macro: 0.8639


Epoch:  57%|█████▋    | 17/30 [24:55<18:22, 84.83s/it]

Saved best polarity model
[Entity] Epoch 18 | Train Loss: 0.2537
[Entity] Dev F1_macro: 0.9243
Early stopping triggered (Entity)
[Polarity] Epoch 18 | Train Loss: 0.2022


Epoch:  60%|██████    | 18/30 [26:19<16:54, 84.52s/it]

[Polarity] Dev F1_macro: 0.8602
[Entity] Epoch 19 | Train Loss: 0.2256
[Entity] Dev F1_macro: 0.9255
Early stopping triggered (Entity)
[Polarity] Epoch 19 | Train Loss: 0.1803
[Polarity] Dev F1_macro: 0.8663


Epoch:  63%|██████▎   | 19/30 [27:44<15:29, 84.54s/it]

Saved best polarity model
[Entity] Epoch 20 | Train Loss: 0.2159
[Entity] Dev F1_macro: 0.9279
Early stopping triggered (Entity)
[Polarity] Epoch 20 | Train Loss: 0.2034
[Polarity] Dev F1_macro: 0.8721


Epoch:  67%|██████▋   | 20/30 [29:09<14:07, 84.72s/it]

Saved best polarity model
[Entity] Epoch 21 | Train Loss: 0.2254
[Entity] Dev F1_macro: 0.9304
Saved best entity model
Early stopping triggered (Entity)
[Polarity] Epoch 21 | Train Loss: 0.2057


Epoch:  70%|███████   | 21/30 [30:34<12:43, 84.87s/it]

[Polarity] Dev F1_macro: 0.8713
[Entity] Epoch 22 | Train Loss: 0.2145
[Entity] Dev F1_macro: 0.9266
Early stopping triggered (Entity)
[Polarity] Epoch 22 | Train Loss: 0.2119
[Polarity] Dev F1_macro: 0.8773


Epoch:  73%|███████▎  | 22/30 [31:59<11:18, 84.84s/it]

Saved best polarity model
[Entity] Epoch 23 | Train Loss: 0.2018
[Entity] Dev F1_macro: 0.9296
Early stopping triggered (Entity)
[Polarity] Epoch 23 | Train Loss: 0.1735


Epoch:  77%|███████▋  | 23/30 [33:23<09:51, 84.54s/it]

[Polarity] Dev F1_macro: 0.8725
[Entity] Epoch 24 | Train Loss: 0.2187
[Entity] Dev F1_macro: 0.9287
Early stopping triggered (Entity)
[Polarity] Epoch 24 | Train Loss: 0.1931
[Polarity] Dev F1_macro: 0.8882


Epoch:  80%|████████  | 24/30 [34:47<08:27, 84.56s/it]

Saved best polarity model
[Entity] Epoch 25 | Train Loss: 0.2070
[Entity] Dev F1_macro: 0.9282
Early stopping triggered (Entity)
[Polarity] Epoch 25 | Train Loss: 0.1725


Epoch:  83%|████████▎ | 25/30 [36:11<07:01, 84.34s/it]

[Polarity] Dev F1_macro: 0.8777
[Entity] Epoch 26 | Train Loss: 0.1862
[Entity] Dev F1_macro: 0.9307
Saved best entity model
Early stopping triggered (Entity)
[Polarity] Epoch 26 | Train Loss: 0.1681


Epoch:  87%|████████▋ | 26/30 [37:36<05:38, 84.56s/it]

[Polarity] Dev F1_macro: 0.8777
[Entity] Epoch 27 | Train Loss: 0.1955
[Entity] Dev F1_macro: 0.9264
Early stopping triggered (Entity)
[Polarity] Epoch 27 | Train Loss: 0.2020


Epoch:  90%|█████████ | 27/30 [39:00<04:13, 84.45s/it]

[Polarity] Dev F1_macro: 0.8764
[Entity] Epoch 28 | Train Loss: 0.1901
[Entity] Dev F1_macro: 0.9300
Early stopping triggered (Entity)
[Polarity] Epoch 28 | Train Loss: 0.1719


Epoch:  90%|█████████ | 27/30 [40:24<04:29, 89.78s/it]

[Polarity] Dev F1_macro: 0.8662
Early stopping triggered (Polarity)
Training complete.





In [None]:
test_data = jsonlload(test_data_path)
test_sentiment_analysis(test_data)

Starting Sentiment Analysis Test...


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Confidence 값 분포 (ACD 단계):
  - max: 1.0000
  - min: 0.7700
  - mean: 0.9999
  - median: 1.0000

Confidence 값 분포 (ASC 단계):
  - max: 1.0000
  - min: 0.5777
  - mean: 0.9967
  - median: 1.0000

F1 Evaluation Result:

▶ ACD result
   Precision: 0.9722
   Recall: 0.9741
   F1: 0.9731

▶ entire ABSA result
   Precision: 0.9443
   Recall: 0.9387
   F1: 0.9415

Per-Aspect Performance:

 - 가격
   ACD_Precision: 1.0
   ACD_Recall: 0.9583
   ACD_F1: 0.9787
   ABSA_Precision: 0.9565
   ABSA_Recall: 0.9167
   ABSA_F1: 0.9362
   Support: 24

 - 거품
   ACD_Precision: 1.0
   ACD_Recall: 0.9901
   ACD_F1: 0.995
   ABSA_Precision: 0.98
   ABSA_Recall: 0.9703
   ABSA_F1: 0.9751
   Support: 101

 - 머릿결
   ACD_Precision: 0.963
   ACD_Recall: 1.0
   ACD_F1: 0.9811
   ABSA_Precision: 0.9074
   ABSA_Recall: 0.9423
   ABSA_F1: 0.9245
   Support: 52

 - 세정
   ACD_Precision: 0.9746
   ACD_Recall: 0.9664
   ACD_F1: 0.9705
   ABSA_Precision: 0.9661
   ABSA_Recall: 0.958
   ABSA_F1: 0.962
   Support: 119

 - 없음
   ACD

# StratifiedKFold 적용

In [None]:
!pip install -U scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (17 kB)
Downloading scikit_learn-1.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.9/12.9 MB[0m [31m129.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.6.1
    Uninstalling scikit-learn-1.6.1:
      Successfully uninstalled scikit-learn-1.6.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
sklearn-compat 0.1.3 requires scikit-learn<1.7,>=1.2, but you have scikit-learn 1.7.0 which is incompatible.[0m[31m
[0mSuccessfully installed scikit-learn-1.7.0


In [None]:
import sklearn
from sklearn.model_selection import StratifiedKFold, KFold

In [None]:
def set_seed(seedNum, device='cpu'):
    torch.manual_seed(seedNum)
    np.random.seed(seedNum)
    random.seed(seedNum)
    if device == 'cuda' and torch.cuda.is_available():
        torch.cuda.manual_seed(seedNum)
        torch.cuda.manual_seed_all(seedNum)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

def custom_stratified_KFold(file_list, n_splits, which_k):
    data = jsonlload(file_list)
    labels = []

    for d in data:
        annotation = d.get("annotation", [])
        if not annotation:
            labels.append(0)
            continue
        max_idx = max(entity_property_pair.index(anno[0]) for anno in annotation if anno[0] in entity_property_pair)
        labels.append(max_idx)

    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=1)

    for n_iter, (train_idx, test_idx) in enumerate(skf.split(data, labels), 1):
        if n_iter == which_k:
            print(f'CustomStratifiedKFold - {n_iter}/{n_splits}')
            train_data = [data[i] for i in train_idx]
            test_data = [data[i] for i in test_idx]

            save_path = os.path.join(DATA_DIR, f"{n_iter}Fold.jsonl")
            jsondump(test_data, save_path)

            return train_data, test_data

set_seed(1, device)

# input_file_list = ["train.jsonl", "dev.jsonl", "temp_aug.jsonl"]
input_file_list = ["train.jsonl", "dev.jsonl"]

In [None]:
train_data, dev_data = custom_stratified_KFold(input_file_list, 3, 1)

CustomStratifiedKFold - 1/3


In [None]:
train_sentiment_analysis(train_data, dev_data)

train_sentiment_analysis START


Token indices sequence length is longer than the specified maximum sequence length for this model (630 > 512). Running this sequence through the model will result in indexing errors


Entity Class Weights:
  - False (class 0): weight = 1.3015
  - True (class 1): weight = 4.3166
Polarity Class Weights:
  - positive (class 0): weight = 1.6432
  - negative (class 1): weight = 3.9215
  - neutral (class 2): weight = 7.3296


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:   0%|          | 0/30 [00:00<?, ?it/s]

[Entity] Epoch 1 | Train Loss: 0.5538
[Entity] Dev F1_macro: 0.8450
Saved best entity model
[Polarity] Epoch 1 | Train Loss: 0.8188
[Polarity] Dev F1_macro: 0.6428


Epoch:   3%|▎         | 1/30 [01:14<36:13, 74.95s/it]

Saved best polarity model
[Entity] Epoch 2 | Train Loss: 0.3282
[Entity] Dev F1_macro: 0.8642
Saved best entity model
[Polarity] Epoch 2 | Train Loss: 0.6707
[Polarity] Dev F1_macro: 0.6861


Epoch:   7%|▋         | 2/30 [02:37<36:56, 79.17s/it]

Saved best polarity model
[Entity] Epoch 3 | Train Loss: 0.2954
[Entity] Dev F1_macro: 0.8720
Saved best entity model
[Polarity] Epoch 3 | Train Loss: 0.6112
[Polarity] Dev F1_macro: 0.7061


Epoch:  10%|█         | 3/30 [03:59<36:22, 80.83s/it]

Saved best polarity model
[Entity] Epoch 4 | Train Loss: 0.2761
[Entity] Dev F1_macro: 0.8753
Saved best entity model
[Polarity] Epoch 4 | Train Loss: 0.5459
[Polarity] Dev F1_macro: 0.7170


Epoch:  13%|█▎        | 4/30 [05:22<35:21, 81.60s/it]

Saved best polarity model
[Entity] Epoch 5 | Train Loss: 0.2657
[Entity] Dev F1_macro: 0.8858
Saved best entity model
[Polarity] Epoch 5 | Train Loss: 0.5136
[Polarity] Dev F1_macro: 0.7225


Epoch:  17%|█▋        | 5/30 [06:45<34:08, 81.96s/it]

Saved best polarity model
[Entity] Epoch 6 | Train Loss: 0.2534
[Entity] Dev F1_macro: 0.8881
Saved best entity model
[Polarity] Epoch 6 | Train Loss: 0.4641
[Polarity] Dev F1_macro: 0.7324


Epoch:  20%|██        | 6/30 [08:07<32:50, 82.11s/it]

Saved best polarity model
[Entity] Epoch 7 | Train Loss: 0.2668
[Entity] Dev F1_macro: 0.8951
Saved best entity model
[Polarity] Epoch 7 | Train Loss: 0.4483
[Polarity] Dev F1_macro: 0.7349


Epoch:  23%|██▎       | 7/30 [09:30<31:32, 82.27s/it]

Saved best polarity model
[Entity] Epoch 8 | Train Loss: 0.2634
[Entity] Dev F1_macro: 0.8969
Saved best entity model
[Polarity] Epoch 8 | Train Loss: 0.4311


Epoch:  27%|██▋       | 8/30 [10:45<29:21, 80.05s/it]

[Polarity] Dev F1_macro: 0.7349
[Entity] Epoch 9 | Train Loss: 0.2885
[Entity] Dev F1_macro: 0.8985
Saved best entity model
[Polarity] Epoch 9 | Train Loss: 0.4225
[Polarity] Dev F1_macro: 0.7444


Epoch:  30%|███       | 9/30 [12:07<28:13, 80.64s/it]

Saved best polarity model
[Entity] Epoch 10 | Train Loss: 0.3410
[Entity] Dev F1_macro: 0.9009
Saved best entity model
[Polarity] Epoch 10 | Train Loss: 0.4106
[Polarity] Dev F1_macro: 0.7485


Epoch:  33%|███▎      | 10/30 [13:29<27:01, 81.09s/it]

Saved best polarity model
[Entity] Epoch 11 | Train Loss: 0.3878
[Entity] Dev F1_macro: 0.9032
Saved best entity model
[Polarity] Epoch 11 | Train Loss: 0.4037
[Polarity] Dev F1_macro: 0.7503


Epoch:  37%|███▋      | 11/30 [14:54<26:00, 82.14s/it]

Saved best polarity model
[Entity] Epoch 12 | Train Loss: 0.4496
[Entity] Dev F1_macro: 0.8999
[Polarity] Epoch 12 | Train Loss: 0.4185


Epoch:  40%|████      | 12/30 [16:07<23:52, 79.58s/it]

[Polarity] Dev F1_macro: 0.7341
[Entity] Epoch 13 | Train Loss: 0.3929
[Entity] Dev F1_macro: 0.9095
Saved best entity model
[Polarity] Epoch 13 | Train Loss: 0.3933
[Polarity] Dev F1_macro: 0.7586


Epoch:  43%|████▎     | 13/30 [17:29<22:42, 80.17s/it]

Saved best polarity model
[Entity] Epoch 14 | Train Loss: 0.3777
[Entity] Dev F1_macro: 0.9094
[Polarity] Epoch 14 | Train Loss: 0.3531
[Polarity] Dev F1_macro: 0.7754


Epoch:  47%|████▋     | 14/30 [18:44<20:56, 78.52s/it]

Saved best polarity model
[Entity] Epoch 15 | Train Loss: 0.3689
[Entity] Dev F1_macro: 0.9110
Saved best entity model
[Polarity] Epoch 15 | Train Loss: 0.3135


Epoch:  50%|█████     | 15/30 [19:59<19:21, 77.45s/it]

[Polarity] Dev F1_macro: 0.7723
[Entity] Epoch 16 | Train Loss: 0.3451
[Entity] Dev F1_macro: 0.9108
[Polarity] Epoch 16 | Train Loss: 0.3111
[Polarity] Dev F1_macro: 0.7806


Epoch:  53%|█████▎    | 16/30 [21:13<17:53, 76.67s/it]

Saved best polarity model
[Entity] Epoch 17 | Train Loss: 0.3610
[Entity] Dev F1_macro: 0.9126
Saved best entity model
[Polarity] Epoch 17 | Train Loss: 0.3158
[Polarity] Dev F1_macro: 0.7829


Epoch:  57%|█████▋    | 17/30 [22:35<16:57, 78.29s/it]

Saved best polarity model
[Entity] Epoch 18 | Train Loss: 0.3096
[Entity] Dev F1_macro: 0.9119
[Polarity] Epoch 18 | Train Loss: 0.2720
[Polarity] Dev F1_macro: 0.7840


Epoch:  60%|██████    | 18/30 [23:50<15:25, 77.11s/it]

Saved best polarity model
[Entity] Epoch 19 | Train Loss: 0.2754
[Entity] Dev F1_macro: 0.9134
Saved best entity model
[Polarity] Epoch 19 | Train Loss: 0.2297
[Polarity] Dev F1_macro: 0.8002


Epoch:  63%|██████▎   | 19/30 [25:10<14:19, 78.12s/it]

Saved best polarity model
[Entity] Epoch 20 | Train Loss: 0.2981
[Entity] Dev F1_macro: 0.9136
Saved best entity model
[Polarity] Epoch 20 | Train Loss: 0.2399
[Polarity] Dev F1_macro: 0.8107


Epoch:  67%|██████▋   | 20/30 [26:32<13:12, 79.28s/it]

Saved best polarity model
[Entity] Epoch 21 | Train Loss: 0.2636
[Entity] Dev F1_macro: 0.9129
[Polarity] Epoch 21 | Train Loss: 0.2080


Epoch:  70%|███████   | 21/30 [27:47<11:40, 77.83s/it]

[Polarity] Dev F1_macro: 0.8104
[Entity] Epoch 22 | Train Loss: 0.2629
[Entity] Dev F1_macro: 0.9137
Saved best entity model
[Polarity] Epoch 22 | Train Loss: 0.1815
[Polarity] Dev F1_macro: 0.8254


Epoch:  73%|███████▎  | 22/30 [29:08<10:31, 78.99s/it]

Saved best polarity model
[Entity] Epoch 23 | Train Loss: 0.2841
[Entity] Dev F1_macro: 0.9145
Saved best entity model
[Polarity] Epoch 23 | Train Loss: 0.2009


Epoch:  77%|███████▋  | 23/30 [30:23<09:04, 77.81s/it]

[Polarity] Dev F1_macro: 0.8236
[Entity] Epoch 24 | Train Loss: 0.2563
[Entity] Dev F1_macro: 0.9176
Saved best entity model
[Polarity] Epoch 24 | Train Loss: 0.1693
[Polarity] Dev F1_macro: 0.8259


Epoch:  80%|████████  | 24/30 [31:46<07:54, 79.12s/it]

Saved best polarity model
[Entity] Epoch 25 | Train Loss: 0.2231
[Entity] Dev F1_macro: 0.9178
Saved best entity model
[Polarity] Epoch 25 | Train Loss: 0.1391
[Polarity] Dev F1_macro: 0.8339


Epoch:  83%|████████▎ | 25/30 [33:09<06:41, 80.26s/it]

Saved best polarity model
[Entity] Epoch 26 | Train Loss: 0.2331
[Entity] Dev F1_macro: 0.9160
[Polarity] Epoch 26 | Train Loss: 0.1527


Epoch:  87%|████████▋ | 26/30 [34:23<05:13, 78.50s/it]

[Polarity] Dev F1_macro: 0.8310
[Entity] Epoch 27 | Train Loss: 0.2435
[Entity] Dev F1_macro: 0.9164
[Polarity] Epoch 27 | Train Loss: 0.1421
[Polarity] Dev F1_macro: 0.8347


Epoch:  90%|█████████ | 27/30 [35:37<03:51, 77.22s/it]

Saved best polarity model
[Entity] Epoch 28 | Train Loss: 0.2473
[Entity] Dev F1_macro: 0.9160
[Polarity] Epoch 28 | Train Loss: 0.1554


Epoch:  93%|█████████▎| 28/30 [36:51<02:32, 76.12s/it]

[Polarity] Dev F1_macro: 0.8307
[Entity] Epoch 29 | Train Loss: 0.2207
[Entity] Dev F1_macro: 0.9157
Early stopping triggered (Entity)
[Polarity] Epoch 29 | Train Loss: 0.1518


Epoch:  97%|█████████▋| 29/30 [38:04<01:15, 75.29s/it]

[Polarity] Dev F1_macro: 0.8303
[Entity] Epoch 30 | Train Loss: 0.2287
[Entity] Dev F1_macro: 0.9166
Early stopping triggered (Entity)
[Polarity] Epoch 30 | Train Loss: 0.1360


Epoch: 100%|██████████| 30/30 [39:17<00:00, 78.59s/it]

[Polarity] Dev F1_macro: 0.8303
Training complete.





In [None]:
test_data = jsonlload(test_data_path)
test_sentiment_analysis(test_data)

Starting Sentiment Analysis Test...


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Confidence 값 분포 (ACD 단계):
  - max: 1.0000
  - min: 0.6027
  - mean: 0.9993
  - median: 1.0000

Confidence 값 분포 (ASC 단계):
  - max: 0.9996
  - min: 0.4095
  - mean: 0.9644
  - median: 0.9988

F1 Evaluation Result:

▶ ACD result
   Precision: 0.9464
   Recall: 0.9502
   F1: 0.9483

▶ entire ABSA result
   Precision: 0.8889
   Recall: 0.8854
   F1: 0.8871

Per-Aspect Performance:

 - 가격
   ACD_Precision: 0.92
   ACD_Recall: 0.9583
   ACD_F1: 0.9388
   ABSA_Precision: 0.8
   ABSA_Recall: 0.8333
   ABSA_F1: 0.8163
   Support: 24

 - 거품
   ACD_Precision: 1.0
   ACD_Recall: 0.9901
   ACD_F1: 0.995
   ABSA_Precision: 0.96
   ABSA_Recall: 0.9505
   ABSA_F1: 0.9552
   Support: 101

 - 머릿결
   ACD_Precision: 0.8947
   ACD_Recall: 0.9808
   ACD_F1: 0.9358
   ABSA_Precision: 0.8421
   ABSA_Recall: 0.9231
   ABSA_F1: 0.8807
   Support: 52

 - 세정
   ACD_Precision: 0.9316
   ACD_Recall: 0.916
   ACD_F1: 0.9237
   ABSA_Precision: 0.8718
   ABSA_Recall: 0.8571
   ABSA_F1: 0.8644
   Support: 119

 - 없음
   

In [None]:
train_data, dev_data = custom_stratified_KFold(input_file_list, 3, 2)

CustomStratifiedKFold - 2/3


In [None]:
train_sentiment_analysis(train_data, dev_data)

train_sentiment_analysis START


Token indices sequence length is longer than the specified maximum sequence length for this model (630 > 512). Running this sequence through the model will result in indexing errors


Entity Class Weights:
  - False (class 0): weight = 1.2942
  - True (class 1): weight = 4.3990
Polarity Class Weights:
  - positive (class 0): weight = 1.6674
  - negative (class 1): weight = 3.8134
  - neutral (class 2): weight = 7.2455


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:   0%|          | 0/30 [00:00<?, ?it/s]

[Entity] Epoch 1 | Train Loss: 0.5487
[Entity] Dev F1_macro: 0.8647
Saved best entity model
[Polarity] Epoch 1 | Train Loss: 0.8715
[Polarity] Dev F1_macro: 0.6527


Epoch:   3%|▎         | 1/30 [01:21<39:29, 81.70s/it]

Saved best polarity model
[Entity] Epoch 2 | Train Loss: 0.3296
[Entity] Dev F1_macro: 0.8710
Saved best entity model
[Polarity] Epoch 2 | Train Loss: 0.6959
[Polarity] Dev F1_macro: 0.6908


Epoch:   7%|▋         | 2/30 [02:44<38:28, 82.46s/it]

Saved best polarity model
[Entity] Epoch 3 | Train Loss: 0.3119
[Entity] Dev F1_macro: 0.8703
[Polarity] Epoch 3 | Train Loss: 0.6127
[Polarity] Dev F1_macro: 0.7375


Epoch:  10%|█         | 3/30 [03:59<35:27, 78.79s/it]

Saved best polarity model
[Entity] Epoch 4 | Train Loss: 0.2791
[Entity] Dev F1_macro: 0.9014
Saved best entity model
[Polarity] Epoch 4 | Train Loss: 0.5409


Epoch:  13%|█▎        | 4/30 [05:14<33:29, 77.29s/it]

[Polarity] Dev F1_macro: 0.7216
[Entity] Epoch 5 | Train Loss: 0.2709
[Entity] Dev F1_macro: 0.9045
Saved best entity model
[Polarity] Epoch 5 | Train Loss: 0.4826
[Polarity] Dev F1_macro: 0.7591


Epoch:  17%|█▋        | 5/30 [06:36<32:55, 79.02s/it]

Saved best polarity model
[Entity] Epoch 6 | Train Loss: 0.2667
[Entity] Dev F1_macro: 0.9094
Saved best entity model
[Polarity] Epoch 6 | Train Loss: 0.4223
[Polarity] Dev F1_macro: 0.7706


Epoch:  20%|██        | 6/30 [07:58<32:06, 80.27s/it]

Saved best polarity model
[Entity] Epoch 7 | Train Loss: 0.2758
[Entity] Dev F1_macro: 0.9129
Saved best entity model
[Polarity] Epoch 7 | Train Loss: 0.3945


Epoch:  23%|██▎       | 7/30 [09:13<30:05, 78.51s/it]

[Polarity] Dev F1_macro: 0.7694
[Entity] Epoch 8 | Train Loss: 0.3136
[Entity] Dev F1_macro: 0.9149
Saved best entity model
[Polarity] Epoch 8 | Train Loss: 0.3900
[Polarity] Dev F1_macro: 0.7721


Epoch:  27%|██▋       | 8/30 [10:35<29:12, 79.64s/it]

Saved best polarity model
[Entity] Epoch 9 | Train Loss: 0.4379
[Entity] Dev F1_macro: 0.9195
Saved best entity model
[Polarity] Epoch 9 | Train Loss: 0.3483
[Polarity] Dev F1_macro: 0.7997


Epoch:  30%|███       | 9/30 [11:58<28:12, 80.60s/it]

Saved best polarity model
[Entity] Epoch 10 | Train Loss: 0.4490
[Entity] Dev F1_macro: 0.9217
Saved best entity model
[Polarity] Epoch 10 | Train Loss: 0.3333
[Polarity] Dev F1_macro: 0.8293


Epoch:  33%|███▎      | 10/30 [13:20<27:02, 81.11s/it]

Saved best polarity model
[Entity] Epoch 11 | Train Loss: 0.4554
[Entity] Dev F1_macro: 0.9211
[Polarity] Epoch 11 | Train Loss: 0.3101
[Polarity] Dev F1_macro: 0.8318


Epoch:  37%|███▋      | 11/30 [14:35<25:04, 79.20s/it]

Saved best polarity model
[Entity] Epoch 12 | Train Loss: 0.3926
[Entity] Dev F1_macro: 0.9241
Saved best entity model
[Polarity] Epoch 12 | Train Loss: 0.2897
[Polarity] Dev F1_macro: 0.8462


Epoch:  40%|████      | 12/30 [15:58<24:04, 80.25s/it]

Saved best polarity model
[Entity] Epoch 13 | Train Loss: 0.3222
[Entity] Dev F1_macro: 0.9271
Saved best entity model
[Polarity] Epoch 13 | Train Loss: 0.2324
[Polarity] Dev F1_macro: 0.8644


Epoch:  43%|████▎     | 13/30 [17:20<22:56, 80.95s/it]

Saved best polarity model
[Entity] Epoch 14 | Train Loss: 0.3114
[Entity] Dev F1_macro: 0.9274
Saved best entity model
[Polarity] Epoch 14 | Train Loss: 0.1615
[Polarity] Dev F1_macro: 0.8663


Epoch:  47%|████▋     | 14/30 [18:43<21:44, 81.56s/it]

Saved best polarity model
[Entity] Epoch 15 | Train Loss: 0.2884
[Entity] Dev F1_macro: 0.9272
[Polarity] Epoch 15 | Train Loss: 0.1534
[Polarity] Dev F1_macro: 0.8705


Epoch:  50%|█████     | 15/30 [19:58<19:54, 79.62s/it]

Saved best polarity model
[Entity] Epoch 16 | Train Loss: 0.2855
[Entity] Dev F1_macro: 0.9265
[Polarity] Epoch 16 | Train Loss: 0.1382
[Polarity] Dev F1_macro: 0.8731


Epoch:  53%|█████▎    | 16/30 [21:13<18:14, 78.16s/it]

Saved best polarity model
[Entity] Epoch 17 | Train Loss: 0.2770
[Entity] Dev F1_macro: 0.9270
[Polarity] Epoch 17 | Train Loss: 0.1518
[Polarity] Dev F1_macro: 0.8733


Epoch:  57%|█████▋    | 17/30 [22:28<16:43, 77.21s/it]

Saved best polarity model
[Entity] Epoch 18 | Train Loss: 0.2794
[Entity] Dev F1_macro: 0.9286
Saved best entity model
[Polarity] Epoch 18 | Train Loss: 0.1389
[Polarity] Dev F1_macro: 0.8865


Epoch:  60%|██████    | 18/30 [23:51<15:45, 78.80s/it]

Saved best polarity model
[Entity] Epoch 19 | Train Loss: 0.2544
[Entity] Dev F1_macro: 0.9291
Saved best entity model
[Polarity] Epoch 19 | Train Loss: 0.1166


Epoch:  63%|██████▎   | 19/30 [25:06<14:14, 77.66s/it]

[Polarity] Dev F1_macro: 0.8815
[Entity] Epoch 20 | Train Loss: 0.2535
[Entity] Dev F1_macro: 0.9270
[Polarity] Epoch 20 | Train Loss: 0.1270


Epoch:  67%|██████▋   | 20/30 [26:20<12:45, 76.58s/it]

[Polarity] Dev F1_macro: 0.8813
[Entity] Epoch 21 | Train Loss: 0.2377
[Entity] Dev F1_macro: 0.9283
[Polarity] Epoch 21 | Train Loss: 0.1401
[Polarity] Dev F1_macro: 0.8872


Epoch:  70%|███████   | 21/30 [27:34<11:22, 75.85s/it]

Saved best polarity model
[Entity] Epoch 22 | Train Loss: 0.2401
[Entity] Dev F1_macro: 0.9301
Saved best entity model
[Polarity] Epoch 22 | Train Loss: 0.1226


Epoch:  73%|███████▎  | 22/30 [28:49<10:04, 75.56s/it]

[Polarity] Dev F1_macro: 0.8754
[Entity] Epoch 23 | Train Loss: 0.2341
[Entity] Dev F1_macro: 0.9291
[Polarity] Epoch 23 | Train Loss: 0.1246


Epoch:  77%|███████▋  | 23/30 [30:03<08:45, 75.02s/it]

[Polarity] Dev F1_macro: 0.8759
[Entity] Epoch 24 | Train Loss: 0.2127
[Entity] Dev F1_macro: 0.9300
[Polarity] Epoch 24 | Train Loss: 0.1501


Epoch:  80%|████████  | 24/30 [31:16<07:27, 74.53s/it]

[Polarity] Dev F1_macro: 0.8774
[Entity] Epoch 25 | Train Loss: 0.2068
[Entity] Dev F1_macro: 0.9298
[Polarity] Epoch 25 | Train Loss: 0.1420


Epoch:  83%|████████▎ | 25/30 [32:29<06:10, 74.17s/it]

[Polarity] Dev F1_macro: 0.8832
Early stopping triggered (Polarity)
[Entity] Epoch 26 | Train Loss: 0.2064


Epoch:  83%|████████▎ | 25/30 [33:30<06:42, 80.43s/it]

[Entity] Dev F1_macro: 0.9292
Early stopping triggered (Entity)
Training complete.





In [None]:
test_data = jsonlload(test_data_path)
test_sentiment_analysis(test_data)

Starting Sentiment Analysis Test...


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Confidence 값 분포 (ACD 단계):
  - max: 1.0000
  - min: 0.5060
  - mean: 0.9993
  - median: 1.0000

Confidence 값 분포 (ASC 단계):
  - max: 1.0000
  - min: 0.5086
  - mean: 0.9900
  - median: 0.9999

F1 Evaluation Result:

▶ ACD result
   Precision: 0.9660
   Recall: 0.9622
   F1: 0.9641

▶ entire ABSA result
   Precision: 0.9300
   Recall: 0.9190
   F1: 0.9245

Per-Aspect Performance:

 - 가격
   ACD_Precision: 0.9583
   ACD_Recall: 0.9583
   ACD_F1: 0.9583
   ABSA_Precision: 0.9167
   ABSA_Recall: 0.9167
   ABSA_F1: 0.9167
   Support: 24

 - 거품
   ACD_Precision: 1.0
   ACD_Recall: 0.9901
   ACD_F1: 0.995
   ABSA_Precision: 0.97
   ABSA_Recall: 0.9604
   ABSA_F1: 0.9652
   Support: 101

 - 머릿결
   ACD_Precision: 0.9091
   ACD_Recall: 0.9615
   ACD_F1: 0.9346
   ABSA_Precision: 0.8727
   ABSA_Recall: 0.9231
   ABSA_F1: 0.8972
   Support: 52

 - 세정
   ACD_Precision: 0.9661
   ACD_Recall: 0.958
   ACD_F1: 0.962
   ABSA_Precision: 0.9322
   ABSA_Recall: 0.9244
   ABSA_F1: 0.9283
   Support: 119

 - 없음

In [None]:
train_data, dev_data = custom_stratified_KFold(input_file_list, 3, 3)

CustomStratifiedKFold - 3/3


In [None]:
train_sentiment_analysis(train_data, dev_data)

train_sentiment_analysis START


Token indices sequence length is longer than the specified maximum sequence length for this model (630 > 512). Running this sequence through the model will result in indexing errors


Entity Class Weights:
  - False (class 0): weight = 1.2986
  - True (class 1): weight = 4.3485
Polarity Class Weights:
  - positive (class 0): weight = 1.6590
  - negative (class 1): weight = 3.7500
  - neutral (class 2): weight = 7.6603


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:   0%|          | 0/30 [00:00<?, ?it/s]

[Entity] Epoch 1 | Train Loss: 0.5300
[Entity] Dev F1_macro: 0.8494
Saved best entity model
[Polarity] Epoch 1 | Train Loss: 0.8637
[Polarity] Dev F1_macro: 0.6143


Epoch:   3%|▎         | 1/30 [01:21<39:32, 81.81s/it]

Saved best polarity model
[Entity] Epoch 2 | Train Loss: 0.3234
[Entity] Dev F1_macro: 0.8640
Saved best entity model
[Polarity] Epoch 2 | Train Loss: 0.7051
[Polarity] Dev F1_macro: 0.6407


Epoch:   7%|▋         | 2/30 [02:44<38:29, 82.48s/it]

Saved best polarity model
[Entity] Epoch 3 | Train Loss: 0.3053
[Entity] Dev F1_macro: 0.8746
Saved best entity model
[Polarity] Epoch 3 | Train Loss: 0.6304
[Polarity] Dev F1_macro: 0.6815


Epoch:  10%|█         | 3/30 [04:05<36:41, 81.54s/it]

Saved best polarity model
[Entity] Epoch 4 | Train Loss: 0.2760
[Entity] Dev F1_macro: 0.8770
Saved best entity model
[Polarity] Epoch 4 | Train Loss: 0.5950
[Polarity] Dev F1_macro: 0.6978


Epoch:  13%|█▎        | 4/30 [05:28<35:40, 82.32s/it]

Saved best polarity model
[Entity] Epoch 5 | Train Loss: 0.2737
[Entity] Dev F1_macro: 0.8840
Saved best entity model
[Polarity] Epoch 5 | Train Loss: 0.5494
[Polarity] Dev F1_macro: 0.7027


Epoch:  17%|█▋        | 5/30 [06:51<34:19, 82.40s/it]

Saved best polarity model
[Entity] Epoch 6 | Train Loss: 0.2717
[Entity] Dev F1_macro: 0.8977
Saved best entity model
[Polarity] Epoch 6 | Train Loss: 0.5275
[Polarity] Dev F1_macro: 0.7133


Epoch:  20%|██        | 6/30 [08:13<32:59, 82.48s/it]

Saved best polarity model
[Entity] Epoch 7 | Train Loss: 0.2862
[Entity] Dev F1_macro: 0.9054
Saved best entity model
[Polarity] Epoch 7 | Train Loss: 0.5086
[Polarity] Dev F1_macro: 0.7358


Epoch:  23%|██▎       | 7/30 [09:35<31:29, 82.15s/it]

Saved best polarity model
[Entity] Epoch 8 | Train Loss: 0.3188
[Entity] Dev F1_macro: 0.9082
Saved best entity model
[Polarity] Epoch 8 | Train Loss: 0.4867
[Polarity] Dev F1_macro: 0.7436


Epoch:  27%|██▋       | 8/30 [10:57<30:07, 82.14s/it]

Saved best polarity model
[Entity] Epoch 9 | Train Loss: 0.3615
[Entity] Dev F1_macro: 0.9103
Saved best entity model
[Polarity] Epoch 9 | Train Loss: 0.4082


Epoch:  30%|███       | 9/30 [12:11<27:53, 79.71s/it]

[Polarity] Dev F1_macro: 0.7412
[Entity] Epoch 10 | Train Loss: 0.4448
[Entity] Dev F1_macro: 0.9141
Saved best entity model
[Polarity] Epoch 10 | Train Loss: 0.3828
[Polarity] Dev F1_macro: 0.7552


Epoch:  33%|███▎      | 10/30 [13:34<26:50, 80.50s/it]

Saved best polarity model
[Entity] Epoch 11 | Train Loss: 0.4594
[Entity] Dev F1_macro: 0.9158
Saved best entity model
[Polarity] Epoch 11 | Train Loss: 0.3731
[Polarity] Dev F1_macro: 0.7764


Epoch:  37%|███▋      | 11/30 [14:56<25:41, 81.12s/it]

Saved best polarity model
[Entity] Epoch 12 | Train Loss: 0.4314
[Entity] Dev F1_macro: 0.9134
[Polarity] Epoch 12 | Train Loss: 0.3438


Epoch:  40%|████      | 12/30 [16:10<23:40, 78.90s/it]

[Polarity] Dev F1_macro: 0.7634
[Entity] Epoch 13 | Train Loss: 0.4155
[Entity] Dev F1_macro: 0.9177
Saved best entity model
[Polarity] Epoch 13 | Train Loss: 0.2873
[Polarity] Dev F1_macro: 0.7991


Epoch:  43%|████▎     | 13/30 [17:32<22:37, 79.83s/it]

Saved best polarity model
[Entity] Epoch 14 | Train Loss: 0.3750
[Entity] Dev F1_macro: 0.9183
Saved best entity model
[Polarity] Epoch 14 | Train Loss: 0.2922
[Polarity] Dev F1_macro: 0.8154


Epoch:  47%|████▋     | 14/30 [18:55<21:31, 80.71s/it]

Saved best polarity model
[Entity] Epoch 15 | Train Loss: 0.3368
[Entity] Dev F1_macro: 0.9178
[Polarity] Epoch 15 | Train Loss: 0.2527


Epoch:  50%|█████     | 15/30 [20:08<19:39, 78.61s/it]

[Polarity] Dev F1_macro: 0.8111
[Entity] Epoch 16 | Train Loss: 0.3028
[Entity] Dev F1_macro: 0.9188
Saved best entity model
[Polarity] Epoch 16 | Train Loss: 0.1993
[Polarity] Dev F1_macro: 0.8242


Epoch:  53%|█████▎    | 16/30 [21:30<18:34, 79.60s/it]

Saved best polarity model
[Entity] Epoch 17 | Train Loss: 0.3059
[Entity] Dev F1_macro: 0.9191
Saved best entity model
[Polarity] Epoch 17 | Train Loss: 0.2105
[Polarity] Dev F1_macro: 0.8361


Epoch:  57%|█████▋    | 17/30 [22:53<17:25, 80.43s/it]

Saved best polarity model
[Entity] Epoch 18 | Train Loss: 0.2972
[Entity] Dev F1_macro: 0.9209
Saved best entity model
[Polarity] Epoch 18 | Train Loss: 0.1949


Epoch:  60%|██████    | 18/30 [24:08<15:45, 78.82s/it]

[Polarity] Dev F1_macro: 0.8351
[Entity] Epoch 19 | Train Loss: 0.2875
[Entity] Dev F1_macro: 0.9210
Saved best entity model
[Polarity] Epoch 19 | Train Loss: 0.1755
[Polarity] Dev F1_macro: 0.8471


Epoch:  63%|██████▎   | 19/30 [25:29<14:36, 79.64s/it]

Saved best polarity model
[Entity] Epoch 20 | Train Loss: 0.2779
[Entity] Dev F1_macro: 0.9206
[Polarity] Epoch 20 | Train Loss: 0.1655
[Polarity] Dev F1_macro: 0.8495


Epoch:  67%|██████▋   | 20/30 [26:45<13:03, 78.32s/it]

Saved best polarity model
[Entity] Epoch 21 | Train Loss: 0.2712
[Entity] Dev F1_macro: 0.9201
[Polarity] Epoch 21 | Train Loss: 0.1607


Epoch:  70%|███████   | 21/30 [27:58<11:33, 77.01s/it]

[Polarity] Dev F1_macro: 0.8439
[Entity] Epoch 22 | Train Loss: 0.2577
[Entity] Dev F1_macro: 0.9176
[Polarity] Epoch 22 | Train Loss: 0.1728


Epoch:  73%|███████▎  | 22/30 [29:12<10:07, 75.93s/it]

[Polarity] Dev F1_macro: 0.8394
[Entity] Epoch 23 | Train Loss: 0.2492
[Entity] Dev F1_macro: 0.9204
Early stopping triggered (Entity)
[Polarity] Epoch 23 | Train Loss: 0.1873


Epoch:  77%|███████▋  | 23/30 [30:25<08:46, 75.16s/it]

[Polarity] Dev F1_macro: 0.8443
[Entity] Epoch 24 | Train Loss: 0.2306
[Entity] Dev F1_macro: 0.9191
Early stopping triggered (Entity)
[Polarity] Epoch 24 | Train Loss: 0.1629
[Polarity] Dev F1_macro: 0.8526


Epoch:  80%|████████  | 24/30 [31:39<07:28, 74.82s/it]

Saved best polarity model
[Entity] Epoch 25 | Train Loss: 0.2366
[Entity] Dev F1_macro: 0.9200
Early stopping triggered (Entity)
[Polarity] Epoch 25 | Train Loss: 0.2034


Epoch:  83%|████████▎ | 25/30 [32:53<06:12, 74.56s/it]

[Polarity] Dev F1_macro: 0.8498
[Entity] Epoch 26 | Train Loss: 0.2467
[Entity] Dev F1_macro: 0.9209
Early stopping triggered (Entity)
[Polarity] Epoch 26 | Train Loss: 0.1971


Epoch:  87%|████████▋ | 26/30 [34:06<04:56, 74.14s/it]

[Polarity] Dev F1_macro: 0.8448
[Entity] Epoch 27 | Train Loss: 0.2550
[Entity] Dev F1_macro: 0.9201
Early stopping triggered (Entity)
[Polarity] Epoch 27 | Train Loss: 0.1641


Epoch:  90%|█████████ | 27/30 [35:20<03:41, 73.91s/it]

[Polarity] Dev F1_macro: 0.8506
[Entity] Epoch 28 | Train Loss: 0.2382
[Entity] Dev F1_macro: 0.9202
Early stopping triggered (Entity)
[Polarity] Epoch 28 | Train Loss: 0.1917


Epoch:  90%|█████████ | 27/30 [36:33<04:03, 81.24s/it]

[Polarity] Dev F1_macro: 0.8483
Early stopping triggered (Polarity)
Training complete.





In [None]:
test_data = jsonlload(test_data_path)
test_sentiment_analysis(test_data)

Starting Sentiment Analysis Test...


Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Confidence 값 분포 (ACD 단계):
  - max: 1.0000
  - min: 0.6067
  - mean: 0.9991
  - median: 1.0000

Confidence 값 분포 (ASC 단계):
  - max: 0.9999
  - min: 0.5312
  - mean: 0.9889
  - median: 0.9999

F1 Evaluation Result:

▶ ACD result
   Precision: 0.9557
   Recall: 0.9462
   F1: 0.9510

▶ entire ABSA result
   Precision: 0.9115
   Recall: 0.8953
   F1: 0.9033

Per-Aspect Performance:

 - 가격
   ACD_Precision: 1.0
   ACD_Recall: 0.9583
   ACD_F1: 0.9787
   ABSA_Precision: 0.9565
   ABSA_Recall: 0.9167
   ABSA_F1: 0.9362
   Support: 24

 - 거품
   ACD_Precision: 0.9899
   ACD_Recall: 0.9703
   ACD_F1: 0.98
   ABSA_Precision: 0.9394
   ABSA_Recall: 0.9208
   ABSA_F1: 0.93
   Support: 101

 - 머릿결
   ACD_Precision: 0.8793
   ACD_Recall: 0.9808
   ACD_F1: 0.9273
   ABSA_Precision: 0.7759
   ABSA_Recall: 0.8654
   ABSA_F1: 0.8182
   Support: 52

 - 세정
   ACD_Precision: 0.9558
   ACD_Recall: 0.9076
   ACD_F1: 0.931
   ABSA_Precision: 0.9558
   ABSA_Recall: 0.9076
   ABSA_F1: 0.931
   Support: 119

 - 없음


# 라벨링

In [None]:
def label_raw_data_with_trained_model(
    raw_data,
    acd_best_model_path,
    asc_best_model_path,
    model_class,
    acd_tokenizer_name,
    asc_tokenizer_name,
    tf_id_to_name,
    polarity_id_to_name,
    special_tokens_dict,
    entity_property_pair,
    max_len,
    threshold,
    save_path
):
    print("Starting Labeling with Raw Data...")

    try:
        acd_tokenizer = AutoTokenizer.from_pretrained(acd_base_model)
        acd_tokenizer.add_special_tokens(special_tokens_dict)
        asc_tokenizer = AutoTokenizer.from_pretrained(asc_base_model)
        asc_tokenizer.add_special_tokens(special_tokens_dict)
    except Exception as e:
        print(f"Tokenizer load error: {e}")
        return

    try:
        acd_best_model = load_model(ABSA_Model, acd_best_model_path, acd_base_model, len(tf_id_to_name), len(acd_tokenizer))
        asc_best_model = load_model(ABSA_Model, asc_best_model_path, asc_base_model, len(polarity_id_to_name), len(asc_tokenizer))
    except Exception as e:
        print(f"Model load error: {e}")
        return

    def predict_on_raw_data(data):
        acd_best_model.eval()
        asc_best_model.eval()
        results = copy.deepcopy(data)

        acd_pad_token_id = acd_tokenizer.pad_token_id
        asc_pad_token_id = asc_tokenizer.pad_token_id

        for idx, sentence in enumerate(tqdm(results, desc="라벨링 진행")):
            form = sentence.get('리뷰', '')
            form = preprocessing(form)
            sentence['annotation'] = []

            if not isinstance(form, str) or not form.strip():
                print(f"Invalid sentence skipped: {form}")
                continue

            for pair in entity_property_pair:
                # ACD 수행
                acd_encoded = acd_tokenizer(
                    form,
                    pair,
                    padding=False,
                    truncation=False,
                    return_tensors='pt',
                    add_special_tokens=True
                )

                acd_input_ids = acd_encoded['input_ids'][0].tolist()
                acd_attention_mask = acd_encoded['attention_mask'][0].tolist()
                acd_input_ids, acd_attention_mask = truncate_left(acd_input_ids, acd_attention_mask, max_len, acd_pad_token_id)

                acd_input_ids = torch.tensor([acd_input_ids]).to(device)
                acd_attention_mask = torch.tensor([acd_attention_mask]).to(device)

                with torch.no_grad():
                    _, acd_logits = acd_best_model(acd_input_ids, acd_attention_mask)

                acd_probs = torch.softmax(acd_logits, dim=-1)
                acd_confidence, acd_pred = torch.max(acd_probs, dim=-1)
                acd_pred = acd_pred.item()
                acd_confidence = acd_confidence.item()

                if tf_id_to_name[acd_pred] == 'True' and acd_confidence >= threshold:
                    # ASC 수행
                    asc_encoded = asc_tokenizer(
                        form,
                        pair,
                        padding=False,
                        truncation=False,
                        return_tensors='pt',
                        add_special_tokens=True
                    )
                    asc_input_ids = asc_encoded['input_ids'][0].tolist()
                    asc_attention_mask = asc_encoded['attention_mask'][0].tolist()
                    asc_input_ids, asc_attention_mask = truncate_left(asc_input_ids, asc_attention_mask, max_len, asc_pad_token_id)

                    asc_input_ids = torch.tensor([asc_input_ids]).to(device)
                    asc_attention_mask = torch.tensor([asc_attention_mask]).to(device)

                    with torch.no_grad():
                        _, asc_logits = asc_best_model(asc_input_ids, asc_attention_mask)

                    asc_probs = torch.softmax(asc_logits, dim=-1)
                    asc_confidence, asc_pred = torch.max(asc_probs, dim=-1)
                    asc_pred = asc_pred.item()
                    asc_confidence = asc_confidence.item()

                    if 0 <= asc_pred < len(polarity_id_to_name):
                        polarity = polarity_id_to_name[asc_pred]
                    else:
                        polarity = "UNKNOWN"

                    sentence['annotation'].append([
                        pair,
                        [None, 0, 0],
                        polarity
                    ])

            if not sentence['annotation']:
                sentence['annotation'] = [["없음", [None, 0, 0], None]]

        return results

    labeled_raw_data = predict_on_raw_data(raw_data)

    if save_path:
        try:
            jsondump(labeled_raw_data, final_output_path)
            print(f"Saved final outputs to {save_path}")
        except Exception as e:
            print(f"Failed to save final outputs: {e}")

    return labeled_raw_data

In [None]:
jsonlload(raw_data_path)

[{'제품명': '모다모다 제로그레이 블랙샴푸10 더블기획 (300g+300g)',
  '브랜드': '모다모다',
  '원가': '78,000\n원',
  '판매가': '48,280\n원',
  '리뷰수': '145',
  '평균별점': '4.6',
  '5점비율': '78%',
  '4점비율': '13%',
  '3점비율': '6%',
  '2점비율': '1%',
  '1점비율': '3%',
  '세정력/아주만족해요': '68%',
  '세정력/보통이에요': '30%',
  '세정력/다소아쉬워요': '2%',
  '자극도/자극없이순해요': '74%',
  '자극도/보통이에요': '22%',
  '자극도/자극이느껴져요': '4%',
  '거품/거품이풍성해요': '68%',
  '거품/보통이에요': '30%',
  '거품/거품이적어요': '2%',
  '향/아주만족해요': '60%',
  '향/보통이에요': '36%',
  '향/다소아쉬워요': '4%',
  '작성자': '용자입니다',
  '작성일자': '2025.05.19',
  '별점': '5점만점에 5점',
  '사용현황': '',
  '세정력': '아주 만족해요',
  '자극도': '자극없이 순해요',
  '거품': '거품이 풍성해요',
  '향': '아주 만족해요',
  '리뷰': '일주일째 쓰는 중인데 좋은 거 같기도 하고 아닌 것 같기도 하고 한 달 뒤 후기로..ㄹ'},
 {'제품명': '모다모다 제로그레이 블랙샴푸10 더블기획 (300g+300g)',
  '브랜드': '모다모다',
  '원가': '78,000\n원',
  '판매가': '48,280\n원',
  '리뷰수': '145',
  '평균별점': '4.6',
  '5점비율': '78%',
  '4점비율': '13%',
  '3점비율': '6%',
  '2점비율': '1%',
  '1점비율': '3%',
  '세정력/아주만족해요': '68%',
  '세정력/보통이에요': '30%',
  '세정력/다소아쉬워요': '2%',
  '자극도/자극없이

In [None]:
raw_data = jsonlload(raw_data_path)
len(raw_data)

181908

In [None]:
labeled = label_raw_data_with_trained_model(
    raw_data=jsonlload(raw_data_path),
    acd_best_model_path=acd_best_model_path,
    asc_best_model_path=asc_best_model_path,
    model_class=ABSA_Model,
    acd_tokenizer_name=acd_base_model,
    asc_tokenizer_name=asc_base_model,
    tf_id_to_name=tf_id_to_name,
    polarity_id_to_name=polarity_id_to_name,
    special_tokens_dict=special_tokens_dict,
    entity_property_pair=entity_property_pair,
    max_len=max_len,
    threshold=threshold,
    save_path=final_output_DIR
)

Starting Labeling with Raw Data...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/80.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/620 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/344k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/375 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/248k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/752k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/173 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/473M [00:00<?, ?B/s]

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


model.safetensors:   0%|          | 0.00/473M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/546 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/443M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
라벨링 진행:   1%|          | 1407/181908 [02:25<5:57:29,  8.42it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (517 > 512). Running this sequence through the model will result in indexing errors
라벨링 진행:   2%|▏         | 2879/181908 [04:53<5:28:10,  9.09it/s]Token indices sequence length is longer than the specified maximum sequence length for this model (539 > 512). Running this sequence through the model will result in indexing errors
라벨링 진행: 100%|██████████| 181908/181908 [5:16:33<00:00,  9.58it/s]


Saved final outputs to /content/drive/MyDrive/ABSA/f_final_output
