In [None]:
# https://pytorch.org/ cuda 설치, pytorch 설치 
# !conda install pytorch torchvision torchaudio pytorch-cuda=11.6 -c pytorch -c nvidia

In [None]:
!pip install sklearn

In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# 모듈 import 및 전역 변수 설정

In [None]:
# 코랩에서 실행 시 drive 마운트
# from google.colab import drive
# drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import json
import os

import torch
import torch.nn as nn
from tqdm import trange

from transformers import AutoTokenizer, AutoModel
from torch.utils.data import DataLoader, TensorDataset
from transformers import get_linear_schedule_with_warmup
from transformers import AdamW

from sklearn.metrics import f1_score
import pandas as pd
import numpy as np
import copy

import random

from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold

import pickle

for_test = True

PADDING_TOKEN = 1
S_OPEN_TOKEN = 0
S_CLOSE_TOKEN = 2

do_eval=True
FULL_FINETUNING = True
category_extraction_model_path = '/content/drive/MyDrive/게임/korean_ABSA_baseline-main/saved_model5/category_extraction'
polarity_classification_model_path = '/content/drive/MyDrive/게임/korean_ABSA_baseline-main/saved_model5/polarity_classification'


train_data_path = '/content/drive/MyDrive/게임/korean_ABSA_baseline-main/data/nikluge-sa-2022-train.jsonl'
dev_data_path = '/content/drive/MyDrive/게임/korean_ABSA_baseline-main/data/nikluge-sa-2022-dev.jsonl'
test_data_path = '/content/drive/MyDrive/게임/korean_ABSA_baseline-main/data/nikluge-sa-2022-test.jsonl'



max_len = 256
batch_size = 8
# base_model = 'xlm-roberta-base'
base_model = 'klue/roberta-large'
entity_property_learning_rate = 3e-6
polarity_learning_rate = 3e-6
eps = 1e-8
num_train_epochs = 100
classifier_hidden_size = 768
classifier_dropout_prob = 0.1

entity_property_patience_ub = 10
polarity_patience_ub = 10
val_num = 5

entity_property_pair = ['본품#품질',
'제품 전체#일반',
'제품 전체#품질',
'본품#일반',
'제품 전체#디자인',
'본품#편의성',
'제품 전체#편의성',
'제품 전체#인지도',
'패키지/구성품#디자인',
'브랜드#일반',
'제품 전체#가격',
'패키지/구성품#편의성',
'패키지/구성품#일반',
# '본품#다양성',
# '본품#디자인',
# '브랜드#품질',
# '패키지/구성품#품질',
# '브랜드#인지도',
# '브랜드#가격',
# '패키지/구성품#다양성',
# '본품#가격',
# '본품#인지도',
# '패키지/구성품#가격'
]

tf_id_to_name = ['True', 'False']
tf_id_to_name = ['본품#품질',
'제품 전체#일반',
'제품 전체#품질',
'본품#일반',
'제품 전체#디자인',
'본품#편의성',
'제품 전체#편의성',
'제품 전체#인지도',
'패키지/구성품#디자인',
'브랜드#일반',
'제품 전체#가격',
'패키지/구성품#편의성',
'패키지/구성품#일반',
# '본품#다양성',
# '본품#디자인',
# '브랜드#품질',
# '패키지/구성품#품질',
# '브랜드#인지도',
# '브랜드#가격',
# '패키지/구성품#다양성',
# '본품#가격',
# '본품#인지도',
# '패키지/구성품#가격'
]
tf_name_to_id = {tf_id_to_name[i]: i for i in range(len(tf_id_to_name))}

polarity_id_to_name = ['positive', 'neutral', 'negative']
polarity_name_to_id = {polarity_id_to_name[i]: i for i in range(len(polarity_id_to_name))}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

special_tokens_dict = {
    'additional_special_tokens': ['&name&', '&affiliation&', '&social-security-num&', '&tel-num&', '&card-num&', '&bank-account&', '&num&', '&online-account&']
}

json 및 jsonl 파일 read, write 함수

In [None]:
def jsonload(fname, encoding="utf-8"):
    with open(fname, encoding=encoding) as f:
        j = json.load(f)

    return j


# json 개체를 파일이름으로 깔끔하게 저장
def jsondump(j, fname):
    with open(fname, "w", encoding="UTF8") as f:
        json.dump(j, f, ensure_ascii=False)

# jsonl 파일 읽어서 list에 저장
def jsonlload(fname, encoding="utf-8"):
    json_list = []
    with open(fname, encoding=encoding) as f:
        for line in f.readlines():
            json_list.append(json.loads(line))
    return json_list


# jsonlload('/content/drive/MyDrive/게임/korean_ABSA_baseline-main/data/sample.jsonl')

[{'id': 'nikluge-sa-2022-train-00001',
  'sentence_form': '둘쨋날은 미친듯이 밟아봤더니 기어가 헛돌면서 틱틱 소리가 나서 경악.',
  'annotation': [['본품#품질', ['기어', 16, 18], 'negative']]},
 {'id': 'nikluge-sa-2022-train-00002',
  'sentence_form': '이거 뭐 삐꾸를 준 거 아냐 불안하고, 거금 투자한 게 왜 이래.. 싶어서 정이 확 떨어졌는데 산 곳 가져가서 확인하니 기어 텐션 문제라고 고장 아니래.',
  'annotation': [['본품#품질', ['기어 텐션', 67, 72], 'negative']]},
 {'id': 'nikluge-sa-2022-train-00003',
  'sentence_form': '간사하게도 그 이후에는 라이딩이 아주 즐거워져서 만족스럽게 탔다.',
  'annotation': [['제품 전체#일반', [None, 0, 0], 'positive']]},
 {'id': 'nikluge-sa-2022-train-00004',
  'sentence_form': '샥이 없는 모델이라 일반 도로에서 타면 노면의 진동 때문에 손목이 덜덜덜 떨리고 이가 부딪칠 지경인데 이마저도 며칠 타면서 익숙해지니 신경쓰이지 않게 됐다.',
  'annotation': [['제품 전체#일반', ['샥이 없는 모델', 0, 8], 'neutral']]},
 {'id': 'nikluge-sa-2022-train-00005',
  'sentence_form': '안장도 딱딱해서 엉덩이가 아팠는데 무시하고 타고 있다.',
  'annotation': [['본품#일반', ['안장', 0, 2], 'negative']]},
 {'id': 'nikluge-sa-2022-train-00006',
  'sentence_form': '지금 내 실력과 저질 체력으로는 이 정도 자전거도 되게 훌륭한 거라는..',
  'annotation'

In [None]:
tmp_list = jsonlload(train_data_path)
tmp_list.extend(jsonlload(dev_data_path))
tmp_df = pd.DataFrame()
k=0
for i in range(len(tmp_list)):
  for j in range(len(tmp_list[i]['annotation'])):
    tmp_df.loc[k,'entity_property'] = tmp_list[i]['annotation'][j][0]
    tmp_df.loc[k,'polarity'] = tmp_list[i]['annotation'][j][-1]
    k+=1
tmp_df

Unnamed: 0,entity_property,polarity
0,본품#품질,negative
1,본품#품질,negative
2,제품 전체#일반,positive
3,제품 전체#일반,neutral
4,본품#일반,negative
...,...,...
6329,패키지/구성품#일반,positive
6330,제품 전체#가격,negative
6331,제품 전체#가격,negative
6332,제품 전체#일반,positive


In [None]:
tmp_df['entity_property'].value_counts()

본품#품질          2380
제품 전체#일반       1624
제품 전체#품질        493
본품#일반           491
제품 전체#디자인       286
본품#편의성          191
제품 전체#편의성       180
제품 전체#인지도       141
패키지/구성품#디자인     117
브랜드#일반          103
제품 전체#가격         92
패키지/구성품#편의성      65
패키지/구성품#일반       50
본품#다양성           31
본품#디자인           21
브랜드#품질           19
패키지/구성품#품질       19
브랜드#인지도          17
브랜드#가격            7
패키지/구성품#다양성       3
본품#가격             2
본품#인지도            1
패키지/구성품#가격        1
Name: entity_property, dtype: int64

In [None]:
import numpy as np
class_weight = list(len(tmp_df['entity_property'])/(tmp_df['entity_property'].value_counts()+100)) # entity_property의 각각 label의 positive에 대해 줄 가중치샘플비율대로 하면 너무 심해서 로그씌움.
class_weight

[2.5540322580645163,
 3.6740139211136893,
 10.681281618887015,
 10.717428087986464,
 16.409326424870468,
 21.766323024054984,
 22.62142857142857,
 26.282157676348547,
 29.1889400921659,
 31.201970443349754,
 32.989583333333336,
 38.38787878787879,
 42.22666666666667,
 48.35114503816794,
 52.34710743801653,
 53.226890756302524,
 53.226890756302524,
 54.136752136752136,
 59.19626168224299,
 61.49514563106796,
 62.09803921568628,
 62.71287128712871,
 62.71287128712871]

In [None]:
import numpy as np
class_weight = list(len(tmp_df['entity_property'])/(tmp_df['entity_property'].value_counts()+100)) # entity_property의 각각 label의 positive에 대해 줄 가중치샘플비율대로 하면 너무 심해서 로그씌움.
class_weight = class_weight[:-10]
class_weight

[2.5540322580645163,
 3.6740139211136893,
 10.681281618887015,
 10.717428087986464,
 16.409326424870468,
 21.766323024054984,
 22.62142857142857,
 26.282157676348547,
 29.1889400921659,
 31.201970443349754,
 32.989583333333336,
 38.38787878787879,
 42.22666666666667]

In [None]:
polarity_class_weight = list(len(tmp_df['polarity'])/tmp_df['polarity'].value_counts())
polarity_class_weight

[1.0385309067060173, 42.51006711409396, 73.65116279069767]

In [None]:
polarity_class_weight = list(np.sqrt(len(tmp_df['polarity'])/(tmp_df['polarity'].value_counts())))
polarity_class_weight

[1.0190833659254857, 6.5199744718897445, 8.582025564556288]

# 모델 정의
roberta large 모델을 기반으로 한 classification 모델 이용

In [None]:
class SimpleClassifier(nn.Module):

    def __init__(self, num_label):
        super().__init__()
        # self.dense = nn.Linear(classifier_hidden_size, classifier_hidden_size)
        self.dense = nn.Linear(1024, 50)
        self.dropout = nn.Dropout(classifier_dropout_prob)
        self.output = nn.Linear(50, num_label)

    def forward(self, features):
        x = features[:, 0, :]
        x = self.dropout(x)
        x = self.dense(x)
        x = torch.tanh(x)
        x = self.dropout(x)
        x = self.output(x)
        return x


class RoBertaBaseClassifier(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(RoBertaBaseClassifier, self).__init__()

        self.num_label = num_label
        self.xlm_roberta = AutoModel.from_pretrained(base_model)
        for param in self.xlm_roberta.parameters():
            param.requires_grad = False

        self.xlm_roberta.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.xlm_roberta(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        # logits = torch.sigmoid(logits)

        loss = None

        if labels is not None:
            loss_fct = nn.BCEWithLogitsLoss(
                pos_weight=torch.tensor(class_weight).to(device)
                )
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels)
        # logits = torch.sigmoid(logits)

        return loss, logits

class RoBertaBaseClassifier_polar(nn.Module):
    def __init__(self, num_label, len_tokenizer):
        super(RoBertaBaseClassifier_polar, self).__init__()

        self.num_label = num_label
        self.xlm_roberta = AutoModel.from_pretrained(base_model)
        for param in self.xlm_roberta.parameters():
            param.requires_grad = False

        self.xlm_roberta.resize_token_embeddings(len_tokenizer)

        self.labels_classifier = SimpleClassifier(self.num_label)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.xlm_roberta(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=None
        )

        sequence_output = outputs[0]
        logits = self.labels_classifier(sequence_output)

        loss = None

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss(
                weight=torch.tensor(polarity_class_weight).to(device)
                )
            loss = loss_fct(logits.view(-1, self.num_label),
                                                labels.view(-1))

        return loss, logits


# 데이터 파싱 및 tokenization 함수 정의


In [None]:

def tokenize_and_align_labels(tokenizer, form, annotations, max_len):

    entity_property_data_dict = {
        'input_ids': [],
        'attention_mask': [],
        'label': []
    }
    polarity_data_dict = {
        'input_ids': [],
        'attention_mask': [],
        'label': []
    }
    tokenized_data = tokenizer(form, '미정', padding='max_length', max_length=max_len, truncation=True)
    entity_property_data_dict['input_ids'].append(tokenized_data['input_ids'])
    entity_property_data_dict['attention_mask'].append(tokenized_data['attention_mask'])
    entity_property_data_dict['label'].append([0.0]*13)
    isPairInOpinion = False
    for pair in entity_property_pair:
        
        if pd.isna(form):
            break
        

        for annotation in annotations:
            entity_property = annotation[0]
            polarity = annotation[2]

            if polarity == '------------':
                continue

            if entity_property == pair:
                entity_property_data_dict['label'][-1][tf_name_to_id[entity_property]] = 1.0

                tokenized_data = tokenizer(form, pair, padding='max_length', max_length=max_len, truncation=True)
                polarity_data_dict['input_ids'].append(tokenized_data['input_ids'])
                polarity_data_dict['attention_mask'].append(tokenized_data['attention_mask'])
                polarity_data_dict['label'].append(polarity_name_to_id[polarity])

                break


    return entity_property_data_dict, polarity_data_dict


def get_dataset(raw_data, tokenizer, max_len):
    input_ids_list = []
    attention_mask_list = []
    token_labels_list = []

    polarity_input_ids_list = []
    polarity_attention_mask_list = []
    polarity_token_labels_list = []

    for utterance in raw_data:
        entity_property_data_dict, polarity_data_dict = tokenize_and_align_labels(tokenizer, utterance['sentence_form'], utterance['annotation'], max_len)
        input_ids_list.extend(entity_property_data_dict['input_ids'])
        attention_mask_list.extend(entity_property_data_dict['attention_mask'])
        token_labels_list.extend(entity_property_data_dict['label'])

        polarity_input_ids_list.extend(polarity_data_dict['input_ids'])
        polarity_attention_mask_list.extend(polarity_data_dict['attention_mask'])
        polarity_token_labels_list.extend(polarity_data_dict['label'])

    return TensorDataset(torch.tensor(input_ids_list), torch.tensor(attention_mask_list),
                         torch.tensor(token_labels_list)), TensorDataset(torch.tensor(polarity_input_ids_list), torch.tensor(polarity_attention_mask_list),
                         torch.tensor(polarity_token_labels_list))



# 모델 학습

In [None]:
# evaluation에 5-fold 정도로 dev 나눠서 시행하고, 성능 감소하면 full 학습으로 변경, 2연속 감소하면 중단. 5개 평균으로 예측하게 변경하기

In [None]:

def train_sentiment_analysis():

    print('train_sentiment_analysis')
    print('category_extraction model would be saved at ', category_extraction_model_path)
    print('polarity model would be saved at ', polarity_classification_model_path)

    print('loading train data')

    train_data = jsonlload(train_data_path)
    train_data.extend(jsonlload(dev_data_path))
    tmp_df = pd.DataFrame()
    tmp_train = []
    k=0
    for i in range(len(train_data)):
      if train_data[i]['annotation'][0][0] in entity_property_pair:
        tmp_train.append(train_data[i])
        tmp_df.loc[k,'entity_property'] = train_data[i]['annotation'][0][0]
        k += 1
    from sklearn.model_selection import StratifiedKFold
    skf = StratifiedKFold(n_splits=5, random_state = 0, shuffle=True)
    val_step = 0
    for train_index, test_index in skf.split(tmp_train, tmp_df['entity_property']):
      print('\n\n======================= {} step=========================\n\n\n\n\n\n\n\n'.format(val_step+1))
      tmp_dev_data = []
      tmp_train_data = []
      for i in test_index:
        tmp_dev_data.append(tmp_train[i])
      for i in train_index:
        tmp_train_data.append(tmp_train[i])


      entity_property_val_best_loss = 100

      print('tokenizing train data')
      tokenizer = AutoTokenizer.from_pretrained(base_model)
      num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
      print('We have added', num_added_toks, 'tokens')
      entity_property_train_data, polarity_train_data = get_dataset(tmp_train_data, tokenizer, max_len)
      entity_property_dev_data, polarity_dev_data = get_dataset(tmp_dev_data, tokenizer, max_len)
      entity_property_train_dataloader = DataLoader(entity_property_train_data, shuffle=True,
                                    batch_size=batch_size)
      entity_property_dev_dataloader = DataLoader(entity_property_dev_data, shuffle=True,
                                  batch_size=batch_size)
      
      print('loading model')
      entity_property_model = RoBertaBaseClassifier(len(tf_id_to_name), len(tokenizer))
      entity_property_model.to(device)

      print('end loading')

      # entity_property_model_optimizer_setting
      FULL_FINETUNING = True
      if FULL_FINETUNING:
          entity_property_param_optimizer = list(entity_property_model.named_parameters())
          no_decay = ['bias', 'gamma', 'beta']
          entity_property_optimizer_grouped_parameters = [
              {'params': [p for n, p in entity_property_param_optimizer if not any(nd in n for nd in no_decay)],
              'weight_decay_rate': 0.01},
              {'params': [p for n, p in entity_property_param_optimizer if any(nd in n for nd in no_decay)],
              'weight_decay_rate': 0.0}
          ]
      else:
          entity_property_param_optimizer = list(entity_property_model.classifier.named_parameters())
          entity_property_optimizer_grouped_parameters = [{"params": [p for n, p in entity_property_param_optimizer]}]

      entity_property_optimizer = AdamW(
          entity_property_optimizer_grouped_parameters,
          lr=entity_property_learning_rate,
          eps=eps
      )
      epochs = num_train_epochs
      max_grad_norm = 1.0
      total_steps = epochs * len(entity_property_train_dataloader)

      entity_property_scheduler = get_linear_schedule_with_warmup(
          entity_property_optimizer,
          num_warmup_steps=0,
          num_training_steps=total_steps
      )

      epoch_step = 0
      entity_property_patience = 0

      for _ in trange(epochs, desc="Epoch"):
          
          entity_property_model.train()
          epoch_step += 1
          if entity_property_patience == int(entity_property_patience_ub*3/5):
              model_load_path = category_extraction_model_path + 'saved_model_epoch_' + str(val_step) + '.pt'
              entity_property_model.load_state_dict(torch.load(model_load_path, map_location=device))
              entity_property_model.to(device)
              for param in entity_property_model.xlm_roberta.parameters():
                  param.requires_grad = True
              entity_property_model.train()

          # entity_property train
          entity_property_total_loss = 0
          if entity_property_patience <= entity_property_patience_ub:
              for step, batch in enumerate(entity_property_train_dataloader):
                  batch = tuple(t.to(device) for t in batch)
                  b_input_ids, b_input_mask, b_labels = batch

                  entity_property_model.zero_grad()
                  # print('b_labels: ', b_labels)
                  loss, _ = entity_property_model(b_input_ids, b_input_mask, b_labels)

                  loss.backward()

                  entity_property_total_loss += loss.item()
                  # print('batch_loss: ', loss.item())

                  torch.nn.utils.clip_grad_norm_(parameters=entity_property_model.parameters(), max_norm=max_grad_norm)
                  entity_property_optimizer.step()
                  entity_property_scheduler.step()

              avg_train_loss = entity_property_total_loss / len(entity_property_train_dataloader)
              print("Entity_Property_Epoch: ", epoch_step)
              print("Average train loss: {}".format(avg_train_loss))



              if do_eval:
                  entity_property_model.eval()

                  pred_list = []
                  label_list = []
                  loss_list = []
                  for batch in entity_property_dev_dataloader:
                      batch = tuple(t.to(device) for t in batch)
                      b_input_ids, b_input_mask, b_labels = batch

                      with torch.no_grad():
                          loss, logits = entity_property_model(b_input_ids, b_input_mask, b_labels)
                          loss_list.append(loss.item())
                  print('validation error: ', np.mean(loss_list))
                  if np.mean(loss_list) < entity_property_val_best_loss:

                      entity_property_val_best_loss = np.mean(loss_list)
                      model_saved_path = category_extraction_model_path + 'saved_model_epoch_' + str(val_step) + '.pt'
                      torch.save(entity_property_model.state_dict(), model_saved_path)
                      print('entity_property_model best model updated.')
                      entity_property_patience = 0
                  else:
                      entity_property_patience += 1

      val_step += 1


    train_data = jsonlload(train_data_path)
    train_data.extend(jsonlload(dev_data_path))

    tmp_df = pd.DataFrame()
    for i in range(len(train_data)):
      tmp_df.loc[i,'polarity'] = train_data[i]['annotation'][0][-1]

    skf = StratifiedKFold(n_splits=5, random_state = 0, shuffle=True)
    val_step = 0
    for train_index, test_index in skf.split(train_data, tmp_df['polarity']):
      tmp_dev_data = []
      tmp_train_data = []
      for i in test_index:
        tmp_dev_data.append(train_data[i])
      for i in train_index:
        tmp_train_data.append(train_data[i])

      polarity_val_best_loss = 100
      print('\n\n======================= {} step=========================\n\n\n\n\n\n\n\n'.format(val_step+1))

      print('tokenizing train data')
      tokenizer = AutoTokenizer.from_pretrained(base_model)
      num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
      print('We have added', num_added_toks, 'tokens')

      entity_property_train_data, polarity_train_data = get_dataset(tmp_train_data, tokenizer, max_len)
      entity_property_dev_data, polarity_dev_data = get_dataset(tmp_dev_data, tokenizer, max_len)
      
      polarity_train_dataloader = DataLoader(polarity_train_data, shuffle=True,
                                                    batch_size=batch_size)
      polarity_dev_dataloader = DataLoader(polarity_dev_data, shuffle=True,
                                                  batch_size=batch_size)

      print('loading model')
      polarity_model = RoBertaBaseClassifier_polar(len(polarity_id_to_name), len(tokenizer))
      polarity_model.to(device)


      print('end loading')
      # polarity_model_optimizer_setting
      if FULL_FINETUNING:
          polarity_param_optimizer = list(polarity_model.named_parameters())
          no_decay = ['bias', 'gamma', 'beta']
          polarity_optimizer_grouped_parameters = [
              {'params': [p for n, p in polarity_param_optimizer if not any(nd in n for nd in no_decay)],
              'weight_decay_rate': 0.01},
              {'params': [p for n, p in polarity_param_optimizer if any(nd in n for nd in no_decay)],
              'weight_decay_rate': 0.0}
          ]
      else:
          polarity_param_optimizer = list(polarity_model.classifier.named_parameters())
          polarity_optimizer_grouped_parameters = [{"params": [p for n, p in polarity_param_optimizer]}]

      polarity_optimizer = AdamW(
          polarity_optimizer_grouped_parameters,
          lr=polarity_learning_rate,
          eps=eps
      )
      epochs = num_train_epochs
      max_grad_norm = 1.0
      total_steps = epochs * len(polarity_train_dataloader)

      polarity_scheduler = get_linear_schedule_with_warmup(
          polarity_optimizer,
          num_warmup_steps=0,
          num_training_steps=total_steps
      )


      epoch_step = 0
      polarity_patience = 0
      for _ in trange(epochs, desc="Epoch"):
          polarity_model.train()
          epoch_step += 1
          if polarity_patience == int(polarity_patience_ub*3/5):
              model_load_path = polarity_classification_model_path + 'saved_model_epoch_' + str(val_step) + '.pt'
              polarity_model.load_state_dict(torch.load(model_load_path, map_location=device))
              polarity_model.to(device)
              for param in polarity_model.xlm_roberta.parameters():
                  param.requires_grad = True
              polarity_model.train()
          if polarity_patience <= polarity_patience_ub:
              polarity_total_loss = 0

              for step, batch in enumerate(polarity_train_dataloader):
                  batch = tuple(t.to(device) for t in batch)
                  b_input_ids, b_input_mask, b_labels = batch

                  polarity_model.zero_grad()

                  loss, _ = polarity_model(b_input_ids, b_input_mask, b_labels)

                  loss.backward()

                  polarity_total_loss += loss.item()
                  # print('batch_loss: ', loss.item())

                  torch.nn.utils.clip_grad_norm_(parameters=polarity_model.parameters(), max_norm=max_grad_norm)
                  polarity_optimizer.step()
                  polarity_scheduler.step()

              avg_train_loss = polarity_total_loss / len(polarity_train_dataloader)
              print("Polarity_Epoch: ", epoch_step)
              print("Average train loss: {}".format(avg_train_loss))


              if do_eval:
                  polarity_model.eval()

                  pred_list = []
                  label_list = []
                  loss_list = []
                  for batch in polarity_dev_dataloader:
                      batch = tuple(t.to(device) for t in batch)
                      b_input_ids, b_input_mask, b_labels = batch

                      with torch.no_grad():
                          loss, logits = polarity_model(b_input_ids, b_input_mask, b_labels)
                          loss_list.append(loss.item())
                  print('validation error: ', np.mean(loss_list))
                  if np.mean(loss_list) < polarity_val_best_loss:
                      polarity_val_best_loss = np.mean(loss_list)
                      model_saved_path = polarity_classification_model_path + 'saved_model_epoch_' + str(val_step) + '.pt'
                      torch.save(polarity_model.state_dict(), model_saved_path)
                      print('\npolarity_model best model updated.')
                      polarity_patience = 0
                  else:
                      polarity_patience += 1



      print("training is done")


      val_step += 1





In [None]:
train_sentiment_analysis()

train_sentiment_analysis
category_extraction model would be saved at  /content/drive/MyDrive/게임/korean_ABSA_baseline-main/saved_model5/category_extraction
polarity model would be saved at  /content/drive/MyDrive/게임/korean_ABSA_baseline-main/saved_model5/polarity_classification
loading train data










tokenizing train data
We have added 8 tokens
loading model


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it f

end loading


Epoch:   0%|          | 0/100 [00:00<?, ?it/s]

Entity_Property_Epoch:  1
Average train loss: 1.1323650358850381
validation error:  1.1188477569526725


Epoch:   1%|          | 1/100 [02:18<3:48:44, 138.63s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  2
Average train loss: 1.1204011902239286
validation error:  1.1014910223600747


Epoch:   2%|▏         | 2/100 [04:25<3:35:13, 131.77s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  3
Average train loss: 1.109383067996724
validation error:  1.094018396380898


Epoch:   3%|▎         | 3/100 [06:32<3:29:23, 129.52s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  4
Average train loss: 1.1022170638577171
validation error:  1.0883897988946287


Epoch:   4%|▍         | 4/100 [08:39<3:25:32, 128.46s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  5
Average train loss: 1.095961675807336
validation error:  1.0843991666407018


Epoch:   5%|▌         | 5/100 [10:46<3:22:27, 127.87s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  6
Average train loss: 1.1001270004860755
validation error:  1.0817276476980089


Epoch:   6%|▌         | 6/100 [12:52<3:19:45, 127.51s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  7
Average train loss: 1.0940297203030445


Epoch:   7%|▋         | 7/100 [14:55<3:15:17, 126.00s/it]

validation error:  1.0827128274457438
Entity_Property_Epoch:  8
Average train loss: 1.0883157841979096
validation error:  1.077848943797025


Epoch:   8%|▊         | 8/100 [17:02<3:13:34, 126.25s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  9
Average train loss: 1.0889289835844391
validation error:  1.0773404444847907


Epoch:   9%|▉         | 9/100 [19:09<3:11:45, 126.44s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  10
Average train loss: 1.0899276663213706
validation error:  1.0760566233754991


Epoch:  10%|█         | 10/100 [21:16<3:09:50, 126.56s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  11
Average train loss: 1.0883588807654088
validation error:  1.0755951454589416


Epoch:  11%|█         | 11/100 [23:23<3:07:50, 126.64s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  12
Average train loss: 1.0879004188078358
validation error:  1.07501127211364


Epoch:  12%|█▏        | 12/100 [25:29<3:05:48, 126.69s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  13
Average train loss: 1.0881291128210526


Epoch:  13%|█▎        | 13/100 [27:32<3:02:03, 125.55s/it]

validation error:  1.0752698645725116
Entity_Property_Epoch:  14
Average train loss: 1.0856037694037695
validation error:  1.074503088330889


Epoch:  14%|█▍        | 14/100 [29:39<3:00:30, 125.93s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  15
Average train loss: 1.0864789456600463


Epoch:  15%|█▌        | 15/100 [31:42<2:57:07, 125.03s/it]

validation error:  1.089190568123664
Entity_Property_Epoch:  16
Average train loss: 1.086349083061587
validation error:  1.0742059208296395


Epoch:  16%|█▌        | 16/100 [33:49<2:55:46, 125.55s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  17
Average train loss: 1.0854596347926162


Epoch:  17%|█▋        | 17/100 [35:52<2:52:36, 124.78s/it]

validation error:  1.080300514097814
Entity_Property_Epoch:  18
Average train loss: 1.083685141770408


Epoch:  18%|█▊        | 18/100 [37:55<2:49:44, 124.20s/it]

validation error:  1.0846841802130212
Entity_Property_Epoch:  19
Average train loss: 1.0842392701256254


Epoch:  19%|█▉        | 19/100 [39:57<2:47:06, 123.78s/it]

validation error:  1.0801202000437917
Entity_Property_Epoch:  20
Average train loss: 1.0819417718005726


Epoch:  20%|██        | 20/100 [42:00<2:44:39, 123.49s/it]

validation error:  1.0845828202220944
Entity_Property_Epoch:  21
Average train loss: 1.0871583089258634
validation error:  1.0735212810389645


Epoch:  21%|██        | 21/100 [44:07<2:43:55, 124.50s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  22
Average train loss: 1.0863952940503407
validation error:  1.0732975714690203


Epoch:  22%|██▏       | 22/100 [46:14<2:42:45, 125.20s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  23
Average train loss: 1.0835354134361856
validation error:  1.0731049784413584


Epoch:  23%|██▎       | 23/100 [48:21<2:41:19, 125.71s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  24
Average train loss: 1.0814860463142395


Epoch:  24%|██▍       | 24/100 [50:24<2:38:10, 124.88s/it]

validation error:  1.073512325336883
Entity_Property_Epoch:  25
Average train loss: 1.0845166183942652


Epoch:  25%|██▌       | 25/100 [52:27<2:35:20, 124.27s/it]

validation error:  1.0735194307940823
Entity_Property_Epoch:  26
Average train loss: 1.0819458780263556
validation error:  1.0729315297586934


Epoch:  26%|██▌       | 26/100 [54:33<2:34:11, 125.02s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  27
Average train loss: 1.0832035206323767


Epoch:  27%|██▋       | 27/100 [56:36<2:31:20, 124.40s/it]

validation error:  1.076406798579476
Entity_Property_Epoch:  28
Average train loss: 1.0806579169782897


Epoch:  28%|██▊       | 28/100 [58:39<2:28:43, 123.94s/it]

validation error:  1.0731686487064496
Entity_Property_Epoch:  29
Average train loss: 1.0818617393136654


Epoch:  29%|██▉       | 29/100 [1:00:42<2:26:16, 123.61s/it]

validation error:  1.0730392024233624
Entity_Property_Epoch:  30
Average train loss: 1.0814809672442807


Epoch:  30%|███       | 30/100 [1:02:45<2:23:56, 123.38s/it]

validation error:  1.0826421509255897
Entity_Property_Epoch:  31
Average train loss: 1.0809814788545162


Epoch:  31%|███       | 31/100 [1:04:48<2:21:42, 123.22s/it]

validation error:  1.075984861467268
Entity_Property_Epoch:  32
Average train loss: 1.0831624042170747
validation error:  1.0721613022830936


Epoch:  32%|███▏      | 32/100 [1:06:55<2:20:50, 124.28s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  33
Average train loss: 1.0809945737540407
validation error:  1.071867004557923


Epoch:  33%|███▎      | 33/100 [1:09:01<2:19:37, 125.04s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  34
Average train loss: 1.0789419704874705
validation error:  1.071669442253513


Epoch:  34%|███▍      | 34/100 [1:11:08<2:18:08, 125.59s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  35
Average train loss: 1.0792922067516508


Epoch:  35%|███▌      | 35/100 [1:13:11<2:15:11, 124.80s/it]

validation error:  1.0747219545024258
Entity_Property_Epoch:  36
Average train loss: 1.0798963593263409
validation error:  1.0714414207251755


Epoch:  36%|███▌      | 36/100 [1:15:18<2:13:45, 125.39s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  37
Average train loss: 1.078301856304095
validation error:  1.0709044724911243


Epoch:  37%|███▋      | 37/100 [1:17:25<2:12:08, 125.84s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  38
Average train loss: 1.0796491109842994
validation error:  1.0704289049535365


Epoch:  38%|███▊      | 38/100 [1:19:32<2:10:23, 126.19s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  39
Average train loss: 1.0786215524681841


Epoch:  39%|███▉      | 39/100 [1:21:35<2:07:17, 125.21s/it]

validation error:  1.0737674553077539
Entity_Property_Epoch:  40
Average train loss: 1.0769928249617242
validation error:  1.0699181477506678


Epoch:  40%|████      | 40/100 [1:23:42<2:05:40, 125.68s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  41
Average train loss: 1.0748364487515603
validation error:  1.0693716506857973


Epoch:  41%|████      | 41/100 [1:25:48<2:03:55, 126.03s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  42
Average train loss: 1.0750437246894167


Epoch:  42%|████▏     | 42/100 [1:27:51<2:00:55, 125.09s/it]

validation error:  1.072622888154917
Entity_Property_Epoch:  43
Average train loss: 1.0786588261123282


Epoch:  43%|████▎     | 43/100 [1:29:54<1:58:11, 124.42s/it]

validation error:  1.0722990361126987
Entity_Property_Epoch:  44
Average train loss: 1.0743608341694717
validation error:  1.0688899071900162


Epoch:  44%|████▍     | 44/100 [1:32:01<1:56:47, 125.14s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  45
Average train loss: 1.076427261733003


Epoch:  45%|████▌     | 45/100 [1:34:04<1:54:06, 124.48s/it]

validation error:  1.084367082252369
Entity_Property_Epoch:  46
Average train loss: 1.0736429386482507
validation error:  1.0678073659643426


Epoch:  46%|████▌     | 46/100 [1:36:11<1:52:39, 125.17s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  47
Average train loss: 1.0756827913394922
validation error:  1.0677949765345434


Epoch:  47%|████▋     | 47/100 [1:38:18<1:51:01, 125.68s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  48
Average train loss: 1.074110587368112


Epoch:  48%|████▊     | 48/100 [1:40:21<1:48:12, 124.86s/it]

validation error:  1.0700139165758253
Entity_Property_Epoch:  49
Average train loss: 1.0703407216993912


Epoch:  49%|████▉     | 49/100 [1:42:23<1:45:37, 124.26s/it]

validation error:  1.0703900460596685
Entity_Property_Epoch:  50
Average train loss: 1.0741631303814048
validation error:  1.0668397020626734


Epoch:  50%|█████     | 50/100 [1:44:30<1:44:11, 125.03s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  51
Average train loss: 1.0720010375725155


Epoch:  51%|█████     | 51/100 [1:46:33<1:41:35, 124.39s/it]

validation error:  1.0820890477487257
Entity_Property_Epoch:  52
Average train loss: 1.0693192905198081
validation error:  1.0662170173404935


Epoch:  52%|█████▏    | 52/100 [1:48:40<1:40:04, 125.10s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  53
Average train loss: 1.068823645843982


Epoch:  53%|█████▎    | 53/100 [1:50:43<1:37:29, 124.45s/it]

validation error:  1.0753225713343053
Entity_Property_Epoch:  54
Average train loss: 1.071362985144181
validation error:  1.0654603063643395


Epoch:  54%|█████▍    | 54/100 [1:52:50<1:35:57, 125.16s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  55
Average train loss: 1.0689446177130635
validation error:  1.0650928587346644


Epoch:  55%|█████▌    | 55/100 [1:54:56<1:34:15, 125.67s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  56
Average train loss: 1.0679815270150692


Epoch:  56%|█████▌    | 56/100 [1:56:59<1:31:34, 124.87s/it]

validation error:  1.0674469883625324
Entity_Property_Epoch:  57
Average train loss: 1.0687951564998326


Epoch:  57%|█████▋    | 57/100 [1:59:02<1:29:04, 124.28s/it]

validation error:  1.074086625676055
Entity_Property_Epoch:  58
Average train loss: 1.069547587727411
validation error:  1.0634507120072425


Epoch:  58%|█████▊    | 58/100 [2:01:09<1:27:32, 125.06s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  59
Average train loss: 1.0674127236610975


Epoch:  59%|█████▉    | 59/100 [2:03:12<1:25:01, 124.42s/it]

validation error:  1.0763957141996263
Entity_Property_Epoch:  60
Average train loss: 1.0672816180596243


Epoch:  60%|██████    | 60/100 [2:05:15<1:22:38, 123.96s/it]

validation error:  1.0692908963957033
Entity_Property_Epoch:  61
Average train loss: 1.064021434955731
validation error:  1.0631093403676173


Epoch:  61%|██████    | 61/100 [2:07:22<1:21:07, 124.81s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  62
Average train loss: 1.0665627306291214
validation error:  1.0623760798594335


Epoch:  62%|██████▏   | 62/100 [2:09:29<1:19:26, 125.43s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  63
Average train loss: 1.065415181154107


Epoch:  63%|██████▎   | 63/100 [2:11:32<1:16:53, 124.70s/it]

validation error:  1.0625197641499393
Entity_Property_Epoch:  64
Average train loss: 1.0669806509005373


Epoch:  64%|██████▍   | 64/100 [2:13:35<1:14:28, 124.13s/it]

validation error:  1.0715463103114309
Entity_Property_Epoch:  65
Average train loss: 1.0640245074756536


Epoch:  65%|██████▌   | 65/100 [2:15:37<1:12:10, 123.74s/it]

validation error:  1.0708717455813934
Entity_Property_Epoch:  66
Average train loss: 1.0650500064365473


Epoch:  66%|██████▌   | 66/100 [2:17:40<1:09:57, 123.46s/it]

validation error:  1.081332173797634
Entity_Property_Epoch:  67
Average train loss: 1.062059581070248
validation error:  1.0611985555061927


Epoch:  67%|██████▋   | 67/100 [2:19:47<1:08:26, 124.45s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  68
Average train loss: 1.064131695168299
validation error:  1.0608260098037186


Epoch:  68%|██████▊   | 68/100 [2:21:54<1:06:45, 125.16s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  69
Average train loss: 1.0645999311353285
validation error:  1.0606800813774961


Epoch:  69%|██████▉   | 69/100 [2:24:01<1:04:56, 125.68s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  70
Average train loss: 1.0651626007628148


Epoch:  70%|███████   | 70/100 [2:26:04<1:02:25, 124.85s/it]

validation error:  1.063210499453378
Entity_Property_Epoch:  71
Average train loss: 1.060055409824911
validation error:  1.0593247978420524


Epoch:  71%|███████   | 71/100 [2:28:10<1:00:37, 125.42s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  72
Average train loss: 1.061829576697626
validation error:  1.0590664235861984


Epoch:  72%|███████▏  | 72/100 [2:30:17<58:44, 125.88s/it]  

entity_property_model best model updated.
Entity_Property_Epoch:  73
Average train loss: 1.0612978101405193


Epoch:  73%|███████▎  | 73/100 [2:32:20<56:14, 125.00s/it]

validation error:  1.0623950091275303
Entity_Property_Epoch:  74
Average train loss: 1.058471953931718


Epoch:  74%|███████▍  | 74/100 [2:34:23<53:52, 124.34s/it]

validation error:  1.075985446259692
Entity_Property_Epoch:  75
Average train loss: 1.0610782679543553
validation error:  1.0585847597022158


Epoch:  75%|███████▌  | 75/100 [2:36:30<52:06, 125.06s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  76
Average train loss: 1.0584647154242377
validation error:  1.058582010802689


Epoch:  76%|███████▌  | 76/100 [2:38:36<50:13, 125.57s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  77
Average train loss: 1.064663722351691
validation error:  1.0577976282659944


Epoch:  77%|███████▋  | 77/100 [2:40:43<48:16, 125.95s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  78
Average train loss: 1.0568902067224673
validation error:  1.0574203906776187


Epoch:  78%|███████▊  | 78/100 [2:42:50<46:16, 126.20s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  79
Average train loss: 1.0615227545082884


Epoch:  79%|███████▉  | 79/100 [2:44:53<43:49, 125.20s/it]

validation error:  1.07283590973674
Entity_Property_Epoch:  80
Average train loss: 1.056689121182648
validation error:  1.0568596641500514


Epoch:  80%|████████  | 80/100 [2:47:00<41:53, 125.65s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  81
Average train loss: 1.0590180080050116


Epoch:  81%|████████  | 81/100 [2:49:03<39:31, 124.81s/it]

validation error:  1.0666604713126495
Entity_Property_Epoch:  82
Average train loss: 1.0592193504523728


Epoch:  82%|████████▏ | 82/100 [2:51:05<37:15, 124.20s/it]

validation error:  1.0626101985677971
Entity_Property_Epoch:  83
Average train loss: 1.0593218718031798


Epoch:  83%|████████▎ | 83/100 [2:53:08<35:04, 123.78s/it]

validation error:  1.059601224385775
Entity_Property_Epoch:  84
Average train loss: 1.056503489692098
validation error:  1.0567107913377403


Epoch:  84%|████████▍ | 84/100 [2:55:15<33:14, 124.67s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  85
Average train loss: 1.0594519191341367
validation error:  1.0564897427192101


Epoch:  85%|████████▌ | 85/100 [2:57:22<31:19, 125.33s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  86
Average train loss: 1.0561161793388676
validation error:  1.055935291351972


Epoch:  86%|████████▌ | 86/100 [2:59:29<29:20, 125.77s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  87
Average train loss: 1.0571806714698175


Epoch:  87%|████████▋ | 87/100 [3:01:32<27:04, 124.93s/it]

validation error:  1.0686005979151159
Entity_Property_Epoch:  88
Average train loss: 1.0565522665089169


Epoch:  88%|████████▊ | 88/100 [3:03:34<24:51, 124.29s/it]

validation error:  1.059675794172954
Entity_Property_Epoch:  89
Average train loss: 1.0583787357241492


Epoch:  89%|████████▉ | 89/100 [3:05:37<22:42, 123.84s/it]

validation error:  1.1065327074977902
Entity_Property_Epoch:  90
Average train loss: 1.0570484559020594


Epoch:  90%|█████████ | 90/100 [3:07:40<20:35, 123.53s/it]

validation error:  1.0585890391489843
Entity_Property_Epoch:  91
Average train loss: 1.0577308643471797
validation error:  1.0556842905658108


Epoch:  91%|█████████ | 91/100 [3:09:47<18:40, 124.49s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  92
Average train loss: 1.0576394416640638
validation error:  1.0554433252427962


Epoch:  92%|█████████▏| 92/100 [3:11:53<16:41, 125.17s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  93
Average train loss: 1.05793150290454


Epoch:  93%|█████████▎| 93/100 [3:13:56<14:31, 124.49s/it]

validation error:  1.0709309571689658
Entity_Property_Epoch:  94
Average train loss: 1.0594354041431406


Epoch:  94%|█████████▍| 94/100 [3:15:59<12:23, 123.98s/it]

validation error:  1.0585481157669654
Entity_Property_Epoch:  95
Average train loss: 1.0565814506819042
validation error:  1.055362899403472


Epoch:  95%|█████████▌| 95/100 [3:18:06<10:23, 124.79s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  96
Average train loss: 1.053921439316654


Epoch:  96%|█████████▌| 96/100 [3:20:09<08:16, 124.21s/it]

validation error:  1.0753753918867845
Entity_Property_Epoch:  97
Average train loss: 1.0544525026646565
validation error:  1.0545661393162253


Epoch:  97%|█████████▋| 97/100 [3:22:15<06:14, 124.97s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  98
Average train loss: 1.0544499433312977


Epoch:  98%|█████████▊| 98/100 [3:24:18<04:08, 124.35s/it]

validation error:  1.0551888252888526
Entity_Property_Epoch:  99
Average train loss: 1.0567161236580311


Epoch:  99%|█████████▉| 99/100 [3:26:21<02:03, 123.88s/it]

validation error:  1.0653274780386812
Entity_Property_Epoch:  100
Average train loss: 1.0558372834445302


Epoch: 100%|██████████| 100/100 [3:28:24<00:00, 125.04s/it]

validation error:  1.0755620011082896










tokenizing train data
We have added 8 tokens





loading model


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it f

end loading


Epoch:   0%|          | 0/100 [00:00<?, ?it/s]

Entity_Property_Epoch:  1
Average train loss: 1.1343479137638541
validation error:  1.122476376436807


Epoch:   1%|          | 1/100 [02:17<3:46:40, 137.38s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  2
Average train loss: 1.1193792054229965
validation error:  1.110019501272615


Epoch:   2%|▏         | 2/100 [04:26<3:36:23, 132.48s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  3
Average train loss: 1.1094325192154815


Epoch:   3%|▎         | 3/100 [06:29<3:27:09, 128.13s/it]

validation error:  1.114823713586047
Entity_Property_Epoch:  4
Average train loss: 1.1030904645450506
validation error:  1.0958867552397134


Epoch:   4%|▍         | 4/100 [08:35<3:23:53, 127.43s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  5
Average train loss: 1.0991997396170778
validation error:  1.0916101903348536


Epoch:   5%|▌         | 5/100 [10:42<3:21:17, 127.13s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  6
Average train loss: 1.093714967029585
validation error:  1.088456865374025


Epoch:   6%|▌         | 6/100 [12:48<3:18:50, 126.92s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  7
Average train loss: 1.0893809188857857


Epoch:   7%|▋         | 7/100 [14:51<3:14:43, 125.63s/it]

validation error:  1.0891306754592416
Entity_Property_Epoch:  8
Average train loss: 1.0894829010712033
validation error:  1.0846393750264094


Epoch:   8%|▊         | 8/100 [16:58<3:13:01, 125.89s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  9
Average train loss: 1.0866687354387843


Epoch:   9%|▉         | 9/100 [19:01<3:09:34, 125.00s/it]

validation error:  1.086733269524741
Entity_Property_Epoch:  10
Average train loss: 1.0872615676353603
validation error:  1.0830599394711582


Epoch:  10%|█         | 10/100 [21:07<3:08:16, 125.52s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  11
Average train loss: 1.0848805808015365
validation error:  1.083029872887618


Epoch:  11%|█         | 11/100 [23:14<3:06:45, 125.91s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  12
Average train loss: 1.0860100030270528
validation error:  1.0829005849944962


Epoch:  12%|█▏        | 12/100 [25:21<3:05:10, 126.25s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  13
Average train loss: 1.08395863186915
validation error:  1.082432087901589


Epoch:  13%|█▎        | 13/100 [27:28<3:03:18, 126.42s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  14
Average train loss: 1.0840118334666287


Epoch:  14%|█▍        | 14/100 [29:31<2:59:50, 125.47s/it]

validation error:  1.0926052015144507
Entity_Property_Epoch:  15
Average train loss: 1.083887600940644


Epoch:  15%|█▌        | 15/100 [31:35<2:56:51, 124.84s/it]

validation error:  1.0828821013023804
Entity_Property_Epoch:  16
Average train loss: 1.0837651162239705


Epoch:  16%|█▌        | 16/100 [33:38<2:54:16, 124.49s/it]

validation error:  1.092026834304516
Entity_Property_Epoch:  17
Average train loss: 1.0827083468227687


Epoch:  17%|█▋        | 17/100 [35:42<2:51:41, 124.12s/it]

validation error:  1.0830714148241323
Entity_Property_Epoch:  18
Average train loss: 1.0835507261732131


Epoch:  18%|█▊        | 18/100 [37:45<2:49:15, 123.85s/it]

validation error:  1.0828788814011154
Entity_Property_Epoch:  19
Average train loss: 1.08226478445509


Epoch:  19%|█▉        | 19/100 [39:48<2:46:56, 123.67s/it]

validation error:  1.0863651479040826
Entity_Property_Epoch:  20
Average train loss: 0.9300084826921118
validation error:  0.7876463988861004


Epoch:  20%|██        | 20/100 [43:04<3:13:35, 145.19s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  21
Average train loss: 0.7360747483786464
validation error:  0.7194262303255655


Epoch:  21%|██        | 21/100 [46:18<3:30:32, 159.91s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  22
Average train loss: 0.6733465575480503
validation error:  0.6959092892133273


Epoch:  22%|██▏       | 22/100 [49:32<3:41:15, 170.20s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  23
Average train loss: 0.6249351975997429
validation error:  0.6844921553885186


Epoch:  23%|██▎       | 23/100 [52:46<3:47:39, 177.39s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  24
Average train loss: 0.5901751895885267
validation error:  0.6837832127417718


Epoch:  24%|██▍       | 24/100 [56:00<3:51:03, 182.41s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  25
Average train loss: 0.5597502732214065


Epoch:  25%|██▌       | 25/100 [59:11<3:51:04, 184.86s/it]

validation error:  0.687601525258351
Entity_Property_Epoch:  26
Average train loss: 0.5365600753019062


Epoch:  26%|██▌       | 26/100 [1:02:21<3:50:03, 186.53s/it]

validation error:  0.6943708460230927
Entity_Property_Epoch:  27
Average train loss: 0.5114008904236691


Epoch:  27%|██▋       | 27/100 [1:05:32<3:48:21, 187.69s/it]

validation error:  0.703131767836484
Entity_Property_Epoch:  28
Average train loss: 0.49448346463154824


Epoch:  28%|██▊       | 28/100 [1:08:42<3:46:15, 188.55s/it]

validation error:  0.6997049432117622
Entity_Property_Epoch:  29
Average train loss: 0.48242588186724955


Epoch:  29%|██▉       | 29/100 [1:11:53<3:43:44, 189.08s/it]

validation error:  0.695468176703353
Entity_Property_Epoch:  30
Average train loss: 0.4609930716939467


Epoch:  30%|███       | 30/100 [1:15:03<3:41:03, 189.48s/it]

validation error:  0.6958809667950744
Entity_Property_Epoch:  31
Average train loss: 0.5607223377705458


Epoch:  31%|███       | 31/100 [1:18:15<3:38:41, 190.17s/it]

validation error:  0.6994886277438878
Entity_Property_Epoch:  32
Average train loss: 0.5341322681815847


Epoch:  32%|███▏      | 32/100 [1:21:26<3:35:44, 190.36s/it]

validation error:  0.6983817878302995
Entity_Property_Epoch:  33
Average train loss: 0.5175721442133764


Epoch:  33%|███▎      | 33/100 [1:24:36<3:32:41, 190.47s/it]

validation error:  0.6969142662478494
Entity_Property_Epoch:  34
Average train loss: 0.49824678798342004


Epoch:  34%|███▍      | 34/100 [1:27:47<3:29:37, 190.57s/it]

validation error:  0.7063396454274238
Entity_Property_Epoch:  35
Average train loss: 0.4810904138746077


Epoch: 100%|██████████| 100/100 [1:30:58<00:00, 54.58s/it]

validation error:  0.7133623305317405










tokenizing train data





We have added 8 tokens
loading model


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it f

end loading


Epoch:   0%|          | 0/100 [00:00<?, ?it/s]

Entity_Property_Epoch:  1
Average train loss: 1.1325880643144015
validation error:  1.1147292681507297


Epoch:   1%|          | 1/100 [02:16<3:45:56, 136.94s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  2
Average train loss: 1.1174682176594994
validation error:  1.104253034491639


Epoch:   2%|▏         | 2/100 [04:24<3:34:56, 131.60s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  3
Average train loss: 1.1048726088552359
validation error:  1.0933308159554755


Epoch:   3%|▎         | 3/100 [06:32<3:29:39, 129.69s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  4
Average train loss: 1.099871934507769
validation error:  1.0879552518571174


Epoch:   4%|▍         | 4/100 [08:39<3:26:00, 128.75s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  5
Average train loss: 1.0956807251764307
validation error:  1.0841755775304942


Epoch:   5%|▌         | 5/100 [10:46<3:23:04, 128.26s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  6
Average train loss: 1.0908897824991357


Epoch:   6%|▌         | 6/100 [12:50<3:18:31, 126.72s/it]

validation error:  1.0927309323024084
Entity_Property_Epoch:  7
Average train loss: 1.0875131191394452
validation error:  1.0804080029467602


Epoch:   7%|▋         | 7/100 [14:57<3:16:39, 126.88s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  8
Average train loss: 1.0880333543452312


Epoch:   8%|▊         | 8/100 [17:01<3:13:00, 125.88s/it]

validation error:  1.0913605168982818
Entity_Property_Epoch:  9
Average train loss: 1.0886613943455299
validation error:  1.0791341670743235


Epoch:   9%|▉         | 9/100 [19:08<3:11:16, 126.11s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  10
Average train loss: 1.083844800929822


Epoch:  10%|█         | 10/100 [21:11<3:07:46, 125.18s/it]

validation error:  1.0951798328986535
Entity_Property_Epoch:  11
Average train loss: 1.0862699827745634


Epoch:  11%|█         | 11/100 [23:14<3:04:42, 124.52s/it]

validation error:  1.0818685872571452
Entity_Property_Epoch:  12
Average train loss: 1.0864088213716114


Epoch:  12%|█▏        | 12/100 [25:17<3:01:57, 124.06s/it]

validation error:  1.0917559691242404
Entity_Property_Epoch:  13
Average train loss: 1.0846033083743076
validation error:  1.078555739009297


Epoch:  13%|█▎        | 13/100 [27:23<3:00:59, 124.82s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  14
Average train loss: 1.0838345843375463


Epoch:  14%|█▍        | 14/100 [29:27<2:58:12, 124.33s/it]

validation error:  1.0787218871650162
Entity_Property_Epoch:  15
Average train loss: 1.0845169790599802


Epoch:  15%|█▌        | 15/100 [31:30<2:55:37, 123.98s/it]

validation error:  1.079094070654649
Entity_Property_Epoch:  16
Average train loss: 1.085738123615513


Epoch:  16%|█▌        | 16/100 [33:33<2:53:11, 123.71s/it]

validation error:  1.082036807403698
Entity_Property_Epoch:  17
Average train loss: 1.0830755979608357


Epoch:  17%|█▋        | 17/100 [35:36<2:50:53, 123.54s/it]

validation error:  1.0789924630751977
Entity_Property_Epoch:  18
Average train loss: 1.0831333263687175


Epoch:  18%|█▊        | 18/100 [37:39<2:48:39, 123.41s/it]

validation error:  1.0795027038434168
Entity_Property_Epoch:  19
Average train loss: 1.0855710435206944


Epoch:  19%|█▉        | 19/100 [39:42<2:46:29, 123.33s/it]

validation error:  1.0796669454841348
Entity_Property_Epoch:  20
Average train loss: 0.911279445163814
validation error:  0.7459760041503639


Epoch:  20%|██        | 20/100 [42:57<3:13:10, 144.88s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  21
Average train loss: 0.7182133866634436
validation error:  0.7041132635586745


Epoch:  21%|██        | 21/100 [46:11<3:30:03, 159.54s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  22
Average train loss: 0.6550780204771273
validation error:  0.6629489812400792


Epoch:  22%|██▏       | 22/100 [49:25<3:40:47, 169.84s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  23
Average train loss: 0.6134618487215628
validation error:  0.6623996975538614


Epoch:  23%|██▎       | 23/100 [52:39<3:47:13, 177.06s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  24
Average train loss: 0.5804897863647012
validation error:  0.6454698574709725


Epoch:  24%|██▍       | 24/100 [55:52<3:50:32, 182.01s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  25
Average train loss: 0.5482303965280262


Epoch:  25%|██▌       | 25/100 [59:02<3:50:33, 184.44s/it]

validation error:  0.6570150998088863
Entity_Property_Epoch:  26
Average train loss: 0.5258747877157007


Epoch:  26%|██▌       | 26/100 [1:02:12<3:49:30, 186.09s/it]

validation error:  0.6542258360585966
Entity_Property_Epoch:  27
Average train loss: 0.5009596196964042


Epoch:  27%|██▋       | 27/100 [1:05:22<3:47:48, 187.24s/it]

validation error:  0.6529170372686186
Entity_Property_Epoch:  28
Average train loss: 0.4835216787975995


Epoch:  28%|██▊       | 28/100 [1:08:32<3:45:40, 188.06s/it]

validation error:  0.6628538457663743
Entity_Property_Epoch:  29
Average train loss: 0.4705002337746545


Epoch:  29%|██▉       | 29/100 [1:11:42<3:43:12, 188.63s/it]

validation error:  0.6667766514774802
Entity_Property_Epoch:  30
Average train loss: 0.457090236809635


Epoch:  30%|███       | 30/100 [1:14:52<3:40:31, 189.03s/it]

validation error:  0.6766072153211473
Entity_Property_Epoch:  31
Average train loss: 0.5483946170140653


Epoch:  31%|███       | 31/100 [1:18:04<3:38:10, 189.71s/it]

validation error:  0.6504510899523755
Entity_Property_Epoch:  32
Average train loss: 0.5267461693349748


Epoch:  32%|███▏      | 32/100 [1:21:13<3:35:05, 189.78s/it]

validation error:  0.6464978627391629
Entity_Property_Epoch:  33
Average train loss: 0.5054708469940614


Epoch:  33%|███▎      | 33/100 [1:24:23<3:31:58, 189.83s/it]

validation error:  0.6506806907953916
Entity_Property_Epoch:  34
Average train loss: 0.48632157498378115


Epoch:  34%|███▍      | 34/100 [1:27:33<3:28:50, 189.85s/it]

validation error:  0.6656874889260406
Entity_Property_Epoch:  35
Average train loss: 0.475324149291


Epoch: 100%|██████████| 100/100 [1:30:43<00:00, 54.44s/it]

validation error:  0.6525627310042614










tokenizing train data





We have added 8 tokens
loading model


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it f

end loading


Epoch:   0%|          | 0/100 [00:00<?, ?it/s]

Entity_Property_Epoch:  1
Average train loss: 1.1225010638496788
validation error:  1.1209292636884676


Epoch:   1%|          | 1/100 [02:20<3:52:31, 140.92s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  2
Average train loss: 1.1117721940595362
validation error:  1.104248561225571


Epoch:   2%|▏         | 2/100 [04:27<3:36:29, 132.55s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  3
Average train loss: 1.1058201931272953
validation error:  1.0973480064552148


Epoch:   3%|▎         | 3/100 [06:34<3:30:05, 129.96s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  4
Average train loss: 1.0984174879447857


Epoch:   4%|▍         | 4/100 [08:37<3:23:36, 127.26s/it]

validation error:  1.1042598423424301
Entity_Property_Epoch:  5
Average train loss: 1.0963570164460918
validation error:  1.0868099040084787


Epoch:   5%|▌         | 5/100 [10:44<3:21:08, 127.03s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  6
Average train loss: 1.0920674989428796


Epoch:   6%|▌         | 6/100 [12:47<3:16:55, 125.69s/it]

validation error:  1.0916521695110348
Entity_Property_Epoch:  7
Average train loss: 1.0907132972313356
validation error:  1.0823215504626293


Epoch:   7%|▋         | 7/100 [14:53<3:15:15, 125.98s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  8
Average train loss: 1.0875485890359995
validation error:  1.0813911244585797


Epoch:   8%|▊         | 8/100 [17:00<3:13:31, 126.21s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  9
Average train loss: 1.0874035230629893
validation error:  1.0811343234735769


Epoch:   9%|▉         | 9/100 [19:07<3:11:38, 126.36s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  10
Average train loss: 1.0875125130366776
validation error:  1.0800153058725637


Epoch:  10%|█         | 10/100 [21:14<3:09:46, 126.51s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  11
Average train loss: 1.0875049691208636
validation error:  1.0796802289836056


Epoch:  11%|█         | 11/100 [23:20<3:07:44, 126.57s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  12
Average train loss: 1.0877353519672668


Epoch:  12%|█▏        | 12/100 [25:24<3:04:08, 125.56s/it]

validation error:  1.0826625757284098
Entity_Property_Epoch:  13
Average train loss: 1.0850617380678549


Epoch:  13%|█▎        | 13/100 [27:27<3:01:00, 124.83s/it]

validation error:  1.0799841918311752
Entity_Property_Epoch:  14
Average train loss: 1.0851557906356972


Epoch:  14%|█▍        | 14/100 [29:30<2:58:25, 124.48s/it]

validation error:  1.079862782171556
Entity_Property_Epoch:  15
Average train loss: 1.08672875626854


Epoch:  15%|█▌        | 15/100 [31:34<2:56:00, 124.24s/it]

validation error:  1.0797050065927571
Entity_Property_Epoch:  16
Average train loss: 1.0878449227981701
validation error:  1.0796169150959363


Epoch:  16%|█▌        | 16/100 [33:42<2:55:17, 125.20s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  17
Average train loss: 1.0858706481008413
validation error:  1.079236594947068


Epoch:  17%|█▋        | 17/100 [35:49<2:54:06, 125.86s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  18
Average train loss: 1.0835594020535113


Epoch:  18%|█▊        | 18/100 [37:53<2:51:10, 125.25s/it]

validation error:  1.0997571361648453
Entity_Property_Epoch:  19
Average train loss: 1.0862845300790087


Epoch:  19%|█▉        | 19/100 [39:56<2:48:26, 124.77s/it]

validation error:  1.0954443520599313
Entity_Property_Epoch:  20
Average train loss: 1.0841589766981732


Epoch:  20%|██        | 20/100 [42:00<2:45:44, 124.31s/it]

validation error:  1.0793086882237788
Entity_Property_Epoch:  21
Average train loss: 1.086895669281797


Epoch:  21%|██        | 21/100 [44:03<2:43:13, 123.96s/it]

validation error:  1.0798370112905968
Entity_Property_Epoch:  22
Average train loss: 1.0885110197880146


Epoch:  22%|██▏       | 22/100 [46:06<2:40:54, 123.78s/it]

validation error:  1.0986621596596458
Entity_Property_Epoch:  23
Average train loss: 1.0849009280883575


Epoch:  23%|██▎       | 23/100 [48:09<2:38:36, 123.59s/it]

validation error:  1.0829283194942074
Entity_Property_Epoch:  24
Average train loss: 0.9128858417220191
validation error:  0.7681672539744344


Epoch:  24%|██▍       | 24/100 [51:25<3:03:54, 145.20s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  25
Average train loss: 0.7329574012588742
validation error:  0.7067772129198888


Epoch:  25%|██▌       | 25/100 [54:39<3:19:49, 159.87s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  26
Average train loss: 0.665814691352509
validation error:  0.6881230340137349


Epoch:  26%|██▌       | 26/100 [57:53<3:29:49, 170.13s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  27
Average train loss: 0.6234112130527127
validation error:  0.6840156985746397


Epoch:  27%|██▋       | 27/100 [1:01:07<3:35:42, 177.30s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  28
Average train loss: 0.585578840782437


Epoch:  28%|██▊       | 28/100 [1:04:18<3:37:31, 181.27s/it]

validation error:  0.6908615369063157
Entity_Property_Epoch:  29
Average train loss: 0.5540627407168671
validation error:  0.6777966520586214


Epoch:  29%|██▉       | 29/100 [1:07:31<3:38:57, 185.04s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  30
Average train loss: 0.5311654826040217


Epoch:  30%|███       | 30/100 [1:10:42<3:37:41, 186.60s/it]

validation error:  0.69298632995232
Entity_Property_Epoch:  31
Average train loss: 0.5064868382808404


Epoch:  31%|███       | 31/100 [1:13:52<3:35:47, 187.65s/it]

validation error:  0.7133224785744727
Entity_Property_Epoch:  32
Average train loss: 0.4909033475735066


Epoch:  32%|███▏      | 32/100 [1:17:02<3:33:30, 188.38s/it]

validation error:  0.696211850934929
Entity_Property_Epoch:  33
Average train loss: 0.47375644452752047


Epoch:  33%|███▎      | 33/100 [1:20:13<3:31:14, 189.17s/it]

validation error:  0.6924653861906145
Entity_Property_Epoch:  34
Average train loss: 0.46208283556576146


Epoch:  34%|███▍      | 34/100 [1:23:23<3:28:31, 189.56s/it]

validation error:  0.7000185615949698
Entity_Property_Epoch:  35
Average train loss: 0.4531214755218352


Epoch:  35%|███▌      | 35/100 [1:26:34<3:25:46, 189.95s/it]

validation error:  0.6895571957518171
Entity_Property_Epoch:  36
Average train loss: 0.5290410755912532


Epoch:  36%|███▌      | 36/100 [1:29:47<3:23:25, 190.71s/it]

validation error:  0.7042928306789665
Entity_Property_Epoch:  37
Average train loss: 0.511628276853444


Epoch:  37%|███▋      | 37/100 [1:32:58<3:20:20, 190.81s/it]

validation error:  0.6877719355629874
Entity_Property_Epoch:  38
Average train loss: 0.49572025598247776


Epoch:  38%|███▊      | 38/100 [1:36:09<3:17:18, 190.94s/it]

validation error:  0.6934648775137388
Entity_Property_Epoch:  39
Average train loss: 0.47684386665875544


Epoch:  39%|███▉      | 39/100 [1:39:20<3:14:14, 191.05s/it]

validation error:  0.7003159110362713
Entity_Property_Epoch:  40
Average train loss: 0.46589831837660817


Epoch: 100%|██████████| 100/100 [1:42:31<00:00, 61.52s/it]  

validation error:  0.6969140663847223










tokenizing train data





We have added 8 tokens
loading model


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it f

end loading


Epoch:   0%|          | 0/100 [00:00<?, ?it/s]

Entity_Property_Epoch:  1
Average train loss: 1.1190290469905404
validation error:  1.113040051409896


Epoch:   1%|          | 1/100 [02:16<3:44:43, 136.19s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  2
Average train loss: 1.1048363187401073
validation error:  1.102893207694443


Epoch:   2%|▏         | 2/100 [04:22<3:33:21, 130.63s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  3
Average train loss: 1.0945165805741228
validation error:  1.0966842304652846


Epoch:   3%|▎         | 3/100 [06:29<3:28:17, 128.84s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  4
Average train loss: 1.0907867366484054
validation error:  1.0934317141351566


Epoch:   4%|▍         | 4/100 [08:36<3:24:45, 127.98s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  5
Average train loss: 1.0883196260262877
validation error:  1.092033608278758


Epoch:   5%|▌         | 5/100 [10:42<3:21:52, 127.50s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  6
Average train loss: 1.0849712974995847
validation error:  1.0917046674540345


Epoch:   6%|▌         | 6/100 [12:49<3:19:20, 127.24s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  7
Average train loss: 1.0833094027842705


Epoch:   7%|▋         | 7/100 [14:52<3:15:07, 125.89s/it]

validation error:  1.091884733085901
Entity_Property_Epoch:  8
Average train loss: 1.0838664185602762


Epoch:   8%|▊         | 8/100 [16:55<3:11:39, 124.99s/it]

validation error:  1.092608175647091
Entity_Property_Epoch:  9
Average train loss: 1.0854210204106014


Epoch:   9%|▉         | 9/100 [18:58<3:08:39, 124.39s/it]

validation error:  1.0934294978497734
Entity_Property_Epoch:  10
Average train loss: 1.0836497886738166


Epoch:  10%|█         | 10/100 [21:02<3:05:58, 123.99s/it]

validation error:  1.0943819607647371
Entity_Property_Epoch:  11
Average train loss: 1.0831296163200284


Epoch:  11%|█         | 11/100 [23:05<3:03:28, 123.70s/it]

validation error:  1.0955108987613462
Entity_Property_Epoch:  12
Average train loss: 1.0859112480822264


Epoch:  12%|█▏        | 12/100 [25:08<3:01:08, 123.50s/it]

validation error:  1.0965170364984325
Entity_Property_Epoch:  13
Average train loss: 0.9251914990702497
validation error:  0.7643417105288572


Epoch:  13%|█▎        | 13/100 [28:23<3:30:27, 145.14s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  14
Average train loss: 0.7171108243440283
validation error:  0.6951885538201936


Epoch:  14%|█▍        | 14/100 [31:37<3:49:11, 159.90s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  15
Average train loss: 0.6528953184980919
validation error:  0.6777387414599808


Epoch:  15%|█▌        | 15/100 [34:51<4:01:07, 170.21s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  16
Average train loss: 0.607243977238298
validation error:  0.659698507315676


Epoch:  16%|█▌        | 16/100 [38:06<4:08:50, 177.75s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  17
Average train loss: 0.5689301956412244
validation error:  0.6422648887399217


Epoch:  17%|█▋        | 17/100 [41:21<4:13:11, 183.03s/it]

entity_property_model best model updated.
Entity_Property_Epoch:  18
Average train loss: 0.5378307186865848


Epoch:  18%|█▊        | 18/100 [44:32<4:13:23, 185.41s/it]

validation error:  0.6543290927796297
Entity_Property_Epoch:  19
Average train loss: 0.5156879650268488


Epoch:  19%|█▉        | 19/100 [47:43<4:12:24, 186.96s/it]

validation error:  0.665290085572592
Entity_Property_Epoch:  20
Average train loss: 0.4870288593593088


Epoch:  20%|██        | 20/100 [50:54<4:10:49, 188.12s/it]

validation error:  0.642719256416173
Entity_Property_Epoch:  21
Average train loss: 0.4696145384508612


Epoch:  21%|██        | 21/100 [54:04<4:08:44, 188.92s/it]

validation error:  0.6575226175113463
Entity_Property_Epoch:  22
Average train loss: 0.4562486190158998


Epoch:  22%|██▏       | 22/100 [57:15<4:06:20, 189.49s/it]

validation error:  0.668445906588729
Entity_Property_Epoch:  23
Average train loss: 0.44363747801638237


Epoch:  23%|██▎       | 23/100 [1:00:26<4:03:43, 189.92s/it]

validation error:  0.6757594785639938
Entity_Property_Epoch:  24
Average train loss: 0.540531726262691


Epoch:  24%|██▍       | 24/100 [1:03:38<4:01:25, 190.60s/it]

validation error:  0.6485210541688221
Entity_Property_Epoch:  25
Average train loss: 0.5108788402093106


Epoch:  25%|██▌       | 25/100 [1:06:49<3:58:21, 190.69s/it]

validation error:  0.656988359043296
Entity_Property_Epoch:  26
Average train loss: 0.4910312307006655


Epoch:  26%|██▌       | 26/100 [1:10:00<3:55:11, 190.69s/it]

validation error:  0.6639436984985647
Entity_Property_Epoch:  27
Average train loss: 0.47042080702387806


Epoch:  27%|██▋       | 27/100 [1:13:11<3:51:59, 190.68s/it]

validation error:  0.6637053114007896
Entity_Property_Epoch:  28
Average train loss: 0.456926733622023


Epoch: 100%|██████████| 100/100 [1:16:21<00:00, 45.82s/it]

validation error:  0.652910052470758















tokenizing train data
We have added 8 tokens
loading model


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it f

end loading


Epoch:   0%|          | 0/100 [00:00<?, ?it/s]

Polarity_Epoch:  1
Average train loss: 0.9907542188179317
validation error:  0.7357645783769456


Epoch:   1%|          | 1/100 [02:15<3:43:40, 135.56s/it]


polarity_model best model updated.
Polarity_Epoch:  2
Average train loss: 0.6324937645535555
validation error:  0.5352380379642311


Epoch:   2%|▏         | 2/100 [04:30<3:41:17, 135.48s/it]


polarity_model best model updated.
Polarity_Epoch:  3
Average train loss: 0.5139433752335547
validation error:  0.508272660994216


Epoch:   3%|▎         | 3/100 [06:46<3:39:01, 135.48s/it]


polarity_model best model updated.
Polarity_Epoch:  4
Average train loss: 0.49003090944343014


Epoch:   4%|▍         | 4/100 [08:58<3:34:27, 134.04s/it]

validation error:  0.5230857530412706
Polarity_Epoch:  5
Average train loss: 0.5044252638428278


Epoch:   5%|▌         | 5/100 [11:10<3:30:55, 133.22s/it]

validation error:  0.5433271010896485
Polarity_Epoch:  6
Average train loss: 0.5204798458648726


Epoch:   6%|▌         | 6/100 [13:21<3:27:54, 132.71s/it]

validation error:  0.5318022573023642
Polarity_Epoch:  7
Average train loss: 0.5391946638244616


Epoch:   7%|▋         | 7/100 [15:33<3:25:12, 132.39s/it]

validation error:  0.54819515068084
Polarity_Epoch:  8
Average train loss: 0.5646777076062506


Epoch:   8%|▊         | 8/100 [17:45<3:22:43, 132.21s/it]

validation error:  0.5558472867859038
Polarity_Epoch:  9
Average train loss: 0.5596691710363664


Epoch:   9%|▉         | 9/100 [19:57<3:20:17, 132.06s/it]

validation error:  0.563520134059026
Polarity_Epoch:  10
Average train loss: 0.5002446473287098
validation error:  0.47918625957773703


Epoch:  10%|█         | 10/100 [23:25<3:53:36, 155.74s/it]


polarity_model best model updated.
Polarity_Epoch:  11
Average train loss: 0.37940254009807445
validation error:  0.3761837209917997


Epoch:  11%|█         | 11/100 [26:53<4:14:31, 171.58s/it]


polarity_model best model updated.
Polarity_Epoch:  12
Average train loss: 0.3265704108747747
validation error:  0.3555761579497668


Epoch:  12%|█▏        | 12/100 [30:20<4:27:41, 182.52s/it]


polarity_model best model updated.
Polarity_Epoch:  13
Average train loss: 0.257348923339877


Epoch:  13%|█▎        | 13/100 [33:44<4:34:05, 189.03s/it]

validation error:  0.3687439024154293
Polarity_Epoch:  14
Average train loss: 0.2095595109941704


Epoch:  14%|█▍        | 14/100 [37:08<4:37:21, 193.50s/it]

validation error:  0.4004293169556676
Polarity_Epoch:  15
Average train loss: 0.19258173510175328


Epoch:  15%|█▌        | 15/100 [40:32<4:38:37, 196.68s/it]

validation error:  0.38481760194132986
Polarity_Epoch:  16
Average train loss: 0.15058875193112883
validation error:  0.34519593057369713


Epoch:  16%|█▌        | 16/100 [44:00<4:39:59, 199.99s/it]


polarity_model best model updated.
Polarity_Epoch:  17
Average train loss: 0.14728585120186527


Epoch:  17%|█▋        | 17/100 [47:25<4:38:35, 201.40s/it]

validation error:  0.35899570179907114
Polarity_Epoch:  18
Average train loss: 0.11828573354426619


Epoch:  18%|█▊        | 18/100 [50:49<4:36:18, 202.17s/it]

validation error:  0.35029165769044895
Polarity_Epoch:  19
Average train loss: 0.1007555251549593


Epoch:  19%|█▉        | 19/100 [54:13<4:33:40, 202.73s/it]

validation error:  0.42374561723983406
Polarity_Epoch:  20
Average train loss: 0.07950983778994121


Epoch:  20%|██        | 20/100 [57:37<4:30:51, 203.14s/it]

validation error:  0.4015358472426765
Polarity_Epoch:  21
Average train loss: 0.07279028962037491


Epoch:  21%|██        | 21/100 [1:01:01<4:27:50, 203.43s/it]

validation error:  0.35551386215650527
Polarity_Epoch:  22
Average train loss: 0.06286987850873782


Epoch:  22%|██▏       | 22/100 [1:04:25<4:24:41, 203.61s/it]

validation error:  0.4475003449172762
Polarity_Epoch:  23
Average train loss: 0.1485086619557296


Epoch:  23%|██▎       | 23/100 [1:07:50<4:21:56, 204.10s/it]

validation error:  0.41518853648908827
Polarity_Epoch:  24
Average train loss: 0.11404731378956333


Epoch:  24%|██▍       | 24/100 [1:11:14<4:18:31, 204.09s/it]

validation error:  0.38844282096742017
Polarity_Epoch:  25
Average train loss: 0.09483871753793974


Epoch:  25%|██▌       | 25/100 [1:14:38<4:15:07, 204.10s/it]

validation error:  0.4049409168502806
Polarity_Epoch:  26
Average train loss: 0.10274170679063192


Epoch:  26%|██▌       | 26/100 [1:18:02<4:11:43, 204.10s/it]

validation error:  0.36732671827715085
Polarity_Epoch:  27
Average train loss: 0.07219458993200109


Epoch: 100%|██████████| 100/100 [1:21:27<00:00, 48.87s/it]

validation error:  0.3865009087297183
training is done










tokenizing train data





We have added 8 tokens
loading model


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it f

end loading


Epoch:   0%|          | 0/100 [00:00<?, ?it/s]

Polarity_Epoch:  1
Average train loss: 0.705254014434391
validation error:  0.56711261393198


Epoch:   1%|          | 1/100 [02:15<3:43:41, 135.57s/it]


polarity_model best model updated.
Polarity_Epoch:  2
Average train loss: 0.5400702167841557
validation error:  0.5045091768884971


Epoch:   2%|▏         | 2/100 [04:31<3:41:26, 135.58s/it]


polarity_model best model updated.
Polarity_Epoch:  3
Average train loss: 0.5045217230523887
validation error:  0.48574885725975037


Epoch:   3%|▎         | 3/100 [06:46<3:39:14, 135.62s/it]


polarity_model best model updated.
Polarity_Epoch:  4
Average train loss: 0.5205619056451771


Epoch:   4%|▍         | 4/100 [08:58<3:34:42, 134.19s/it]

validation error:  0.5435447139677658
Polarity_Epoch:  5
Average train loss: 0.5356328650561505


Epoch:   5%|▌         | 5/100 [11:10<3:31:09, 133.37s/it]

validation error:  0.5720363986842772
Polarity_Epoch:  6
Average train loss: 0.5403735485283265


Epoch:   6%|▌         | 6/100 [13:22<3:28:10, 132.87s/it]

validation error:  0.5590858695407709
Polarity_Epoch:  7
Average train loss: 0.5496111981092805


Epoch:   7%|▋         | 7/100 [15:34<3:25:27, 132.55s/it]

validation error:  0.5736360319591816
Polarity_Epoch:  8
Average train loss: 0.5682101904010204


Epoch:   8%|▊         | 8/100 [17:46<3:22:55, 132.35s/it]

validation error:  0.5335363539702752
Polarity_Epoch:  9
Average train loss: 0.5889366861083545


Epoch:   9%|▉         | 9/100 [19:58<3:20:31, 132.21s/it]

validation error:  0.599289071301814
Polarity_Epoch:  10
Average train loss: 0.5190896195861953


Epoch:  10%|█         | 10/100 [23:23<3:52:04, 154.72s/it]

validation error:  0.5394094891456607
Polarity_Epoch:  11
Average train loss: 0.3675596683108444
validation error:  0.39069358552836514


Epoch:  11%|█         | 11/100 [26:50<4:13:24, 170.83s/it]


polarity_model best model updated.
Polarity_Epoch:  12
Average train loss: 0.3256046462078015


Epoch:  12%|█▏        | 12/100 [30:14<4:25:20, 180.92s/it]

validation error:  0.43048074387403484
Polarity_Epoch:  13
Average train loss: 0.23665449134454034


Epoch:  13%|█▎        | 13/100 [33:38<4:32:20, 187.82s/it]

validation error:  0.415349729042622
Polarity_Epoch:  14
Average train loss: 0.19330925257772355


Epoch:  14%|█▍        | 14/100 [37:02<4:36:06, 192.64s/it]

validation error:  0.4159028208236289
Polarity_Epoch:  15
Average train loss: 0.11972157952473744
validation error:  0.36800798896115783


Epoch:  15%|█▌        | 15/100 [40:29<4:39:15, 197.12s/it]


polarity_model best model updated.
Polarity_Epoch:  16
Average train loss: 0.0899299651347591


Epoch:  16%|█▌        | 16/100 [43:53<4:38:48, 199.15s/it]

validation error:  0.41615045128475414
Polarity_Epoch:  17
Average train loss: 0.08117951393145822


Epoch:  17%|█▋        | 17/100 [47:17<4:37:18, 200.47s/it]

validation error:  0.4842506951707251
Polarity_Epoch:  18
Average train loss: 0.059375750939484294


Epoch:  18%|█▊        | 18/100 [50:40<4:35:16, 201.43s/it]

validation error:  0.3766105324699501
Polarity_Epoch:  19
Average train loss: 0.057787946597703636


Epoch:  19%|█▉        | 19/100 [54:04<4:32:49, 202.09s/it]

validation error:  0.4046936769622798
Polarity_Epoch:  20
Average train loss: 0.061354877144606836


Epoch:  20%|██        | 20/100 [57:28<4:30:08, 202.61s/it]

validation error:  0.4235430741865261
Polarity_Epoch:  21
Average train loss: 0.053866972926985716


Epoch:  21%|██        | 21/100 [1:00:52<4:27:30, 203.17s/it]

validation error:  0.4180878859828987
Polarity_Epoch:  22
Average train loss: 0.12166739324732732


Epoch:  22%|██▏       | 22/100 [1:04:18<4:25:06, 203.93s/it]

validation error:  0.49883882160864623
Polarity_Epoch:  23
Average train loss: 0.08082402429672104


Epoch:  23%|██▎       | 23/100 [1:07:42<4:21:51, 204.05s/it]

validation error:  0.39594139548907287
Polarity_Epoch:  24
Average train loss: 0.06897371068541695


Epoch:  24%|██▍       | 24/100 [1:11:06<4:18:26, 204.04s/it]

validation error:  0.3988894072233462
Polarity_Epoch:  25
Average train loss: 0.047686518472466174


Epoch:  25%|██▌       | 25/100 [1:14:30<4:14:54, 203.92s/it]

validation error:  0.4383861425886649
Polarity_Epoch:  26
Average train loss: 0.04960882494694823


Epoch: 100%|██████████| 100/100 [1:17:54<00:00, 46.74s/it]

validation error:  0.42482226623794417
training is done










tokenizing train data





We have added 8 tokens
loading model


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it f

end loading


Epoch:   0%|          | 0/100 [00:00<?, ?it/s]

Polarity_Epoch:  1
Average train loss: 0.8022383072560555
validation error:  0.6282214481456607


Epoch:   1%|          | 1/100 [02:15<3:43:29, 135.45s/it]


polarity_model best model updated.
Polarity_Epoch:  2
Average train loss: 0.5724967819099364
validation error:  0.5221570585093467


Epoch:   2%|▏         | 2/100 [04:31<3:41:29, 135.61s/it]


polarity_model best model updated.
Polarity_Epoch:  3
Average train loss: 0.505646513774991
validation error:  0.5195538406960325


Epoch:   3%|▎         | 3/100 [06:46<3:39:17, 135.65s/it]


polarity_model best model updated.
Polarity_Epoch:  4
Average train loss: 0.48993772760915916


Epoch:   4%|▍         | 4/100 [08:58<3:34:44, 134.21s/it]

validation error:  0.5328560572336701
Polarity_Epoch:  5
Average train loss: 0.5040946425563705


Epoch:   5%|▌         | 5/100 [11:10<3:31:11, 133.39s/it]

validation error:  0.5842351586697927
Polarity_Epoch:  6
Average train loss: 0.5319921893839675


Epoch:   6%|▌         | 6/100 [13:22<3:28:17, 132.95s/it]

validation error:  0.585120439529419
Polarity_Epoch:  7
Average train loss: 0.5495276403870728


Epoch:   7%|▋         | 7/100 [15:35<3:25:45, 132.75s/it]

validation error:  0.5969528948364694
Polarity_Epoch:  8
Average train loss: 0.5574035956156685


Epoch:   8%|▊         | 8/100 [17:47<3:23:22, 132.64s/it]

validation error:  0.5995630112543605
Polarity_Epoch:  9
Average train loss: 0.561950633504526


Epoch:   9%|▉         | 9/100 [19:59<3:20:58, 132.51s/it]

validation error:  0.6070058613869489
Polarity_Epoch:  10
Average train loss: 0.5179180839638177
validation error:  0.31869406512408865


Epoch:  10%|█         | 10/100 [23:29<3:54:27, 156.31s/it]


polarity_model best model updated.
Polarity_Epoch:  11
Average train loss: 0.3548719120751086


Epoch:  11%|█         | 11/100 [26:53<4:13:41, 171.03s/it]

validation error:  0.5342171455130857
Polarity_Epoch:  12
Average train loss: 0.32179343954071793


Epoch:  12%|█▏        | 12/100 [30:18<4:25:39, 181.13s/it]

validation error:  0.45664106291152684
Polarity_Epoch:  13
Average train loss: 0.24624374205258823


Epoch:  13%|█▎        | 13/100 [33:42<4:32:46, 188.12s/it]

validation error:  0.46248461493577053
Polarity_Epoch:  14
Average train loss: 0.1926066029731086


Epoch:  14%|█▍        | 14/100 [37:06<4:36:38, 193.00s/it]

validation error:  0.4953252122147021
Polarity_Epoch:  15
Average train loss: 0.18313883922957375


Epoch:  15%|█▌        | 15/100 [40:30<4:38:07, 196.33s/it]

validation error:  0.5343942520592142
Polarity_Epoch:  16
Average train loss: 0.15932054486965122


Epoch:  16%|█▌        | 16/100 [43:54<4:38:10, 198.69s/it]

validation error:  0.5641715752776542
Polarity_Epoch:  17
Average train loss: 0.36485916155231135


Epoch:  17%|█▋        | 17/100 [47:20<4:37:35, 200.67s/it]

validation error:  0.4468808336536479
Polarity_Epoch:  18
Average train loss: 0.29328946374957204


Epoch:  18%|█▊        | 18/100 [50:44<4:35:40, 201.72s/it]

validation error:  0.49637313469562655
Polarity_Epoch:  19
Average train loss: 0.2179611914356141


Epoch:  19%|█▉        | 19/100 [54:08<4:33:15, 202.42s/it]

validation error:  0.4514030011042076
Polarity_Epoch:  20
Average train loss: 0.19284055831437735


Epoch:  20%|██        | 20/100 [57:32<4:30:32, 202.91s/it]

validation error:  0.41055926396907155
Polarity_Epoch:  21
Average train loss: 0.1744301340184297


Epoch: 100%|██████████| 100/100 [1:00:56<00:00, 36.57s/it]

validation error:  0.4755300216961141
training is done










tokenizing train data





We have added 8 tokens
loading model


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it f

end loading


Epoch:   0%|          | 0/100 [00:00<?, ?it/s]

Polarity_Epoch:  1
Average train loss: 0.8978376717403017
validation error:  0.694733618513534


Epoch:   1%|          | 1/100 [02:32<4:10:57, 152.09s/it]


polarity_model best model updated.
Polarity_Epoch:  2
Average train loss: 0.622471236781338
validation error:  0.5417558770430716


Epoch:   2%|▏         | 2/100 [04:48<3:52:55, 142.60s/it]


polarity_model best model updated.
Polarity_Epoch:  3
Average train loss: 0.5209484867421277
validation error:  0.4814353936204785


Epoch:   3%|▎         | 3/100 [07:04<3:45:38, 139.57s/it]


polarity_model best model updated.
Polarity_Epoch:  4
Average train loss: 0.49534554662082


Epoch:   4%|▍         | 4/100 [09:16<3:38:44, 136.71s/it]

validation error:  0.49755231129299654
Polarity_Epoch:  5
Average train loss: 0.5082517621919439


Epoch:   5%|▌         | 5/100 [11:28<3:33:55, 135.11s/it]

validation error:  0.49771278567220034
Polarity_Epoch:  6
Average train loss: 0.511224285612259


Epoch:   6%|▌         | 6/100 [13:40<3:30:07, 134.13s/it]

validation error:  0.5437173299295338
Polarity_Epoch:  7
Average train loss: 0.5326988193679032


Epoch:   7%|▋         | 7/100 [15:52<3:26:50, 133.45s/it]

validation error:  0.5174884620416713
Polarity_Epoch:  8
Average train loss: 0.5447402906853381


Epoch:   8%|▊         | 8/100 [18:05<3:24:06, 133.11s/it]

validation error:  0.5772219710297098
Polarity_Epoch:  9
Average train loss: 0.5398546192544239


Epoch:   9%|▉         | 9/100 [20:17<3:21:27, 132.83s/it]

validation error:  0.5910337612179941
Polarity_Epoch:  10
Average train loss: 0.507069246069649


Epoch:  10%|█         | 10/100 [23:43<3:53:05, 155.39s/it]

validation error:  0.49253790418764476
Polarity_Epoch:  11
Average train loss: 0.375158903814473
validation error:  0.35284005228037896


Epoch:  11%|█         | 11/100 [27:11<4:14:26, 171.53s/it]


polarity_model best model updated.
Polarity_Epoch:  12
Average train loss: 0.3012118612481847


Epoch:  12%|█▏        | 12/100 [30:35<4:26:13, 181.51s/it]

validation error:  0.35715613445561184
Polarity_Epoch:  13
Average train loss: 0.2596545544726704


Epoch:  13%|█▎        | 13/100 [34:00<4:33:19, 188.50s/it]

validation error:  0.3717337376560624
Polarity_Epoch:  14
Average train loss: 0.21593741610744122


Epoch:  14%|█▍        | 14/100 [37:24<4:37:07, 193.35s/it]

validation error:  0.35834312260052875
Polarity_Epoch:  15
Average train loss: 0.18380821972461464
validation error:  0.34744597966537666


Epoch:  15%|█▌        | 15/100 [40:53<4:40:12, 197.80s/it]


polarity_model best model updated.
Polarity_Epoch:  16
Average train loss: 0.15359851120617882
validation error:  0.33172134414168175


Epoch:  16%|█▌        | 16/100 [44:21<4:41:17, 200.93s/it]


polarity_model best model updated.
Polarity_Epoch:  17
Average train loss: 0.1210835783395225


Epoch:  17%|█▋        | 17/100 [47:45<4:39:29, 202.04s/it]

validation error:  0.3767457217419226
Polarity_Epoch:  18
Average train loss: 0.0982665423738154
validation error:  0.3020051104433246


Epoch:  18%|█▊        | 18/100 [51:14<4:38:38, 203.88s/it]


polarity_model best model updated.
Polarity_Epoch:  19
Average train loss: 0.0889429051952042


Epoch:  19%|█▉        | 19/100 [54:38<4:35:30, 204.08s/it]

validation error:  0.3808323226403445
Polarity_Epoch:  20
Average train loss: 0.07536955507493567


Epoch:  20%|██        | 20/100 [58:02<4:32:12, 204.16s/it]

validation error:  0.38402812565235717
Polarity_Epoch:  21
Average train loss: 0.07778379704026361


Epoch:  21%|██        | 21/100 [1:01:27<4:28:56, 204.26s/it]

validation error:  0.33660978091096405
Polarity_Epoch:  22
Average train loss: 0.077325690750259


Epoch:  22%|██▏       | 22/100 [1:04:51<4:25:35, 204.31s/it]

validation error:  0.35586694816715625
Polarity_Epoch:  23
Average train loss: 0.06921050739314022


Epoch:  23%|██▎       | 23/100 [1:08:16<4:22:12, 204.31s/it]

validation error:  0.379753180993966
Polarity_Epoch:  24
Average train loss: 0.0639195171565625


Epoch:  24%|██▍       | 24/100 [1:11:40<4:18:48, 204.32s/it]

validation error:  0.44125643424616245
Polarity_Epoch:  25
Average train loss: 0.07552099996055239


Epoch:  25%|██▌       | 25/100 [1:15:06<4:15:55, 204.74s/it]

validation error:  0.40197617075357
Polarity_Epoch:  26
Average train loss: 0.08511021708356972


Epoch:  26%|██▌       | 26/100 [1:18:30<4:12:22, 204.63s/it]

validation error:  0.3775589319411665
Polarity_Epoch:  27
Average train loss: 0.08460629154424901


Epoch:  27%|██▋       | 27/100 [1:21:54<4:08:52, 204.55s/it]

validation error:  0.388064651946096
Polarity_Epoch:  28
Average train loss: 0.06858402087807362


Epoch:  28%|██▊       | 28/100 [1:25:19<4:05:20, 204.45s/it]

validation error:  0.3958458552921289
Polarity_Epoch:  29
Average train loss: 0.0646784193630289


Epoch: 100%|██████████| 100/100 [1:28:43<00:00, 53.24s/it]

validation error:  0.4474722026712506
training is done










tokenizing train data





We have added 8 tokens
loading model


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it f

end loading


Epoch:   0%|          | 0/100 [00:00<?, ?it/s]

Polarity_Epoch:  1
Average train loss: 0.7127576670525305
validation error:  0.5606522967940882


Epoch:   1%|          | 1/100 [02:27<4:03:07, 147.35s/it]


polarity_model best model updated.
Polarity_Epoch:  2
Average train loss: 0.5435290687238837
validation error:  0.4883261105339778


Epoch:   2%|▏         | 2/100 [04:43<3:50:03, 140.85s/it]


polarity_model best model updated.
Polarity_Epoch:  3
Average train loss: 0.49885312339355203


Epoch:   3%|▎         | 3/100 [06:55<3:41:22, 136.93s/it]

validation error:  0.5028986748504011
Polarity_Epoch:  4
Average train loss: 0.5007252002254202


Epoch:   4%|▍         | 4/100 [09:08<3:36:12, 135.13s/it]

validation error:  0.500642974248254
Polarity_Epoch:  5
Average train loss: 0.5481062334991246


Epoch:   5%|▌         | 5/100 [11:20<3:32:18, 134.08s/it]

validation error:  0.5422924330065909
Polarity_Epoch:  6
Average train loss: 0.5456132454910791


Epoch:   6%|▌         | 6/100 [13:32<3:29:07, 133.48s/it]

validation error:  0.5341603216764174
Polarity_Epoch:  7
Average train loss: 0.5506767272912516


Epoch:   7%|▋         | 7/100 [15:45<3:26:17, 133.09s/it]

validation error:  0.5470484323848627
Polarity_Epoch:  8
Average train loss: 0.5756044729394083


Epoch:   8%|▊         | 8/100 [17:57<3:23:43, 132.86s/it]

validation error:  0.5603292939302168
Polarity_Epoch:  9
Average train loss: 0.5197966459447332
validation error:  0.3453649135772139


Epoch:   9%|▉         | 9/100 [21:27<3:58:00, 156.93s/it]


polarity_model best model updated.
Polarity_Epoch:  10
Average train loss: 0.3846232137613745
validation error:  0.3425487197911073


Epoch:  10%|█         | 10/100 [24:55<4:19:12, 172.81s/it]


polarity_model best model updated.
Polarity_Epoch:  11
Average train loss: 0.3388797466984181


Epoch:  11%|█         | 11/100 [28:20<4:30:40, 182.48s/it]

validation error:  0.4136222470551729
Polarity_Epoch:  12
Average train loss: 0.28143112614371607
validation error:  0.33164646975254936


Epoch:  12%|█▏        | 12/100 [31:48<4:39:09, 190.34s/it]


polarity_model best model updated.
Polarity_Epoch:  13
Average train loss: 0.21896494275849002
validation error:  0.3185590969423126


Epoch:  13%|█▎        | 13/100 [35:16<4:43:52, 195.78s/it]


polarity_model best model updated.
Polarity_Epoch:  14
Average train loss: 0.16123321826595732


Epoch:  14%|█▍        | 14/100 [38:41<4:44:28, 198.47s/it]

validation error:  0.40116105280130315
Polarity_Epoch:  15
Average train loss: 0.13699971131784675


Epoch:  15%|█▌        | 15/100 [42:05<4:43:43, 200.27s/it]

validation error:  0.4485059753611782
Polarity_Epoch:  16
Average train loss: 0.11361622864652689


Epoch:  16%|█▌        | 16/100 [45:30<4:42:07, 201.51s/it]

validation error:  0.4033175094334997
Polarity_Epoch:  17
Average train loss: 0.09892031459416124


Epoch:  17%|█▋        | 17/100 [48:54<4:40:00, 202.42s/it]

validation error:  0.4253676134333211
Polarity_Epoch:  18
Average train loss: 0.08843673999942778


Epoch:  18%|█▊        | 18/100 [52:19<4:37:30, 203.05s/it]

validation error:  0.3791522452129835
Polarity_Epoch:  19
Average train loss: 0.07910626019199787


Epoch:  19%|█▉        | 19/100 [55:43<4:34:43, 203.51s/it]

validation error:  0.3944019245621013
Polarity_Epoch:  20
Average train loss: 0.201480397763425


Epoch:  20%|██        | 20/100 [59:09<4:32:18, 204.23s/it]

validation error:  0.42897726108908263
Polarity_Epoch:  21
Average train loss: 0.14620550129752244


Epoch:  21%|██        | 21/100 [1:02:34<4:29:04, 204.36s/it]

validation error:  0.48549459141213447
Polarity_Epoch:  22
Average train loss: 0.09305345638974742


Epoch:  22%|██▏       | 22/100 [1:05:58<4:25:43, 204.41s/it]

validation error:  0.41004821943045644
Polarity_Epoch:  23
Average train loss: 0.09492749050840711


Epoch:  23%|██▎       | 23/100 [1:09:23<4:22:20, 204.42s/it]

validation error:  0.4113460367447452
Polarity_Epoch:  24
Average train loss: 0.07238054179933613


Epoch: 100%|██████████| 100/100 [1:12:48<00:00, 43.68s/it]

validation error:  0.3929034569361982
training is done



