In [1]:
import pandas as pd
import numpy as np
import torch
import os

from transformers import AutoTokenizer, AutoConfig, TrainingArguments, Trainer
from mkdataset import TimeDataset, TestDataset
from datasets import load_metric, load_dataset
from classifier import RobertaForSequenceClassification
from sklearn.model_selection import StratifiedKFold
from utils import set_allseed
import warnings

In [2]:
seed = 777
batch_size = 32
save_steps = 103
set_allseed(seed)

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]= "0"

gpu = torch.device("cuda")
cpu = torch.device("cpu")
warnings.filterwarnings(action='ignore') 

In [3]:
def compute_metrics(pred):
    f1 = load_metric("f1")
    references = pred.label_ids
    predictions = pred.predictions.argmax(axis=1)
    metric = f1.compute(predictions=predictions, references=references, average="micro")
    return metric

In [4]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

In [5]:
train_df = train_df[["문장","시제"]]

In [6]:
kfold_function = StratifiedKFold(n_splits=5, random_state=seed, shuffle=True)
tokenizer = AutoTokenizer.from_pretrained("klue/roberta-base")
type_config = AutoConfig.from_pretrained("klue/roberta-base")
type_config.num_labels = len(train_df.시제.value_counts())

In [7]:
training_args = TrainingArguments(
    output_dir="./output_type",
    seed=seed,
    save_total_limit=2,
    save_steps = save_steps,
    num_train_epochs = 5,
    learning_rate= 1e-4,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    gradient_accumulation_steps=4,
    weight_decay=1e-4,
    logging_dir="./logs",
    logging_steps = save_steps,
    evaluation_strategy = "steps",
    metric_for_best_model = "eval_f1",
    eval_steps = save_steps,
    load_best_model_at_end=True,
)
test_dataset = TestDataset(data=test_df, tokenizer=tokenizer)

In [8]:
logit = 0
for i, (train_index, test_index) in enumerate(kfold_function.split(train_df["문장"],train_df["시제"])):
    model = RobertaForSequenceClassification.from_pretrained("klue/roberta-base", config=type_config)
    train_corpus, valid_corpus = train_df["문장"][train_index], train_df["문장"][test_index]
    train_label, valod_label = train_df["시제"][train_index], train_df["시제"][test_index]
    fold_train = pd.concat([train_corpus, train_label], axis =1)
    fold_valid = pd.concat([valid_corpus, valod_label], axis =1)
    train_dataset = TimeDataset(data=fold_train, tokenizer=tokenizer)
    valid_dataset = TimeDataset(data=fold_valid, tokenizer=tokenizer)

    trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    compute_metrics=compute_metrics,
    )

    trainer.train()

    logit += trainer.predict(test_dataset).predictions / 5

Some weights of the model checkpoint at klue/roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['lstm.weight_ih_l1', 'lstm.weight_ih_l0_reverse', 'lstm.bias_hh

Step,Training Loss,Validation Loss,F1
103,0.4307,0.345722,0.890601
206,0.258,0.305835,0.89725
309,0.186,0.343698,0.890299
412,0.1155,0.426975,0.888486
515,0.067,0.496145,0.889997


***** Running Evaluation *****
  Num examples = 3309
  Batch size = 32
Saving model checkpoint to ./output_type/checkpoint-103
Configuration saved in ./output_type/checkpoint-103/config.json
Model weights saved in ./output_type/checkpoint-103/pytorch_model.bin
Deleting older checkpoint [output_type/checkpoint-360] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 3309
  Batch size = 32
Saving model checkpoint to ./output_type/checkpoint-206
Configuration saved in ./output_type/checkpoint-206/config.json
Model weights saved in ./output_type/checkpoint-206/pytorch_model.bin
Deleting older checkpoint [output_type/checkpoint-450] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 3309
  Batch size = 32
Saving model checkpoint to ./output_type/checkpoint-309
Configuration saved in ./output_type/checkpoint-309/config.json
Model weights saved in ./output_type/checkpoint-309/pytorch_model.bin
Deleting older checkpoint [output_type/checkpoint

loading weights file https://huggingface.co/klue/roberta-base/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/b204e0dc0a3b8fd45b35e7fcefd97c5f839b86c14aea510f1eb38fb8469e23d8.57d3cd0dfa80e5a249a776870dc87b6da993900685a271086750174009115320
Some weights of the model checkpoint at klue/roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenc

Step,Training Loss,Validation Loss,F1
103,0.3794,0.274545,0.896917
206,0.2454,0.263278,0.905683
309,0.1647,0.301611,0.900242
412,0.0923,0.373659,0.893591
515,0.0404,0.428433,0.89873


***** Running Evaluation *****
  Num examples = 3308
  Batch size = 32
Saving model checkpoint to ./output_type/checkpoint-103
Configuration saved in ./output_type/checkpoint-103/config.json
Model weights saved in ./output_type/checkpoint-103/pytorch_model.bin
Deleting older checkpoint [output_type/checkpoint-206] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 3308
  Batch size = 32
Saving model checkpoint to ./output_type/checkpoint-206
Configuration saved in ./output_type/checkpoint-206/config.json
Model weights saved in ./output_type/checkpoint-206/pytorch_model.bin
Deleting older checkpoint [output_type/checkpoint-515] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 3308
  Batch size = 32
Saving model checkpoint to ./output_type/checkpoint-309
Configuration saved in ./output_type/checkpoint-309/config.json
Model weights saved in ./output_type/checkpoint-309/pytorch_model.bin
Deleting older checkpoint [output_type/checkpoint

loading weights file https://huggingface.co/klue/roberta-base/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/b204e0dc0a3b8fd45b35e7fcefd97c5f839b86c14aea510f1eb38fb8469e23d8.57d3cd0dfa80e5a249a776870dc87b6da993900685a271086750174009115320
Some weights of the model checkpoint at klue/roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenc

Step,Training Loss,Validation Loss,F1
103,0.3865,0.255958,0.908102
206,0.2444,0.286251,0.909915
309,0.1711,0.303809,0.903265
412,0.0923,0.377117,0.904474
515,0.0466,0.419592,0.902056


***** Running Evaluation *****
  Num examples = 3308
  Batch size = 32
Saving model checkpoint to ./output_type/checkpoint-103
Configuration saved in ./output_type/checkpoint-103/config.json
Model weights saved in ./output_type/checkpoint-103/pytorch_model.bin
Deleting older checkpoint [output_type/checkpoint-206] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 3308
  Batch size = 32
Saving model checkpoint to ./output_type/checkpoint-206
Configuration saved in ./output_type/checkpoint-206/config.json
Model weights saved in ./output_type/checkpoint-206/pytorch_model.bin
Deleting older checkpoint [output_type/checkpoint-515] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 3308
  Batch size = 32
Saving model checkpoint to ./output_type/checkpoint-309
Configuration saved in ./output_type/checkpoint-309/config.json
Model weights saved in ./output_type/checkpoint-309/pytorch_model.bin
Deleting older checkpoint [output_type/checkpoint

loading weights file https://huggingface.co/klue/roberta-base/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/b204e0dc0a3b8fd45b35e7fcefd97c5f839b86c14aea510f1eb38fb8469e23d8.57d3cd0dfa80e5a249a776870dc87b6da993900685a271086750174009115320
Some weights of the model checkpoint at klue/roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenc

Step,Training Loss,Validation Loss,F1
103,0.4148,0.327043,0.880593
206,0.2619,0.294758,0.895405
309,0.1827,0.298811,0.893894
412,0.1096,0.36834,0.891475
515,0.0542,0.447383,0.888755


***** Running Evaluation *****
  Num examples = 3308
  Batch size = 32
Saving model checkpoint to ./output_type/checkpoint-103
Configuration saved in ./output_type/checkpoint-103/config.json
Model weights saved in ./output_type/checkpoint-103/pytorch_model.bin
Deleting older checkpoint [output_type/checkpoint-206] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 3308
  Batch size = 32
Saving model checkpoint to ./output_type/checkpoint-206
Configuration saved in ./output_type/checkpoint-206/config.json
Model weights saved in ./output_type/checkpoint-206/pytorch_model.bin
Deleting older checkpoint [output_type/checkpoint-515] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 3308
  Batch size = 32
Saving model checkpoint to ./output_type/checkpoint-309
Configuration saved in ./output_type/checkpoint-309/config.json
Model weights saved in ./output_type/checkpoint-309/pytorch_model.bin
Deleting older checkpoint [output_type/checkpoint

loading weights file https://huggingface.co/klue/roberta-base/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/b204e0dc0a3b8fd45b35e7fcefd97c5f839b86c14aea510f1eb38fb8469e23d8.57d3cd0dfa80e5a249a776870dc87b6da993900685a271086750174009115320
Some weights of the model checkpoint at klue/roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenc

Step,Training Loss,Validation Loss,F1
103,0.3677,0.286223,0.898126
206,0.2447,0.263231,0.909311
309,0.1652,0.303199,0.904172
412,0.0888,0.369992,0.90266
515,0.045,0.426284,0.900846


***** Running Evaluation *****
  Num examples = 3308
  Batch size = 32
Saving model checkpoint to ./output_type/checkpoint-103
Configuration saved in ./output_type/checkpoint-103/config.json
Model weights saved in ./output_type/checkpoint-103/pytorch_model.bin
Deleting older checkpoint [output_type/checkpoint-206] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 3308
  Batch size = 32
Saving model checkpoint to ./output_type/checkpoint-206
Configuration saved in ./output_type/checkpoint-206/config.json
Model weights saved in ./output_type/checkpoint-206/pytorch_model.bin
Deleting older checkpoint [output_type/checkpoint-515] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 3308
  Batch size = 32
Saving model checkpoint to ./output_type/checkpoint-309
Configuration saved in ./output_type/checkpoint-309/config.json
Model weights saved in ./output_type/checkpoint-309/pytorch_model.bin
Deleting older checkpoint [output_type/checkpoint

In [9]:
result = pd.DataFrame(logit.argmax(axis=1).tolist(), columns=["type"])
test_df_type = pd.concat([test_df,result],axis=1, ignore_index=True)
test_df_type.to_csv("result_time.csv") 