In [1]:
import pandas as pd
import numpy as np
import torch
import os

from transformers import AutoTokenizer, AutoConfig, TrainingArguments, Trainer, AutoModelForSequenceClassification
from mkdataset import TimeDataset, TestDataset
from datasets import load_metric, load_dataset
from sklearn.model_selection import StratifiedKFold
from utils import set_allseed
import warnings
import pickle

In [2]:
seed = 777
batch_size = 8
save_steps = 103
set_allseed(seed)
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]= "0"
gpu = torch.device("cuda")
cpu = torch.device("cpu")
warnings.filterwarnings(action='ignore') 
model_name = ""

In [3]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

In [4]:
kfold_function = StratifiedKFold(n_splits=5, random_state=seed, shuffle=True)
tokenizer = AutoTokenizer.from_pretrained(model_name, max_length=512)
config = AutoConfig.from_pretrained(model_name)
config.num_labels = len(train_df.시제.value_counts())

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [5]:
def compute_metrics(pred):
    f1 = load_metric("f1")
    references = pred.label_ids
    predictions = pred.predictions.argmax(axis=1)
    metric = f1.compute(predictions=predictions, references=references, average="micro")
    return metric

In [6]:
training_args = TrainingArguments(
    output_dir="./output_time",
    seed=seed,
    save_total_limit=2,
    save_steps = save_steps,
    num_train_epochs = 5,
    learning_rate= 1e-4,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    gradient_accumulation_steps=16,
    weight_decay=1e-4,
    logging_dir="./logs",
    logging_steps = save_steps,
    evaluation_strategy = "steps",
    eval_steps = save_steps,
    load_best_model_at_end=True,
)
test_dataset = TestDataset(data=test_df, tokenizer=tokenizer)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [7]:
logit = 0
for i, (train_index, test_index) in enumerate(kfold_function.split(train_df["문장"],train_df["시제"])):
    model = AutoModelForSequenceClassification.from_pretrained(model_name, config=config)
    train_corpus, valid_corpus = train_df["문장"][train_index], train_df["문장"][test_index]
    train_label, valid_label = train_df["시제"][train_index], train_df["시제"][test_index]
    fold_train = pd.concat([train_corpus, train_label], axis =1)
    fold_valid = pd.concat([valid_corpus, valid_label], axis =1)
    train_dataset = TimeDataset(data=fold_train, tokenizer=tokenizer)
    valid_dataset = TimeDataset(data=fold_valid, tokenizer=tokenizer)

    trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    compute_metrics=compute_metrics,
    )

    trainer.train()

    logit += trainer.predict(test_dataset).predictions / 5

Some weights of the model checkpoint at lighthouse/mdeberta-v3-base-kor-further were not used when initializing DebertaV2ForSequenceClassification: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.dense.weight']
- This IS expected if you are initializing DebertaV2ForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2ForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at lighthouse/mdeberta-v3-base-kor-further and are newly in

Step,Training Loss,Validation Loss,F1
103,0.4187,0.307095,0.896646
206,0.2396,0.295873,0.89997
309,0.1664,0.323701,0.895437
412,0.0983,0.37571,0.894832
515,0.0524,0.443988,0.890601


***** Running Evaluation *****
  Num examples = 3309
  Batch size = 8
Saving model checkpoint to ./output_time/checkpoint-103
Configuration saved in ./output_time/checkpoint-103/config.json
Model weights saved in ./output_time/checkpoint-103/pytorch_model.bin
Deleting older checkpoint [output_time/checkpoint-206] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 3309
  Batch size = 8
Saving model checkpoint to ./output_time/checkpoint-206
Configuration saved in ./output_time/checkpoint-206/config.json
Model weights saved in ./output_time/checkpoint-206/pytorch_model.bin
Deleting older checkpoint [output_time/checkpoint-515] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 3309
  Batch size = 8
Saving model checkpoint to ./output_time/checkpoint-309
Configuration saved in ./output_time/checkpoint-309/config.json
Model weights saved in ./output_time/checkpoint-309/pytorch_model.bin
Deleting older checkpoint [output_time/checkpoint-10

loading weights file https://huggingface.co/lighthouse/mdeberta-v3-base-kor-further/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/bde5a73d669f751bac22c948d08a6feee4c3d647ddc7b449edec12361db93800.79c014a771591b09f61daa0cf7bb9a02aa0b4a738335812b42e75a2692ce7919
Some weights of the model checkpoint at lighthouse/mdeberta-v3-base-kor-further were not used when initializing DebertaV2ForSequenceClassification: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.dense.weight']
- This IS expected if you are initializing DebertaV2ForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2ForSequenceClassification from the checkpoint of a mod

Step,Training Loss,Validation Loss,F1
103,0.3992,0.294358,0.890871
206,0.2475,0.279197,0.903869
309,0.1727,0.290087,0.901451
412,0.1022,0.365171,0.900242
515,0.0579,0.394688,0.900846


***** Running Evaluation *****
  Num examples = 3308
  Batch size = 8
Saving model checkpoint to ./output_time/checkpoint-103
Configuration saved in ./output_time/checkpoint-103/config.json
Model weights saved in ./output_time/checkpoint-103/pytorch_model.bin
Deleting older checkpoint [output_time/checkpoint-206] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 3308
  Batch size = 8
Saving model checkpoint to ./output_time/checkpoint-206
Configuration saved in ./output_time/checkpoint-206/config.json
Model weights saved in ./output_time/checkpoint-206/pytorch_model.bin
Deleting older checkpoint [output_time/checkpoint-515] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 3308
  Batch size = 8
Saving model checkpoint to ./output_time/checkpoint-309
Configuration saved in ./output_time/checkpoint-309/config.json
Model weights saved in ./output_time/checkpoint-309/pytorch_model.bin
Deleting older checkpoint [output_time/checkpoint-10

loading weights file https://huggingface.co/lighthouse/mdeberta-v3-base-kor-further/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/bde5a73d669f751bac22c948d08a6feee4c3d647ddc7b449edec12361db93800.79c014a771591b09f61daa0cf7bb9a02aa0b4a738335812b42e75a2692ce7919
Some weights of the model checkpoint at lighthouse/mdeberta-v3-base-kor-further were not used when initializing DebertaV2ForSequenceClassification: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.dense.weight']
- This IS expected if you are initializing DebertaV2ForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2ForSequenceClassification from the checkpoint of a mod

Step,Training Loss,Validation Loss,F1
103,0.4216,0.283256,0.905683
206,0.2569,0.246897,0.91052
309,0.1862,0.272413,0.914148
412,0.107,0.343206,0.902963
515,0.0587,0.38009,0.903869


***** Running Evaluation *****
  Num examples = 3308
  Batch size = 8
Saving model checkpoint to ./output_time/checkpoint-103
Configuration saved in ./output_time/checkpoint-103/config.json
Model weights saved in ./output_time/checkpoint-103/pytorch_model.bin
Deleting older checkpoint [output_time/checkpoint-206] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 3308
  Batch size = 8
Saving model checkpoint to ./output_time/checkpoint-206
Configuration saved in ./output_time/checkpoint-206/config.json
Model weights saved in ./output_time/checkpoint-206/pytorch_model.bin
Deleting older checkpoint [output_time/checkpoint-515] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 3308
  Batch size = 8
Saving model checkpoint to ./output_time/checkpoint-309
Configuration saved in ./output_time/checkpoint-309/config.json
Model weights saved in ./output_time/checkpoint-309/pytorch_model.bin
Deleting older checkpoint [output_time/checkpoint-10

loading weights file https://huggingface.co/lighthouse/mdeberta-v3-base-kor-further/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/bde5a73d669f751bac22c948d08a6feee4c3d647ddc7b449edec12361db93800.79c014a771591b09f61daa0cf7bb9a02aa0b4a738335812b42e75a2692ce7919
Some weights of the model checkpoint at lighthouse/mdeberta-v3-base-kor-further were not used when initializing DebertaV2ForSequenceClassification: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.dense.weight']
- This IS expected if you are initializing DebertaV2ForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2ForSequenceClassification from the checkpoint of a mod

Step,Training Loss,Validation Loss,F1
103,0.4141,0.331967,0.875453
206,0.2531,0.288311,0.89208
309,0.1798,0.321282,0.889964
412,0.1143,0.389436,0.887848
515,0.0674,0.422813,0.891475


***** Running Evaluation *****
  Num examples = 3308
  Batch size = 8
Saving model checkpoint to ./output_time/checkpoint-103
Configuration saved in ./output_time/checkpoint-103/config.json
Model weights saved in ./output_time/checkpoint-103/pytorch_model.bin
Deleting older checkpoint [output_time/checkpoint-206] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 3308
  Batch size = 8
Saving model checkpoint to ./output_time/checkpoint-206
Configuration saved in ./output_time/checkpoint-206/config.json
Model weights saved in ./output_time/checkpoint-206/pytorch_model.bin
Deleting older checkpoint [output_time/checkpoint-515] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 3308
  Batch size = 8
Saving model checkpoint to ./output_time/checkpoint-309
Configuration saved in ./output_time/checkpoint-309/config.json
Model weights saved in ./output_time/checkpoint-309/pytorch_model.bin
Deleting older checkpoint [output_time/checkpoint-10

loading weights file https://huggingface.co/lighthouse/mdeberta-v3-base-kor-further/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/bde5a73d669f751bac22c948d08a6feee4c3d647ddc7b449edec12361db93800.79c014a771591b09f61daa0cf7bb9a02aa0b4a738335812b42e75a2692ce7919
Some weights of the model checkpoint at lighthouse/mdeberta-v3-base-kor-further were not used when initializing DebertaV2ForSequenceClassification: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.dense.weight']
- This IS expected if you are initializing DebertaV2ForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2ForSequenceClassification from the checkpoint of a mod

Step,Training Loss,Validation Loss,F1
103,0.3905,0.280245,0.897219
206,0.2372,0.250303,0.911427
309,0.1631,0.2954,0.902358
412,0.0938,0.33612,0.904172
515,0.0522,0.376516,0.903265


***** Running Evaluation *****
  Num examples = 3308
  Batch size = 8
Saving model checkpoint to ./output_time/checkpoint-103
Configuration saved in ./output_time/checkpoint-103/config.json
Model weights saved in ./output_time/checkpoint-103/pytorch_model.bin
Deleting older checkpoint [output_time/checkpoint-206] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 3308
  Batch size = 8
Saving model checkpoint to ./output_time/checkpoint-206
Configuration saved in ./output_time/checkpoint-206/config.json
Model weights saved in ./output_time/checkpoint-206/pytorch_model.bin
Deleting older checkpoint [output_time/checkpoint-515] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 3308
  Batch size = 8
Saving model checkpoint to ./output_time/checkpoint-309
Configuration saved in ./output_time/checkpoint-309/config.json
Model weights saved in ./output_time/checkpoint-309/pytorch_model.bin
Deleting older checkpoint [output_time/checkpoint-10

In [8]:
with open("time_logit_de.pickle","wb") as f:
    pickle.dump(logit, f)

In [12]:
with open("time_logit_de.pickle","rb") as f:
    logit = pickle.load(f)

with open("time_logit_e.pickle","rb") as f:
    logit2 = pickle.load(f)
    
logit += logit2
logit /= 2
    
result = pd.DataFrame(logit.argmax(axis=1).tolist(), columns=["type"])
test_df_result = pd.concat([test_df,result],axis=1, ignore_index=True)
test_df_result.to_csv("result_time.csv")