In [None]:
!pip3 install ray[tune] optuna

In [None]:
from datasets import load_dataset
from transformers import AutoModelForTokenClassification, AutoTokenizer

def preprocess_data(example, tokenizer):
    sentence = "".join(example["tokens"]).replace("\xa0", " ")
    encoded = tokenizer(
        sentence,
        return_offsets_mapping=True,
        add_special_tokens=False,
        padding=False,
        truncation=False
    )

    labels = []
    for offset in encoded.offset_mapping:
        if offset[0] == offset[1]:
            labels.append(-100)
        else:
            labels.append(example["ner_tags"][offset[0]])
    encoded["labels"] = labels
    return encoded

dataset = load_dataset("klue", "ner")
labels = dataset["train"].features["ner_tags"].feature.names

model_name = "Leo97/KoELECTRA-small-v3-modu-ner"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(
    model_name,
    num_labels=len(labels),
    ignore_mismatched_sizes=True
)

processed_dataset = dataset.map(
    lambda example: preprocess_data(example, tokenizer),
    batched=False,
    remove_columns=dataset["train"].column_names
)

In [None]:
from ray import tune
from functools import partial
from transformers import Trainer, TrainingArguments
from transformers.data.data_collator import DataCollatorForTokenClassification

def model_init(model_name, labels):
    return AutoModelForTokenClassification.from_pretrained(
        model_name, num_labels=len(labels), ignore_mismatched_sizes=True
    )

def hp_space(trial):
    return {
        "learning_rate": tune.loguniform(1e-5, 1e-4),
        "weight_decay": tune.loguniform(1e-5, 1e-1),
        "num_train_epochs": tune.choice([1, 2, 3])
    }

def compute_objective(metrics):
    return metrics["eval_loss"]

training_args = TrainingArguments(
    output_dir="token-classification-hyperparameter-search",
    evaluation_strategy="epoch",
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    # learning_rate=1e-4,
    # weight_decay=0.01,
    # num_train_epochs=5,
    seed=42
)

trainer = Trainer(
    model_init=partial(model_init, model_name=model_name, labels=labels),
    args=training_args,
    train_dataset=processed_dataset["train"],
    eval_dataset=processed_dataset["validation"],
    data_collator=DataCollatorForTokenClassification(tokenizer=tokenizer, padding=True)
)

best_run = trainer.hyperparameter_search(
    backend="ray",
    n_trials=5,
    direction="minimize",
    hp_space=hp_space,
    compute_objective=compute_objective,
    resources_per_trial={"cpu": 2, "gpu": 1},
    trial_dirname_creator=lambda trial: str(trial)
)
print(best_run.hyperparameters)

In [None]:
from ray.tune.schedulers import ASHAScheduler
from ray.tune.search.optuna import OptunaSearch

best_run = trainer.hyperparameter_search(
    backend="ray",
    n_trials=5,
    hp_space=hp_space,
    resources_per_trial={"cpu": 2, "gpu": 1},
    trial_dirname_creator=lambda trial: str(trial),
    search_alg=OptunaSearch(
        metric="eval_loss",
        mode="min"
    ),
    scheduler=ASHAScheduler(
        metric="eval_loss",
        mode="min",
        max_t=1000,
        grace_period=1,
        reduction_factor=2
    )
)
print(best_run.hyperparameters)