In [1]:
######## Customize Train Strategy ########

num_cpus = 16
num_gpus = 1

seed = 1234

model_name = "xlm-roberta-base" # bert-base-multilingual-cased ; klue/roberta-base ; bert-base-cased ...

train_proportion = 1.0 # 0.2, 0.4 ...

n_trials = 5
std=0.2
patience=1

# Prerequisites

- Host OS: Ubuntu 20.04 lts
- Using Docker Image 'mltooling/ml-workspace-gpu' (docker pull mltooling/ml-workspace-gpu)
- Single Nvidia GPU (RTX 3080)

# 0. GPU check

In [2]:
import torch

if torch.cuda.is_available():
    device = torch.device("cuda:0")
    device_count = torch.cuda.device_count()
    print("device_count: {}".format(device_count))
    for device_num in range(device_count):
        print("device {} capability {}".format(
            device_num,
            torch.cuda.get_device_capability(device_num)))
        print("device {} name {}".format(
            device_num, 
            torch.cuda.get_device_name(device_num)))
else:
    device = torch.device("cpu")
    print("no cuda device")

device_count: 1
device 0 capability (8, 6)
device 0 name NVIDIA GeForce RTX 3080


# 1. Import packages

In [3]:
## Need to check if packages are compatible ##

# !pip install accelerate nvidia-ml-py3
# !pip install datasets==2.4.0
# !pip install huggingface_hub==0.9.1
# !pip install transformers==4.22.1 
# !pip install pyarrow==9.0.0
# !pip install -q ray

In [4]:
import transformers
import datasets
import huggingface_hub
import pyarrow

print(transformers.__version__)
print(datasets.__version__)
print(huggingface_hub.__version__)
print(pyarrow.__version__)

# 4.22.1
# 2.4.0
# 0.9.1
# 9.0.0

4.22.1
2.4.0
0.9.1
9.0.0


In [5]:
import os
import re
import math
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 'You can use tf32' if you are acessing Ampere hardware
import torch
torch.backends.cuda.matmul.allow_tf32 = True

from datasets import load_dataset, load_metric, ClassLabel
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, precision_score, recall_score, f1_score

from functools import partial

import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.examples.pbt_transformers.utils import (
    download_data,
    build_compute_metrics_fn,
)
from ray.tune.schedulers import PopulationBasedTraining, ASHAScheduler
from transformers import (
    glue_tasks_num_labels,
    AdamW,
    AutoConfig,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    Trainer,
    GlueDataset,
    GlueDataTrainingArguments,
    TrainingArguments,
    EarlyStoppingCallback
)

# 2. Import Data

In [6]:
data_name = "dummy_data" 

dataset = load_dataset('csv', data_files={'train': f'../data_splited/{data_name}_train.csv',
                                          'test': f'../data_splited/{data_name}_test.csv'})
dataset

Using custom data configuration default-eb5690d969448b40
Reusing dataset csv (/root/.cache/huggingface/datasets/csv/default-eb5690d969448b40/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a)


  0%|          | 0/2 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'past_history', 'treatment_effect', 'examination', 'label'],
        num_rows: 3756
    })
    test: Dataset({
        features: ['id', 'past_history', 'treatment_effect', 'examination', 'label'],
        num_rows: 940
    })
})

# 3. Data Preprocessing

In [7]:
# dataset = dataset.remove_columns(['id', 'treatment_effect', 'examination'])
# dataset = dataset.rename_column("past_history", "text")

dataset = dataset.remove_columns(['id', 'examination', 'past_history'])
dataset = dataset.rename_column("treatment_effect", "text")

# dataset = dataset.remove_columns(['id', 'treatment_effect', 'past_history'])
# dataset = dataset.rename_column("examination", "text")
dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 3756
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 940
    })
})

In [8]:
# remove NA rows

dataset = dataset.filter(lambda row: pd.notnull(row["text"]))

## remove specal characters

def remove_sp(example):
    example["text"]=re.sub(r'[^a-z|A-Z|0-9|ㄱ-ㅎ|ㅏ-ㅣ|가-힣| ]+', '', str(example["text"]))
    return example

dataset = dataset.map(remove_sp)

dataset

Loading cached processed dataset at /root/.cache/huggingface/datasets/csv/default-eb5690d969448b40/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-2286abe26c3e708d.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/csv/default-eb5690d969448b40/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-931dab6ac37501d3.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/csv/default-eb5690d969448b40/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-7f9b68d4305ec8e5.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/csv/default-eb5690d969448b40/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-6b3d8562d6df9dae.arrow


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 3756
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 940
    })
})

# 4.Modeling

In [9]:
######## Initialize Ray ########

ray.shutdown()
ray.init(log_to_driver=False, ignore_reinit_error=True, num_cpus=num_cpus, num_gpus=num_gpus, include_dashboard=False)

######## Tokenizing ########

tokenizer = AutoTokenizer.from_pretrained(model_name, truncation_side = 'left') # truncation_side = 'left' option remains last 512 tokens

def tokenize_function(examples):
    tokenized_batch = tokenizer(examples["text"], padding="max_length", truncation=True) # padding : ['longest', 'max_length', 'do_not_pad']
    return tokenized_batch

tokenized_datasets = dataset.map(tokenize_function, batched=True)

######## Train-Evalulation-Test Split ########

train_dataset = tokenized_datasets["train"].shuffle(seed=seed).select(range(0,math.floor(len(tokenized_datasets["train"])*0.7*train_proportion)))
eval_dataset = tokenized_datasets["train"].shuffle(seed=seed).select(range(math.floor(len(tokenized_datasets["train"])*0.7), len(tokenized_datasets["train"])))
test_dataset = tokenized_datasets["test"]

######## Applying Class Weights ########

def class_weight(train_dataset) :
    
    train_labels = np.array(train_dataset["label"])
    class_weights = compute_class_weight(class_weight = 'balanced', classes = np.unique(train_labels), y = train_labels)
    
    weights = torch.tensor(class_weights, dtype = torch.float)
    
    return weights

weights = class_weight(train_dataset)
print(f"Class Weights: {weights}")

2022-11-02 01:25:09,362	INFO worker.py:1518 -- Started a local Ray instance.
Loading cached processed dataset at /root/.cache/huggingface/datasets/csv/default-eb5690d969448b40/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-fe59fed5bbfaa74f.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/csv/default-eb5690d969448b40/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-8b597b77835b0174.arrow
Loading cached shuffled indices for dataset at /root/.cache/huggingface/datasets/csv/default-eb5690d969448b40/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-754c204a36be6102.arrow
Loading cached shuffled indices for dataset at /root/.cache/huggingface/datasets/csv/default-eb5690d969448b40/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-754c204a36be6102.arrow


Class Weights: tensor([0.6868, 1.8385])


In [10]:
######## Modeling ########

def model_init():
    return AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=2
        )

def compute_metrics(eval_pred):
    metric1 = load_metric("accuracy")
    metric2 = load_metric("f1")
    
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = metric1.compute(predictions=predictions, references=labels)["accuracy"]
    f1 = metric2.compute(predictions=predictions, references=labels)["f1"]
    return {"accuracy": accuracy, "f1": f1, "objective": accuracy+f1}

training_args = TrainingArguments(
    output_dir=".",
    learning_rate=2e-5, # config
    do_train=True,
    do_eval=True,
    no_cuda=num_gpus <= 0,
    evaluation_strategy="steps",
    save_strategy="steps",
    eval_steps = 50,
    metric_for_best_model="objective", 
    greater_is_better=True,
    load_best_model_at_end=True,
    num_train_epochs=2,  # config
    max_steps=-1,
    per_device_train_batch_size=8,  # config
    per_device_eval_batch_size=8,
    warmup_steps=0,
    warmup_ratio=0.1,  # config
    weight_decay=0.1,  # config
    logging_dir="./logs",
    skip_memory_metrics=True,
    report_to="none",
    fp16=True,
    gradient_accumulation_steps=4,
    gradient_checkpointing=True,
    seed=seed  # config
    )

class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")
        # compute custom loss
        weight = weights.to(device)
        loss_fct = torch.nn.CrossEntropyLoss(weight=weight)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss
    
trainer = CustomTrainer(
    model_init=model_init,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    )

tune_config = {
    "per_device_eval_batch_size": 8,
    "per_device_train_batch_size": 8,
    "max_steps": -1
}

scheduler = PopulationBasedTraining(
    time_attr="training_iteration",
    metric="objective",
    mode="max",
    perturbation_interval=1,
    hyperparam_mutations={
        "num_train_epochs": tune.randint(2, 20),
#         "per_device_train_batch_size": tune.choice([4, 8]),
        "weight_decay": tune.uniform(0.0, 0.3),
        "learning_rate": tune.uniform(1e-5, 5e-5),
        "warmup_ratio": tune.uniform(0.0, 0.3),
        "adam_beta1": tune.loguniform(1e-2, 1),
        "adam_beta2": tune.loguniform(1e-3, 1),
        "adam_epsilon": tune.loguniform(1e-8, 1e-5),
    }, 
)

reporter = CLIReporter(
    parameter_columns={
        "weight_decay": "w_decay",
        "learning_rate": "lr",
        "per_device_train_batch_size": "train_bs/gpu",
        "num_train_epochs": "num_epochs",
    },
    metric_columns=["eval_f1", "eval_accuracy", "eval_objective", "eval_loss", "epoch", "training_iteration"]
)

model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                           num_labels = 2,
                                                           output_attentions = False,
                                                           output_hidden_states = False)

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/f6d161e8f5f6f2ed433fb4023d6cb34146506b3f/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 250002
}

loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/f6d

In [11]:
stopper = tune.stopper.ExperimentPlateauStopper(metric="objective", 
                                                std=std,
                                                top=n_trials,
                                                mode="max",
                                                patience=patience
                                                )

result = trainer.hyperparameter_search(
    hp_space = lambda _: tune_config,
    direction = "maximize",
    backend="ray",
    reuse_actors = True,
    n_trials=n_trials,
    resources_per_trial={"cpu": num_cpus, "gpu": num_gpus},
    scheduler=scheduler,
    keep_checkpoints_num=1,
    checkpoint_score_attr="training_iteration",
    stop=stopper,
    progress_reporter=reporter,
    local_dir="./test-results",
    name="tune_transformer_pbt",
    log_to_file=True,
)


from ray.air import session

def train(config):
    # ...
    session.report({"metric": metric}, checkpoint=checkpoint)

For more information please see https://docs.ray.io/en/master/ray-air/key-concepts.html#session



== Status ==
Current time: 2022-11-02 01:25:25 (running for 00:00:00.17)
Memory usage on this node: 10.7/31.1 GiB
PopulationBasedTraining: 0 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/16.88 GiB heap, 0.0/8.44 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/BERT_classification_binary/test-results/tune_transformer_pbt
Number of trials: 5/5 (4 PENDING, 1 RUNNING)
+------------------------+----------+-------------------+-----------+-------------+----------------+--------------+
| Trial name             | status   | loc               |   w_decay |          lr |   train_bs/gpu |   num_epochs |
|------------------------+----------+-------------------+-----------+-------------+----------------+--------------|
| _objective_3a387_00000 | RUNNING  | 172.17.0.3:735922 | 0.149299  | 4.27135e-05 |              8 |           17 |
| _objective_3a387_00001 | PENDING  |                   | 0.244549  | 1.63526e-05 |              8 |           13 |
| 

2022-11-02 01:35:08,676	INFO pbt.py:618 -- [exploit] transferring weights from trial _objective_3a387_00002 (score 3.2284485700634766) -> _objective_3a387_00003 (score 2.0470236319323627)
2022-11-02 01:35:08,677	INFO pbt.py:636 -- [explore] perturbed config from {'num_train_epochs': 17, 'weight_decay': 0.016798034689761297, 'learning_rate': 2.7706505958368913e-05, 'warmup_ratio': 0.006643174080908176, 'adam_beta1': 0.038146711647204984, 'adam_beta2': 0.005485084592226852, 'adam_epsilon': 1.6400621429618284e-06} -> {'num_train_epochs': 14, 'weight_decay': 0.21137439245687176, 'learning_rate': 3.3247807150042694e-05, 'warmup_ratio': 0.06563763170222657, 'adam_beta1': 0.030517369317763988, 'adam_beta2': 0.004388067673781482, 'adam_epsilon': 1.3120497143694629e-06}


Result for _objective_3a387_00003:
  date: 2022-11-02_01-35-08
  done: false
  episodes_total: 0
  epoch: 1.22
  eval_accuracy: 0.7178349600709849
  eval_f1: 0.30567685589519655
  eval_loss: 0.6620995998382568
  eval_objective: 1.0235118159661813
  eval_runtime: 7.5423
  eval_samples_per_second: 149.424
  eval_steps_per_second: 18.695
  experiment_id: 5cf3e9a67ae145ab887bd698148955e6
  hostname: 3481a8a2ae33
  iterations_since_restore: 2
  node_ip: 172.17.0.3
  objective: 2.0470236319323627
  pid: 735922
  time_since_restore: 85.61292338371277
  time_this_iter_s: 41.357298135757446
  time_total_s: 129.55094981193542
  timestamp: 1667352908
  timesteps_since_restore: 0
  timesteps_total: 0
  training_iteration: 2
  trial_id: 3a387_00003
  warmup_time: 0.003602743148803711
  
== Status ==
Current time: 2022-11-02 01:35:13 (running for 00:09:48.34)
Memory usage on this node: 15.4/31.1 GiB
PopulationBasedTraining: 6 checkpoints, 1 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0

2022-11-02 01:38:44,100	INFO tune.py:758 -- Total run time: 798.77 seconds (798.46 seconds for the tuning loop).


In [12]:
result

BestRun(run_id='3a387_00000', objective=3.3184140162510274, hyperparameters={'per_device_eval_batch_size': 8, 'per_device_train_batch_size': 8, 'max_steps': -1, 'num_train_epochs': 17, 'weight_decay': 0.14929909994179477, 'learning_rate': 4.271353764466278e-05, 'warmup_ratio': 0.18363356840318124, 'adam_beta1': 0.34891515857331573, 'adam_beta2': 0.3819524623277516, 'adam_epsilon': 2.8308111453227584e-08})

In [13]:
for n, v in result.hyperparameters.items():
    setattr(trainer.args, n, v)

In [14]:
trainer.args

TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.34891515857331573,
adam_beta2=0.3819524623277516,
adam_epsilon=2.8308111453227584e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=True,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=50,
evaluation_strategy=steps,
fp16=True,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
gradient_accumulation_steps=4,
gradient_checkpointing=True,
greater_is_better=True,
group_by_length=False,
half_precision_backend=cuda_amp,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=every_save,
hub_token=<HUB_TOKEN>,
ignore_data_skip=False,
include_inputs_fo

In [16]:
trainer.train()

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/f6d161e8f5f6f2ed433fb4023d6cb34146506b3f/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 250002
}

loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/f6d

Step,Training Loss,Validation Loss,Accuracy,F1,Objective
50,No log,0.686256,0.72937,0.0,0.72937
100,No log,0.666651,0.548358,0.520264,1.068622
150,No log,0.402793,0.881988,0.777219,1.659207
200,No log,0.391419,0.846495,0.759388,1.605883
250,No log,0.393108,0.847382,0.741742,1.589124
300,No log,0.36498,0.897072,0.815287,1.712358
350,No log,0.334696,0.892635,0.813559,1.706195
400,No log,0.375227,0.873114,0.77551,1.648625
450,No log,0.395726,0.899734,0.8192,1.718934
500,0.424800,0.352348,0.913043,0.84345,1.756494


The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1127
  Batch size = 8
  nn.utils.clip_grad_norm_(
The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1127
  Batch size = 8
  nn.utils.clip_grad_norm_(
The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this mes

TrainOutput(global_step=1394, training_loss=0.23201608999900872, metrics={'train_runtime': 1170.7029, 'train_samples_per_second': 38.176, 'train_steps_per_second': 1.191, 'total_flos': 1.175790684192768e+16, 'train_loss': 0.23201608999900872, 'epoch': 17.0})

In [17]:
trainer.evaluate()

The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1127
  Batch size = 8


{'eval_loss': 0.5296093821525574,
 'eval_accuracy': 0.9254658385093167,
 'eval_f1': 0.8662420382165604,
 'eval_objective': 1.7917078767258772,
 'eval_runtime': 8.6017,
 'eval_samples_per_second': 131.02,
 'eval_steps_per_second': 16.392,
 'epoch': 17.0}

In [18]:
pred = trainer.predict(test_dataset=test_dataset)
pred

The following columns in the test set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 940
  Batch size = 8


PredictionOutput(predictions=array([[-3.553,  3.705],
       [ 4.07 , -4.164],
       [ 4.035, -4.086],
       ...,
       [ 3.875, -3.883],
       [-3.5  ,  3.658],
       [-3.602,  3.77 ]], dtype=float16), label_ids=array([1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
       1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1,
       0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,
       1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 

In [19]:
label_test = list(pred.label_ids)
pred_test = list(map(lambda x: x.index(max(x)), pred.predictions.tolist()))

In [20]:
print(confusion_matrix(label_test, pred_test))

[[632  31]
 [ 21 256]]


In [21]:
accuracy = accuracy_score(label_test, pred_test)
f1 = f1_score(label_test, pred_test)
recall = recall_score(label_test, pred_test)
precision = precision_score(label_test, pred_test)

print(accuracy)
print(f1)
print(recall)
print(precision)

0.9446808510638298
0.9078014184397164
0.924187725631769
0.89198606271777


In [24]:
# model_path = f"sev_past_{train_proportion}"
# trainer.model.save_pretrained(model_path)
# tokenizer.save_pretrained(model_path)

# Reference

https://bo-10000.tistory.com/154  
https://huggingface.co/blog/ray-tune  
https://docs.ray.io/en/latest/tune/examples/pbt_transformers.html  
https://wood-b.github.io/post/a-novices-guide-to-hyperparameter-optimization-at-scale/#schedulers-vs-search-algorithms  
https://docs.ray.io/en/latest/tune/api_docs/search_space.html  
https://docs.ray.io/en/latest/tune/tutorials/tune-advanced-tutorial.html  
https://keras.io/examples/keras_recipes/sample_size_estimate/  
https://www.topbots.com/fine-tune-transformers-in-pytorch/  
https://docs.ray.io/en/latest/tune/api_docs/schedulers.html  
https://blog.ml.cmu.edu/2018/12/12/massively-parallel-hyperparameter-optimization/  
https://docs.ray.io/en/latest/tune/faq.html  
https://docs.ray.io/en/latest/tune/api_docs/schedulers.html#population-based-training-tune-schedulers-populationbasedtraining  
https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer.hyperparameter_search  
https://docs.ray.io/en/latest/tune/api_docs/suggestion.html#optuna-tune-search-optuna-optunasearch  
https://kyunghyunlim.github.io/nlp/ml_ai/2021/09/22/hugging_face_5.html  

# Future Challenges
 - step이 늘어나면서 성능이 어떻게 좋아지는지, hp조합에 따라 어떻게 좋아지는지 시각화 추가