In [1]:
######## Customize Train Strategy ########

num_cpus = 12
num_gpus = 1
seed = 1234

model_name = "xlm-roberta-base" # bert-base-multilingual-cased ; klue/roberta-base ; bert-base-cased ...

train_proportion = 1.0

gpus_per_trial = 1
cpus_per_trial = 12
n_trials = 5

# Prerequisites

- Host OS: Ubuntu 20.04 lts
- Using Docker Image 'mltooling/ml-workspace-gpu' (docker pull mltooling/ml-workspace-gpu)
- Single Nvidia GPU (RTX 3080)

# 0. GPU check

In [2]:
import torch

if torch.cuda.is_available():
    device = torch.device("cuda:0")
    device_count = torch.cuda.device_count()
    print("device_count: {}".format(device_count))
    for device_num in range(device_count):
        print("device {} capability {}".format(
            device_num,
            torch.cuda.get_device_capability(device_num)))
        print("device {} name {}".format(
            device_num, 
            torch.cuda.get_device_name(device_num)))
else:
    device = torch.device("cpu")
    print("no cuda device")

device_count: 1
device 0 capability (6, 0)
device 0 name Tesla P100-PCIE-16GB


# 1. Import packages

In [3]:
## Need to check if packages are compatible ##

!pip install -q accelerate nvidia-ml-py3
!pip install -q datasets==2.4.0
!pip install -q huggingface_hub==0.9.1
!pip install -q transformers==4.22.1 
!pip install -q pyarrow==9.0.0
!pip install -q ray

[?25l[K     |██▏                             | 10 kB 18.2 MB/s eta 0:00:01[K     |████▍                           | 20 kB 5.7 MB/s eta 0:00:01[K     |██████▋                         | 30 kB 8.1 MB/s eta 0:00:01[K     |████████▉                       | 40 kB 4.4 MB/s eta 0:00:01[K     |███████████                     | 51 kB 4.3 MB/s eta 0:00:01[K     |█████████████▏                  | 61 kB 5.0 MB/s eta 0:00:01[K     |███████████████▍                | 71 kB 5.4 MB/s eta 0:00:01[K     |█████████████████▋              | 81 kB 6.0 MB/s eta 0:00:01[K     |███████████████████▉            | 92 kB 6.3 MB/s eta 0:00:01[K     |██████████████████████          | 102 kB 5.2 MB/s eta 0:00:01[K     |████████████████████████▎       | 112 kB 5.2 MB/s eta 0:00:01[K     |██████████████████████████▍     | 122 kB 5.2 MB/s eta 0:00:01[K     |████████████████████████████▋   | 133 kB 5.2 MB/s eta 0:00:01[K     |██████████████████████████████▉ | 143 kB 5.2 MB/s eta 0:00:01[K    

In [4]:
import transformers
import datasets
import huggingface_hub
import pyarrow

print(transformers.__version__)
print(datasets.__version__)
print(huggingface_hub.__version__)
print(pyarrow.__version__)

# 4.22.1
# 2.4.0
# 0.9.1
# 9.0.0

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


Moving 0 files to the new cache system


0it [00:00, ?it/s]

4.22.1
2.4.0
0.9.1
9.0.0


In [5]:
import os
import re
import math
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 'You can use tf32' if you are acessing Ampere hardware
import torch
torch.backends.cuda.matmul.allow_tf32 = True

from datasets import load_dataset, load_metric, ClassLabel
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, precision_score, recall_score, f1_score

from functools import partial

import ray
from ray import tune
from ray import air
from ray.tune import CLIReporter
from ray.tune.examples.pbt_transformers.utils import (
    download_data,
    build_compute_metrics_fn,
)
from ray.tune.schedulers import PopulationBasedTraining, ASHAScheduler
from transformers import (
    glue_tasks_num_labels,
    AdamW,
    AutoConfig,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    Trainer,
    GlueDataset,
    GlueDataTrainingArguments,
    TrainingArguments,
    EarlyStoppingCallback
)

# 2. Import Data

In [6]:
from google.colab import drive
drive.mount('/content/gdrive')

data_name = "dummy_dataset"

dataset = load_dataset('csv', data_files={'train': f'/content/gdrive/MyDrive/bert/data_split/{data_name}_train.csv',
                                          'test': f'/content/gdrive/MyDrive/bert/data_split/{data_name}_test.csv'})
dataset

Mounted at /content/gdrive




Downloading and preparing dataset csv/default to /root/.cache/huggingface/datasets/csv/default-b5e03989c24757c6/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a...


Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

0 tables [00:00, ? tables/s]

0 tables [00:00, ? tables/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/csv/default-b5e03989c24757c6/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a. Subsequent calls will reuse this data.


  0%|          | 0/2 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'past_history', 'treatment_effect', 'examination', 'label'],
        num_rows: 3756
    })
    test: Dataset({
        features: ['id', 'past_history', 'treatment_effect', 'examination', 'label'],
        num_rows: 940
    })
})

# 3. Data Preprocessing

In [7]:
dataset = dataset.remove_columns(['id', 'treatment_effect', 'examination'])
dataset = dataset.rename_column("past_history", "text")

# dataset = dataset.remove_columns(['id', 'examination', 'past_history'])
# dataset = dataset.rename_column("treatment_effect", "text")

# dataset = dataset.remove_columns(['id', 'treatment_effect', 'past_history'])
# dataset = dataset.rename_column("examination", "text")
dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 3756
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 940
    })
})

In [8]:
## remove NA rows

dataset = dataset.filter(lambda row: pd.notnull(row["text"]))

## remove specal characters

def remove_sp(example):
    example["text"]=re.sub(r'[^a-z|A-Z|0-9|ㄱ-ㅎ|ㅏ-ㅣ|가-힣| ]+', '', str(example["text"]))
    return example

dataset = dataset.map(remove_sp)

dataset



  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/3756 [00:00<?, ?ex/s]

  0%|          | 0/940 [00:00<?, ?ex/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 3756
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 940
    })
})

In [9]:
dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 3756
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 940
    })
})

# 4.Modeling

In [10]:
######## Initialize Ray ########

ray.shutdown()
ray.init(log_to_driver=False, ignore_reinit_error=True, num_cpus=num_cpus, num_gpus=num_gpus, include_dashboard=False)

######## Tokenizing ########

tokenizer = AutoTokenizer.from_pretrained(model_name, truncation_side = 'left') # truncation_side = 'left' option remains last 512 tokens

def tokenize_function(examples):
    tokenized_batch = tokenizer(examples["text"], padding="max_length", truncation=True) # padding : ['longest', 'max_length', 'do_not_pad']
    return tokenized_batch

tokenized_datasets = dataset.map(tokenize_function, batched=True)

######## Train-Evalulation-Test Split ########

train_dataset = tokenized_datasets["train"].shuffle(seed=seed).select(range(0,math.floor(len(tokenized_datasets["train"])*0.7*train_proportion)))
eval_dataset = tokenized_datasets["train"].shuffle(seed=seed).select(range(math.floor(len(tokenized_datasets["train"])*0.7), len(tokenized_datasets["train"])))
test_dataset = tokenized_datasets["test"]

# data for test
train_dataset = train_dataset.shuffle(seed=1919).select(range(1000))
eval_dataset = eval_dataset.shuffle(seed=1919).select(range(500))
test_dataset = test_dataset.shuffle(seed=1919).select(range(200))

######## Applying Class Weights ########

def class_weight(train_dataset) :
    
    train_labels = np.array(train_dataset["label"])
    class_weights = compute_class_weight(class_weight = 'balanced', classes = np.unique(train_labels), y = train_labels)
    
    weights = torch.tensor(class_weights, dtype = torch.float)
    
    return weights

weights = class_weight(train_dataset)
print(f"Class Weights: {weights}")

######## Modeling ########

def model_init():
    return AutoModelForSequenceClassification.from_pretrained(
        model_name,
        num_labels=2
        )

def compute_metrics(eval_preds):
    metric = load_metric("glue", "mrpc") # for using both of acc and f1 
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

training_args = TrainingArguments(
    output_dir=".",
    learning_rate=2e-5, # config
    do_train=True,
    do_eval=True,
    no_cuda=gpus_per_trial <= 0,
    evaluation_strategy="steps",
    save_strategy="steps",
    eval_steps = 50,
    metric_for_best_model="objective", 
    greater_is_better=True,
    load_best_model_at_end=True,
    num_train_epochs=2,  # config
    max_steps=-1,
    per_device_train_batch_size=8,  # config
    per_device_eval_batch_size=8,
    warmup_steps=0,
    warmup_ratio=0.1,  # config
    weight_decay=0.1,  # config
    logging_dir="./logs",
    skip_memory_metrics=True,
    report_to="none",
    fp16=True,
    gradient_accumulation_steps=4,
    gradient_checkpointing=True,
    seed=seed  # config
    )

class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")
        # compute custom loss
        weight = weights.to(device)
        loss_fct = torch.nn.CrossEntropyLoss(weight=weight)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss
    
trainer = CustomTrainer(
    model_init=model_init,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    )

tune_config = {
    "per_device_eval_batch_size": 8,
    "per_device_train_batch_size": 8,
    "max_steps": -1
}

scheduler = PopulationBasedTraining(
    time_attr="training_iteration",
    metric="objective",
    mode="max",
    perturbation_interval=1,
    hyperparam_mutations={
#         "num_train_epochs": tune.randint(1, 20),
        "num_train_epochs": tune.randint(10, 15),
#         "per_device_train_batch_size": tune.choice([4, 8]),
        "weight_decay": tune.uniform(0.0, 0.3),
        "learning_rate": tune.uniform(1e-5, 5e-5),
        "warmup_ratio": tune.uniform(0.0, 0.3),
        "adam_beta1": tune.loguniform(1e-2, 1),
        "adam_beta2": tune.loguniform(1e-3, 1),
        "adam_epsilon": tune.loguniform(1e-8, 1e-5),
    },
)

reporter = CLIReporter(
    parameter_columns={
        "weight_decay": "w_decay",
        "learning_rate": "lr",
        "per_device_train_batch_size": "train_bs/gpu",
        "num_train_epochs": "num_epochs",
    },
    metric_columns=["eval_f1", "eval_accuracy", "eval_loss", "epoch", "training_iteration"]
)

model = AutoModelForSequenceClassification.from_pretrained(model_name,
                                                           num_labels = 2,
                                                           output_attentions = False,
                                                           output_hidden_states = False)

2022-11-01 06:53:47,641	INFO worker.py:1518 -- Started a local Ray instance.


Downloading:   0%|          | 0.00/615 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/f6d161e8f5f6f2ed433fb4023d6cb34146506b3f/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 250002
}



Class Weights: tensor([0.6757, 1.9231])


Downloading:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/f6d161e8f5f6f2ed433fb4023d6cb34146506b3f/pytorch_model.bin
Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassificat

In [11]:
stopper = tune.stopper.ExperimentPlateauStopper(metric="objective", 
                                                std=0.3,
                                                top=3,
                                                mode="max",
                                                patience=1
                                                )

result = trainer.hyperparameter_search(
    hp_space = lambda _: tune_config,
    direction = "maximize",
    backend="ray",
    reuse_actors = True,
    n_trials=n_trials,
    resources_per_trial={"cpu": cpus_per_trial, "gpu": gpus_per_trial},
    scheduler=scheduler,
    keep_checkpoints_num=1,
    checkpoint_score_attr="training_iteration",
    stop = stopper,
    progress_reporter=reporter,
    local_dir="./test-results",
    name="tune_transformer_pbt",
    log_to_file=True,
)


from ray.air import session

def train(config):
    # ...
    session.report({"metric": metric}, checkpoint=checkpoint)

For more information please see https://docs.ray.io/en/master/ray-air/key-concepts.html#session

2022-11-01 06:54:39,323	INFO tensorboardx.py:170 -- pip install "ray[tune]" to see TensorBoard files.


== Status ==
Current time: 2022-11-01 06:54:39 (running for 00:00:00.21)
Memory usage on this node: 4.8/51.0 GiB
PopulationBasedTraining: 0 checkpoints, 0 perturbs
Resources requested: 12.0/12 CPUs, 1.0/1 GPUs, 0.0/30.3 GiB heap, 0.0/15.15 GiB objects (0.0/1.0 accelerator_type:P100)
Result logdir: /content/test-results/tune_transformer_pbt
Number of trials: 5/5 (4 PENDING, 1 RUNNING)
+------------------------+----------+----------------+-----------+-------------+----------------+--------------+
| Trial name             | status   | loc            |   w_decay |          lr |   train_bs/gpu |   num_epochs |
|------------------------+----------+----------------+-----------+-------------+----------------+--------------|
| _objective_0e0e6_00000 | RUNNING  | 172.28.0.2:809 |  0.186633 | 2.75091e-05 |              8 |           13 |
| _objective_0e0e6_00001 | PENDING  |                |  0.244549 | 1.63526e-05 |              8 |           13 |
| _objective_0e0e6_00002 | PENDING  |           

2022-11-01 07:01:46,214	INFO tune.py:759 -- Total run time: 427.09 seconds (426.66 seconds for the tuning loop).


In [12]:
result

BestRun(run_id='0e0e6_00001', objective=1.341669616519174, hyperparameters={'per_device_eval_batch_size': 8, 'per_device_train_batch_size': 8, 'max_steps': -1, 'num_train_epochs': 13, 'weight_decay': 0.24454888021530063, 'learning_rate': 1.635261412737746e-05, 'warmup_ratio': 0.03484134828556462, 'adam_beta1': 0.010612435463932554, 'adam_beta2': 0.028873559738745436, 'adam_epsilon': 9.841159819173958e-08})

In [13]:
for n, v in result.hyperparameters.items():
    setattr(trainer.args, n, v)

In [14]:
trainer.args

TrainingArguments(
_n_gpu=1,
adafactor=False,
adam_beta1=0.010612435463932554,
adam_beta2=0.028873559738745436,
adam_epsilon=9.841159819173958e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_pin_memory=True,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=True,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=50,
evaluation_strategy=steps,
fp16=True,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
gradient_accumulation_steps=4,
gradient_checkpointing=True,
greater_is_better=True,
group_by_length=False,
half_precision_backend=cuda_amp,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=every_save,
hub_token=<HUB_TOKEN>,
ignore_data_skip=False,
include_inputs_

In [16]:
trainer.train()

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/f6d161e8f5f6f2ed433fb4023d6cb34146506b3f/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 250002
}

loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/f6d

Step,Training Loss,Validation Loss,Accuracy,F1
50,No log,0.587835,0.734,0.60767
100,No log,0.42893,0.854,0.732601
150,No log,0.355903,0.894,0.796935
200,No log,0.356198,0.89,0.795539
250,No log,0.381756,0.894,0.798479
300,No log,0.341327,0.908,0.835714
350,No log,0.396761,0.886,0.781609
400,No log,0.410022,0.898,0.810409


The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 500
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 500
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 500


TrainOutput(global_step=403, training_loss=0.3947397615418541, metrics={'train_runtime': 1071.7748, 'train_samples_per_second': 12.129, 'train_steps_per_second': 0.376, 'total_flos': 3418338831237120.0, 'train_loss': 0.3947397615418541, 'epoch': 12.99})

In [17]:
trainer.evaluate()

The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 500
  Batch size = 8


{'eval_loss': 0.42017561197280884,
 'eval_accuracy': 0.904,
 'eval_f1': 0.8195488721804511,
 'eval_runtime': 10.16,
 'eval_samples_per_second': 49.213,
 'eval_steps_per_second': 6.201,
 'epoch': 12.99}

In [18]:
pred = trainer.predict(test_dataset=test_dataset)
pred

The following columns in the test set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 200
  Batch size = 8


PredictionOutput(predictions=array([[ 3.152  , -2.871  ],
       [-1.799  ,  2.06   ],
       [ 2.701  , -2.314  ],
       [ 3.127  , -2.852  ],
       [-2.467  ,  2.775  ],
       [-1.266  ,  1.661  ],
       [-2.404  ,  2.727  ],
       [ 0.1787 ,  0.06824],
       [ 3.217  , -2.982  ],
       [-2.234  ,  2.53   ],
       [-1.271  ,  1.727  ],
       [ 3.082  , -2.762  ],
       [ 3.043  , -2.834  ],
       [-1.042  ,  1.384  ],
       [ 2.598  , -2.121  ],
       [-1.304  ,  1.586  ],
       [ 3.21   , -2.879  ],
       [-2.541  ,  2.91   ],
       [ 2.719  , -2.361  ],
       [ 3.04   , -2.852  ],
       [-1.987  ,  2.266  ],
       [ 3.271  , -3.025  ],
       [ 3.2    , -3.018  ],
       [ 3.299  , -3.076  ],
       [ 2.453  , -2.209  ],
       [ 3.201  , -2.994  ],
       [-2.51   ,  2.838  ],
       [-0.876  ,  1.3    ],
       [-2.303  ,  2.574  ],
       [ 2.836  , -2.453  ],
       [-1.996  ,  2.373  ],
       [ 3.113  , -2.807  ],
       [-2.107  ,  2.43   ],
       [-2.248

In [19]:
label_test = list(pred.label_ids)
pred_test = list(map(lambda x: x.index(max(x)), pred.predictions.tolist()))

In [20]:
print(confusion_matrix(label_test, pred_test))

[[130  13]
 [ 11  46]]


In [21]:
accuracy = accuracy_score(label_test, pred_test)
f1 = f1_score(label_test, pred_test)
recall = recall_score(label_test, pred_test)
precision = precision_score(label_test, pred_test)

print(accuracy)
print(f1)
print(recall)
print(precision)

0.88
0.7931034482758621
0.8070175438596491
0.7796610169491526


In [None]:
model_path = f"sev_past_{train_proportion}"
trainer.model.save_pretrained(model_path)
tokenizer.save_pretrained(model_path)

# Reference

https://bo-10000.tistory.com/154  
https://huggingface.co/blog/ray-tune  
https://docs.ray.io/en/latest/tune/examples/pbt_transformers.html  
https://wood-b.github.io/post/a-novices-guide-to-hyperparameter-optimization-at-scale/#schedulers-vs-search-algorithms  
https://docs.ray.io/en/latest/tune/api_docs/search_space.html  
https://docs.ray.io/en/latest/tune/tutorials/tune-advanced-tutorial.html  
https://docs.ray.io/en/latest/tune/api_docs/schedulers.html  
https://blog.ml.cmu.edu/2018/12/12/massively-parallel-hyperparameter-optimization/  
https://docs.ray.io/en/latest/tune/faq.html  
https://docs.ray.io/en/latest/tune/api_docs/schedulers.html#population-based-training-tune-schedulers-populationbasedtraining  
https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer.hyperparameter_search  
https://docs.ray.io/en/latest/tune/api_docs/suggestion.html#optuna-tune-search-optuna-optunasearch  
https://kyunghyunlim.github.io/nlp/ml_ai/2021/09/22/hugging_face_5.html  
https://docs.ray.io/en/latest/tune/api_docs/stoppers.html  
https://docs.ray.io/en/latest/tune/tutorials/tune-stopping.html#stopping-with-a-function  
https://docs.ray.io/en/latest/ray-air/package-ref.html  
https://huggingface.co/docs/transformers/v4.22.2/en/main_classes/callback#transformers.TrainerControl  
https://github.com/huggingface/transformers/blob/v4.22.2/src/transformers/trainer_callback.py#L524  
https://github.com/jg051623/BertOnSocratex/blob/5c3755bbff288c64ead46920f0b01ccf186c049d/FunctionsForBert.py#L147  
https://docs.ray.io/en/latest/_modules/ray/tune/stopper/experiment_plateau.html#ExperimentPlateauStopper  