# Prerequisites

- Host OS: Ubuntu 20.04 lts
- Using Docker Image 'mltooling/ml-workspace-gpu' (docker pull mltooling/ml-workspace-gpu)
- Single Nvidia GPU (RTX 3080)

# Check computing resource

In [1]:
#### The number of CPU cores
!grep -c processor /proc/cpuinfo

20


In [2]:
#### GPU information
!nvidia-smi

Thu Nov 17 06:47:41 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.65.01    Driver Version: 515.65.01    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:01:00.0 Off |                  N/A |
| 58%   46C    P8    73W / 370W |    242MiB / 12288MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

In [3]:
import torch

if torch.cuda.is_available():
    device = torch.device("cuda:0")
    device_count = torch.cuda.device_count()
    print("device_count: {}".format(device_count))
    for device_num in range(device_count):
        print("device {} capability {}".format(
            device_num,
            torch.cuda.get_device_capability(device_num)))
        print("device {} name {}".format(
            device_num, 
            torch.cuda.get_device_name(device_num)))
else:
    device = torch.device("cpu")
    print("no cuda device")

device_count: 1
device 0 capability (8, 6)
device 0 name NVIDIA GeForce RTX 3080


# 0. Customize Train Strategy

In [9]:
num_cpus = 16
num_gpus = 1
seed = 1234
model_name = "xlm-roberta-base" # bert-base-multilingual-cased, klue/roberta-base, bert-base-cased, etc.
train_proportion = 0.8 # train set : eval set = 8 : 2
do_hpo = False

# If you want to search best hyperparameters using ray tune, parameters below should be set
n_trials = 5
std = 0.1
patience = 5

# 1. Import packages

In [None]:
## Need to check if packages are compatible ##

# !pip install accelerate nvidia-ml-py3
# !pip install datasets==2.4.0
# !pip install huggingface_hub==0.9.1
# !pip install transformers==4.22.1 
# !pip install pyarrow==9.0.0
# !pip install -q ray

In [4]:
import transformers
import datasets
import huggingface_hub
import pyarrow

print(transformers.__version__)
print(datasets.__version__)
print(huggingface_hub.__version__)
print(pyarrow.__version__)

# 4.22.1
# 2.4.0
# 0.9.1
# 9.0.0

4.22.1
2.4.0
0.9.1
9.0.0


In [5]:
import os
import re
import math
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 'You can use tf32' if you are acessing Ampere hardware
import torch
torch.backends.cuda.matmul.allow_tf32 = True

from datasets import load_dataset, load_metric, ClassLabel
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, precision_score, recall_score, f1_score

import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.examples.pbt_transformers.utils import (
    download_data,
    build_compute_metrics_fn,
)
from ray.tune.schedulers import PopulationBasedTraining
from transformers import (
    AdamW,
    AutoConfig,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    Trainer,
    TrainingArguments,
)

from typing import TypeVar

# 2. Import Data

2 files are needed (`{data_name}_train.csv` and `{data_name}_test.csv`) in your data directory (in this case, `data_splited/`).

In [11]:
data_name = "cardiovascular_sev_dataset" 

dataset = load_dataset('csv', data_files={'train': f'../data_split/{data_name}_train.csv',
                                          'test': f'../data_split/{data_name}_test.csv'})
dataset

Using custom data configuration default-9aa18915b5f32f1a
Reusing dataset csv (/root/.cache/huggingface/datasets/csv/default-9aa18915b5f32f1a/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a)


  0%|          | 0/2 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'past_history', 'treatment_effect', 'examination', 'label'],
        num_rows: 3756
    })
    test: Dataset({
        features: ['id', 'past_history', 'treatment_effect', 'examination', 'label'],
        num_rows: 940
    })
})

# 3. Data Preprocessing

In [12]:
def preprocessing(dataset,
                  text_column: str,
                  label_column: str,
                  model_name: str,
                  train_proportion: float,
                  seed: int) -> tuple:
    from datasets.dataset_dict import DatasetDict
    if not isinstance(dataset, DatasetDict):
        raise TypeError(f"Values in `dataset` should be of type `DatasetDict` but got type '{type(dataset)}'")
    
    # Select columns to use
    cols_to_remove = list(dataset['train'].features.keys())
    cols_to_remove.remove(text_column)
    cols_to_remove.remove(label_column)
    dataset = dataset.remove_columns(cols_to_remove)
    if 'text' not in dataset['train'].features.keys():
        dataset = dataset.rename_column(text_column, "text")
    if label_column not in dataset['train'].features.keys():
        dataset = dataset.rename_column(label_column, "label")
     
    # Remove NA rows
    print("Removing rows with missing value...")
    dataset = dataset.filter(lambda row: pd.notnull(row["text"]))
    print("Done. (1/4)")
    
    # Remove specal characters
    def remove_sp_fn(dataset):
        dataset["text"]=re.sub(r'[^a-z|A-Z|0-9|ㄱ-ㅎ|ㅏ-ㅣ|가-힣| ]+', '', str(dataset["text"]))
        return dataset
    
    print("Removing special characters...")
    dataset = dataset.map(remove_sp_fn)
    print("Done. (2/4)")
    
    # Tokenize
    tokenizer = AutoTokenizer.from_pretrained(model_name, truncation_side = 'left')
    def tokenize_fn(dataset):
        tokenized_batch = tokenizer(dataset["text"], padding="max_length", truncation=True)
        return tokenized_batch
    
    print("Tokenining the text column...")
    dataset = dataset.map(tokenize_fn, batched=True)
    print("Done. (3/4)")
    
    # train-evaluation-test split
    print("Spliting train-evaluation-test set...")
    train_dataset = dataset["train"].shuffle(seed=seed).select(range(0,math.floor(len(dataset["train"])*train_proportion)))
    eval_dataset = dataset["train"].shuffle(seed=seed).select(range(math.floor(len(dataset["train"])*train_proportion), len(dataset["train"])))
    test_dataset = dataset["test"]
    print("Done. (4/4)")

    return train_dataset, eval_dataset, test_dataset

In [13]:
train_dataset, eval_dataset, test_dataset = preprocessing(dataset = dataset,
                                                          text_column = 'past_history', 
                                                          label_column = 'label',
                                                          model_name = 'xlm-roberta-base',
                                                          train_proportion = 0.8,
                                                          seed = seed)

Loading cached processed dataset at /root/.cache/huggingface/datasets/csv/default-9aa18915b5f32f1a/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-a35c0666d739865a.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/csv/default-9aa18915b5f32f1a/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-1f517c0ccfa238fd.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/csv/default-9aa18915b5f32f1a/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-85b089be2512d97f.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/csv/default-9aa18915b5f32f1a/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-67f6e0a09eb432a6.arrow


Removing rows with missing value...
Done. (1/4)
Removing special characters...
Done. (2/4)


Loading cached processed dataset at /root/.cache/huggingface/datasets/csv/default-9aa18915b5f32f1a/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-c1ad0d2680f7d6d6.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/csv/default-9aa18915b5f32f1a/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-f07883bd96c8ceef.arrow
Loading cached shuffled indices for dataset at /root/.cache/huggingface/datasets/csv/default-9aa18915b5f32f1a/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-1697abb056f6b3f0.arrow
Loading cached shuffled indices for dataset at /root/.cache/huggingface/datasets/csv/default-9aa18915b5f32f1a/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-1697abb056f6b3f0.arrow


Tokenining the text column...
Done. (3/4)
Spliting train-evaluation-test set...
Done. (4/4)


In [14]:
train_dataset

Dataset({
    features: ['text', 'label', 'input_ids', 'attention_mask'],
    num_rows: 3004
})

# 4. Set model configuration

In [15]:
def modeling(train_dataset,
             eval_dataset,
             model_name: str,
             num_gpus: int,
             num_cpus: int,
             seed: int,
             output_dir: str = './output',
             logging_dir: str = "./logs",
             do_hpo: bool = False,
             std: float = 0.1,
             n_trials: int = 5,
             patience: int = 3,
             hpo_result_dir: str = "./hpo-results",
             hpo_result_dir_subfolder_name: str = 'tune_transformer_pbt'
             ):
    from datasets.arrow_dataset import Dataset
    if not isinstance(train_dataset, Dataset):
        raise TypeError(f"Values in `train_dataset` should be of type `Dataset` but got type '{type(train_dataset)}'")
    elif not isinstance(eval_dataset, Dataset):
        raise TypeError(f"Values in `eval_dataset` should be of type `Dataset` but got type '{type(eval_dataset)}'")
        
    # Load the model 
    def _model_init():
        return AutoModelForSequenceClassification.from_pretrained(
            model_name,
            num_labels=2,
            output_attentions = False,
            output_hidden_states = False
            )

    # Define metrics to use for evaluation
    def _compute_metrics(eval_pred):
        metric1 = load_metric("accuracy")
        metric2 = load_metric("f1")
        logits, labels = eval_pred
        predictions = np.argmax(logits, axis=-1)
        accuracy = metric1.compute(predictions=predictions, references=labels)["accuracy"]
        f1 = metric2.compute(predictions=predictions, references=labels)["f1"]
        return {"accuracy": accuracy, "f1": f1, "objective": accuracy+f1}

    # Default: batch size = 32, evaluate every 50 steps
    training_args = TrainingArguments(
        output_dir=output_dir,
        do_train=True,
        do_eval=True,
        evaluation_strategy="steps",
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        gradient_accumulation_steps=4,
        learning_rate=2e-5, # config
        weight_decay=0.1, # config
        adam_beta1=0.1, # config
        adam_beta2=0.1, # config
        adam_epsilon=1.5e-06, # config
        num_train_epochs=15, # config
        max_steps=-1,
        lr_scheduler_type="linear",
        warmup_ratio=0.1,  # config
        warmup_steps=0,
        logging_dir=logging_dir,
        save_strategy="steps",
        no_cuda=num_gpus <= 0, 
        seed=seed,  # config
        bf16=False, # Need torch>=1.10, Ampere GPU with cuda>=11.0
        fp16=True,
        tf32=True, 
        eval_steps = 50,
        load_best_model_at_end=True,
        greater_is_better=True,
        metric_for_best_model="objective", # f1 + acc
        report_to="none",
        skip_memory_metrics=True,
        gradient_checkpointing=True
        )
    
    # Calculate class weights
    train_labels = np.array(train_dataset["label"])
    class_weights = compute_class_weight(class_weight = 'balanced', classes = np.unique(train_labels), y = train_labels)
    weights = torch.tensor(class_weights, dtype = torch.float)
    
    # Define device
    if torch.cuda.is_available():
        device = torch.device("cuda:0")
    else: 
        device = torch.device("cpu")

    #### Customize trainer class to apply class weights
    class CustomTrainer(Trainer):
        def compute_loss(self, model, inputs, return_outputs=False):
            labels = inputs.get("labels")
            # forward pass
            outputs = model(**inputs)
            logits = outputs.get("logits")
            # compute custom loss
            weight = weights.to(device)
            loss_fct = torch.nn.CrossEntropyLoss(weight=weight)
            loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
            return (loss, outputs) if return_outputs else loss

    trainer = CustomTrainer(
        model_init=_model_init,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=_compute_metrics,
        )
    
    if do_hpo == True:
    
        # Initialize Ray
        ray.shutdown()
        ray.init(log_to_driver=False, ignore_reinit_error=True, num_cpus=num_cpus, num_gpus=num_gpus, include_dashboard=False)

        # Fix batch_size in each trial
        tune_config = {
            "per_device_eval_batch_size": 8,
            "per_device_train_batch_size": 8,
            "max_steps": -1
        }

        # PBT schduler
        scheduler = PopulationBasedTraining(
            time_attr="training_iteration",
            metric="objective",
            mode="max",
            perturbation_interval=1,
            hyperparam_mutations={
                "num_train_epochs": tune.randint(2, 20),
                "seed": tune.randint(1, 9999),
                "weight_decay": tune.uniform(0.0, 0.3),
                "learning_rate": tune.uniform(1e-5, 5e-5),
                "warmup_ratio": tune.uniform(0.0, 0.3),
                "adam_beta1": tune.loguniform(1e-2, 1),
                "adam_beta2": tune.loguniform(1e-3, 1),
                "adam_epsilon": tune.loguniform(1e-8, 1e-5),
            }, 
        )

        # Define columns to report
        reporter = CLIReporter(
            parameter_columns={
                "weight_decay": "w_decay",
                "learning_rate": "lr",
                "per_device_train_batch_size": "train_bs/gpu",
                "num_train_epochs": "num_epochs",
            },
            metric_columns=["eval_f1", "eval_accuracy", "eval_objective", "eval_loss", "epoch", "training_iteration"]
        )

        # Early stopping
        stopper = tune.stopper.ExperimentPlateauStopper(metric="objective", 
                                                        std=std,
                                                        top=n_trials,
                                                        mode="max",
                                                        patience=patience
                                                        )

        # HPO
        hpo_result = trainer.hyperparameter_search(
            hp_space = lambda _: tune_config,
            direction = "maximize",
            backend="ray",
            reuse_actors = True,
            n_trials=n_trials,
            resources_per_trial={"cpu": num_cpus, "gpu": num_gpus},
            scheduler=scheduler,
            keep_checkpoints_num=1,
            checkpoint_score_attr="training_iteration",
            stop=stopper,
            progress_reporter=reporter,
            local_dir=hpo_result_dir,
            name=hpo_result_dir_subfolder_name,
            log_to_file=True,
        )
        for n, v in hpo_result.hyperparameters.items():
            setattr(trainer.args, n, v)
    else:
        pass
    
    return trainer

In [None]:
trainer = modeling(train_dataset=train_dataset,
                   eval_dataset=eval_dataset,
                   model_name='xlm-roberta-base',
                   num_gpus=num_gpus,
                   num_cpus=num_cpus,
                   seed=seed,
                   output_dir='./output',
                   logging_dir="./logs",
                   do_hpo=True,
                   std = std,
                   n_trials = n_trials,
                   patience = patience,
                   hpo_result_dir = "./hpo-results",
                   hpo_result_dir_subfolder_name = 'tune_transformer_pbt')

PyTorch: setting up devices
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/42f548f32366559214515ec137cdd16002968bf6/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 250002
}

loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--xl

== Status ==
Current time: 2022-11-17 06:50:37 (running for 00:00:00.17)
Memory usage on this node: 11.5/31.1 GiB
PopulationBasedTraining: 0 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.32 GiB heap, 0.0/6.66 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/BERT_classification_binary/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PENDING, 1 RUNNING)
+------------------------+----------+--------------------+-----------+-------------+----------------+--------------+
| Trial name             | status   | loc                |   w_decay |          lr |   train_bs/gpu |   num_epochs |
|------------------------+----------+--------------------+-----------+-------------+----------------+--------------|
| _objective_24a60_00000 | RUNNING  | 172.17.0.3:3201523 |  0.186633 | 2.75091e-05 |              8 |           17 |
| _objective_24a60_00001 | PENDING  |                    |  0.287442 | 4.50373e-05 |              8 |           13 

2022-11-17 06:52:06,848	INFO pbt.py:552 -- [pbt]: no checkpoint for trial. Skip exploit for Trial _objective_24a60_00001


Result for _objective_24a60_00001:
  date: 2022-11-17_06-52-06
  done: false
  epoch: 0.53
  eval_accuracy: 0.7127659574468085
  eval_f1: 0.0
  eval_loss: 0.7396878004074097
  eval_objective: 0.7127659574468085
  eval_runtime: 5.6374
  eval_samples_per_second: 133.395
  eval_steps_per_second: 16.674
  experiment_id: 16c207df115c455ab2fbe981c224c4ad
  hostname: 3481a8a2ae33
  iterations_since_restore: 1
  node_ip: 172.17.0.3
  objective: 1.425531914893617
  pid: 3201523
  time_since_restore: 41.84851908683777
  time_this_iter_s: 41.84851908683777
  time_total_s: 41.84851908683777
  timestamp: 1668667926
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 24a60_00001
  warmup_time: 0.0032570362091064453
  
== Status ==
Current time: 2022-11-17 06:52:11 (running for 00:01:34.23)
Memory usage on this node: 16.1/31.1 GiB
PopulationBasedTraining: 0 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.32 GiB heap, 0.0/6.66 GiB objects (0.0/1.0 accelerator

2022-11-17 06:58:21,842	INFO pbt.py:618 -- [exploit] transferring weights from trial _objective_24a60_00004 (score 2.444416678566329) -> _objective_24a60_00002 (score 1.428191489361702)
2022-11-17 06:58:21,843	INFO pbt.py:636 -- [explore] perturbed config from {'num_train_epochs': 12, 'seed': 6764, 'weight_decay': 0.1308520271687038, 'learning_rate': 4.2085905683206364e-05, 'warmup_ratio': 0.04313004735436937, 'adam_beta1': 0.2561662685923485, 'adam_beta2': 0.12994031779026446, 'adam_epsilon': 4.53290186619553e-08} -> {'num_train_epochs': 9, 'seed': 8116, 'weight_decay': 0.2774602885846695, 'learning_rate': 3.366872454656509e-05, 'warmup_ratio': 0.05175605682524324, 'adam_beta1': 0.3073995223108182, 'adam_beta2': 0.10395225423221158, 'adam_epsilon': 3.6263214929564245e-08}


Result for _objective_24a60_00002:
  date: 2022-11-17_06-58-21
  done: false
  episodes_total: 0
  epoch: 1.06
  eval_accuracy: 0.714095744680851
  eval_f1: 0.0
  eval_loss: 0.6814857721328735
  eval_objective: 0.714095744680851
  eval_runtime: 6.2148
  eval_samples_per_second: 121.001
  eval_steps_per_second: 15.125
  experiment_id: 16c207df115c455ab2fbe981c224c4ad
  hostname: 3481a8a2ae33
  iterations_since_restore: 2
  node_ip: 172.17.0.3
  objective: 1.428191489361702
  pid: 3201523
  time_since_restore: 82.74339604377747
  time_this_iter_s: 39.671080350875854
  time_total_s: 124.60622596740723
  timestamp: 1668668301
  timesteps_since_restore: 0
  timesteps_total: 0
  training_iteration: 2
  trial_id: 24a60_00002
  warmup_time: 0.0032570362091064453
  
== Status ==
Current time: 2022-11-17 06:58:26 (running for 00:07:49.22)
Memory usage on this node: 16.1/31.1 GiB
PopulationBasedTraining: 3 checkpoints, 1 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.32 GiB heap, 

2022-11-17 06:59:44,551	INFO pbt.py:618 -- [exploit] transferring weights from trial _objective_24a60_00004 (score 2.444416678566329) -> _objective_24a60_00002 (score 1.425531914893617)
2022-11-17 06:59:44,551	INFO pbt.py:636 -- [explore] perturbed config from {'num_train_epochs': 12, 'seed': 6764, 'weight_decay': 0.1308520271687038, 'learning_rate': 4.2085905683206364e-05, 'warmup_ratio': 0.04313004735436937, 'adam_beta1': 0.2561662685923485, 'adam_beta2': 0.12994031779026446, 'adam_epsilon': 4.53290186619553e-08} -> {'num_train_epochs': 9, 'seed': 8116, 'weight_decay': 0.15702243260244456, 'learning_rate': 3.366872454656509e-05, 'warmup_ratio': 0.132642226621253, 'adam_beta1': 0.6586154555178979, 'adam_beta2': 0.10395225423221158, 'adam_epsilon': 5.439482239434636e-08}


Result for _objective_24a60_00002:
  date: 2022-11-17_06-59-44
  done: false
  episodes_total: 0
  epoch: 1.06
  eval_accuracy: 0.7127659574468085
  eval_f1: 0.0
  eval_loss: 0.697701632976532
  eval_objective: 0.7127659574468085
  eval_runtime: 6.5472
  eval_samples_per_second: 114.858
  eval_steps_per_second: 14.357
  experiment_id: 16c207df115c455ab2fbe981c224c4ad
  hostname: 3481a8a2ae33
  iterations_since_restore: 2
  node_ip: 172.17.0.3
  objective: 1.425531914893617
  pid: 3201523
  time_since_restore: 82.6147825717926
  time_this_iter_s: 39.97831654548645
  time_total_s: 124.7731921672821
  timestamp: 1668668384
  timesteps_since_restore: 0
  timesteps_total: 0
  training_iteration: 2
  trial_id: 24a60_00002
  warmup_time: 0.0032570362091064453
  
== Status ==
Current time: 2022-11-17 06:59:44 (running for 00:09:06.91)
Memory usage on this node: 16.1/31.1 GiB
PopulationBasedTraining: 3 checkpoints, 2 perturbs
Resources requested: 0/16 CPUs, 0/1 GPUs, 0.0/13.32 GiB heap, 0.0/6.6

2022-11-17 07:05:59,902	INFO pbt.py:618 -- [exploit] transferring weights from trial _objective_24a60_00004 (score 3.2661145296182346) -> _objective_24a60_00000 (score 2.614221724524076)
2022-11-17 07:05:59,903	INFO pbt.py:636 -- [explore] perturbed config from {'num_train_epochs': 12, 'seed': 6764, 'weight_decay': 0.1308520271687038, 'learning_rate': 4.2085905683206364e-05, 'warmup_ratio': 0.04313004735436937, 'adam_beta1': 0.2561662685923485, 'adam_beta2': 0.12994031779026446, 'adam_epsilon': 4.53290186619553e-08} -> {'num_train_epochs': 14, 'seed': 5411, 'weight_decay': 0.01794276683395557, 'learning_rate': 5.050308681984764e-05, 'warmup_ratio': 0.05175605682524324, 'adam_beta1': 0.3073995223108182, 'adam_beta2': 0.10395225423221158, 'adam_epsilon': 5.439482239434636e-08}


Result for _objective_24a60_00000:
  date: 2022-11-17_07-05-59
  done: false
  episodes_total: 0
  epoch: 1.6
  eval_accuracy: 0.800531914893617
  eval_f1: 0.506578947368421
  eval_loss: 0.6219388842582703
  eval_objective: 1.307110862262038
  eval_runtime: 5.6364
  eval_samples_per_second: 133.419
  eval_steps_per_second: 16.677
  experiment_id: 16c207df115c455ab2fbe981c224c4ad
  hostname: 3481a8a2ae33
  iterations_since_restore: 3
  node_ip: 172.17.0.3
  objective: 2.614221724524076
  pid: 3201523
  time_since_restore: 121.42531704902649
  time_this_iter_s: 39.20459008216858
  time_total_s: 248.6718671321869
  timestamp: 1668668759
  timesteps_since_restore: 0
  timesteps_total: 0
  training_iteration: 3
  trial_id: 24a60_00000
  warmup_time: 0.0032570362091064453
  
== Status ==
Current time: 2022-11-17 07:06:04 (running for 00:15:27.27)
Memory usage on this node: 16.1/31.1 GiB
PopulationBasedTraining: 5 checkpoints, 3 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.3

In [None]:
trainer.train()

In [None]:
trainer.evaluate()

In [None]:
pred = trainer.predict(test_dataset=test_dataset)

In [None]:
label_test = list(pred.label_ids)
pred_test = list(map(lambda x: x.index(max(x)), pred.predictions.tolist()))

In [None]:
print(confusion_matrix(label_test, pred_test))

In [None]:
accuracy = accuracy_score(label_test, pred_test)
f1 = f1_score(label_test, pred_test)
recall = recall_score(label_test, pred_test)
precision = precision_score(label_test, pred_test)

print(accuracy)
print(f1)
print(recall)
print(precision)

In [None]:
test_results = pd.concat([pd.DataFrame(test_dataset['text'], columns=['text']), 
                          pd.DataFrame(label_test, columns=['label']),
                          pd.DataFrame(pred_test, columns=['pred'])],
                         axis=1
                        )
test_results

In [None]:
# test_results.to_csv('./ph_result.csv')

# 7. Save the model

In [None]:
# model_path = f"sev_exam_1.0"
# trainer.model.save_pretrained(model_path)
# tokenizer.save_pretrained(model_path)

In [None]:
# load model / pred

# load_model = AutoModelForSequenceClassification.from_pretrained("sev_exam_1.0/")
# load_tokenizer = AutoTokenizer.from_pretrained("sev_exam_1.0/")

# Reference

https://bo-10000.tistory.com/154  
https://huggingface.co/blog/ray-tune  
https://docs.ray.io/en/latest/tune/examples/pbt_transformers.html  
https://wood-b.github.io/post/a-novices-guide-to-hyperparameter-optimization-at-scale/#schedulers-vs-search-algorithms  
https://docs.ray.io/en/latest/tune/api_docs/search_space.html  
https://docs.ray.io/en/latest/tune/tutorials/tune-advanced-tutorial.html  
https://keras.io/examples/keras_recipes/sample_size_estimate/  
https://www.topbots.com/fine-tune-transformers-in-pytorch/  
https://docs.ray.io/en/latest/tune/api_docs/schedulers.html  
https://blog.ml.cmu.edu/2018/12/12/massively-parallel-hyperparameter-optimization/  
https://docs.ray.io/en/latest/tune/faq.html  
https://docs.ray.io/en/latest/tune/api_docs/schedulers.html#population-based-training-tune-schedulers-populationbasedtraining  
https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer.hyperparameter_search  
https://docs.ray.io/en/latest/tune/api_docs/suggestion.html#optuna-tune-search-optuna-optunasearch  
https://kyunghyunlim.github.io/nlp/ml_ai/2021/09/22/hugging_face_5.html  

In [None]:
# gpu 비울 수 있는 방법?


# 버트 모델 돌면 각 모델 성능 확인하고
# pred 등에 id 붙이고 df 만든다음에
# merge
# 다시 merge된 df로 hpo하고 머신러닝