# Prerequisites

- Host OS: Ubuntu 20.04 lts
- Using Docker Image 'mltooling/ml-workspace-gpu' (docker pull mltooling/ml-workspace-gpu)
- Single Nvidia GPU (RTX 3080)

# 0. Import libraries

In [1]:
import bert_ensemble_functions
from datasets import load_dataset

import transformers
import datasets
import huggingface_hub
import pyarrow
import torch
print(transformers.__version__)
print(datasets.__version__)
print(huggingface_hub.__version__)
print(pyarrow.__version__)
print(torch.__version__)

4.22.1
2.4.0
0.9.1
9.0.0
1.9.0+cu111


In [2]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    device_count = torch.cuda.device_count()
    print("device_count: {}".format(device_count))
    for device_num in range(device_count):
        print("device {} capability {}".format(
            device_num,
            torch.cuda.get_device_capability(device_num)))
        print("device {} name {}".format(
            device_num, 
            torch.cuda.get_device_name(device_num)))
else:
    device = torch.device("cpu")
    print("no cuda device")

device_count: 1
device 0 capability (8, 6)
device 0 name NVIDIA GeForce RTX 3080


In [3]:
#### The number of CPU cores
!grep -c processor /proc/cpuinfo

20


In [4]:
#### GPU information
!nvidia-smi

Tue Apr 11 11:13:13 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.105.01   Driver Version: 515.105.01   CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:01:00.0 Off |                  N/A |
| 60%   44C    P3   101W / 370W |    339MiB / 12288MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

# 1. Customize Train Strategy

In [5]:
num_cpus = 16
num_gpus = 1
seed = 1234
model_name = "xlm-roberta-base" # bert-base-multilingual-cased, klue/roberta-base, bert-base-cased, etc.

text_column = 'past_history'
label_column = 'label'
id_column = 'id'
custom_dir = f"sev_{text_column}_ensemble"

train_proportion = 0.5 # train set : eval set = 5 : 5
do_hpo = True

# If you want to search best hyperparameters using ray tune, parameters below should be set
n_trials = 5
std = 0.1
patience = 5

# 2. Import Data

2 files are needed (`{data_name}_train.csv` and `{data_name}_test.csv`) in your data directory (in this case, `data_splited/`).

In [6]:
data_name = "data_MI" 

dataset = load_dataset('csv', data_files={'train': f'../data_split/{data_name}_train.csv',
                                          'test': f'../data_split/{data_name}_test.csv'})
dataset

Using custom data configuration default-2397cb3812416ba0
Reusing dataset csv (/root/.cache/huggingface/datasets/csv/default-2397cb3812416ba0/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a)


  0%|          | 0/2 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'treatment_effect', 'past_history', 'examination', 'label'],
        num_rows: 808
    })
    test: Dataset({
        features: ['id', 'treatment_effect', 'past_history', 'examination', 'label'],
        num_rows: 203
    })
})

# 3. Data Preprocessing

In [7]:
train_dataset, eval_dataset, test_dataset = bert_ensemble_functions.preprocessing(dataset = dataset,
                                                                                  text_column = text_column, 
                                                                                  label_column = label_column,
                                                                                  id_column = id_column,
                                                                                  model_name = model_name,
                                                                                  train_proportion = train_proportion,
                                                                                  seed = seed,
                                                                                  custom_tokenizer_dir = custom_dir)

Loading cached processed dataset at /root/.cache/huggingface/datasets/csv/default-2397cb3812416ba0/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-3b1533dc78920f85.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/csv/default-2397cb3812416ba0/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-1e0a74c9d99544f2.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/csv/default-2397cb3812416ba0/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-e4596ac9b5980a20.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/csv/default-2397cb3812416ba0/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-75de8d9d80ccb5fd.arrow


Removing rows with missing value...
Done. (1/5)
Removing special characters...
Done. (2/5)
Tokenining the text column...


Loading cached processed dataset at /root/.cache/huggingface/datasets/csv/default-2397cb3812416ba0/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-36019dad0459bfd0.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/csv/default-2397cb3812416ba0/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-252dd90ad7fee2da.arrow
Loading cached shuffled indices for dataset at /root/.cache/huggingface/datasets/csv/default-2397cb3812416ba0/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-1945ca50d3a8baac.arrow
Loading cached shuffled indices for dataset at /root/.cache/huggingface/datasets/csv/default-2397cb3812416ba0/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-1945ca50d3a8baac.arrow


Done. (3/5)
Spliting train-evaluation-test set...
Done. (4/5)
Applying oversampling to balance the dataset...
Done. (5/5)


In [8]:
train_dataset

Dataset({
    features: ['id', 'text', 'label', 'input_ids', 'attention_mask'],
    num_rows: 746
})

# 4. Modeling

In [9]:
trainer = bert_ensemble_functions.modeling(train_dataset=train_dataset,
                                           eval_dataset=eval_dataset,
                                           model_name='xlm-roberta-base',
                                           num_gpus=num_gpus,
                                           num_cpus=num_cpus,
                                           seed=seed,
                                           output_dir='./output',
                                           logging_dir="./logs",
                                           do_hpo=do_hpo,
                                           std = std,
                                           n_trials = n_trials,
                                           patience = patience,
                                           hpo_result_dir = "./hpo-results",
                                           hpo_result_dir_subfolder_name = 'tune_transformer_pbt'
                                          )



loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/77de1f7a7e5e737aead1cd880979d4f1b3af6668/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 250002
}

loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/77d

== Status ==
Current time: 2023-04-11 11:13:27 (running for 00:00:00.08)
Memory usage on this node: 9.3/31.1 GiB 
PopulationBasedTraining: 0 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------|
| _objective_e23c6_00000 | RUNNING  | 172.17.0.3:31473 |  0.186633 | 2.75091e-05 |             16 |           17 |
| _objective_e23c6_00001 | PENDING  |                  |  0.287442 | 4.50373e-05 |             16 |           13 |
| _objective_e23c6_0000

== Status ==
Current time: 2023-04-11 11:14:00 (running for 00:00:32.24)
Memory usage on this node: 14.0/31.1 GiB 
PopulationBasedTraining: 0 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------|
| _objective_e23c6_00000 | RUNNING  | 172.17.0.3:31473 |  0.186633 | 2.75091e-05 |             16 |           17 |
| _objective_e23c6_00001 | PENDING  |                  |  0.287442 | 4.50373e-05 |             16 |           13 |
| _objective_e23c6_000

== Status ==
Current time: 2023-04-11 11:14:30 (running for 00:01:02.25)
Memory usage on this node: 14.0/31.1 GiB 
PopulationBasedTraining: 0 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------|
| _objective_e23c6_00000 | RUNNING  | 172.17.0.3:31473 |  0.186633 | 2.75091e-05 |             16 |           17 |
| _objective_e23c6_00001 | PENDING  |                  |  0.287442 | 4.50373e-05 |             16 |           13 |
| _objective_e23c6_000

Trial name,date,done,episodes_total,epoch,eval_accuracy,eval_f1,eval_loss,eval_objective,eval_runtime,eval_samples_per_second,eval_steps_per_second,experiment_id,hostname,iterations_since_restore,node_ip,objective,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
_objective_e23c6_00000,2023-04-11_11-21-52,False,0.0,9.09,0.905941,0.950131,0.574832,1.85607,5.2083,77.568,4.992,6f3feffab21a440596e9f4852461e046,d898612130f5,2,172.17.0.3,3.71214,31473,141.937,70.3984,216.017,1681212112,0,0.0,2,e23c6_00000,0.00202274
_objective_e23c6_00001,2023-04-11_11-24-13,False,0.0,9.09,0.89604,0.944882,0.851785,1.84092,4.0864,98.864,6.363,6f3feffab21a440596e9f4852461e046,d898612130f5,2,172.17.0.3,3.68184,31473,140.881,69.5855,213.012,1681212253,0,0.0,2,e23c6_00001,0.00202274
_objective_e23c6_00002,2023-04-11_11-25-24,True,0.0,4.51,0.905941,0.950649,0.475898,1.85659,4.1233,97.98,6.306,6f3feffab21a440596e9f4852461e046,d898612130f5,1,172.17.0.3,3.71318,31473,70.7828,70.7828,142.914,1681212324,0,0.0,1,e23c6_00002,0.00202274
_objective_e23c6_00003,2023-04-11_11-18-19,False,,4.51,0.79703,0.882857,0.503701,1.67989,4.2061,96.052,6.182,6f3feffab21a440596e9f4852461e046,d898612130f5,1,172.17.0.3,3.35977,31473,70.922,70.922,70.922,1681211899,0,,1,e23c6_00003,0.00202274
_objective_e23c6_00004,2023-04-11_11-19-30,False,,4.51,0.903465,0.949153,0.473938,1.85262,4.1225,97.999,6.307,6f3feffab21a440596e9f4852461e046,d898612130f5,1,172.17.0.3,3.70524,31473,70.7574,70.7574,70.7574,1681211970,0,,1,e23c6_00004,0.00202274


== Status ==
Current time: 2023-04-11 11:14:49 (running for 00:01:21.40)
Memory usage on this node: 14.0/31.1 GiB 
PopulationBasedTraining: 0 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (1 PAUSED, 3 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+--

== Status ==
Current time: 2023-04-11 11:15:09 (running for 00:01:41.41)
Memory usage on this node: 14.0/31.1 GiB 
PopulationBasedTraining: 0 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (1 PAUSED, 3 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+--

== Status ==
Current time: 2023-04-11 11:15:29 (running for 00:02:01.42)
Memory usage on this node: 14.0/31.1 GiB 
PopulationBasedTraining: 0 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (1 PAUSED, 3 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+--

== Status ==
Current time: 2023-04-11 11:15:49 (running for 00:02:21.43)
Memory usage on this node: 14.0/31.1 GiB 
PopulationBasedTraining: 0 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (1 PAUSED, 3 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+--

== Status ==
Current time: 2023-04-11 11:16:11 (running for 00:02:43.73)
Memory usage on this node: 14.0/31.1 GiB 
PopulationBasedTraining: 1 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (2 PAUSED, 2 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+--

== Status ==
Current time: 2023-04-11 11:16:31 (running for 00:03:03.74)
Memory usage on this node: 14.0/31.1 GiB 
PopulationBasedTraining: 1 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (2 PAUSED, 2 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+--

== Status ==
Current time: 2023-04-11 11:16:51 (running for 00:03:23.75)
Memory usage on this node: 14.0/31.1 GiB 
PopulationBasedTraining: 1 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (2 PAUSED, 2 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+--

2023-04-11 11:17:08,193	INFO pbt.py:804 -- 

[PopulationBasedTraining] [Exploit] Cloning trial e23c6_00001 (score = 3.681843) into trial e23c6_00002 (score = 3.433845)

2023-04-11 11:17:08,194	INFO pbt.py:831 -- 

[PopulationBasedTraining] [Explore] Perturbed the hyperparameter config of triale23c6_00002:
num_train_epochs : 13 --- (* 1.2) --> 15
seed : 1183 --- (* 0.8) --> 946
weight_decay : 0.28744180610511155 --- (resample) --> 0.2774602885846695
learning_rate : 4.503730538968379e-05 --- (* 1.2) --> 5.404476646762055e-05
warmup_ratio : 0.10734518098736 --- (resample) --> 0.132642226621253
adam_beta1 : 0.10045932391231586 --- (resample) --> 0.6586154555178979
adam_beta2 : 0.11230233998349251 --- (* 1.2) --> 0.134762807980191
adam_epsilon : 1.3743776400634128e-06 --- (* 0.8) --> 1.0995021120507303e-06



== Status ==
Current time: 2023-04-11 11:17:13 (running for 00:03:45.50)
Memory usage on this node: 14.0/31.1 GiB 
PopulationBasedTraining: 1 checkpoints, 1 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (3 PAUSED, 1 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+--

== Status ==
Current time: 2023-04-11 11:17:33 (running for 00:04:05.51)
Memory usage on this node: 14.0/31.1 GiB 
PopulationBasedTraining: 1 checkpoints, 1 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (3 PAUSED, 1 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+--

== Status ==
Current time: 2023-04-11 11:17:53 (running for 00:04:25.52)
Memory usage on this node: 14.0/31.1 GiB 
PopulationBasedTraining: 1 checkpoints, 1 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (3 PAUSED, 1 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+--

== Status ==
Current time: 2023-04-11 11:18:13 (running for 00:04:45.53)
Memory usage on this node: 14.0/31.1 GiB 
PopulationBasedTraining: 1 checkpoints, 1 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (3 PAUSED, 1 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+--

2023-04-11 11:18:19,204	INFO pbt.py:804 -- 

[PopulationBasedTraining] [Exploit] Cloning trial e23c6_00001 (score = 3.681843) into trial e23c6_00003 (score = 3.359774)

2023-04-11 11:18:19,204	INFO pbt.py:831 -- 

[PopulationBasedTraining] [Explore] Perturbed the hyperparameter config of triale23c6_00003:
num_train_epochs : 13 --- (* 0.8) --> 10
seed : 1183 --- (* 0.8) --> 946
weight_decay : 0.28744180610511155 --- (* 0.8) --> 0.22995344488408925
learning_rate : 4.503730538968379e-05 --- (* 1.2) --> 5.404476646762055e-05
warmup_ratio : 0.10734518098736 --- (* 0.8) --> 0.085876144789888
adam_beta1 : 0.10045932391231586 --- (* 1.2) --> 0.12055118869477903
adam_beta2 : 0.11230233998349251 --- (* 0.8) --> 0.08984187198679401
adam_epsilon : 1.3743776400634128e-06 --- (* 1.2) --> 1.6492531680760953e-06



== Status ==
Current time: 2023-04-11 11:18:24 (running for 00:04:56.51)
Memory usage on this node: 14.0/31.1 GiB 
PopulationBasedTraining: 1 checkpoints, 2 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:18:44 (running for 00:05:16.52)
Memory usage on this node: 14.0/31.1 GiB 
PopulationBasedTraining: 1 checkpoints, 2 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:19:04 (running for 00:05:36.53)
Memory usage on this node: 14.0/31.1 GiB 
PopulationBasedTraining: 1 checkpoints, 2 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:19:24 (running for 00:05:56.54)
Memory usage on this node: 14.0/31.1 GiB 
PopulationBasedTraining: 1 checkpoints, 2 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:19:45 (running for 00:06:17.45)
Memory usage on this node: 14.0/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 2 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:20:05 (running for 00:06:37.46)
Memory usage on this node: 14.0/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 2 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:20:25 (running for 00:06:57.47)
Memory usage on this node: 14.0/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 2 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:20:46 (running for 00:07:18.98)
Memory usage on this node: 14.0/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 2 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:21:06 (running for 00:07:38.99)
Memory usage on this node: 14.0/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 2 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:21:26 (running for 00:07:59.00)
Memory usage on this node: 14.1/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 2 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:21:46 (running for 00:08:19.01)
Memory usage on this node: 14.1/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 2 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:22:07 (running for 00:08:39.55)
Memory usage on this node: 14.1/31.1 GiB 
PopulationBasedTraining: 3 checkpoints, 2 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:22:27 (running for 00:08:59.56)
Memory usage on this node: 14.1/31.1 GiB 
PopulationBasedTraining: 3 checkpoints, 2 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:22:47 (running for 00:09:19.57)
Memory usage on this node: 14.1/31.1 GiB 
PopulationBasedTraining: 3 checkpoints, 2 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:23:08 (running for 00:09:40.84)
Memory usage on this node: 14.1/31.1 GiB 
PopulationBasedTraining: 3 checkpoints, 2 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:23:28 (running for 00:10:00.96)
Memory usage on this node: 14.1/31.1 GiB 
PopulationBasedTraining: 3 checkpoints, 2 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:23:48 (running for 00:10:20.97)
Memory usage on this node: 14.1/31.1 GiB 
PopulationBasedTraining: 3 checkpoints, 2 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:24:08 (running for 00:10:40.98)
Memory usage on this node: 14.1/31.1 GiB 
PopulationBasedTraining: 3 checkpoints, 2 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:24:33 (running for 00:11:05.52)
Memory usage on this node: 14.1/31.1 GiB 
PopulationBasedTraining: 3 checkpoints, 2 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:24:53 (running for 00:11:25.53)
Memory usage on this node: 14.1/31.1 GiB 
PopulationBasedTraining: 3 checkpoints, 2 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:25:13 (running for 00:11:45.54)
Memory usage on this node: 14.1/31.1 GiB 
PopulationBasedTraining: 3 checkpoints, 2 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

2023-04-11 11:25:24,099	INFO tune.py:798 -- Total run time: 716.39 seconds (716.30 seconds for the tuning loop).


== Status ==
Current time: 2023-04-11 11:25:24 (running for 00:11:56.31)
Memory usage on this node: 14.1/31.1 GiB 
PopulationBasedTraining: 3 checkpoints, 2 perturbs
Resources requested: 0/16 CPUs, 0/1 GPUs, 0.0/13.23 GiB heap, 0.0/6.62 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (5 TERMINATED)
+------------------------+------------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status     | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+------------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+-----

In [10]:
trainer.train()

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/77de1f7a7e5e737aead1cd880979d4f1b3af6668/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 250002
}

loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/77d

Step,Training Loss,Validation Loss,Accuracy,F1,Objective
50,No log,0.475898,0.905941,0.950649,1.85659
100,No log,0.643907,0.888614,0.939597,1.828211
150,No log,0.845003,0.910891,0.953247,1.864138


  nn.utils.clip_grad_norm_(
The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 404
  Batch size = 16
The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 404
  Batch size = 16
The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluati

TrainOutput(global_step=165, training_loss=0.24085993911280776, metrics={'train_runtime': 226.9369, 'train_samples_per_second': 49.309, 'train_steps_per_second': 0.727, 'total_flos': 2933162045153280.0, 'train_loss': 0.24085993911280776, 'epoch': 14.94})

In [11]:
# save the pretrained model
trainer.model.save_pretrained(custom_dir)

Configuration saved in sev_past_history_ensemble/config.json
Model weights saved in sev_past_history_ensemble/pytorch_model.bin


In [12]:
df = bert_ensemble_functions.evaluation(trainer = trainer,
                                        eval_dataset = eval_dataset,
                                        text_column_name = text_column
                                        )
df

The following columns in the test set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 404
  Batch size = 16


Unnamed: 0,id,text,past_history_pred_0,past_history_pred_1,label,pred
0,59194424,상기 환자 HTN 으로 po med 하고 있는 분으로 2008년 1월 cheat p...,-5.429688,4.882812,1,1
1,22235170,상기 환자 HTN외 특이 과거력 없는 분으로 23일전부터 both foot edem...,-5.359375,4.792969,1,1
2,131182223,Name 서동석 M59UNo 3970770 ...,-4.875000,4.269531,1,1
3,9453533,상기 52세 남환으로 HTN10yrspo medication DM2주전 제주한라병원...,-5.089844,4.500000,1,1
4,58102385,상기 52세 남환은 20pyr 의 smoker 로 HTN 있다는 말 들었으나 ...,-5.375000,4.820312,1,1
...,...,...,...,...,...,...
399,58439794,상기 46세 남환 과음 후 샤워한 뒤 갑자기 발생한 chest pain으로 타병원 ...,-5.351562,4.792969,0,1
400,63772231,상기 남환 HTNno medication rectal cancer sp op2014...,-4.660156,4.074219,1,1
401,55819857,상기 59세 남환 DM HTN으로 medication 중인 자로 최근 일주일간 간혹...,-5.191406,4.648438,1,1
402,111502342,상기 78세 남환 10년 전 unstable angia로 본원 입원하여 시술 예정이...,-4.800781,4.246094,1,1


In [13]:
# Check the classification result of each XLM-RoBERTa Model 
from sklearn.metrics import (
    confusion_matrix, 
    accuracy_score, 
    precision_score, 
    recall_score, 
    f1_score
)

print("Confusion Matrix")
print(confusion_matrix(df.label, df.pred))
print("-------------------------")
accuracy = accuracy_score(df.label, df.pred)
f1 = f1_score(df.label, df.pred)
recall = recall_score(df.label, df.pred)
precision = precision_score(df.label, df.pred)
print(f"Accuracy: {accuracy}")
print(f"F1 score: {f1}")
print(f"Recall: {recall}")
print(f"Precision: {precision}")

Confusion Matrix
[[  2  34]
 [ 14 354]]
-------------------------
Accuracy: 0.8811881188118812
F1 score: 0.9365079365079365
Recall: 0.9619565217391305
Precision: 0.9123711340206185


In [14]:
df.to_csv(f'./{text_column}_bert_result_df.csv', index=False)