# Prerequisites

- Host OS: Ubuntu 20.04 lts
- Using Docker Image 'mltooling/ml-workspace-gpu' (docker pull mltooling/ml-workspace-gpu)
- Single Nvidia GPU (RTX 3080)

# 0. Import libraries

In [1]:
import bert_ensemble_functions
from datasets import load_dataset

import transformers
import datasets
import huggingface_hub
import pyarrow
import torch
print(transformers.__version__)
print(datasets.__version__)
print(huggingface_hub.__version__)
print(pyarrow.__version__)
print(torch.__version__)

4.22.1
2.4.0
0.9.1
9.0.0
1.9.0+cu111


In [2]:
# 'You can use tf32' if you are acessing Ampere hardware
# torch.backends.cuda.matmul.allow_tf32 = True

if torch.cuda.is_available():
    device = torch.device("cuda:0")
    device_count = torch.cuda.device_count()
    print("device_count: {}".format(device_count))
    for device_num in range(device_count):
        print("device {} capability {}".format(
            device_num,
            torch.cuda.get_device_capability(device_num)))
        print("device {} name {}".format(
            device_num, 
            torch.cuda.get_device_name(device_num)))
else:
    device = torch.device("cpu")
    print("no cuda device")

device_count: 1
device 0 capability (8, 6)
device 0 name NVIDIA GeForce RTX 3080


In [3]:
#### The number of CPU cores
!grep -c processor /proc/cpuinfo

20


In [4]:
#### GPU information
!nvidia-smi

Tue Apr 11 11:30:17 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.105.01   Driver Version: 515.105.01   CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:01:00.0 Off |                  N/A |
| 58%   41C    P8    25W / 370W |    339MiB / 12288MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

# 1. Customize Train Strategy

In [5]:
num_cpus = 16
num_gpus = 1
seed = 1234
model_name = "xlm-roberta-base" # bert-base-multilingual-cased, klue/roberta-base, bert-base-cased, etc.

text_column = 'treatment_effect'
label_column = 'label'
id_column = 'id'
custom_dir = f"sev_{text_column}_ensemble"

train_proportion = 0.5 # train set : eval set = 5 : 5
do_hpo = True

# If you want to search best hyperparameters using ray tune, parameters below should be set
n_trials = 5
std = 0.1
patience = 5

# 2. Import Data

2 files are needed (`{data_name}_train.csv` and `{data_name}_test.csv`) in your data directory (in this case, `data_splited/`).

In [6]:
data_name = "data_MI" 

dataset = load_dataset('csv', data_files={'train': f'../data_split/{data_name}_train.csv',
                                          'test': f'../data_split/{data_name}_test.csv'})
dataset

Using custom data configuration default-2397cb3812416ba0
Reusing dataset csv (/root/.cache/huggingface/datasets/csv/default-2397cb3812416ba0/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a)


  0%|          | 0/2 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'treatment_effect', 'past_history', 'examination', 'label'],
        num_rows: 808
    })
    test: Dataset({
        features: ['id', 'treatment_effect', 'past_history', 'examination', 'label'],
        num_rows: 203
    })
})

# 3. Data Preprocessing

In [7]:
train_dataset, eval_dataset, test_dataset = bert_ensemble_functions.preprocessing(dataset = dataset,
                                                                                  text_column = text_column, 
                                                                                  label_column = label_column,
                                                                                  id_column = id_column,
                                                                                  model_name = model_name,
                                                                                  train_proportion = train_proportion,
                                                                                  seed = seed,
                                                                                  custom_tokenizer_dir = custom_dir)

Removing rows with missing value...


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Done. (1/5)
Removing special characters...


  0%|          | 0/808 [00:00<?, ?ex/s]

  0%|          | 0/203 [00:00<?, ?ex/s]

Done. (2/5)
Tokenining the text column...


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Loading cached shuffled indices for dataset at /root/.cache/huggingface/datasets/csv/default-2397cb3812416ba0/0.0.0/652c3096f041ee27b04d2232d41f10547a8fecda3e284a79a0ec4053c916ef7a/cache-a37a752d6218aa02.arrow


Done. (3/5)
Spliting train-evaluation-test set...
Done. (4/5)
Applying oversampling to balance the dataset...
Done. (5/5)


# 4. Modeling

In [8]:
trainer = bert_ensemble_functions.modeling(train_dataset=train_dataset,
                                           eval_dataset=eval_dataset,
                                           model_name='xlm-roberta-base',
                                           num_gpus=num_gpus,
                                           num_cpus=num_cpus,
                                           seed=seed,
                                           output_dir='./output',
                                           logging_dir="./logs",
                                           do_hpo=do_hpo,
                                           std = std,
                                           n_trials = n_trials,
                                           patience = patience,
                                           hpo_result_dir = "./hpo-results",
                                           hpo_result_dir_subfolder_name = 'tune_transformer_pbt'
                                          )

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/77de1f7a7e5e737aead1cd880979d4f1b3af6668/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 250002
}

loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/77d

== Status ==
Current time: 2023-04-11 11:30:32 (running for 00:00:00.07)
Memory usage on this node: 9.4/31.1 GiB 
PopulationBasedTraining: 0 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------|
| _objective_44bc1_00000 | RUNNING  | 172.17.0.3:34034 |  0.186633 | 2.75091e-05 |             16 |           17 |
| _objective_44bc1_00001 | PENDING  |                  |  0.287442 | 4.50373e-05 |             16 |           13 |
| _objective_44bc1_0000

== Status ==
Current time: 2023-04-11 11:31:04 (running for 00:00:32.23)
Memory usage on this node: 14.0/31.1 GiB 
PopulationBasedTraining: 0 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------|
| _objective_44bc1_00000 | RUNNING  | 172.17.0.3:34034 |  0.186633 | 2.75091e-05 |             16 |           17 |
| _objective_44bc1_00001 | PENDING  |                  |  0.287442 | 4.50373e-05 |             16 |           13 |
| _objective_44bc1_000

== Status ==
Current time: 2023-04-11 11:31:34 (running for 00:01:02.25)
Memory usage on this node: 14.1/31.1 GiB 
PopulationBasedTraining: 0 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------|
| _objective_44bc1_00000 | RUNNING  | 172.17.0.3:34034 |  0.186633 | 2.75091e-05 |             16 |           17 |
| _objective_44bc1_00001 | PENDING  |                  |  0.287442 | 4.50373e-05 |             16 |           13 |
| _objective_44bc1_000

Trial name,date,done,episodes_total,epoch,eval_accuracy,eval_f1,eval_loss,eval_objective,eval_runtime,eval_samples_per_second,eval_steps_per_second,experiment_id,hostname,iterations_since_restore,node_ip,objective,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
_objective_44bc1_00000,2023-04-11_11-38-55,False,0.0,9.09,0.893564,0.943495,0.667068,1.83706,5.1846,77.923,5.015,feb3a196f6614d05bf3828668a625017,d898612130f5,2,172.17.0.3,3.67412,34034,142.357,70.6781,216.32,1681213135,0,0.0,2,44bc1_00000,0.00199485
_objective_44bc1_00001,2023-04-11_11-41-17,False,0.0,9.09,0.868812,0.928859,0.985904,1.79767,4.2364,95.363,6.137,feb3a196f6614d05bf3828668a625017,d898612130f5,2,172.17.0.3,3.59534,34034,141.59,69.7465,212.607,1681213277,0,0.0,2,44bc1_00001,0.00199485
_objective_44bc1_00002,2023-04-11_11-43-38,False,0.0,9.09,0.886139,0.939314,0.42597,1.82545,4.8806,82.776,5.327,feb3a196f6614d05bf3828668a625017,d898612130f5,2,172.17.0.3,3.65091,34034,141.383,70.3999,212.747,1681213418,0,0.0,2,44bc1_00002,0.00199485
_objective_44bc1_00003,2023-04-11_11-46-00,True,0.0,9.09,0.863861,0.925575,0.598555,1.78944,4.2039,96.102,6.185,feb3a196f6614d05bf3828668a625017,d898612130f5,2,172.17.0.3,3.57887,34034,141.126,69.7283,212.265,1681213560,0,0.0,2,44bc1_00003,0.00199485
_objective_44bc1_00004,2023-04-11_11-36-33,False,,4.51,0.888614,0.940867,0.508262,1.82948,4.1063,98.385,6.332,feb3a196f6614d05bf3828668a625017,d898612130f5,1,172.17.0.3,3.65896,34034,70.965,70.965,70.965,1681212993,0,,1,44bc1_00004,0.00199485


== Status ==
Current time: 2023-04-11 11:31:53 (running for 00:01:21.28)
Memory usage on this node: 14.1/31.1 GiB 
PopulationBasedTraining: 0 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (1 PAUSED, 3 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+--

== Status ==
Current time: 2023-04-11 11:32:13 (running for 00:01:41.29)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 0 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (1 PAUSED, 3 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+--

== Status ==
Current time: 2023-04-11 11:32:33 (running for 00:02:01.31)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 0 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (1 PAUSED, 3 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+--

== Status ==
Current time: 2023-04-11 11:32:53 (running for 00:02:21.32)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 0 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (1 PAUSED, 3 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+--

== Status ==
Current time: 2023-04-11 11:33:14 (running for 00:02:42.49)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 1 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (2 PAUSED, 2 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+--

== Status ==
Current time: 2023-04-11 11:33:34 (running for 00:03:02.50)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 1 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (2 PAUSED, 2 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+--

== Status ==
Current time: 2023-04-11 11:33:54 (running for 00:03:22.51)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 1 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (2 PAUSED, 2 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+--

== Status ==
Current time: 2023-04-11 11:34:16 (running for 00:03:43.99)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (3 PAUSED, 1 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+--

== Status ==
Current time: 2023-04-11 11:34:36 (running for 00:04:04.01)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (3 PAUSED, 1 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+--

== Status ==
Current time: 2023-04-11 11:34:56 (running for 00:04:24.01)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (3 PAUSED, 1 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+--

== Status ==
Current time: 2023-04-11 11:35:16 (running for 00:04:44.02)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (3 PAUSED, 1 PENDING, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+--

== Status ==
Current time: 2023-04-11 11:35:37 (running for 00:05:05.23)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:35:57 (running for 00:05:25.24)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:36:17 (running for 00:05:45.25)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:36:38 (running for 00:06:06.28)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:36:58 (running for 00:06:26.29)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:37:18 (running for 00:06:46.30)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:37:38 (running for 00:07:06.31)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:38:00 (running for 00:07:27.98)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:38:20 (running for 00:07:47.99)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:38:40 (running for 00:08:08.01)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:39:00 (running for 00:08:28.72)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:39:20 (running for 00:08:48.74)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:39:40 (running for 00:09:08.75)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:40:00 (running for 00:09:28.76)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:40:22 (running for 00:09:50.58)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:40:42 (running for 00:10:10.66)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:41:02 (running for 00:10:30.67)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

2023-04-11 11:41:17,343	INFO pbt.py:646 -- [pbt]: no checkpoint for trial. Skip exploit for Trial _objective_44bc1_00001


== Status ==
Current time: 2023-04-11 11:41:22 (running for 00:10:50.42)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:41:42 (running for 00:11:10.43)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:42:02 (running for 00:11:30.44)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:42:22 (running for 00:11:50.45)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:42:43 (running for 00:12:11.42)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:43:03 (running for 00:12:31.43)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:43:23 (running for 00:12:51.44)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

2023-04-11 11:43:38,825	INFO pbt.py:646 -- [pbt]: no checkpoint for trial. Skip exploit for Trial _objective_44bc1_00002


== Status ==
Current time: 2023-04-11 11:43:43 (running for 00:13:11.89)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:44:03 (running for 00:13:31.90)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:44:23 (running for 00:13:51.91)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:44:43 (running for 00:14:11.92)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:45:05 (running for 00:14:33.30)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:45:25 (running for 00:14:53.31)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

== Status ==
Current time: 2023-04-11 11:45:45 (running for 00:15:13.32)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 16.0/16 CPUs, 1.0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (4 PAUSED, 1 RUNNING)
+------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status   | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+----------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------

2023-04-11 11:46:00,062	INFO tune.py:798 -- Total run time: 928.11 seconds (928.02 seconds for the tuning loop).


== Status ==
Current time: 2023-04-11 11:46:00 (running for 00:15:28.02)
Memory usage on this node: 14.2/31.1 GiB 
PopulationBasedTraining: 2 checkpoints, 0 perturbs
Resources requested: 0/16 CPUs, 0/1 GPUs, 0.0/13.16 GiB heap, 0.0/6.58 GiB objects (0.0/1.0 accelerator_type:G)
Result logdir: /workspace/syc/MI_ensemble/hpo-results/tune_transformer_pbt
Number of trials: 5/5 (5 TERMINATED)
+------------------------+------------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+---------+----------------------+
| Trial name             | status     | loc              |   w_decay |          lr |   train_bs/gpu |   num_epochs |   eval_f1 |   eval_accuracy |   eval_objective |   eval_loss |   epoch |   training_iteration |
|------------------------+------------+------------------+-----------+-------------+----------------+--------------+-----------+-----------------+------------------+-------------+-----

In [9]:
trainer.train()

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/77de1f7a7e5e737aead1cd880979d4f1b3af6668/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "xlm-roberta-base",
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.22.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 250002
}

loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/77d

Step,Training Loss,Validation Loss,Accuracy,F1,Objective
50,No log,0.80727,0.623762,0.759494,1.383256
100,No log,0.667068,0.893564,0.943495,1.83706
150,No log,0.861477,0.90099,0.947917,1.848907


  nn.utils.clip_grad_norm_(
The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 404
  Batch size = 16
The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 404
  Batch size = 16
The following columns in the evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluati

TrainOutput(global_step=187, training_loss=0.3107929127739075, metrics={'train_runtime': 255.7864, 'train_samples_per_second': 49.58, 'train_steps_per_second': 0.731, 'total_flos': 3325723739750400.0, 'train_loss': 0.3107929127739075, 'epoch': 16.94})

In [10]:
# save the pretrained model
trainer.model.save_pretrained(custom_dir)

Configuration saved in sev_treatment_effect_ensemble/config.json
Model weights saved in sev_treatment_effect_ensemble/pytorch_model.bin


In [11]:
df = bert_ensemble_functions.evaluation(trainer = trainer,
                                        eval_dataset = eval_dataset,
                                        text_column_name = text_column
                                        )
df

The following columns in the test set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 404
  Batch size = 16


Unnamed: 0,id,text,treatment_effect_pred_0,treatment_effect_pred_1,label,pred
0,59194424,상환 STEMI 로 Direct PCI시행하여 CAD 1vd로 PTCA with...,-4.988281,4.980469,1,1
1,22235170,Recent MI Ischemic HF EF 44 EE 20 HTN Brief hx...,-5.035156,5.003906,1,1
2,131182223,Hospital course1 Cognition전입 당시 평가한 MMSE 30점이...,-4.914062,4.929688,1,1
3,9453533,NSTEMICAOD 3vd sp 5OPCAB LIMA to LAD Aortainte...,-5.125000,5.039062,1,1
4,58102385,Direct PCI1 STEMI로 cangio 시행 mRCA의 total occlu...,-5.054688,4.937500,1,1
...,...,...,...,...,...,...
399,58439794,succssful PCI at mLADAbsorb GT1 3518 시행 후 특이 합...,-5.035156,5.003906,0,1
400,63772231,NSTEMI succssful PCI at LCx c orsiro 2530 201...,-5.082031,5.042969,1,1
401,55819857,상환 NSTEMI로 PTCA with stent insertion at LM 후 급...,-5.093750,5.011719,1,1
402,111502342,Direct PCI postROSC cardiogenic shock상수역에서 갑자기...,-5.031250,5.023438,1,1


In [12]:
# Check the classification result of each XLM-RoBERTa Model 
from sklearn.metrics import (
    confusion_matrix, 
    accuracy_score, 
    precision_score, 
    recall_score, 
    f1_score
)

print("Confusion Matrix")
print(confusion_matrix(df.label, df.pred))
print("-------------------------")
accuracy = accuracy_score(df.label, df.pred)
f1 = f1_score(df.label, df.pred)
recall = recall_score(df.label, df.pred)
precision = precision_score(df.label, df.pred)
print(f"Accuracy: {accuracy}")
print(f"F1 score: {f1}")
print(f"Recall: {recall}")
print(f"Precision: {precision}")

Confusion Matrix
[[  0  36]
 [  5 363]]
-------------------------
Accuracy: 0.8985148514851485
F1 score: 0.9465449804432855
Recall: 0.9864130434782609
Precision: 0.9097744360902256


In [13]:
df.to_csv(f'./{text_column}_bert_result_df.csv', index=False)