In [75]:
# !export CUDA_LAUNCH_BLOCKING=1
# !export TORCH_USE_CUDA_DSA=true
# !export TOKENIZERS_PARALLELISM=false

In [1]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [2]:
# <your imports>
import numpy as np
import torch
from torch.optim import Adam
import torch.nn as nn
from tqdm import tqdm
from omegaconf import OmegaConf
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, classification_report
from transformers import TrainingArguments, Trainer
from transformers import BertForSequenceClassification, XLMRobertaXLConfig
from transformers import EarlyStoppingCallback

import sys
sys.path.append('../src')
from preprocessing import Preprocessing
from matplotlib import pyplot as plt

# models
from models import BertClassifier

# ignore fucking warnings
import warnings
warnings.filterwarnings("ignore")

In [12]:
def custom_f1(p):
    pred, labels = p
    pred = np.argmax(pred, axis=1)

    accuracy = accuracy_score(y_true=labels, y_pred=pred)
    f1_scores = f1_score(y_true=labels, y_pred=pred, average="weighted")
    
    # f1_scores = f1_score(y_true=labels, y_pred=pred, average=None)
    # final_score = (f1_scores[0] + f1_scores[2]) / 2
    return {
        "accuracy": accuracy,
        "f1_score": f1_scores
        # "f1_score": final_score
    }

def compute_metrics(p):
    pred, labels = p
    # print(pred, labels)
    # print(np.unique(labels, return_counts=True), labels.shape)
    pred = np.argmax(pred, axis=1)

    accuracy = accuracy_score(y_true=labels, y_pred=pred)
    recall = recall_score(y_true=labels, y_pred=pred, average='weighted')
    precision = precision_score(y_true=labels, y_pred=pred, average='weighted')
    f1 = f1_score(y_true=labels, y_pred=pred, average='weighted')

    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

In [13]:
preprocessor = Preprocessing()

# result = preprocessor.get_dataloaders()
result = preprocessor.get_datasets()

train_set = result["train_set"]
val_set = result["val_set"]
test_set = result["test_set"]
# plt.hist(val_set.labels, bins=3)

In [14]:
conf = OmegaConf.load("../config.yaml").general
for i in conf.keys():
    print(f'{i}: {conf[i]}')

dataset_name: semEval
batch_size: 16
device: cpu
dataloader_shuffle: True
pretrained_model: bert-base-uncased
num_classes: 3
num_workers: 1
lr: 1e-06
epochs: 2
eps: 1e-08
optimizer: AdamW
eval_steps: 10
logging_steps: 10


In [15]:
# help(AutoModelForSequenceClassification)

In [16]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

model = AutoModelForSequenceClassification.from_pretrained(
    # "cardiffnlp/twitter-roberta-base-sentiment",
    "coderSounak/finetuned_twitter_sentiment_LSTM", 
    # 'cardiffnlp/twitter-xlm-roberta-base-sentiment',
    num_labels=3,
)

In [17]:
model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 300, padding_idx=0)
      (position_embeddings): Embedding(512, 300)
      (token_type_embeddings): Embedding(2, 300)
      (LayerNorm): LayerNorm((300,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=300, out_features=300, bias=True)
              (key): Linear(in_features=300, out_features=300, bias=True)
              (value): Linear(in_features=300, out_features=300, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=300, out_features=300, bias=True)
              (LayerNorm): LayerNorm((300,), eps=1e-12, element

In [18]:
args = TrainingArguments(
    output_dir="output",
    evaluation_strategy="steps",
    eval_steps=conf.eval_steps,
    per_device_train_batch_size=conf.batch_size,
    per_device_eval_batch_size=conf.batch_size,
    num_train_epochs=conf.epochs,
    seed=69,
    logging_steps=conf.logging_steps,
    # learning_rate=conf.lr,
    learning_rate=3e-7,
    load_best_model_at_end=True,
    # no_cuda=True
)

In [19]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_set,
    eval_dataset=val_set,
    # compute_metrics=compute_metrics,
    compute_metrics=custom_f1,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
    # num_workers=1,
    
)

## Train loop

In [20]:
trainer.train()

Step,Training Loss,Validation Loss,Accuracy,F1 Score
10,0.8639,0.82148,0.678571,0.691268
20,0.8534,0.812303,0.715608,0.715645
30,0.8299,0.803113,0.740741,0.725263
40,0.8186,0.794256,0.753968,0.727009
50,0.8185,0.78591,0.760582,0.716777
60,0.791,0.778009,0.76455,0.705855
70,0.8048,0.7704,0.772487,0.704453
80,0.7975,0.763294,0.77381,0.698738
90,0.8134,0.756717,0.781746,0.702758
100,0.7973,0.750545,0.783069,0.703425


TrainOutput(global_step=378, training_loss=0.7560744790173082, metrics={'train_runtime': 26.5277, 'train_samples_per_second': 227.838, 'train_steps_per_second': 14.249, 'total_flos': 21359437977600.0, 'train_loss': 0.7560744790173082, 'epoch': 2.0})

## Custom trainer

In [13]:
from trainer import CustomTrainer

In [14]:
preprocessor = Preprocessing()

result = preprocessor.get_dataloaders()

train_dl = result["train_dl"]
val_dl = result["val_dl"]

In [19]:
for i in val_dl:
    # print(i)
    i['input_ids'].double().to("cuda:0")
    break

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


RuntimeError: CUDA error: device-side assert triggered
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [16]:
trainer = CustomTrainer(model)

Initialized training config with params: {'dataset_name': 'semEval', 'batch_size': 16, 'device': 'cpu', 'dataloader_shuffle': True, 'pretrained_model': 'bert-base-uncased', 'num_classes': 3, 'num_workers': 1, 'lr': 1e-06, 'epochs': 5, 'eps': 1e-08, 'optimizer': 'AdamW', 'eval_steps': 10, 'logging_steps': 10}


In [17]:
trainer.train(train_dl, val_dl)

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/48 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument index in method wrapper_CUDA__index_select)

## Evaluation/Weighting metric

In [21]:
import optuna

In [22]:
def make_weighted_metric(p):
    print(p)
    global weights
    pred, labels = p
    if weights is None:
        weights = np.ones(pred.shape[1])
    pred = np.argmax(pred*weights, axis=1)

    f1_scores = f1_score(y_true=labels, y_pred=pred, average="weighted")
    # final_score = np.mean(f1_scores[1:])
    # return {"custom F1 score": final_score}
    # return {
    #     "F1 for class 0": f1_scores[0],
    #     "F1 for class 1": f1_scores[1],
    #     "F1 for class 2": f1_scores[2]
    # }
    # print(f1_scores)
    return {"f1_score": f1_scores}

In [23]:
def target_func(weight_1, weight_2, weight_3):
    global weights
    weights = np.array([weight_1, weight_2, weight_3])
    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=train_set,
        eval_dataset=val_set,
        # compute_metrics=compute_metrics,
        compute_metrics=make_weighted_metric,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
    )
    result = trainer.evaluate()
    # final_result = (result['eval_F1 for class 0'] + result['eval_F1 for class 2']) / 3
    # return final_result
    print(result)
    return result['eval_f1_score']

In [24]:
def objective(trial):
    weight_1 = trial.suggest_float("weight_1", -2, 2)
    weight_2 = trial.suggest_float("weight_2", -2, 2)
    weight_3 = trial.suggest_float("weight_3", -2, 2)
    
    result = target_func(weight_1, weight_2, weight_3)
    
    return result

In [25]:
study = optuna.create_study(directions=["maximize"])
study.optimize(objective, n_trials=300, timeout=300)

[32m[I 2023-05-22 23:35:21,945][0m A new study created in memory with name: no-name-7ed99cec-daf0-43bd-9ec9-85cd7bb21467[0m


[32m[I 2023-05-22 23:35:22,434][0m Trial 0 finished with value: 0.0 and parameters: {'weight_1': 1.3332790238691183, 'weight_2': -0.43085895060962853, 'weight_3': -1.1843442488109819}. Best is trial 0 with value: 0.0.[0m


<transformers.trainer_utils.EvalPrediction object at 0x7f0d51c768c0>
{'eval_loss': 0.6745257377624512, 'eval_f1_score': 0.0, 'eval_runtime': 0.4835, 'eval_samples_per_second': 1563.64, 'eval_steps_per_second': 99.279}


[32m[I 2023-05-22 23:35:22,918][0m Trial 1 finished with value: 0.3107805566222905 and parameters: {'weight_1': 1.8613082258706313, 'weight_2': 0.10583447379739175, 'weight_3': 0.5769839657009905}. Best is trial 1 with value: 0.3107805566222905.[0m


<transformers.trainer_utils.EvalPrediction object at 0x7f0d31c83fa0>
{'eval_loss': 0.6745257377624512, 'eval_f1_score': 0.3107805566222905, 'eval_runtime': 0.4798, 'eval_samples_per_second': 1575.779, 'eval_steps_per_second': 100.049}


[32m[I 2023-05-22 23:35:23,404][0m Trial 2 finished with value: 0.0 and parameters: {'weight_1': -0.697911708750504, 'weight_2': -0.33468014586429273, 'weight_3': -1.8617785756071141}. Best is trial 1 with value: 0.3107805566222905.[0m


<transformers.trainer_utils.EvalPrediction object at 0x7f0d31c93850>
{'eval_loss': 0.6745257377624512, 'eval_f1_score': 0.0, 'eval_runtime': 0.4818, 'eval_samples_per_second': 1569.263, 'eval_steps_per_second': 99.636}


[32m[I 2023-05-22 23:35:23,887][0m Trial 3 finished with value: 0.0 and parameters: {'weight_1': 0.2597598668012062, 'weight_2': -0.6867499964001209, 'weight_3': -0.831965616161793}. Best is trial 1 with value: 0.3107805566222905.[0m


<transformers.trainer_utils.EvalPrediction object at 0x7f0d31cabd00>
{'eval_loss': 0.6745257377624512, 'eval_f1_score': 0.0, 'eval_runtime': 0.4794, 'eval_samples_per_second': 1576.998, 'eval_steps_per_second': 100.127}


[32m[I 2023-05-22 23:35:24,371][0m Trial 4 finished with value: 0.08226994501504305 and parameters: {'weight_1': 1.9792499140473772, 'weight_2': -1.8516733713333942, 'weight_3': 0.8582572556904209}. Best is trial 1 with value: 0.3107805566222905.[0m


<transformers.trainer_utils.EvalPrediction object at 0x7f0d31cb3730>
{'eval_loss': 0.6745257377624512, 'eval_f1_score': 0.08226994501504305, 'eval_runtime': 0.4793, 'eval_samples_per_second': 1577.261, 'eval_steps_per_second': 100.144}


[32m[I 2023-05-22 23:35:24,854][0m Trial 5 finished with value: 0.0 and parameters: {'weight_1': 0.22565402539843982, 'weight_2': 1.0078813325208733, 'weight_3': -1.404818808237219}. Best is trial 1 with value: 0.3107805566222905.[0m


<transformers.trainer_utils.EvalPrediction object at 0x7f0d31cb3ca0>
{'eval_loss': 0.6745257377624512, 'eval_f1_score': 0.0, 'eval_runtime': 0.4791, 'eval_samples_per_second': 1577.845, 'eval_steps_per_second': 100.181}


[32m[I 2023-05-22 23:35:25,340][0m Trial 6 finished with value: 0.0025282968320943003 and parameters: {'weight_1': -1.137944531639456, 'weight_2': -0.4902950438576137, 'weight_3': -0.2512995771492923}. Best is trial 1 with value: 0.3107805566222905.[0m


<transformers.trainer_utils.EvalPrediction object at 0x7f0d31cbfc70>
{'eval_loss': 0.6745257377624512, 'eval_f1_score': 0.0025282968320943003, 'eval_runtime': 0.4809, 'eval_samples_per_second': 1571.892, 'eval_steps_per_second': 99.803}


[32m[I 2023-05-22 23:35:25,824][0m Trial 7 finished with value: 0.07154878654354667 and parameters: {'weight_1': 0.07955612110239718, 'weight_2': -1.4619264568060433, 'weight_3': 0.17663265986927001}. Best is trial 1 with value: 0.3107805566222905.[0m


<transformers.trainer_utils.EvalPrediction object at 0x7f0d31cc2d70>
{'eval_loss': 0.6745257377624512, 'eval_f1_score': 0.07154878654354667, 'eval_runtime': 0.4799, 'eval_samples_per_second': 1575.353, 'eval_steps_per_second': 100.022}


[32m[I 2023-05-22 23:35:26,310][0m Trial 8 finished with value: 0.0 and parameters: {'weight_1': 0.18545349805057443, 'weight_2': 1.2462448650618363, 'weight_3': -1.8660634647471501}. Best is trial 1 with value: 0.3107805566222905.[0m


<transformers.trainer_utils.EvalPrediction object at 0x7f0d31cb3d90>
{'eval_loss': 0.6745257377624512, 'eval_f1_score': 0.0, 'eval_runtime': 0.4819, 'eval_samples_per_second': 1568.714, 'eval_steps_per_second': 99.601}


[32m[I 2023-05-22 23:35:26,797][0m Trial 9 finished with value: 0.7031327576900974 and parameters: {'weight_1': -1.9835993448903229, 'weight_2': 1.0433311791440198, 'weight_3': -0.3681120630012873}. Best is trial 9 with value: 0.7031327576900974.[0m


<transformers.trainer_utils.EvalPrediction object at 0x7f0d31cc25c0>
{'eval_loss': 0.6745257377624512, 'eval_f1_score': 0.7031327576900974, 'eval_runtime': 0.4823, 'eval_samples_per_second': 1567.354, 'eval_steps_per_second': 99.515}


[32m[I 2023-05-22 23:35:27,293][0m Trial 10 finished with value: 0.7112657735615876 and parameters: {'weight_1': -1.846564443643111, 'weight_2': 1.9230337055905755, 'weight_3': 1.8292081833734604}. Best is trial 10 with value: 0.7112657735615876.[0m


<transformers.trainer_utils.EvalPrediction object at 0x7f0d31cd33a0>
{'eval_loss': 0.6745257377624512, 'eval_f1_score': 0.7112657735615876, 'eval_runtime': 0.4818, 'eval_samples_per_second': 1569.144, 'eval_steps_per_second': 99.628}


[32m[I 2023-05-22 23:35:27,795][0m Trial 11 finished with value: 0.7112657735615876 and parameters: {'weight_1': -1.9402836918431239, 'weight_2': 1.92839092992309, 'weight_3': 1.951534115202912}. Best is trial 10 with value: 0.7112657735615876.[0m


<transformers.trainer_utils.EvalPrediction object at 0x7f0d3031ce80>
{'eval_loss': 0.6745257377624512, 'eval_f1_score': 0.7112657735615876, 'eval_runtime': 0.4884, 'eval_samples_per_second': 1547.927, 'eval_steps_per_second': 98.281}


[32m[I 2023-05-22 23:35:28,295][0m Trial 12 finished with value: 0.7105910229893553 and parameters: {'weight_1': -1.9716541580566331, 'weight_2': 1.7919655997339354, 'weight_3': 1.9429171974228476}. Best is trial 10 with value: 0.7112657735615876.[0m


<transformers.trainer_utils.EvalPrediction object at 0x7f0d30326f50>
{'eval_loss': 0.6745257377624512, 'eval_f1_score': 0.7105910229893553, 'eval_runtime': 0.485, 'eval_samples_per_second': 1558.716, 'eval_steps_per_second': 98.966}


[32m[I 2023-05-22 23:35:28,793][0m Trial 13 finished with value: 0.7112657735615876 and parameters: {'weight_1': -1.2952802366356542, 'weight_2': 1.9860675963163248, 'weight_3': 1.958116036257068}. Best is trial 10 with value: 0.7112657735615876.[0m


<transformers.trainer_utils.EvalPrediction object at 0x7f0d303277c0>
{'eval_loss': 0.6745257377624512, 'eval_f1_score': 0.7112657735615876, 'eval_runtime': 0.4832, 'eval_samples_per_second': 1564.516, 'eval_steps_per_second': 99.334}


[33m[W 2023-05-22 23:35:29,234][0m Trial 14 failed with parameters: {'weight_1': -1.5099217799201714, 'weight_2': 1.5826869665385668, 'weight_3': 1.364792646013467} because of the following error: KeyboardInterrupt().[0m
Traceback (most recent call last):
  File "/home/lazarev/disk/lazarev/miniconda3/envs/study/lib/python3.10/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_22501/1701860310.py", line 6, in objective
    result = target_func(weight_1, weight_2, weight_3)
  File "/tmp/ipykernel_22501/683430318.py", line 13, in target_func
    result = trainer.evaluate()
  File "/home/lazarev/disk/lazarev/miniconda3/envs/study/lib/python3.10/site-packages/transformers/trainer.py", line 2993, in evaluate
    output = eval_loop(
  File "/home/lazarev/disk/lazarev/miniconda3/envs/study/lib/python3.10/site-packages/transformers/trainer.py", line 3164, in evaluation_loop
    for step, inputs in enumerate(dataloader):


KeyboardInterrupt: 