<a href="https://colab.research.google.com/github/tzhsu211/CVS/blob/main/Finetune0205.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install datasets
!pip install optuna

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m30.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

In [2]:
import torch
import pandas as pd
import numpy as np
import json
import os
import warnings
import random
from accelerate import Accelerator
from transformers import BertTokenizerFast, EarlyStoppingCallback, AutoModelForSequenceClassification, Trainer, TrainingArguments
from matplotlib import pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from datasets import Dataset
import optuna


In [3]:
torch.cuda.is_available()

True

In [4]:
warnings.filterwarnings('ignore')

In [5]:
df = pd.read_csv('./cvs_products.csv')
df1 = df.drop(['product', 'store', 'link', 'CVS'], axis = 1)
scaler = StandardScaler()
df1['rating_standard'] = scaler.fit_transform(df['rating'].values.reshape(-1, 1))
X_train, X_test, y_train, y_test = train_test_split(df1['review'], df1['rating_standard'], test_size=0.1, random_state=42)
train_df = pd.DataFrame({'text': X_train, 'label': y_train.astype(float)})
test_df = pd.DataFrame({'text': X_test, 'label': y_test.astype(float)})

train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

In [6]:
def preprocess_function(examples, tokenizer):
    return tokenizer(examples['text'], padding='max_length', truncation=True, max_length=256)

google_bert_tokenizer = BertTokenizerFast.from_pretrained('google-bert/bert-base-chinese')
google_bert_train_dataset = train_dataset.map(lambda x: preprocess_function(x, google_bert_tokenizer), batched=True)
google_bert_test_dataset = test_dataset.map(lambda x: preprocess_function(x, google_bert_tokenizer), batched=True)


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/110k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/269k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/624 [00:00<?, ?B/s]

Map:   0%|          | 0/8656 [00:00<?, ? examples/s]

Map:   0%|          | 0/962 [00:00<?, ? examples/s]

In [7]:
def compute_metrics(p):
    preds, labels = p
    preds = preds.squeeze()
    mse = mean_squared_error(labels, preds)
    r2 = r2_score(labels, preds)
    return {"mse": mse, "r2": r2}

In [8]:
checkpoint_path = './google_bert'
google_bert_model = AutoModelForSequenceClassification.from_pretrained(checkpoint_path, config=f'{checkpoint_path}/config.json')


In [9]:
def objective(trial, model, model_name, train_dataset, eval_dataset):

    # Define hyperparameters to tune
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-3)
    dropout = trial.suggest_uniform('dropout', 0.1, 0.5)
    batch_size = trial.suggest_categorical('batch_size', [32, 64])
    weight_decay = trial.suggest_loguniform('weight_decay', 0.01, 0.1)

    # Set dropout in model config (dropout is not set in training_args)
    model.config.attention_probs_dropout_prob = dropout
    model.config.hidden_dropout_prob = dropout

    # Set training_args based on fine-tune hyperparameters
    training_args = TrainingArguments(
        output_dir=f'./results/{model_name}_trial_{trial.number}',
        logging_dir=f'./logs/{model_name}_trial_{trial.number}',
        logging_steps=100,
        save_strategy='epoch',
        evaluation_strategy='epoch',
        warmup_steps=300,
        num_train_epochs=3,
        learning_rate=learning_rate,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        weight_decay=weight_decay,
        load_best_model_at_end=True,
        fp16=True,
        seed=42,
    )

    accelerator = Accelerator(mixed_precision="fp16")

    # Initialize Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics,
        # callbacks=[EarlyStoppingCallback(early_stopping_patience=2)],
    )

    trainer.train()

    # Evaluation
    eval_result = trainer.evaluate()

    return eval_result['eval_loss']


In [10]:
study = optuna.create_study(
    study_name="study0205",
    direction='minimize',
    storage='sqlite:////content/optuna_db/study.db',
    load_if_exists=True
)

study.optimize(
    lambda trial: objective(
        trial,
        google_bert_model,
        "google-bert-base_optuna",
        google_bert_train_dataset,
        google_bert_test_dataset
    ),
    n_trials=10
)

[I 2025-02-05 07:44:44,231] A new study created in RDB with name: study0205


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mttzuhsu[0m ([33mttzuhsu-none[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Epoch,Training Loss,Validation Loss,Mse,R2
1,0.2104,0.474196,0.474196,0.515574
2,0.1738,0.425345,0.425345,0.565479
3,0.1236,0.401035,0.401035,0.590313


[I 2025-02-05 07:51:20,057] Trial 0 finished with value: 0.40103545784950256 and parameters: {'learning_rate': 4.8640483052755485e-05, 'dropout': 0.16892734772481646, 'batch_size': 32, 'weight_decay': 0.03068760271236806}. Best is trial 0 with value: 0.40103545784950256.


Epoch,Training Loss,Validation Loss,Mse,R2
1,0.1574,0.517342,0.517342,0.471497
2,0.3777,0.510313,0.510313,0.478678
3,0.1863,0.453165,0.453165,0.537058


[I 2025-02-05 07:57:13,284] Trial 1 finished with value: 0.4531651735305786 and parameters: {'learning_rate': 0.00014515455816385876, 'dropout': 0.18264516830092262, 'batch_size': 32, 'weight_decay': 0.053120852070534345}. Best is trial 0 with value: 0.40103545784950256.


Epoch,Training Loss,Validation Loss,Mse,R2
1,0.135,0.485826,0.485826,0.503693
2,0.1374,0.490336,0.490336,0.499085
3,0.0941,0.42888,0.42888,0.561867


[I 2025-02-05 08:03:01,179] Trial 2 finished with value: 0.42888039350509644 and parameters: {'learning_rate': 7.348590114714911e-05, 'dropout': 0.2581720212322267, 'batch_size': 32, 'weight_decay': 0.025390044596121965}. Best is trial 0 with value: 0.40103545784950256.


Epoch,Training Loss,Validation Loss,Mse,R2
1,0.084,0.43056,0.43056,0.560151
2,0.0748,0.495935,0.495935,0.493365
3,0.0749,0.454899,0.454899,0.535287


[I 2025-02-05 08:09:20,963] Trial 3 finished with value: 0.4305596649646759 and parameters: {'learning_rate': 6.532192802264272e-05, 'dropout': 0.24747800175688678, 'batch_size': 64, 'weight_decay': 0.04558766627001453}. Best is trial 0 with value: 0.40103545784950256.


Epoch,Training Loss,Validation Loss,Mse,R2
1,0.0532,0.431281,0.431281,0.559415
2,0.0684,0.475355,0.475355,0.51439
3,0.0472,0.426706,0.426706,0.564088


[I 2025-02-05 08:16:26,764] Trial 4 finished with value: 0.4267061650753021 and parameters: {'learning_rate': 4.088754286650167e-05, 'dropout': 0.2259343393037393, 'batch_size': 32, 'weight_decay': 0.018632751644231563}. Best is trial 0 with value: 0.40103545784950256.


Epoch,Training Loss,Validation Loss,Mse,R2
1,0.0287,0.421922,0.421922,0.568975
2,0.037,0.422618,0.422618,0.568264
3,0.0265,0.431961,0.431961,0.55872


[I 2025-02-05 08:23:58,730] Trial 5 finished with value: 0.4219222664833069 and parameters: {'learning_rate': 2.066688805604789e-05, 'dropout': 0.21981233907657002, 'batch_size': 32, 'weight_decay': 0.04920605659586631}. Best is trial 0 with value: 0.40103545784950256.


Epoch,Training Loss,Validation Loss,Mse,R2
1,0.0139,0.440215,0.440215,0.550288
2,0.0534,0.443735,0.443735,0.546692
3,0.0418,0.436182,0.436182,0.554408


[I 2025-02-05 08:31:00,651] Trial 6 finished with value: 0.436181902885437 and parameters: {'learning_rate': 4.7213605898642805e-05, 'dropout': 0.11843898810491744, 'batch_size': 32, 'weight_decay': 0.018615062441723042}. Best is trial 0 with value: 0.40103545784950256.


Epoch,Training Loss,Validation Loss,Mse,R2
1,0.0504,0.463146,0.463146,0.526862
2,0.1198,1.424842,1.424842,-0.455582
3,0.4836,0.499042,0.499042,0.490192


[I 2025-02-05 08:38:51,384] Trial 7 finished with value: 0.4631456732749939 and parameters: {'learning_rate': 0.0002799091545763998, 'dropout': 0.3476373045475534, 'batch_size': 64, 'weight_decay': 0.019870359058480377}. Best is trial 0 with value: 0.40103545784950256.


Epoch,Training Loss,Validation Loss,Mse,R2
1,0.0973,0.609962,0.609962,0.376879
2,0.4796,1.001441,1.001441,-0.023046
3,1.0925,0.978949,0.978949,-6.9e-05


[I 2025-02-05 08:46:05,532] Trial 8 finished with value: 0.6099615097045898 and parameters: {'learning_rate': 0.0006227600324999825, 'dropout': 0.11189933826384837, 'batch_size': 64, 'weight_decay': 0.012148360811198815}. Best is trial 0 with value: 0.40103545784950256.


Epoch,Training Loss,Validation Loss,Mse,R2
1,0.179,1.08387,1.08387,-0.107254
2,0.883,0.777609,0.777609,0.205615
3,0.5855,0.64933,0.64933,0.336661


[I 2025-02-05 08:53:05,210] Trial 9 finished with value: 0.6493299603462219 and parameters: {'learning_rate': 0.00031823888380612815, 'dropout': 0.1448191125732774, 'batch_size': 32, 'weight_decay': 0.03184021249848639}. Best is trial 0 with value: 0.40103545784950256.


In [11]:
study_load = optuna.load_study(
    study_name="study0205",
    storage="sqlite:////content/optuna_db/study.db"
)

print(f"Study name: {study.study_name}")
print(f"Study direction: {study.direction}")
print(f"Number of trials: {len(study.trials)}")

Study name: study0205
Study direction: 1
Number of trials: 10


In [13]:
for trial in study_load.trials:
    print(f"Trial {trial.number}:")
    print(f"  Value: {trial.value}")
    print(f"  Params: {trial.params}")
    print(f"  State: {trial.state}")
    print()

Trial 0:
  Value: 0.40103545784950256
  Params: {'learning_rate': 4.8640483052755485e-05, 'dropout': 0.16892734772481646, 'batch_size': 32, 'weight_decay': 0.03068760271236806}
  State: 1

Trial 1:
  Value: 0.4531651735305786
  Params: {'learning_rate': 0.00014515455816385876, 'dropout': 0.18264516830092262, 'batch_size': 32, 'weight_decay': 0.053120852070534345}
  State: 1

Trial 2:
  Value: 0.42888039350509644
  Params: {'learning_rate': 7.348590114714911e-05, 'dropout': 0.2581720212322267, 'batch_size': 32, 'weight_decay': 0.025390044596121965}
  State: 1

Trial 3:
  Value: 0.4305596649646759
  Params: {'learning_rate': 6.532192802264272e-05, 'dropout': 0.24747800175688678, 'batch_size': 64, 'weight_decay': 0.04558766627001453}
  State: 1

Trial 4:
  Value: 0.4267061650753021
  Params: {'learning_rate': 4.088754286650167e-05, 'dropout': 0.2259343393037393, 'batch_size': 32, 'weight_decay': 0.018632751644231563}
  State: 1

Trial 5:
  Value: 0.4219222664833069
  Params: {'learning_ra