<a href="https://colab.research.google.com/github/yc-chen10/113-1-finance/blob/main/week10%E4%BD%9C%E6%A5%AD_%E9%87%91%E8%9E%8D%E6%96%87%E6%9C%AC%E6%83%85%E7%B7%92%E8%BE%A8%E8%AD%98Hugging_Face.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
os.environ["WANDB_DISABLED"] = "true"

In [7]:
# 必要套件
from datasets import load_dataset, DatasetDict
from transformers import AutoTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, classification_report
import torch

In [8]:

# 1. 載入資料集
dataset = load_dataset("takala/financial_phrasebank", "sentences_allagree")

In [9]:
# 2. 分詞器初始化
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 3. 分詞函數
def tokenize_function(example):
    return tokenizer(
        example["sentence"],
        padding="max_length",
        truncation=True,
        max_length=128
    )

# 4. 數據分詞並轉換為 PyTorch 格式
tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format("torch", columns=["input_ids", "attention_mask", "labels"])


In [10]:
# 5. 劃分訓練集和測試集（80%訓練，20%測試）
train_test_split = tokenized_datasets["train"].train_test_split(test_size=0.2, seed=42)
tokenized_datasets = DatasetDict({
    "train": train_test_split["train"],
    "test": train_test_split["test"]
})

# 6. 檢查數據集
print(f"Training sample: {tokenized_datasets['train'][0]}")
print(f"Test sample: {tokenized_datasets['test'][0]}")

Training sample: {'labels': tensor(1), 'input_ids': tensor([  101, 12849,  4246,  1005,  1055,  3006,  3745,  1997,  1996,  3872,
         1997,  1996,  3006,  2001,  2603,  1012,  1018,  1003,  1010, 10556,
        25032,  2226,  1005,  1055,  2538,  1012,  1018,  1003,  1012,   102,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            

In [11]:
# 7. 初始化 BERT 模型（3分類：Negative, Neutral, Positive）
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=3)



Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:
# 8. 評估函數
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), axis=-1).numpy()
    acc = accuracy_score(labels, predictions)
    print(classification_report(labels, predictions, target_names=["Negative", "Neutral", "Positive"]))
    return {"accuracy": acc}

In [14]:
# 9. 訓練參數
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    save_strategy="epoch",
    logging_dir="./logs",
    logging_steps=10,
    save_total_limit=2,
    run_name="financial_sentiment_analysis"
)

# 10. 設置 Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
  trainer = Trainer(


In [15]:
# 11. 訓練模型
trainer.train()


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2377,0.194457,0.951435
2,0.097,0.149327,0.966887
3,0.0169,0.149302,0.966887


              precision    recall  f1-score   support

    Negative       0.93      0.90      0.92        73
     Neutral       0.98      0.98      0.98       280
    Positive       0.89      0.91      0.90       100

    accuracy                           0.95       453
   macro avg       0.93      0.93      0.93       453
weighted avg       0.95      0.95      0.95       453

              precision    recall  f1-score   support

    Negative       0.94      0.93      0.94        73
     Neutral       0.99      0.98      0.99       280
    Positive       0.92      0.95      0.94       100

    accuracy                           0.97       453
   macro avg       0.95      0.95      0.95       453
weighted avg       0.97      0.97      0.97       453

              precision    recall  f1-score   support

    Negative       0.94      0.93      0.94        73
     Neutral       0.99      0.98      0.99       280
    Positive       0.92      0.95      0.94       100

    accuracy        

TrainOutput(global_step=342, training_loss=0.21215787525043675, metrics={'train_runtime': 7532.5294, 'train_samples_per_second': 0.721, 'train_steps_per_second': 0.045, 'total_flos': 357373799629056.0, 'train_loss': 0.21215787525043675, 'epoch': 3.0})

In [16]:
# 12. 評估模型
results = trainer.evaluate()
print(results)


              precision    recall  f1-score   support

    Negative       0.94      0.93      0.94        73
     Neutral       0.99      0.98      0.99       280
    Positive       0.92      0.95      0.94       100

    accuracy                           0.97       453
   macro avg       0.95      0.95      0.95       453
weighted avg       0.97      0.97      0.97       453

{'eval_loss': 0.14930245280265808, 'eval_accuracy': 0.9668874172185431, 'eval_runtime': 167.4421, 'eval_samples_per_second': 2.705, 'eval_steps_per_second': 0.173, 'epoch': 3.0}
