In [5]:
%pip install transformers datasets torch accelerate evaluate scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.7.2-cp310-cp310-win_amd64.whl.metadata (11 kB)
Collecting scipy>=1.8.0 (from scikit-learn)
  Using cached scipy-1.15.3-cp310-cp310-win_amd64.whl.metadata (60 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Downloading joblib-1.5.2-py3-none-any.whl.metadata (5.6 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Using cached threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.7.2-cp310-cp310-win_amd64.whl (8.9 MB)
   ---------------------------------------- 0.0/8.9 MB ? eta -:--:--
   ---- ----------------------------------- 1.0/8.9 MB 8.4 MB/s eta 0:00:01
   -------------- ------------------------- 3.1/8.9 MB 9.2 MB/s eta 0:00:01
   ----------------------- ---------------- 5.2/8.9 MB 9.4 MB/s eta 0:00:01
   ---------------------------------- ----- 7.6/8.9 MB 9.6 MB/s eta 0:00:01
   ---------------------------------------- 8.9/8.9 MB 9.5 MB/s  0:00:00
Downloading joblib-1.5.2-py3-none-any.whl (30

In [11]:
# 匯入必要的庫
from datasets import load_dataset  # 用來載入資料集
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments  # BERT 相關工具
import torch  # PyTorch 框架
import evaluate  # 用來評估模型效能

# Step 1: 載入 IMDb 資料集（情感分析：正面/負面評論）
dataset = load_dataset("imdb")
train_dataset = dataset["train"].shuffle(seed=42).select(range(500))
eval_dataset = dataset["test"].shuffle(seed=42).select(range(100))

# Step 2: 載入 BERT Tokenizer（用來將文字轉換成模型輸入）
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# 定義 Tokenization 函數
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)  # 限制長度為 128 tokens

# 應用 Tokenization 到資料集
tokenized_train = train_dataset.map(tokenize_function, batched=True)
tokenized_eval = eval_dataset.map(tokenize_function, batched=True)

# Step 3: 載入 BERT 模型（用於序列分類，num_labels=2 表示二元分類）
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)


################################-TO DO-#################################################
# Step 4: 設定訓練參數
training_args = TrainingArguments(
    output_dir="./results",  # 輸出目錄
    num_train_epochs=8,  # 訓練 epoch 數
    per_device_train_batch_size=24,  # 每個裝置的 batch size
    per_device_eval_batch_size=64,  # 評估時的 batch size
    warmup_steps=24,  # 學習率 warmup
    weight_decay=0.005,  # 權重衰減
    logging_dir="./logs",  # 日誌目錄
    eval_strategy="epoch",  # 每個 epoch 評估一次
    save_strategy="epoch",  # 每個 epoch 保存模型
    load_best_model_at_end=True,  # 訓練結束載入最佳模型
    report_to="none",
)
#########################################################################################


# Step 5: 定義評估指標（使用 accuracy）
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=-1)
    return metric.compute(predictions=predictions, references=labels)

# Step 6: 使用 Trainer API 進行訓練
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval,
    compute_metrics=compute_metrics,
)

# 開始訓練
trainer.train()

# Step 7: 評估模型
results = trainer.evaluate()
print("評估結果:", results)

# Step 8: 儲存模型（可選）
trainer.save_model("./fine_tuned_bert")
tokenizer.save_pretrained("./fine_tuned_bert")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.647554,0.58
2,No log,0.441512,0.8
3,No log,0.482431,0.82
4,No log,0.458393,0.89
5,No log,0.50335,0.85
6,No log,0.540378,0.85
7,No log,0.643764,0.85
8,No log,0.700268,0.84




評估結果: {'eval_loss': 0.44151151180267334, 'eval_accuracy': 0.8, 'eval_runtime': 5.0401, 'eval_samples_per_second': 19.841, 'eval_steps_per_second': 0.397, 'epoch': 8.0}


('./fine_tuned_bert\\tokenizer_config.json',
 './fine_tuned_bert\\special_tokens_map.json',
 './fine_tuned_bert\\vocab.txt',
 './fine_tuned_bert\\added_tokens.json')

In [14]:
# Step 9: 使用 fine-tuned 模型預測 CSV 資料
import pandas as pd
from transformers import pipeline

# 載入 fine-tuned 模型
classifier = pipeline("sentiment-analysis", model="./fine_tuned_bert")

# 讀取 CSV 檔案
df = pd.read_csv("data.csv")
predictions = []

for idx, row in df.iterrows():
    text = row['data']
    result = classifier(text)
    predictions.append({
        'text': text,
        'label': result[0]['label'],
        'confidence': result[0]['score']
    })
    
# 將預測結果轉換為 DataFrame
results_df = pd.DataFrame(predictions)
print(results_df)


Device set to use cpu


                                                text    label  confidence
0  This movie is sooooo nice!! I should watch it ...  LABEL_1    0.892257
1  Absolutely loved the soundtrack, it fit perfec...  LABEL_1    0.921078
2         One of the best films I’ve seen this year!  LABEL_1    0.913377
3   Amazing visuals, the cinematography is stunning.  LABEL_1    0.930297
4         A masterpiece, truly unforgettable cinema.  LABEL_1    0.832912
5   I wouldn’t recommend it, felt too long and slow.  LABEL_0    0.837048
6        Characters were shallow and underdeveloped.  LABEL_0    0.805675
7         Not worth the hype, kind of disappointing.  LABEL_0    0.854372
8     The story didn’t make sense, left me confused.  LABEL_0    0.842527
9  It was painful to sit through, very disappoint...  LABEL_0    0.839201


### 更多測試~~

用自己的資料好像不太行qq (but是財經新聞，跟訓練的電影評論資料有點差距:3)

In [None]:
from transformers import pipeline

# 載入 fine-tuned 模型
classifier = pipeline(
    "sentiment-analysis",
    model="./fine_tuned_bert",
    device=0  # 強制用 GPU
)

# 要分析的句子(應該是正面LABEL_1。)
text = "高盛證券於舊金山舉行論壇，邀請台積電高層進行深度對談，聚焦四大主題：智慧機與高效能運算（HPC）帶動先進製程需求。"

# 預測
result = classifier(text)
print(result)

  from .autonotebook import tqdm as notebook_tqdm
Device set to use cuda:0


[{'label': 'LABEL_0', 'score': 0.6631180047988892}]


In [None]:
from transformers import pipeline

# 載入 fine-tuned 模型
classifier = pipeline("sentiment-analysis", model="./fine_tuned_bert")

# 要分析的句子(應該是正面LABEL_1。)
text = "Goldman Sachs held a forum in San Francisco, inviting TSMC executives for an in-depth discussion, focusing on four key themes: Smartphones and high-performance computing (HPC) drive demand for advanced processes."

# 預測
result = classifier(text)
print(result)

Device set to use cpu


[{'label': 'LABEL_1', 'score': 0.9028972387313843}]


In [None]:
# 要分析的句子(應該是負面LABEL_0。)
text = "America's jobs market is in a 'two-tier growth spurt."

# 預測
result = classifier(text)
print(result)

[{'label': 'LABEL_1', 'score': 0.8271191716194153}]


In [None]:
# 要分析的句子(應該是負面LABEL_0。)
text = """It’s signaling that we’re in a two-tier growth spurt,
 Richardson said on the CNN Business digital live show Markets Now.
The first tier is that the nation is still filling back the jobs lost during the worst of the pandemic. 
The second is that the economy is actually changing, Richardson said."""

# 預測
result = classifier(text)
print(result)

[{'label': 'LABEL_1', 'score': 0.8971593976020813}]


### 試試看增強版的(更多訓練資料，更長的token。)

分析英文電影評論貌似步錯，但其餘表現似乎持續尷尬，用電影評論預測財經新聞，或是分析中文評論好似真不太適合。

In [1]:
# 匯入必要的庫
from datasets import load_dataset  # 用來載入資料集
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments  # BERT 相關工具
import torch  # PyTorch 框架
import evaluate  # 用來評估模型效能

# 檢查 CUDA 是否可用
print("CUDA available:", torch.cuda.is_available())

# Step 1: 載入 IMDb 資料集（情感分析：正面/負面評論）
dataset = load_dataset("imdb")
train_dataset = dataset["train"].shuffle(seed=42).select(range(8000))
eval_dataset = dataset["test"].shuffle(seed=42).select(range(1500))

# Step 2: 載入 BERT Tokenizer（用來將文字轉換成模型輸入）
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# 定義 Tokenization 函數
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=200)  # 限制長度為 200 tokens

# 應用 Tokenization 到資料集
tokenized_train = train_dataset.map(tokenize_function, batched=True)
tokenized_eval = eval_dataset.map(tokenize_function, batched=True)

# Step 3: 載入 BERT 模型（用於序列分類，num_labels=2 表示二元分類）
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)


################################-TO DO-#################################################
# Step 4: 設定訓練參數
training_args = TrainingArguments(
    output_dir="./Upg.results",
    num_train_epochs=8,
    per_device_train_batch_size=48,  # 依 GPU 記憶體調整
    per_device_eval_batch_size=128,  # 評估可設大
    warmup_steps=1500,                # 約 5~10% 總步驟
    weight_decay=0.005,               # 可微調，0.005~0.01
    logging_dir="./logs",
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    report_to="none",
)
#########################################################################################


# Step 5: 定義評估指標（使用 accuracy）
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=-1)
    return metric.compute(predictions=predictions, references=labels)

# Step 6: 使用 Trainer API 進行訓練
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval,
    compute_metrics=compute_metrics,
)

# 開始訓練
trainer.train()

# Step 7: 評估模型
results = trainer.evaluate()
print("評估結果:", results)

# Step 8: 儲存模型
trainer.save_model("./fine_tuned_bert_max")
tokenizer.save_pretrained("./fine_tuned_bert_max")

  from .autonotebook import tqdm as notebook_tqdm


CUDA available: True


Map: 100%|██████████| 8000/8000 [00:42<00:00, 187.57 examples/s]
Map: 100%|██████████| 1500/1500 [00:07<00:00, 200.34 examples/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.396433,0.832667
2,No log,0.285562,0.875333
3,0.400300,0.265365,0.899333
4,0.400300,0.363845,0.867333
5,0.400300,0.426155,0.878
6,0.118400,0.459131,0.888667
7,0.118400,0.464007,0.890667
8,0.118400,0.625987,0.855333


評估結果: {'eval_loss': 0.26536479592323303, 'eval_accuracy': 0.8993333333333333, 'eval_runtime': 42.269, 'eval_samples_per_second': 35.487, 'eval_steps_per_second': 0.284, 'epoch': 8.0}


('./fine_tuned_bert_max\\tokenizer_config.json',
 './fine_tuned_bert_max\\special_tokens_map.json',
 './fine_tuned_bert_max\\vocab.txt',
 './fine_tuned_bert_max\\added_tokens.json')

#### 這邊更強的模型，表現的confidence更好了~~

In [3]:
# Step 9: 使用更強的 fine-tuned 模型預測 CSV 資料
import pandas as pd
from transformers import pipeline

# 載入 fine-tuned 模型
classifier = pipeline("sentiment-analysis", model="./fine_tuned_bert_max")

# 讀取 CSV 檔案
df = pd.read_csv("data.csv")
predictions = []

for idx, row in df.iterrows():
    text = row['data']
    result = classifier(text)
    predictions.append({
        'text': text,
        'label': result[0]['label'],
        'confidence': result[0]['score']
    })
    
# 將預測結果轉換為 DataFrame
results_df = pd.DataFrame(predictions)
print(results_df)


Device set to use cuda:0


                                                text    label  confidence
0  This movie is sooooo nice!! I should watch it ...  LABEL_1    0.917468
1  Absolutely loved the soundtrack, it fit perfec...  LABEL_1    0.984024
2         One of the best films I’ve seen this year!  LABEL_1    0.972332
3   Amazing visuals, the cinematography is stunning.  LABEL_1    0.985718
4         A masterpiece, truly unforgettable cinema.  LABEL_1    0.956977
5   I wouldn’t recommend it, felt too long and slow.  LABEL_0    0.895157
6        Characters were shallow and underdeveloped.  LABEL_0    0.936893
7         Not worth the hype, kind of disappointing.  LABEL_0    0.934566
8     The story didn’t make sense, left me confused.  LABEL_0    0.916409
9  It was painful to sit through, very disappoint...  LABEL_0    0.942754


#### 電影評論進階題
進一步比較中英文的差距，英文表現幾乎正確，中文錯誤偏多。

In [26]:
# 10 個很難辨識的電影評論（中英文），並附上正確答案
reviews = [
    # 1. 難辨識的正面
    {"zh": "這部電影雖然節奏緩慢，但最後的情感爆發讓人感動。", "en": "Although the movie is slow-paced, the emotional climax at the end is touching.", "label": "POSITIVE"},
    # 2. 難辨識的負面
    {"zh": "演員表現不錯，可惜劇情讓人失望。", "en": "The actors did well, but the plot was disappointing.", "label": "NEGATIVE"},
    # 3. 難辨識的正面
    {"zh": "一開始覺得無聊，沒想到後面越來越精彩。", "en": "It felt boring at first, but it got more exciting later on.", "label": "POSITIVE"},
    # 4. 難辨識的負面
    {"zh": "特效很棒，但故事完全沒吸引力。", "en": "The special effects were great, but the story was not engaging at all.", "label": "NEGATIVE"},
    # 5. 難辨識的正面
    {"zh": "雖然有些地方拖戲，但整體來說還算不錯。", "en": "Although some parts dragged, overall it was pretty good.", "label": "POSITIVE"},
    # 6. 難辨識的負面
    {"zh": "配樂很出色，可惜角色塑造太薄弱。", "en": "The soundtrack was excellent, but the character development was too weak.", "label": "NEGATIVE"},
    # 7. 難辨識的正面
    {"zh": "不是我喜歡的類型，但結局讓我改觀。", "en": "Not my favorite genre, but the ending changed my mind.", "label": "POSITIVE"},
    # 8. 難辨識的負面
    {"zh": "畫面很美，可惜劇情漏洞太多。", "en": "The visuals were beautiful, but there were too many plot holes.", "label": "NEGATIVE"},
    # 9. 難辨識的正面
    {"zh": "雖然有些老套，但還是讓人看得很開心。", "en": "Although a bit clichéd, it was still enjoyable to watch.", "label": "POSITIVE"},
    # 10. 難辨識的負面
    {"zh": "導演很有想法，但整體表現不如預期。", "en": "The director had ideas, but the overall execution was below expectations.", "label": "NEGATIVE"}
]


zh_texts = [r["zh"] for r in reviews]
zh_labels = [r["label"] for r in reviews]
zh_results = classifier(zh_texts)
for i, (text, result, label) in enumerate(zip(zh_texts, zh_results, zh_labels), 1):
    print(f"{i}. {text}\n預測: {result['label']} (信心: {result['score']:.2f})，正確答案: {label}\n")


en_texts = [r["en"] for r in reviews]
en_labels = [r["label"] for r in reviews]
en_results = classifier(en_texts)
for i, (text, result, label) in enumerate(zip(en_texts, en_results, en_labels), 1):
    print(f"{i}. {text}\nPrediction: {result['label']} (Confidence: {result['score']:.2f}), Correct: {label}\n")

1. 這部電影雖然節奏緩慢，但最後的情感爆發讓人感動。
預測: LABEL_0 (信心: 0.55)，正確答案: POSITIVE

2. 演員表現不錯，可惜劇情讓人失望。
預測: LABEL_0 (信心: 0.66)，正確答案: NEGATIVE

3. 一開始覺得無聊，沒想到後面越來越精彩。
預測: LABEL_0 (信心: 0.58)，正確答案: POSITIVE

4. 特效很棒，但故事完全沒吸引力。
預測: LABEL_0 (信心: 0.60)，正確答案: NEGATIVE

5. 雖然有些地方拖戲，但整體來說還算不錯。
預測: LABEL_0 (信心: 0.65)，正確答案: POSITIVE

6. 配樂很出色，可惜角色塑造太薄弱。
預測: LABEL_0 (信心: 0.64)，正確答案: NEGATIVE

7. 不是我喜歡的類型，但結局讓我改觀。
預測: LABEL_0 (信心: 0.63)，正確答案: POSITIVE

8. 畫面很美，可惜劇情漏洞太多。
預測: LABEL_0 (信心: 0.61)，正確答案: NEGATIVE

9. 雖然有些老套，但還是讓人看得很開心。
預測: LABEL_0 (信心: 0.63)，正確答案: POSITIVE

10. 導演很有想法，但整體表現不如預期。
預測: LABEL_0 (信心: 0.64)，正確答案: NEGATIVE

1. Although the movie is slow-paced, the emotional climax at the end is touching.
Prediction: LABEL_1 (Confidence: 0.99), Correct: POSITIVE

2. The actors did well, but the plot was disappointing.
Prediction: LABEL_0 (Confidence: 0.96), Correct: NEGATIVE

3. It felt boring at first, but it got more exciting later on.
Prediction: LABEL_1 (Confidence: 0.70), Correct: POSITIVE

4. The special e

#### 底下都是財經新聞的情緒分析...目前不太好

In [4]:
from transformers import pipeline

# 載入 fine-tuned 模型
classifier = pipeline("sentiment-analysis", model="./fine_tuned_bert_max")

# 要分析的句子(應該是正面LABEL_1。)
text = """Goldman Sachs held a forum in San Francisco, inviting TSMC executives for an in-depth discussion,
 focusing on four key themes: Smartphones and high-performance computing (HPC) 
 drive demand for advanced processes."""

# 預測
result = classifier(text)
print(result)

Device set to use cuda:0


[{'label': 'LABEL_1', 'score': 0.9328424334526062}]


In [None]:
# 要分析的句子(應該是正面LABEL_1。)
text = "高盛證券於舊金山舉行論壇，邀請台積電高層進行深度對談，聚焦四大主題：智慧機與高效能運算（HPC）帶動先進製程需求。"
# 預測
result = classifier(text)
print(result)

[{'label': 'LABEL_1', 'score': 0.6301445364952087}]


In [13]:
# 要分析的句子(應該是正面LABEL_1。)
text = """台積電米玉傑等5人成為工研院新院士，這屆新任院士展現台灣在先進製程、工程永續與醫療健康上的厚實能量，
不僅強化科技實力，更提升國際影響力。"""
# 預測
result = classifier(text)
print(result)

[{'label': 'LABEL_1', 'score': 0.6281268000602722}]


In [11]:
# 要分析的句子(應該是正面LABEL_1。)
text = """沒必過度擔心明年台股 前外資天后投資教戰。"""
# 預測
result = classifier(text)
print(result)

[{'label': 'LABEL_0', 'score': 0.5948212742805481}]


In [5]:
# 要分析的句子(應該是正面LABEL_1。)
text = """There's no need to worry too much about the Taiwan stock market next year. 
A former foreign investment queen offers investment advice."""
# 預測
result = classifier(text)
print(result)

[{'label': 'LABEL_1', 'score': 0.7858993411064148}]


In [6]:
# 要分析的句子(應該是負面LABEL_0。)
text = "America's jobs market is in a 'two-tier growth spurt."
# 預測
result = classifier(text)
print(result)

[{'label': 'LABEL_1', 'score': 0.7364367246627808}]


In [7]:
# 要分析的句子(應該是負面LABEL_0。)
text = """It’s signaling that we’re in a two-tier growth spurt, Richardson said on the CNN Business digital
live show Markets Now. The first tier is that the nation is still filling back the jobs lost during the worst
of the pandemic. The second is that the economy is actually changing, Richardson said."""
# 預測
result = classifier(text)
print(result)

[{'label': 'LABEL_1', 'score': 0.9200599789619446}]


In [8]:
# 要分析的句子(應該是負面LABEL_0。)
text = """It’s lunchtime and the market looks red.After a mixed morning, where the Dow eked out some time
 in positive territory, all three major stock indexes are now trading lower."""
# 預測
result = classifier(text)
print(result)

[{'label': 'LABEL_0', 'score': 0.7618705630302429}]


In [None]:
# 要分析的句子(應該是負面LABEL_0。)
text = """“S&P 500 [earnings per share] is up 50% from the Covid trough, 
and the S&P 500 has doubled,” the bank’s analysts wrote in a note this morning. 
But what good news is left?Not very much… the economy is recovering,
 though the pace of improvements is taking a hit amid the spread of the Delta variant.
   Meanwhile, the Federal Reserve is preparing to normalize its ultra-easy money policies."""

# 預測
result = classifier(text)
print(result)

[{'label': 'LABEL_0', 'score': 0.835030198097229}]


新聞標題表現不太優...

In [24]:
# 新增一個 cell，將20個新聞標題丟入 fine-tuned BERT 模型，批次判斷正面/負面
from transformers import pipeline
# 載入 fine-tuned 模型
classifier = pipeline("sentiment-analysis", model="./fine_tuned_bert_max")
# 20則新聞標題
titles = [
    "廣運營收續報喜 好轉已成事實",
    "地表最強投顧來襲！ 川普喊話「大幅降息」",
    "智慧醫療熱度不減 安勤鎖定關鍵成長",
    "台灣無人機聯盟啟航！ 哪些個股最值得關注？",
    "力成擺脫大牛股印象 成為法人愛股",
    "英偉達遭中國反壟斷調查 市場該如何看待？",
    "台灣10家公司入列！《時代》揭全球最佳企業榜單：輝達、微軟⋯以AI為貴，蘋果、台積電卻落榜？",
    "iPhone 17台灣預購已開！電信三雄資費方案攻略",
    "出席「大專生洄游農村競賽」...蕭美琴談五大青年照顧政策，勉勵學子將台灣農村精神帶向國際",
    "台新證1.1264股換元富證1股合併！市佔躍升第4大、期貨合併成第5大…賴昭吟：資源整合發揮綜效",
    "AI液冷大爆發！雙鴻卡位液冷先機",
    "8月EPS年增近6成！ 光寶科卡位電源核心",
    "興達電廠火災將大限電？台電闢謠，盤點近年「大停電」意外一次看",
    "第三代晶圓連日狂飆 SiC成關鍵解方",
    "想要一整天大腦清醒？起床第一件事：絕對不要看手機！",
    "當主管好難！改變快被說假、慢被說不積極？怎麼做才對",
    "赴韓參訪非軍事區、38度線！謝金河回頭看韓戰：到底南韓還是台灣安全？用1差別看地表最危險國家",
    "彎著腰前進南韓「非軍事區」坑道！內部畫面曝光…謝金河評比：金門坑道更壯觀，但DMZ有1大特色",
    "第一名店董座王義郎辭世享壽86歲…從工友到銀行員、航空業總座再創業：我愛工作創新「一生都要學習」",
    "中信金、富邦金…女星存金融股「無痛買iPhone 17」，60張兆豐金底氣超足：我是存錢的概念，不是賺價差"
]
# 批次預測
results = classifier(titles)
for result in results:
    print(f"{result['label']} (信心: {result['score']:.2f})")


Device set to use cuda:0


LABEL_0 (信心: 0.62)
LABEL_0 (信心: 0.53)
LABEL_0 (信心: 0.67)
LABEL_0 (信心: 0.61)
LABEL_0 (信心: 0.66)
LABEL_0 (信心: 0.58)
LABEL_1 (信心: 0.51)
LABEL_0 (信心: 0.57)
LABEL_1 (信心: 0.59)
LABEL_1 (信心: 0.55)
LABEL_0 (信心: 0.63)
LABEL_0 (信心: 0.58)
LABEL_0 (信心: 0.53)
LABEL_0 (信心: 0.66)
LABEL_1 (信心: 0.54)
LABEL_0 (信心: 0.56)
LABEL_0 (信心: 0.54)
LABEL_0 (信心: 0.53)
LABEL_0 (信心: 0.53)
LABEL_0 (信心: 0.51)


英文新聞標題表現也不太優...但比中文好一丁點

In [23]:
# 新增一個 cell，將20個新聞標題丟入 fine-tuned BERT 模型，批次判斷正面/負面
from transformers import pipeline
# 載入 fine-tuned 模型
classifier = pipeline("sentiment-analysis", model="./fine_tuned_bert_max")
# 20則新聞標題
titles = [
"Guangzhou's operations continue to report positive earnings, and the recovery is a reality.",

"The world's most powerful investment advisor arrives! Trump calls for a substantial rate cut.",

"Smart healthcare remains popular, and Avalue is targeting key growth opportunities.",

"The Taiwan Drone Alliance launches! Which stocks are worth watching?",

"Powertech sheds its image as a bull stock and becomes a favorite among institutional investors.",

"Nvidia faces antitrust investigation in China: How should the market view it?",

"Ten Taiwanese companies make the list! Time reveals its list of the world's best companies: Nvidia and Microsoft... prioritize AI, while Apple and TSMC are left out?",

"iPhone 17 pre-orders are now open in Taiwan! A guide to the tariff plans of the three major telecom operators.",
"Attending the 'College Student Rural Migration Competition'... Hsiao Mei-ching discusses five key youth support policies, encouraging students to bring Taiwan's rural spirit to the world.",

"1.1264 Taishin Securities shares exchanged for 1 Yuanfu Securities share! Merger! Market share jumps to 4th place, futures merger becomes 5th... Lai Zhaoyin: Resource integration unlocks synergy.",
"AI liquid cooling explodes! Shuanghong seizes the lead in liquid cooling.",
"August EPS increases nearly 60% year-over-year! Lite-On Technology secures a position at the heart of power supply.",
"Xingda Power Plant fire will lead to major power cuts? Taipower denies rumors, reviewing recent major blackouts.",
"Third-generation wafers surge, SiC becomes the key solution.",
"Want a clear mind all day? First thing in the morning: Never check your phone!",
"Being a manager is tough! Quick changes are called fake, slow changes are called inactive. What's the right approach?",
"Visiting the Demilitarized Zone and the 38th Parallel in South Korea! Xie Jinhe looks back at the Korean War: Is South Korea or Taiwan safer? One difference reveals the most dangerous country on Earth.",
"Bending over in the tunnels of South Korea's Demilitarized Zone! Internal footage leaked... Xie Jinhe's assessment: The Kinmen tunnels are more spectacular, but the DMZ has one major feature.",
"Wang Yilang, chairman of No. 1 Store, passed away at the age of 86... From worker to banker, then airline CEO, and then entrepreneur again: I love work and innovation, and I'm a lifelong learner.",
"Chinatrust Financial Holdings, Fubon Financial Holdings... Actresses invest in financial stocks, making it a painless purchase of the iPhone 17. 60 Mega Financial Holdings shares offer confidence: I'm saving money, not profiting from price differences."
]


# 批次預測
results = classifier(titles)
for result in results:
    print(f"{result['label']} (信心: {result['score']:.2f})")


Device set to use cuda:0


LABEL_1 (信心: 0.88)
LABEL_1 (信心: 0.61)
LABEL_1 (信心: 0.72)
LABEL_0 (信心: 0.66)
LABEL_0 (信心: 0.68)
LABEL_0 (信心: 0.88)
LABEL_1 (信心: 0.85)
LABEL_1 (信心: 0.94)
LABEL_1 (信心: 0.96)
LABEL_1 (信心: 0.95)
LABEL_0 (信心: 0.81)
LABEL_1 (信心: 0.89)
LABEL_0 (信心: 0.76)
LABEL_0 (信心: 0.72)
LABEL_0 (信心: 0.75)
LABEL_0 (信心: 0.84)
LABEL_1 (信心: 0.73)
LABEL_0 (信心: 0.68)
LABEL_1 (信心: 0.98)
LABEL_0 (信心: 0.80)
