In [None]:


# 安裝必要套件
!pip install -q transformers datasets

# 載入資料集
from datasets import load_dataset

dataset = load_dataset("pubmed_qa", "pqa_labeled")
print("✅ 成功載入 PubMedQA 資料集")

# 載入模型（這裡使用 PubMedBERT）
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

model_name = "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

print("✅ 成功載入 PubMedBERT 模型")

# 設定推理函數
def predict_answer(question, context):
    prompt = f"Question: {question}\nContext: {context}\nAnswer:"
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)

    with torch.no_grad():
        logits = model(**inputs).logits

    predicted_class_id = logits.argmax().item()
    labels = ["yes", "no", "maybe"]  # 注意：這個需要根據模型實際分類數調整
    return labels[predicted_class_id]

# 測試一題
sample = dataset["train"][0]
print("--- 測試一題 ---")
print("問題:", sample["question"])
print("模型回答:", predict_answer(sample["question"], sample["context"]))
print("正確答案:", sample["final_decision"])

#批量測試前100題，計算準確率
print("\n--- 批量測試 100 題 ---")
correct = 0
for i in range(100):
    sample = dataset["train"][i]
    pred = predict_answer(sample["question"], sample["context"])
    if pred == sample["final_decision"]:
        correct += 1

print(f"100題中的正確率：{correct}%")

#完成！
print("\n✅ 全部執行完畢！")


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.2/491.2 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m183.9/183.9 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.8/194.8 kB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gcsfs 2025.3.2 requires fsspec==2025.3.2, but you have fsspec 2024.12.0 which is incompatible.
torch 2.6.0+cu124 requires nvidia-cublas-cu12==12.4.5.8; platform_system == "Linux" and platform_machine == "x86_64",

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/5.19k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/1.08M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1000 [00:00<?, ? examples/s]

✅ 成功載入 PubMedQA 資料集


tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


✅ 成功載入 PubMedBERT 模型
--- 測試一題 ---
問題: Do mitochondria play a role in remodelling lace plant leaves during programmed cell death?
模型回答: yes
正確答案: yes

--- 批量測試 100 題 ---
100題中的正確率：56%

✅ 全部執行完畢！


In [None]:




# --- 🔵 新增互動版：自己輸入問題和摘要！ ---

while True:
    print("\n🔵 手動測試區 (按下Enter直接跳出) 🔵")
    question = input("請輸入問題 (例如: Does aspirin reduce heart attack risk?)：")
    if question.strip() == "":
        print("✅ 結束互動模式。")
        break
    context = input("請輸入摘要 (例如: Studies show aspirin significantly reduces heart attack risk...)：")
    if context.strip() == "":
        print("❗摘要不可空白，請重新輸入。")
        continue
    pred = predict_answer(question, context)
    print(f"\n模型回答：{pred}\n")



🔵 手動測試區 (按下Enter直接跳出) 🔵
請輸入問題 (例如: Does aspirin reduce heart attack risk?)：Does aspirin reduce heart attack risk?
請輸入摘要 (例如: Studies show aspirin significantly reduces heart attack risk...)：Studies show aspirin significantly reduces heart attack risk...

模型回答：no


🔵 手動測試區 (按下Enter直接跳出) 🔵
請輸入問題 (例如: Does aspirin reduce heart attack risk?)：Does taking vitamin D supplements reduce the risk of COVID-19?
請輸入摘要 (例如: Studies show aspirin significantly reduces heart attack risk...)：Recent studies have shown that vitamin D supplementation has a moderate effect on reducing the severity of COVID-19 symptoms.

模型回答：no


🔵 手動測試區 (按下Enter直接跳出) 🔵
請輸入問題 (例如: Does aspirin reduce heart attack risk?)：
✅ 結束互動模式。


In [None]:
while True:
    print("\n🔵 單題提問模式 (按Enter直接退出) 🔵")
    question = input("請輸入你的問題（例如：Is exercise good for heart health?）：")
    if question.strip() == "":
        print("✅ 結束提問模式。")
        break
    # 因為沒有摘要，我們直接把問題當成prompt
    inputs = tokenizer(question, return_tensors="pt", truncation=True, max_length=512)

    with torch.no_grad():
        logits = model(**inputs).logits

    predicted_class_id = logits.argmax().item()
    labels = ["yes", "no", "maybe"]
    pred = labels[predicted_class_id]

    print(f"\n模型回答：{pred}\n")



🔵 單題提問模式 (按Enter直接退出) 🔵
請輸入你的問題（例如：Is exercise good for heart health?）：Does taking vitamin D supplements reduce the risk of COVID-19?

模型回答：no


🔵 單題提問模式 (按Enter直接退出) 🔵
請輸入你的問題（例如：Is exercise good for heart health?）：
✅ 結束提問模式。


In [5]:
!pip install --upgrade transformers datasets evaluate
import transformers
print(transformers.__version__)


4.51.3


In [17]:
# 安裝必要套件
!pip install -q transformers datasets evaluate

from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
import torch
from sklearn.metrics import accuracy_score
import evaluate
import numpy as np

# Step 1：載入資料集並分割
from datasets import load_dataset

# 載入整個資料集
dataset = load_dataset("pubmed_qa", "pqa_labeled")["train"]

# 先切出 20% 作為 test set
dataset = dataset.train_test_split(test_size=0.2, seed=42)
train_val_dataset = dataset["train"]
test_dataset = dataset["test"]

# 再從 train_val 中切出 12.5% 作為 validation（約整體 10%）
train_val_split = train_val_dataset.train_test_split(test_size=0.125, seed=42)
train_dataset = train_val_split["train"]   # 約 70%
val_dataset = train_val_split["test"]      # 約 10%


# Step 2：定義 tokenizer 與模型
model_name = "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# PubMedQA 的 label 有三種
label2id = {"yes": 0, "no": 1, "maybe": 2}
id2label = {v: k for k, v in label2id.items()}

# ✅ 前處理函數（支援 batched）
def preprocess(examples):
    prompts = [f"Question: {q}\nContext: {c}" for q, c in zip(examples["question"], examples["context"])]
    encodings = tokenizer(prompts, truncation=True, padding="max_length", max_length=512)
    encodings["labels"] = [label2id[label] for label in examples["final_decision"]]
    return encodings

# ✅ 對 train / val / test 做預處理（並移除原始欄位）
train_dataset = train_dataset.map(preprocess, batched=True, remove_columns=train_dataset.column_names)
val_dataset = val_dataset.map(preprocess, batched=True, remove_columns=val_dataset.column_names)
test_dataset = test_dataset.map(preprocess, batched=True, remove_columns=test_dataset.column_names)

# ✅ 載入模型（分類頭會自動建立）
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=3,
    id2label=id2label,
    label2id=label2id,
)

# ✅ 評估指標
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {"accuracy": accuracy_score(labels, predictions)}

# ✅ 訓練參數設定
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    load_best_model_at_end=True,
    report_to="none",  # 不用 wandb
)

# ✅ 初始化 Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# ✅ 訓練模型
trainer.train()

# ✅ 驗證集結果
val_result = trainer.evaluate()
print(f"\nValidation Accuracy: {val_result['eval_accuracy']:.2%}")

# ✅ 測試集準確率
test_result = trainer.evaluate(eval_dataset=test_dataset)
print(f"\nTest Accuracy: {test_result['eval_accuracy']:.2%}")

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.349994,0.85
2,No log,0.380382,0.88
3,No log,0.389212,0.9



Validation Accuracy: 85.00%

Test Accuracy: 80.00%
