In [None]:
# Importing the libraries needed
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
import seaborn as sns
import transformers
import json
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from transformers import RobertaModel, RobertaTokenizer, AutoTokenizer
import logging
logging.basicConfig(level=logging.ERROR)

In [None]:
pip install datasets



In [None]:
# Setting up the device for GPU usage

from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

In [None]:
from datasets import load_dataset

train_boolq_dataset = load_dataset("boolq", split='train')
val_boolq_dataset = load_dataset("boolq", split='validation')


In [None]:
# Add a question mark to the end of each question in the train dataset
train_boolq_dataset = train_boolq_dataset.map(lambda example: {"question": [q + " ?" for q in example["question"]]}, batched=True)

# Add a question mark to the end of each question in the validation dataset
val_boolq_dataset = val_boolq_dataset.map(lambda example: {"question": [q + " ?" for q in example["question"]]}, batched=True)
print(train_boolq_dataset[0])

{'question': 'do iran and afghanistan speak the same language ?', 'answer': True, 'passage': 'Persian (/ˈpɜːrʒən, -ʃən/), also known by its endonym Farsi (فارسی fārsi (fɒːɾˈsiː) ( listen)), is one of the Western Iranian languages within the Indo-Iranian branch of the Indo-European language family. It is primarily spoken in Iran, Afghanistan (officially known as Dari since 1958), and Tajikistan (officially known as Tajiki since the Soviet era), and some other regions which historically were Persianate societies and considered part of Greater Iran. It is written in the Persian alphabet, a modified variant of the Arabic script, which itself evolved from the Aramaic alphabet.'}


In [None]:
# Define a function to convert True/False to 1/0
def convert_bool_to_int(example):
    example['label'] = 1 if example['answer'] else 0
    return example

# Apply the function to each dataset
train_boolq_dataset = train_boolq_dataset.map(convert_bool_to_int)
val_boolq_dataset = val_boolq_dataset.map(convert_bool_to_int)

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', truncation=True, do_lower_case=True)
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
train_boolq_dataset = train_boolq_dataset.map(lambda e: tokenizer(e['question'], e['passage'], truncation=True, padding='max_length', max_length=512), batched=True)

In [None]:
val_boolq_dataset = val_boolq_dataset.map(lambda e: tokenizer(e['question'], e['passage'], truncation=True, padding='max_length', max_length=512), batched=True)

In [None]:
train_boolq_dataset.set_format(type='torch', columns=['input_ids', 'token_type_ids', 'attention_mask', 'label'])
val_boolq_dataset.set_format(type='torch', columns=['input_ids', 'token_type_ids', 'attention_mask', 'label'])

In [None]:
pip install accelerate -U



In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

import torch

# compute the metrics
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=30,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=4,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    save_total_limit=1,
    evaluation_strategy='epoch',
    logging_steps=len(train_boolq_dataset) // 8,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_boolq_dataset,
    eval_dataset=val_boolq_dataset,
    compute_metrics = compute_metrics
)

trainer.train()
# save
torch.save(model.state_dict(), 'answer_classification.pt')
# evaluate
eval_result = trainer.evaluate()
print(eval_result)

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.625147,0.662385,0.733719,0.71983,0.748155
2,0.611100,0.56569,0.698777,0.742887,0.791435,0.699951
3,0.611100,0.603424,0.732722,0.792103,0.766928,0.818987
4,0.297200,0.822853,0.745566,0.80181,0.777367,0.827841
5,0.297200,1.030543,0.740367,0.787909,0.800508,0.775701


KeyboardInterrupt: ignored

In [None]:
# save
torch.save(model.state_dict(), 'answer_classification.pt')
# evaluate
eval_result = trainer.evaluate()
print(eval_result)

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.625147,0.662385,0.733719,0.71983,0.748155
2,0.611100,0.56569,0.698777,0.742887,0.791435,0.699951
3,0.611100,0.603424,0.732722,0.792103,0.766928,0.818987
4,0.297200,0.822853,0.745566,0.80181,0.777367,0.827841
5,0.297200,1.323804,0.749235,0.799707,0.794275,0.805214


{'eval_loss': 1.3238035440444946, 'eval_accuracy': 0.7492354740061162, 'eval_f1': 0.7997068881289693, 'eval_precision': 0.7942746239689471, 'eval_recall': 0.8052139695031972}


In [None]:
# save
torch.save(model.state_dict(), 'answer_classification.pt')
# evaluate
eval_result = trainer.evaluate()
print(eval_result)

In [None]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification

# 加载模型权重文件
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)  # 假设您的模型有两个标签
model.load_state_dict(torch.load('answer_classification.pt'))

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')  # 使用与训练时相同的分词器

# 准备问题文本
question_text = "Beijing is the capital of China, yes or no? Answer:"

# 分词和编码
inputs = tokenizer(question_text, return_tensors='pt')

# 设置模型为评估模式
model.eval()

# 在没有梯度的情况下进行预测
with torch.no_grad():
    outputs = model(**inputs)

# 获取预测结果
logits = outputs.logits
predicted_class = torch.argmax(logits, dim=1).item()

# 可以根据预测的类别来判断问题类型
if predicted_class == 1:
    question_type = "Yes/No"
else:
    question_type = "Open"

print("Predicted Question Type:", question_type)

In [None]:
pip list