In [2]:
%pip install transformers
%pip install datasets
%pip install torch

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import torch
from datasets import load_dataset
from transformers import BertForSequenceClassification, BertTokenizer
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset, RandomSampler
from transformers import AdamW, get_linear_schedule_with_warmup
from sklearn.metrics import classification_report
from tqdm.notebook import tqdm
import random
import numpy as np
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Enable tqdm in pandas
tqdm.pandas()

# Set to True to use the GPU (if there is one available)
use_gpu = True

# Select device
device = torch.device('cuda' if use_gpu and torch.cuda.is_available() else 'cpu')
print(f'device: {device.type}')

# Random seed
seed = 1234

# Set random seed
if seed is not None:
    print(f'random seed: {seed}')
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if device.type == 'cuda':
        torch.cuda.manual_seed_all(seed)

device: cuda
random seed: 1234


# Step 1. Load the Dataset

In [5]:
# Function to load and preprocess dataset
def load_and_preprocess_data(file_path, label_map):
    dataset = pd.read_csv(file_path, sep='\t')
    filtered_dataset = dataset[dataset['goldstandard2'].isin(label_map.keys())]
    filtered_dataset['goldstandard2'] = filtered_dataset['goldstandard2'].map(label_map)
    return filtered_dataset

# Step 2. Encoding the Dataset

In [6]:
# Function to encode dataset
def encode_dataset(data, tokenizer, text_columns):
    texts = data[text_columns[0]].tolist()
    if len(text_columns) > 1:
        second_texts = data[text_columns[1]].tolist()
        encoded_data = tokenizer(texts, second_texts, truncation=True, padding=True)
    else:
        encoded_data = tokenizer(texts, truncation=True, padding=True)
    encoded_data["labels"] = data["goldstandard2"].tolist()
    return encoded_data

# Step 3. Creating DataLoader

In [7]:
# Function to create DataLoader
def create_dataloader(encoded_data, batch_size=16):
    inputs = torch.tensor(encoded_data["input_ids"])
    if "token_type_ids" in encoded_data:
        token_types = torch.tensor(encoded_data["token_type_ids"])
    else:
        token_types = torch.zeros_like(inputs)  # Placeholder if token_type_ids not available
    masks = torch.tensor(encoded_data["attention_mask"])
    labels = torch.tensor(encoded_data["labels"])
    dataset = TensorDataset(inputs, token_types, masks, labels)
    sampler = RandomSampler(dataset)
    dataloader = DataLoader(dataset, sampler=sampler, batch_size=batch_size)
    return dataloader

# Step 4. Model Training

In [8]:
# Function to train the model
def train_model(model, dataloader, optimizer, scheduler, criterion, num_epochs=3):
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for batch in dataloader:
            batch = tuple(t.to(device) for t in batch)
            inputs = {"input_ids": batch[0], "token_type_ids": batch[1], "attention_mask": batch[2]}
            targets = batch[3]
            optimizer.zero_grad()
            outputs = model(**inputs)
            loss = criterion(outputs.logits, targets)
            loss.backward()
            optimizer.step()
            scheduler.step()
            total_loss += loss.item()
        avg_train_loss = total_loss / len(dataloader)
        print(f"Epoch {epoch + 1}/{num_epochs}, Average Training Loss: {avg_train_loss:.4f}")
    print("Training complete.")

# Step 5. Model Evaluation

In [9]:
# Function to evaluate the model
def evaluate_model(model, dataloader, device):
    model.eval()
    predictions, true_labels = [], []
    for batch in dataloader:
        batch = tuple(t.to(device) for t in batch)
        inputs = {"input_ids": batch[0], "token_type_ids": batch[1], "attention_mask": batch[2]}
        targets = batch[3]
        with torch.no_grad():
            outputs = model(**inputs)
        logits = outputs.logits
        predictions.extend(torch.argmax(logits, dim=1).tolist())
        true_labels.extend(targets.tolist())
    return classification_report(true_labels, predictions, output_dict=True)

# Step 6. Extracting F-1 Score

In [10]:
# Function to extract F1-scores from the report
def extract_f1_scores(report):
    score = {}
    for label in ['0', '1', '2', '3']:
        if label in report:
            score[label] = report[label]['f1-score'] * 100
    return score

# Step 7. Model Preparation

In [11]:
# Function to prepare model and tokenizer
def prepare_model_and_tokenizer(model_name, num_classes):
    model = BertForSequenceClassification.from_pretrained(model_name, num_labels=num_classes)
    tokenizer = BertTokenizer.from_pretrained(model_name)
    model.to(device)
    return model, tokenizer

# Step 8. Data Spliting

In [12]:
# Model evaluation and report preparation
def model_evaluation_and_report(model_name, dataset, tokenizer, text_columns, label_map, model_label):
    # Prepare model and tokenizer
    model, tokenizer = prepare_model_and_tokenizer(model_name, len(label_map))
    
    # Data Splitting
    train_data, temp_data = train_test_split(dataset, test_size=0.4, random_state=42)
    dev_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42)

    # Encode datasets
    train_encoded_data = encode_dataset(train_data, tokenizer, text_columns)
    test_encoded_data = encode_dataset(test_data, tokenizer, text_columns)

    # Create DataLoader
    train_dataloader = create_dataloader(train_encoded_data, batch_size=16)
    test_dataloader = create_dataloader(test_encoded_data, batch_size=16)

    # Model Preparation
    model, _ = prepare_model_and_tokenizer(model_name, len(label_map))

    # Define hyperparameters
    learning_rate = 2e-5
    num_epochs = 3
    optimizer = AdamW(model.parameters(), lr=learning_rate)
    total_steps = len(train_dataloader) * num_epochs
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)
    criterion = torch.nn.CrossEntropyLoss()

    # Train the model
    train_model(model, train_dataloader, optimizer, scheduler, criterion, num_epochs)

    # Evaluate the model
    report = evaluate_model(model, test_dataloader, device)

    # Extract F1-scores and accuracy
    score = extract_f1_scores(report)
    accuracy = report["accuracy"] * 100

    # Create a dataframe for the report
    data = {
        'Model': [model_label],
        'Accuracy': [f"{accuracy:.1f} %"],
        'F1-Score 0': [f"{score.get('0', 0):.1f}"],
        'F1-Score 1': [f"{score.get('1', 0):.1f}"],
        'F1-Score 2': [f"{score.get('3', 0):.1f}"],
        'F1-Score 3': [f"{score.get('2', 0):.1f}"]
    }
    return pd.DataFrame(data)

# Step 9. Model Function & Report

In [13]:

# Common label map for all models
label_map = {
    'Yes': 0,
    'No': 1,
    'In the middle, neither yes nor no': 2,
    'Yes, subject to some conditions': 3
}

# Load and preprocess dataset
file_path = 'circa-data.tsv'
dataset = load_and_preprocess_data(file_path, label_map)

# Prepare model and tokenizer
model_name = "bert-base-uncased"
_, tokenizer = prepare_model_and_tokenizer(model_name, len(label_map))

# BERT_YN
df_bert_yn = model_evaluation_and_report(
    model_name="bert-base-uncased",
    dataset=dataset,
    tokenizer=tokenizer,
    text_columns=["question-X", "answer-Y"],
    label_map=label_map,
    model_label="BERT_YN"
)

# BERT_YN_QUES
df_bert_yn_ques = model_evaluation_and_report(
    model_name="bert-base-uncased",
    dataset=dataset,
    tokenizer=tokenizer,
    text_columns=["question-X"],
    label_map=label_map,
    model_label="BERT_YN_QUES"
)

# BERT_YN_ANS
df_bert_yn_ans = model_evaluation_and_report(
    model_name="bert-base-uncased",
    dataset=dataset,
    tokenizer=tokenizer,
    text_columns=["answer-Y"],
    label_map=label_map,
    model_label="BERT_YN_ANS"
)

# Combine all results into a single dataframe
combined_results = pd.concat([df_bert_yn, df_bert_yn_ques, df_bert_yn_ans], ignore_index=True)

# Print the combined results
print(combined_results.to_string(index=False))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_dataset['goldstandard2'] = filtered_dataset['goldstandard2'].map(label_map)
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initializ

Epoch 1/3, Average Training Loss: 0.5405
Epoch 2/3, Average Training Loss: 0.2970
Epoch 3/3, Average Training Loss: 0.1723
Training complete.


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3, Average Training Loss: 0.9755
Epoch 2/3, Average Training Loss: 0.9059
Epoch 3/3, Average Training Loss: 0.8550
Training complete.


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3, Average Training Loss: 0.5762
Epoch 2/3, Average Training Loss: 0.4076
Epoch 3/3, Average Training Loss: 0.2908
Training complete.
       Model Accuracy F1-Score 0 F1-Score 1 F1-Score 2 F1-Score 3
     BERT_YN   87.5 %       89.5       87.8       88.5       33.0
BERT_YN_QUES   55.9 %       64.7       50.7        2.5        5.5
 BERT_YN_ANS   81.7 %       84.0       80.4       89.0       23.5
