In [None]:
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset, load_metric
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np
import pandas as pd
import torch

# Load the datasets
train_dataset = load_dataset('csv', data_files='bert_tabsa/apa-train.csv')['train']
dev_dataset = load_dataset('csv', data_files='bert_tabsa/apa-dev.csv')['train']
test_dataset = load_dataset('csv', data_files='bert_tabsa/apa-test.csv')['train']

In [None]:
df = pd.read_csv('bert_tabsa/apa-train.csv')
label_mapping = {'Positive': 0, 'Negative': 1, 'Neutral': 2}
df['label'] = df['sentiment'].apply(lambda x: label_mapping[x])
df.to_csv('bert_tabsa/apa-train.csv')

In [None]:
# Load tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)  # Assuming 3 classes

# Tokenize the input
def preprocess_function(examples):
    return tokenizer(examples['input_text'], padding='max_length', truncation=True)

train_dataset = train_dataset.map(preprocess_function, batched=True)
dev_dataset = dev_dataset.map(preprocess_function, batched=True)
test_dataset = test_dataset.map(preprocess_function, batched=True)

train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
dev_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])


# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
)

# Metric function for evaluation
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    labels = p.label_ids
    accuracy = (preds == labels).mean()
    f1 = load_metric("f1").compute(predictions=preds, references=labels, average='weighted')["f1"]
    precision = load_metric("precision").compute(predictions=preds, references=labels, average='weighted')["precision"]
    recall = load_metric("recall").compute(predictions=preds, references=labels, average='weighted')["recall"]
    return {"accuracy": accuracy, "f1": f1, "precision": precision, "recall": recall}

In [None]:
# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=2,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=dev_dataset,
    compute_metrics=compute_metrics
)

trainer.train()

In [None]:
# Evaluate on the test set
test_results = trainer.predict(test_dataset)

# Calculate the confusion matrix
y_true = test_results.label_ids
y_pred = np.argmax(test_results.predictions, axis=1)
conf_matrix = confusion_matrix(y_true, y_pred)

# Generate classification report
class_report = classification_report(y_true, y_pred, target_names=['Class 0', 'Class 1', 'Class 2'], digits=4)

# Print the results
print("Confusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)