In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments, TrainerCallback, TrainerState, TrainerControl
from datasets import Dataset
import torch
from torch import nn, optim
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np
import matplotlib.pyplot as plt
import os

# Load dataset
data = pd.read_csv('final.csv')
data['text'] = data['text'].fillna('')

# Encode labels
label_encoder = LabelEncoder()
data['Class'] = label_encoder.fit_transform(data['Class'])

# Split data
train_texts, eval_texts, train_labels, eval_labels = train_test_split(data['text'].tolist(), data['Class'].tolist(), test_size=0.2, random_state=42)

# Load tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=6)

# Tokenize data
train_encodings = tokenizer(train_texts, truncation=True, padding=True)
eval_encodings = tokenizer(eval_texts, truncation=True, padding=True)

# Convert to datasets
train_dataset = Dataset.from_dict({
    'input_ids': train_encodings['input_ids'],
    'attention_mask': train_encodings['attention_mask'],
    'labels': train_labels
})

eval_dataset = Dataset.from_dict({
    'input_ids': eval_encodings['input_ids'],
    'attention_mask': eval_encodings['attention_mask'],
    'labels': eval_labels
})

# Ensure labels are long tensors
def convert_labels_to_long(example):
    example['labels'] = int(example['labels'])
    return example

train_dataset = train_dataset.map(convert_labels_to_long)
eval_dataset = eval_dataset.map(convert_labels_to_long)

# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    eval_strategy="epoch",
    logging_dir='./logs',
    logging_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=50,
    weight_decay=0.01,
    save_strategy="steps",
    save_steps=5000,
)

# Compute metrics
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(p.label_ids, preds, average='weighted')
    acc = accuracy_score(p.label_ids, preds)
    return {
        'accuracy': acc,
        'precision': precision,
        'recall': recall,
        'f1': f1,
    }

# Custom callback
class AccuracyPerEpochCallback(TrainerCallback):
    def __init__(self):
        self.train_accuracies = []
        self.eval_accuracies = []

    def on_epoch_end(self, args, state, control, **kwargs):
        train_results = trainer.evaluate(train_dataset)
        train_accuracy = train_results['eval_accuracy']
        self.train_accuracies.append(train_accuracy)
        
        eval_results = trainer.evaluate(eval_dataset)
        eval_accuracy = eval_results['eval_accuracy']
        self.eval_accuracies.append(eval_accuracy)
        
        print(f"Epoch {state.epoch}: Train Accuracy = {train_accuracy}, Eval Accuracy = {eval_accuracy}")

# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)

# Add callback and train
accuracy_callback = AccuracyPerEpochCallback()
trainer.add_callback(accuracy_callback)
train_metrics = trainer.train()

# Plot accuracies
epochs = range(1, len(accuracy_callback.train_accuracies) + 1)
plt.plot(epochs, accuracy_callback.train_accuracies, label='Train Accuracy')
plt.plot(epochs, accuracy_callback.eval_accuracies, label='Eval Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Train and Eval Accuracy per Epoch')
plt.legend()
plt.show()

# Reinforcement learning component
class RLTrainer:
    def __init__(self, model, tokenizer, learning_rate=1e-5):
        self.model = model
        self.tokenizer = tokenizer
        self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
        self.loss_fn = nn.CrossEntropyLoss()

    def predict(self, text):
        inputs = self.tokenizer(text, return_tensors="pt", truncation=True, padding=True)
        outputs = self.model(**inputs)
        logits = outputs.logits
        predicted_label = torch.argmax(logits, axis=1).item()
        return predicted_label

    def update_model(self, text, true_label):
        inputs = self.tokenizer(text, return_tensors="pt", truncation=True, padding=True)
        labels = torch.tensor([true_label]).unsqueeze(0)
        self.model.train()
        self.optimizer.zero_grad()
        outputs = self.model(**inputs, labels=labels)
        loss = outputs.loss
        loss.backward()
        self.optimizer.step()

def predict_and_collect_feedback(rl_trainer, texts):
    model.eval()  # Set the model to evaluation mode
    predictions = []
    rewards = []
    for text in texts:
        predicted_label = rl_trainer.predict(text)
        print(f"Text: {text}")
        print(f"Predicted Label: {predicted_label}")
        correct_label = int(input("Enter the correct label (or -1 to keep the predicted label): "))
        if correct_label != -1:
            reward = -1  # Penalize incorrect prediction
            rl_trainer.update_model(text, correct_label)
        else:
            reward = 1  # Reward correct prediction
        predictions.append(predicted_label if correct_label == -1 else correct_label)
        rewards.append(reward)
    return predictions, rewards

def evaluate_model(trainer, dataset):
    results = trainer.evaluate(dataset)
    return {
        'accuracy': results['eval_accuracy'],
        'precision': results['eval_precision'],
        'recall': results['eval_recall'],
        'f1': results['eval_f1'],
    }

# Initial evaluation
initial_eval_metrics = evaluate_model(trainer, eval_dataset)
print("Initial Evaluation Metrics:", initial_eval_metrics)

# Initialize RLTrainer
rl_trainer = RLTrainer(model, tokenizer)

# Predict and collect feedback
texts_for_feedback = eval_texts[:10]  # Example texts for feedback
predicted_labels, rewards = predict_and_collect_feedback(rl_trainer, texts_for_feedback)

# Post-RL evaluation
post_rl_eval_metrics = evaluate_model(trainer, eval_dataset)
print("Post-RL Evaluation Metrics:", post_rl_eval_metrics)

# Compare initial and post-RL evaluation metrics
print("Initial vs Post-RL Evaluation Metrics:")
for metric in initial_eval_metrics:
    print(f"{metric.capitalize()}: {initial_eval_metrics[metric]:.4f} -> {post_rl_eval_metrics[metric]:.4f}")