# **Classification of app reviews for requirements engineering using deep learning models**

## **Data loading and processing**

In [None]:
# Load multi-label dataset from CSV file
from datasets import load_dataset

# Load dataset
dataset = load_dataset('csv', data_files='dataset/gpt_multi_label_100.csv')

print(dataset)
print(dataset['train'][0])


In [None]:
# Convert labels into a list
def process_labels(example):
    example['label'] = [example['feature request'],
                        example['bug report'],
                        example['rating'],
                        example['user experience']]
    return example

dataset = dataset.map(process_labels)

print(dataset['train'][0])


*K-Fold Cross-Validation*

In [None]:
from datasets import Dataset, load_dataset
from sklearn.model_selection import train_test_split, KFold
import pandas as pd
# Convert data to a Pandas DataFrame for processing
df = pd.DataFrame(dataset['train'])

# Split the dataset into a train+validation(90%) and test(10%)
train_val_df, test_df = train_test_split(df, test_size=0.1)
# Convert to Hugging Face Dataset
test_dataset = Dataset.from_pandas(test_df)
test_dataset = test_dataset.remove_columns(['__index_level_0__'])

# Perform K-Fold cross-validation on the train+validation set
kf = KFold(n_splits=5, shuffle=True)

# Save data for each fold
folds = []
for fold, (train_index, val_index) in enumerate(kf.split(train_val_df)):
    # Split into training and validation sets for the current fold
    train_data = train_val_df.iloc[train_index]
    val_data = train_val_df.iloc[val_index]
    
    # Convert to Hugging Face Dataset
    train_dataset = Dataset.from_pandas(train_data)
    val_dataset = Dataset.from_pandas(val_data)
    train_dataset = train_dataset.remove_columns(['__index_level_0__'])
    val_dataset = val_dataset.remove_columns(['__index_level_0__'])
    
    # Save the datasets for the current fold
    folds.append({
        'train': train_dataset,
        'validation': val_dataset
    })


# Print the structure of the first fold's datasets
print("Fold 1 Training Dataset:")
print(folds[0]['train'])
print("Fold 1 Validation Dataset:")
print(folds[0]['validation'])
print("Test Dataset:")
print(test_dataset)


## **BERT Model**

In [None]:
from transformers import BertTokenizer, BertForSequenceClassification
from torch.optim import AdamW
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader
from transformers import get_linear_schedule_with_warmup
import torch
import torch.nn as nn
from sklearn.metrics import classification_report
import os
import datetime

# Load the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Data preprocessing function
def preprocess_data(example):
    tokenized = tokenizer(example['review'], 
                          padding='max_length', 
                          truncation=True, 
                          max_length=128)
    tokenized['labels'] = example['label']  # Add multi-labels
    return tokenized

# Training parameters
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 32
epochs = 10
patience = 3

# Directory to save the models
save_dir = os.path.join(".", "models", "kf_bert")
os.makedirs(save_dir, exist_ok=True)
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
save_path = os.path.join(save_dir, f"best_model_{timestamp}.pth")
backup_path = os.path.join(save_dir, f"backup_model_{timestamp}.pth")
best_model_paths = []

# Define a function to save the model
def save_model(model, path):
    try:
        torch.save(model.state_dict(), path)
        print(f"Model saved to {path}")
        return True
    except Exception as e:
        print(f"Failed to save the model to {path}: {e}")
        return False

# Loss function
loss_fn = nn.BCEWithLogitsLoss()

# Store the validation losses for each fold
all_val_losses = []

# Loop through each fold
for fold_id, fold_data in enumerate(folds):
    print(f"\nTraining Fold {fold_id + 1}:")
    
    # Get training and validation datasets for the current fold
    train_dataset = fold_data['train'].map(preprocess_data, batched=True)
    valid_dataset = fold_data['validation'].map(preprocess_data, batched=True)
    test_dataset = test_dataset.map(preprocess_data, batched=True)
    
    # Convert datasets to PyTorch format
    train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
    valid_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
    test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

    # Data loaders
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    # Load BERT model
    model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=4)
    model.classifier = nn.Linear(768, 4)  # Replace classifier head for multi-label task
    model.to(device)
    
    # Optimizer and learning rate scheduler
    optimizer = AdamW(model.parameters(), lr=2e-5, weight_decay=1e-5)
    total_steps = len(train_dataloader) * epochs
    warmup_steps = int(0.1 * total_steps)
    warmup_scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=warmup_steps, num_training_steps=total_steps
    )
    reduce_scheduler = ReduceLROnPlateau(
        optimizer, mode='min', factor=0.5, patience=2, verbose=True
    )
    
    # Early stopping and best model saving
    val_losses = []
    global_step = 0
    epochs_no_improve = 0
    best_val_loss = float('inf')
    save_path = os.path.join(save_dir, f"fold{fold_id + 1}_best_model_{timestamp}.pth")
    best_model_paths.append(save_path)
    
    # Training and validation loop
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for batch in train_dataloader:
            # Prepare data
            inputs = {key: val.to(device) for key, val in batch.items() if key != 'labels'}
            labels = batch['labels'].to(device)
            
            # Forward pass
            outputs = model(**inputs)
            loss = loss_fn(outputs.logits, labels.float())
            total_loss += loss.item()
            
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            # Update learning rate with warm-up scheduler
            if global_step < warmup_steps:
                warmup_scheduler.step()
            global_step += 1
        
        # Calculate average training loss
        avg_train_loss = total_loss / len(train_dataloader)
        
        # Validation
        model.eval()
        val_loss = 0
        all_labels = []
        all_preds = []
        with torch.no_grad():
            for batch in valid_dataloader:
                inputs = {key: val.to(device) for key, val in batch.items() if key != 'labels'}
                labels = batch['labels'].to(device)
                
                outputs = model(**inputs)
                loss = loss_fn(outputs.logits, labels.float())
                val_loss += loss.item()
                
                preds = torch.sigmoid(outputs.logits).cpu().numpy() > 0.5
                all_labels.extend(labels.cpu().numpy())
                all_preds.extend(preds)
        
        # Calculate average validation loss
        avg_val_loss = val_loss / len(valid_dataloader)
        val_losses.append(avg_val_loss)
        
        # Adjust learning rate using reduce-on-plateau scheduler
        if global_step >= warmup_steps:
            reduce_scheduler.step(avg_val_loss)
        
        # Print results
        print(f"Epoch {epoch + 1}/{epochs} - Training Loss: {avg_train_loss:.4f}, Validation Loss: {avg_val_loss:.4f}")

        # Save the best model based on validation loss
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            epochs_no_improve = 0
            # Save the best model
            saved = save_model(model, save_path)
            if saved:
                best_model_path = save_path
            else:
                # Attempt backup path if saving to primary path fails
                backup_saved = save_model(model, backup_path)
                if backup_saved:
                    best_model_path = backup_path
                else:
                    print("Failed to save the model to both primary and backup paths. Stopping training.")
                    sys.exit(1)
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print("Early stopping due to no improvement in validation loss.")
                break
                
    # Save the validation losses for this fold            
    all_val_losses.append(val_losses)

print("Training completed for all folds!")


In [None]:
# Draw the validation loss curve
import matplotlib.pyplot as plt

plt.figure(figsize=(6, 4))
for fold_id, val_loss in enumerate(all_val_losses):
    plt.plot(val_loss, label=f'Fold {fold_id + 1}')
plt.title('Validation Loss Curve for Each Fold')
plt.xlabel('Epoch')
plt.ylabel('Validation Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
import numpy as np
from pathlib import Path
# Full training + fine-tuning code
print("\nRetraining the best model using the full training + validation set...")

# Get the index and path of the model with the lowest validation loss
best_model_index = np.argmin([min(val_loss) for val_loss in all_val_losses])
best_model_path = best_model_paths[best_model_index]
print(f"Path of the model with the lowest validation loss: {best_model_path}")

# Load the best model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=4)
model.classifier = nn.Linear(768, 4)  # Replace the classification head
model.load_state_dict(torch.load(best_model_path, weights_only=True))
model.to(device)

# Prepare the full training + validation set
full_train_dataset = Dataset.from_pandas(train_val_df)
full_train_dataset = full_train_dataset.map(preprocess_data, batched=True)
full_train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

# DataLoader
full_train_dataloader = DataLoader(full_train_dataset, batch_size=batch_size, shuffle=True)

# Optimizer and scheduler
optimizer = AdamW(model.parameters(), lr=2e-5, weight_decay=1e-5)
total_steps = len(full_train_dataloader) * (epochs // 2)  # Reduce epochs for fine-tuning
scheduler = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps=int(0.1 * total_steps), num_training_steps=total_steps
)

# Fine-tune the model
model.train()
for epoch in range(epochs // 2):  # Use 50% of the total epochs for fine-tuning
    total_loss = 0
    for batch in full_train_dataloader:
        # Prepare data
        inputs = {key: val.to(device) for key, val in batch.items() if key != 'labels'}
        labels = batch['labels'].to(device)

        # Forward pass
        outputs = model(**inputs)
        loss = loss_fn(outputs.logits, labels.float())
        total_loss += loss.item()

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

    # Print training loss for each epoch
    avg_train_loss = total_loss / len(full_train_dataloader)
    print(f"Fine-tuning Epoch {epoch + 1}/{epochs // 2} - Training Loss: {avg_train_loss:.4f}")

# Save the final fine-tuned model
final_model_path = Path(save_dir) / "final_model_fine_tuned_bert.pth"
torch.save(model.state_dict(), final_model_path)
print(f"The final fine-tuned model has been saved to: {final_model_path}")

In [None]:
# Load the best model
model.load_state_dict(torch.load(final_model_path, weights_only=True))  
model.to(device)
model.eval()

# Evaluate on the test set
all_labels = []
all_preds = []
with torch.no_grad():
    for batch in test_dataloader:
        inputs = {key: val.to(device) for key, val in batch.items() if key != 'labels'}
        labels = batch['labels'].to(device)

        outputs = model(**inputs)
        preds = torch.sigmoid(outputs.logits).cpu().numpy() > 0.5
        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(preds)

print("Test Classification Report:")
print(classification_report(all_labels, all_preds, target_names=['feature request', 'bug report', 'rating', 'user experience'], zero_division=0))

## **RoBERTa Model**

In [None]:
from transformers import RobertaTokenizer, RobertaForSequenceClassification

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 32
epochs = 10
patience = 3

# Create a directory to save the model
save_dir = os.path.join(".", "models", "kf_roberta")
os.makedirs(save_dir, exist_ok=True)
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
backup_path = os.path.join(save_dir, f"backup_model_{timestamp}.pth")

# Load the Roberta tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# Data preprocessing function
def preprocess_data(example):
    tokenized = tokenizer(example['review'],
                          padding='max_length',
                          truncation=True,
                          max_length=128)
    tokenized['labels'] = example['label']
    return tokenized

loss_fn = nn.BCEWithLogitsLoss()
best_model_paths = []
all_val_losses = []

def save_model(model, path):
    try:
        torch.save(model.state_dict(), path)
        print(f"Model saved to {path}")
        return True
    except Exception as e:
        print(f"Failed to save the model to {path}: {e}")
        return False

# Train and validate for each fold
for fold_id, fold_data in enumerate(folds):
    print(f"\nTraining Fold {fold_id + 1}:")

    # Preprocess the training and validation data for the current fold
    train_dataset = fold_data['train'].map(preprocess_data, batched=True)
    valid_dataset = fold_data['validation'].map(preprocess_data, batched=True)
    test_dataset = test_dataset.map(preprocess_data, batched=True)

    train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
    valid_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
    test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Load the Roberta model
    model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=4)
    # RobertaForSequenceClassification already has a classifier layer; num_labels=4 can be used directly.
    # If customization is needed, replace as follows:
    model.classifier.out_proj = nn.Linear(model.classifier.out_proj.in_features, 4)
    model.to(device)

    optimizer = AdamW(model.parameters(), lr=2e-5, weight_decay=1e-5)
    total_steps = len(train_dataloader) * epochs
    warmup_steps = int(0.1 * total_steps)
    warmup_scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=warmup_steps, num_training_steps=total_steps
    )
    reduce_scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True)

    val_losses = []
    global_step = 0
    epochs_no_improve = 0
    best_val_loss = float('inf')
    fold_best_path = os.path.join(save_dir, f"fold{fold_id + 1}_best_model_{timestamp}.pth")
    best_model_paths.append(fold_best_path)

    # Start the training loop
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for batch in train_dataloader:
            inputs = {k: v.to(device) for k, v in batch.items() if k != 'labels'}
            labels = batch['labels'].to(device)

            outputs = model(**inputs)
            loss = loss_fn(outputs.logits, labels.float())
            total_loss += loss.item()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if global_step < warmup_steps:
                warmup_scheduler.step()
            global_step += 1

        avg_train_loss = total_loss / len(train_dataloader)

        # Validation
        model.eval()
        val_loss = 0
        all_labels = []
        all_preds = []
        with torch.no_grad():
            for batch in valid_dataloader:
                inputs = {k: v.to(device) for k, v in batch.items() if k != 'labels'}
                labels = batch['labels'].to(device)

                outputs = model(**inputs)
                loss = loss_fn(outputs.logits, labels.float())
                val_loss += loss.item()

                preds = (torch.sigmoid(outputs.logits).cpu().numpy() > 0.5).astype(int)
                all_labels.extend(labels.cpu().numpy())
                all_preds.extend(preds)

        avg_val_loss = val_loss / len(valid_dataloader)
        val_losses.append(avg_val_loss)

        # Use ReduceLROnPlateau only after warm-up
        if global_step >= warmup_steps:
            reduce_scheduler.step(avg_val_loss)

        print(f"Epoch {epoch + 1}/{epochs} - Training Loss: {avg_train_loss:.4f}, Validation Loss: {avg_val_loss:.4f}")

        # Save the best model
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            epochs_no_improve = 0
            saved = save_model(model, fold_best_path)
            if not saved:
                backup_saved = save_model(model, backup_path)
                if not backup_saved:
                    print("Failed to save model to both paths. Exiting.")
                    import sys
                    sys.exit(1)
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print("Early stopping due to no improvement in validation loss.")
                break

    all_val_losses.append(val_losses)

print("Training completed for all folds!")


In [None]:
# Plot the validation loss curve for each fold
plt.figure(figsize=(6,4))
for fold_id, val_loss in enumerate(all_val_losses):
    plt.plot(val_loss, label=f'Fold {fold_id + 1}')
plt.title('Validation Loss Curve for Each Fold (Roberta)')
plt.xlabel('Epoch')
plt.ylabel('Validation Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Select the model with the best validation loss across all folds
print("\nRetraining the best model using the full training + validation set...")
best_model_index = np.argmin([min(val_loss) for val_loss in all_val_losses])
best_model_path = best_model_paths[best_model_index]
print(f"Path of the model with the lowest validation loss: {best_model_path}")

# Reload the best model
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=4)
model.classifier.out_proj = nn.Linear(model.classifier.out_proj.in_features, 4)
model.load_state_dict(torch.load(best_model_path))
model.to(device)

# Fine-tune using the full training + validation set
full_train_dataset = Dataset.from_pandas(train_val_df)
full_train_dataset = full_train_dataset.map(preprocess_data, batched=True)
full_train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

full_train_dataloader = DataLoader(full_train_dataset, batch_size=batch_size, shuffle=True)

optimizer = AdamW(model.parameters(), lr=2e-5, weight_decay=1e-5)
total_steps = len(full_train_dataloader) * (epochs // 2)
scheduler = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps=int(0.1 * total_steps), num_training_steps=total_steps
)

model.train()
for epoch in range(epochs // 2):
    total_loss = 0
    for batch in full_train_dataloader:
        inputs = {k: v.to(device) for k, v in batch.items() if k != 'labels'}
        labels = batch['labels'].to(device)

        outputs = model(**inputs)
        loss = loss_fn(outputs.logits, labels.float())
        total_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

    avg_train_loss = total_loss / len(full_train_dataloader)
    print(f"Fine-tuning Epoch {epoch + 1}/{epochs // 2} - Training Loss: {avg_train_loss:.4f}")

final_model_path = Path(save_dir) / "final_model_fine_tuned_roberta.pth"
torch.save(model.state_dict(), final_model_path)
print(f"The final fine-tuned model has been saved to: {final_model_path}")

In [None]:
# Load the final model and evaluate on the test set
model.load_state_dict(torch.load(final_model_path))
model.to(device)
model.eval()

all_labels = []
all_preds = []
with torch.no_grad():
    for batch in test_dataloader:
        inputs = {k: v.to(device) for k, v in batch.items() if k != 'labels'}
        labels = batch['labels'].to(device)

        outputs = model(**inputs)
        preds = (torch.sigmoid(outputs.logits).cpu().numpy() > 0.5).astype(int)
        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(preds)

print("Test Classification Report:")
print(classification_report(all_labels, all_preds,
                            target_names=['feature request', 'bug report', 'rating', 'user experience'],
                            zero_division=0))

## **BART Model**

In [None]:
from transformers import BartTokenizer, BartForSequenceClassification

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 32
epochs = 10
patience = 3

# Create a directory to save the model
save_dir = os.path.join(".", "models", "kf_bart")
os.makedirs(save_dir, exist_ok=True)
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
backup_path = os.path.join(save_dir, f"backup_model_{timestamp}.pth")

# Load the BART tokenizer
tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')

# Data preprocessing function
def preprocess_data(example):
    tokenized = tokenizer(example['review'],
                          padding='max_length',
                          truncation=True,
                          max_length=128)
    tokenized['labels'] = example['label']
    return tokenized

loss_fn = nn.BCEWithLogitsLoss()
best_model_paths = []
all_val_losses = []

def save_model(model, path):
    try:
        torch.save(model.state_dict(), path)
        print(f"Model saved to {path}")
        return True
    except Exception as e:
        print(f"Failed to save the model to {path}: {e}")
        return False

# Train and validate for each fold
for fold_id, fold_data in enumerate(folds):
    print(f"\nTraining Fold {fold_id + 1}:")

    # Preprocess the training and validation data for the current fold
    train_dataset = fold_data['train'].map(preprocess_data, batched=True)
    valid_dataset = fold_data['validation'].map(preprocess_data, batched=True)
    # Re-process the test dataset in each fold to ensure correctness (slightly reduces performance)
    test_dataset_mapped = test_dataset.map(preprocess_data, batched=True)

    train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
    valid_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
    test_dataset_mapped.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True)
    test_dataloader = DataLoader(test_dataset_mapped, batch_size=batch_size, shuffle=False)

    # Load the BART model
    model = BartForSequenceClassification.from_pretrained('facebook/bart-base', num_labels=4)
    model.to(device)

    optimizer = AdamW(model.parameters(), lr=2e-5, weight_decay=1e-5)
    total_steps = len(train_dataloader) * epochs
    warmup_steps = int(0.1 * total_steps)
    warmup_scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=warmup_steps, num_training_steps=total_steps
    )
    reduce_scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True)

    val_losses = []
    global_step = 0
    epochs_no_improve = 0
    best_val_loss = float('inf')
    fold_best_path = os.path.join(save_dir, f"fold{fold_id + 1}_best_model_{timestamp}.pth")
    best_model_paths.append(fold_best_path)

    # Start the training loop
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for batch in train_dataloader:
            inputs = {k: v.to(device) for k, v in batch.items() if k != 'labels'}
            labels = batch['labels'].to(device)

            outputs = model(**inputs)
            loss = loss_fn(outputs.logits, labels.float())
            total_loss += loss.item()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if global_step < warmup_steps:
                warmup_scheduler.step()
            global_step += 1

        avg_train_loss = total_loss / len(train_dataloader)

        # Validation
        model.eval()
        val_loss = 0
        all_labels = []
        all_preds = []
        with torch.no_grad():
            for batch in valid_dataloader:
                inputs = {k: v.to(device) for k, v in batch.items() if k != 'labels'}
                labels = batch['labels'].to(device)

                outputs = model(**inputs)
                loss = loss_fn(outputs.logits, labels.float())
                val_loss += loss.item()

                preds = (torch.sigmoid(outputs.logits).cpu().numpy() > 0.5).astype(int)
                all_labels.extend(labels.cpu().numpy())
                all_preds.extend(preds)

        avg_val_loss = val_loss / len(valid_dataloader)
        val_losses.append(avg_val_loss)

        if global_step >= warmup_steps:
            reduce_scheduler.step(avg_val_loss)

        print(f"Epoch {epoch + 1}/{epochs} - Training Loss: {avg_train_loss:.4f}, Validation Loss: {avg_val_loss:.4f}")

        # Save the best model
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            epochs_no_improve = 0
            saved = save_model(model, fold_best_path)
            if not saved:
                backup_saved = save_model(model, backup_path)
                if not backup_saved:
                    print("Failed to save model to both paths. Exiting.")
                    import sys
                    sys.exit(1)
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print("Early stopping due to no improvement in validation loss.")
                break

    all_val_losses.append(val_losses)

print("Training completed for all folds!")


In [None]:
# Plot the validation loss curve for each fold
plt.figure(figsize=(6, 4))
for fold_id, val_loss in enumerate(all_val_losses):
    plt.plot(val_loss, label=f'Fold {fold_id + 1}')
plt.title('Validation Loss Curve for Each Fold (BART)')
plt.xlabel('Epoch')
plt.ylabel('Validation Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Select the model with the best validation loss across all folds
print("\nRetraining the best model using the full training + validation set...")
best_model_index = np.argmin([min(val_loss) for val_loss in all_val_losses])
best_model_path = best_model_paths[best_model_index]
print(f"Path of the model with the lowest validation loss: {best_model_path}")

# Reload the best model
model = BartForSequenceClassification.from_pretrained('facebook/bart-base', num_labels=4)
model.load_state_dict(torch.load(best_model_path))
model.to(device)

# Fine-tune using the full training + validation set
full_train_dataset = Dataset.from_pandas(train_val_df)
full_train_dataset = full_train_dataset.map(preprocess_data, batched=True)
full_train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

full_train_dataloader = DataLoader(full_train_dataset, batch_size=batch_size, shuffle=True)

optimizer = AdamW(model.parameters(), lr=2e-5, weight_decay=1e-5)
total_steps = len(full_train_dataloader) * (epochs // 2)
scheduler = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps=int(0.1 * total_steps), num_training_steps=total_steps
)

model.train()
for epoch in range(epochs // 2):
    total_loss = 0
    for batch in full_train_dataloader:
        inputs = {k: v.to(device) for k, v in batch.items() if k != 'labels'}
        labels = batch['labels'].to(device)

        outputs = model(**inputs)
        loss = loss_fn(outputs.logits, labels.float())
        total_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

    avg_train_loss = total_loss / len(full_train_dataloader)
    print(f"Fine-tuning Epoch {epoch + 1}/{epochs // 2} - Training Loss: {avg_train_loss:.4f}")

final_model_path = Path(save_dir) / "final_model_fine_tuned_bart.pth"
torch.save(model.state_dict(), final_model_path)
print(f"The final fine-tuned model has been saved to: {final_model_path}")

In [None]:
# Load the final model for test set evaluation
model.load_state_dict(torch.load(final_model_path))
model.to(device)
model.eval()

# Evaluate on the test set
all_labels = []
all_preds = []

# Use the preprocessed test_dataset_mapped (re-process if not done previously)
test_dataset_mapped = test_dataset.map(preprocess_data, batched=True)
test_dataset_mapped.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
test_dataloader = DataLoader(test_dataset_mapped, batch_size=batch_size, shuffle=False)

with torch.no_grad():
    for batch in test_dataloader:
        inputs = {k: v.to(device) for k, v in batch.items() if k != 'labels'}
        labels = batch['labels'].to(device)

        outputs = model(**inputs)
        preds = (torch.sigmoid(outputs.logits).cpu().numpy() > 0.5).astype(int)
        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(preds)

print("Test Classification Report:")
print(classification_report(all_labels, all_preds,
                            target_names=['feature request', 'bug report', 'rating', 'user experience'],
                            zero_division=0))