In [103]:
# Imports and Initial Setup
import torch
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
import pandas as pd
import numpy as np
from torch.nn import CrossEntropyLoss
from tqdm import tqdm
import itertools

# Name to use for saving the model
model_path = 'BERT Sentiment Model'

# Dataset (CSV) Column Names
sentence_column_name = "Sentence"
sentiment_column_name = "Final_Sent"

# To Use GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

device

device(type='cpu')

In [104]:
# Data Splitting
# Load the dataset from CSV file
df = pd.read_csv('annotated_dataset.csv')
# Split data into 80% training+validation and 20% test
remaining, test = train_test_split(df, test_size=0.2, random_state=42)
# Split the remaining 80% into 70% training and 10% validation (0.125 of 80% = 10% overall)
train, val = train_test_split(remaining, test_size=0.125, random_state=42)

train, val, test

(                                               Sentence Presidential_Candidate  ... Vote_2  Final_Sent
 1292  As the Democratic National Convention approach...          Kamala Harris  ...      1           1
 2200                    He's like fighter, fighter man.           Donald Trump  ...      1           1
 2780  Donald Trump over performed by almost 5% accor...           Donald Trump  ...      0           1
 2960  If Kamala wants my vote, I want her to take a ...          Kamala Harris  ...     -1           0
 2870  Around 80% of the African Americans here in Mi...           Donald Trump  ...     -1          -1
 ...                                                 ...                    ...  ...    ...         ...
 2748     If Michigan goes to Donald Trump, he will win.           Donald Trump  ...      1           1
 636   And Harris's ability to appeal to them could p...          Kamala Harris  ...      1           1
 861   Donald Trump, on the other hand, in a town hal...        

In [105]:
# Set up the model and compute class weights
def compute_class_weights(labels):
    """
    Calculate weights for each class to handle imbalanced data
    For example, if we have 100 positive but only 10 negative samples,
    negative samples will get higher weight to balance their importance
    """
    # Shift labels for model [-1, 0, 1] to [0, 1, 2]
    mapped_labels = labels + 1
    # Count how many samples we have of each class
    class_counts = np.bincount(mapped_labels)
    # Give higher weights to classes with fewer samples
    weights = 1. / class_counts
    # Normalize weights to sum to number of classes
    weights = weights * len(class_counts) / weights.sum()
    return torch.FloatTensor(weights)

# Calculate weights for each class from training data
class_weights = compute_class_weights(train[sentiment_column_name].values)
class_weights = class_weights.to(device)  # Move weights to GPU if available

class_weights

tensor([1.0569, 1.0804, 0.8626])

In [106]:
# Base BERT model to use
model_name = 'bert-base-uncased'

# Create a custom BERT model that can handle weighted loss
class BertWithWeightedLoss(BertForSequenceClassification):
    """
    Custom BERT model that applies different weights to each class
    This helps handle imbalanced datasets better
    """
    def __init__(self, config, class_weights):
        super().__init__(config)
        self.class_weights = class_weights # Store class weights for loss calculation
    
    def forward(self, input_ids, attention_mask=None, labels=None):
        # Get model outputs without computing loss
        outputs = super().forward(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=None # Set to None to prevent automatic loss calculation
        )
        
        # Calculate weighted loss if labels are provided (training phase)
        if labels is not None:
            # Create loss function with class weights
            loss_fct = CrossEntropyLoss(weight=self.class_weights)
            # Calculate loss using model predictions and true labels
            loss = loss_fct(
                outputs.logits.view(-1, self.num_labels),  # Reshape predictions
                labels.view(-1)                            # Reshape labels
            )
            outputs.loss = loss  # Add loss to outputs
        
        return outputs

# Initialize the tokenizer that will convert text to numbers
tokenizer = BertTokenizer.from_pretrained(model_name)

tokenizer

BertTokenizer(name_or_path='bert-base-uncased', vocab_size=30522, model_max_length=512, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True, added_tokens_decoder={
	0: AddedToken("[PAD]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	100: AddedToken("[UNK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	101: AddedToken("[CLS]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	102: AddedToken("[SEP]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	103: AddedToken("[MASK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}
)

In [107]:
# Data Preparation Functions
def create_data_loader(data, tokenizer, batch_size):
    """
    Convert text data into a format BERT can understand and create batches
    
    Args:
        data: DataFrame containing text and labels
        tokenizer: BERT tokenizer to convert text to numbers
        batch_size: How many samples to process at once
    
    Returns:
        DataLoader that yields batches of processed data
    """
    # Convert text to BERT input format with progress bar
    encodings = tokenizer(
        data[sentence_column_name].tolist(), # Convert sentences to list
        truncation=True, # Cut texts longer than max_length
        padding=True, # Pad texts shorter than max_length
        max_length=128, # Maximum sequence length
        return_tensors='pt', # Return PyTorch tensors
        verbose=True # Show progress
    )

    # Create dataset by combining inputs and labels
    dataset = torch.utils.data.TensorDataset(
        encodings['input_ids'], # Tokenized text
        encodings['attention_mask'], # Attention mask for padding
        torch.tensor(data[sentiment_column_name].tolist()) # Labels
    )
    
    return DataLoader(dataset, batch_size=batch_size, shuffle=True)


In [108]:
# Evaluation Function
def evaluate_model(model, data_loader, device):
    """
    Evaluate model performance using various metrics
    
    Args:
        model: The BERT model to evaluate
        data_loader: DataLoader containing validation or test data
        device: CPU or GPU
    
    Returns:
        Dictionary containing various performance metrics
    """
    model.eval() # Set model to evaluation mode
    val_loss = 0
    all_preds = []
    all_labels = []
    
    with torch.no_grad(): # Don't compute gradients during evaluation
        for batch in tqdm(data_loader, desc="Evaluation"):
            # Move batch to GPU if available
            input_ids, attention_mask, labels = [b.to(device) for b in batch]
            adjusted_labels = labels + 1 # Shift labels for model

            # Get model predictions
            outputs = model(input_ids, attention_mask, labels=adjusted_labels)
            val_loss += outputs.loss.item()

            # Store predictions and true labels
            _, predicted = torch.max(outputs.logits, 1)
            all_preds.extend((predicted - 1).cpu().numpy())
            all_labels.extend((adjusted_labels - 1).cpu().numpy())

    # Calculate various performance metrics
    accuracy = accuracy_score(all_labels, all_preds)
    precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='weighted')
    
    return {
        'loss': val_loss / len(data_loader),
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

In [109]:
# Training Function
def train_model(model, train_loader, val_loader, device, epochs, learning_rate):
    """
    Train the model and periodically evaluate its performance
    
    Args:
        model: The BERT model to train
        train_loader: DataLoader with training data
        val_loader: DataLoader with validation data
        device: CPU or GPU
        epochs: Number of times to process all training data
        learning_rate: How quickly the model should learn
    
    Returns:
        Trained model and its best validation metrics
    """
    # Initialize optimizer
    optimizer = AdamW(model.parameters(), lr=learning_rate)
    best_metrics = None
    best_model = None
    
    # Training loop
    for epoch in range(epochs):
        model.train() # Set model to training mode
        total_loss = 0
        
        # Process each batch
        pbar = tqdm(train_loader, desc=f"Epoch {epoch + 1}/{epochs} Training")
        for batch in pbar:
            # Move batch to GPU if available
            input_ids, attention_mask, labels = [b.to(device) for b in batch]
            adjusted_labels = labels + 1 # Shift labels for model [-1, 0, 1] to [0, 1, 2]

            # Training step
            optimizer.zero_grad() # Clear previous gradients
            outputs = model(input_ids, attention_mask, labels=adjusted_labels) # Forward pass
            
            loss = outputs.loss
            total_loss += loss.item() # Accumulate loss

            # Update model weights
            loss.backward() # Backward pass
            optimizer.step() # Update weights

            # Update progress bar with current loss
            pbar.set_postfix(loss=loss.item())

        # Calculate average loss for this epoch
        avg_train_loss = total_loss / len(train_loader)
        print(f'Epoch {epoch + 1}/{epochs}, Average training loss: {avg_train_loss:.4f}')
        
        val_metrics = evaluate_model(model, val_loader, device)
        print(f'Validation metrics: {val_metrics}')
        
         # Update best parameters if accuracy score improves
        if best_metrics == None or val_metrics['accuracy'] > best_metrics['accuracy']:
            best_metrics = val_metrics
            best_model = model
    
    return best_model, best_metrics

In [111]:
# Define different values to try for each parameter
param_grid = {
    'learning_rate': [2e-5, 5e-5], # BERT is best on -5
    'batch_size': [32], # If the max length is 128 or 256, then 32 would be a good number.
    'epochs': [5] # Amount of epochs required were small 3 to 5
}

# Hyperparameter Tuning Function
def hyperparameter_tuning(train_data, val_data, device, class_weights):
    """
    Try different combinations of hyperparameters to find the best ones
    
    Args:
        train_data: Training data DataFrame
        val_data: Validation data DataFrame
        device: CPU or GPU
        class_weights: Weights for each class
    
    Returns:
        Best parameters and their corresponding metrics
    """    

    # Create all possible combinations of parameters
    param_combinations = [
        dict(zip(param_grid.keys(), v)) 
        for v in itertools.product(*param_grid.values())
    ]

    best_model = None
    best_metrics = None
    best_params = None

    # Try each combination of parameters
    for params in param_combinations:
        print(f"\nTrying parameters: {params}")

        # Create data loaders with current batch size
        train_loader = create_data_loader(train_data, tokenizer, params['batch_size'])
        val_loader = create_data_loader(val_data, tokenizer, params['batch_size'])

        # Initialize the custom BERT model
        model = BertWithWeightedLoss.from_pretrained(
            model_name,
            # Configure BERT for binary classification
            config=BertForSequenceClassification.from_pretrained(
                model_name,
                num_labels=3,
                output_attentions=False, # Don't output attention weights
                output_hidden_states=False, # Don't output hidden states
            ).config,
            class_weights=class_weights
        )
        # Move model to GPU if available
        model.to(device)

        # Train model with current parameters
        model, val_metrics = train_model(
            model, 
            train_loader, 
            val_loader,
            device,
            params['epochs'],
            params['learning_rate']
        )

        # Update best parameters if accuracy score improves
        if best_metrics == None or val_metrics['accuracy'] > best_metrics['accuracy']:
            best_model = model
            best_params = params
            best_metrics = val_metrics
    
    return best_model, best_params, best_metrics

In [112]:
# Run Hyperparameter Tuning
best_model, best_params, best_metrics = hyperparameter_tuning(train, val, device, class_weights)
print(f"\nBest parameters: {best_params}")
print(f"Best validation metrics: {best_metrics}")

# Save the best model
best_model.save_pretrained(model_path)
tokenizer.save_pretrained(model_path)

best_model


Trying parameters: {'learning_rate': 2e-05, 'batch_size': 32, 'epochs': 5}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertWithWeightedLoss were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/5 Training: 100%|██████████████████████████████████████████████████| 83/83 [14:32<00:00, 10.52s/it, loss=0.686]


Epoch 1/5, Average training loss: 0.9529


Evaluation: 100%|██████████████████████████████████████████████████████████████████████| 12/12 [00:22<00:00,  1.91s/it]


Validation metrics: {'loss': 0.7400287439425787, 'accuracy': 0.6851851851851852, 'precision': 0.6833075654504226, 'recall': 0.6851851851851852, 'f1': 0.6770897761219201}


Epoch 2/5 Training: 100%|███████████████████████████████████████████████████| 83/83 [14:56<00:00, 10.80s/it, loss=0.59]


Epoch 2/5, Average training loss: 0.6529


Evaluation: 100%|██████████████████████████████████████████████████████████████████████| 12/12 [00:24<00:00,  2.03s/it]


Validation metrics: {'loss': 0.6706100727121035, 'accuracy': 0.701058201058201, 'precision': 0.694312524913797, 'recall': 0.701058201058201, 'f1': 0.6949520620027532}


Epoch 3/5 Training: 100%|██████████████████████████████████████████████████| 83/83 [15:48<00:00, 11.43s/it, loss=0.355]


Epoch 3/5, Average training loss: 0.4043


Evaluation: 100%|██████████████████████████████████████████████████████████████████████| 12/12 [00:23<00:00,  1.96s/it]


Validation metrics: {'loss': 0.7281094739834467, 'accuracy': 0.7328042328042328, 'precision': 0.7349320873130397, 'recall': 0.7328042328042328, 'f1': 0.7300415897933018}


Epoch 4/5 Training: 100%|██████████████████████████████████████████████████| 83/83 [14:42<00:00, 10.64s/it, loss=0.126]


Epoch 4/5, Average training loss: 0.2370


Evaluation: 100%|██████████████████████████████████████████████████████████████████████| 12/12 [00:26<00:00,  2.20s/it]


Validation metrics: {'loss': 0.849938211341699, 'accuracy': 0.716931216931217, 'precision': 0.7167796288215753, 'recall': 0.716931216931217, 'f1': 0.7161571734685311}


Epoch 5/5 Training: 100%|██████████████████████████████████████████████████| 83/83 [33:41<00:00, 24.35s/it, loss=0.125]


Epoch 5/5, Average training loss: 0.1594


Evaluation: 100%|██████████████████████████████████████████████████████████████████████| 12/12 [00:58<00:00,  4.88s/it]


Validation metrics: {'loss': 0.9804117580254873, 'accuracy': 0.6851851851851852, 'precision': 0.6996588736431473, 'recall': 0.6851851851851852, 'f1': 0.6860579958692612}

Trying parameters: {'learning_rate': 5e-05, 'batch_size': 32, 'epochs': 5}


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertWithWeightedLoss were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/5 Training: 100%|██████████████████████████████████████████████████| 83/83 [30:34<00:00, 22.10s/it, loss=0.525]


Epoch 1/5, Average training loss: 0.8809


Evaluation: 100%|██████████████████████████████████████████████████████████████████████| 12/12 [00:24<00:00,  2.06s/it]


Validation metrics: {'loss': 0.7059457004070282, 'accuracy': 0.701058201058201, 'precision': 0.692152787491736, 'recall': 0.701058201058201, 'f1': 0.6858338149795068}


Epoch 2/5 Training: 100%|██████████████████████████████████████████████████| 83/83 [15:26<00:00, 11.16s/it, loss=0.696]


Epoch 2/5, Average training loss: 0.5485


Evaluation: 100%|██████████████████████████████████████████████████████████████████████| 12/12 [00:24<00:00,  2.07s/it]


Validation metrics: {'loss': 0.6784906660517057, 'accuracy': 0.701058201058201, 'precision': 0.6975855201708459, 'recall': 0.701058201058201, 'f1': 0.6988953916187093}


Epoch 3/5 Training: 100%|██████████████████████████████████████████████████| 83/83 [15:53<00:00, 11.49s/it, loss=0.194]


Epoch 3/5, Average training loss: 0.3132


Evaluation: 100%|██████████████████████████████████████████████████████████████████████| 12/12 [00:25<00:00,  2.12s/it]


Validation metrics: {'loss': 0.8028487414121628, 'accuracy': 0.7116402116402116, 'precision': 0.7235556827930305, 'recall': 0.7116402116402116, 'f1': 0.7082303887486773}


Epoch 4/5 Training: 100%|██████████████████████████████████████████████████| 83/83 [15:32<00:00, 11.24s/it, loss=0.134]


Epoch 4/5, Average training loss: 0.1640


Evaluation: 100%|██████████████████████████████████████████████████████████████████████| 12/12 [00:24<00:00,  2.06s/it]


Validation metrics: {'loss': 1.0634979108969371, 'accuracy': 0.6904761904761905, 'precision': 0.6969949654216449, 'recall': 0.6904761904761905, 'f1': 0.6917576179342783}


Epoch 5/5 Training: 100%|██████████████████████████████████████████████████| 83/83 [16:57<00:00, 12.26s/it, loss=0.304]


Epoch 5/5, Average training loss: 0.0980


Evaluation: 100%|██████████████████████████████████████████████████████████████████████| 12/12 [00:24<00:00,  2.05s/it]


Validation metrics: {'loss': 1.1518813719352086, 'accuracy': 0.6746031746031746, 'precision': 0.6781348531832091, 'recall': 0.6746031746031746, 'f1': 0.6758853744255204}

Best parameters: {'learning_rate': 2e-05, 'batch_size': 32, 'epochs': 5}
Best validation metrics: {'loss': 0.7281094739834467, 'accuracy': 0.7328042328042328, 'precision': 0.7349320873130397, 'recall': 0.7328042328042328, 'f1': 0.7300415897933018}


BertWithWeightedLoss(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elem

In [None]:
# Training Time
# 14:32 00:22
# 14:56 00:24
# 15:48 00:23
# 14:42 00:26
# 33:41 00:58

# 30:34 00:24
# 15:26 00:24
# 15:53 00:25
# 15:32 00:24
# 16:57 00:24

In [114]:
# Final Evaluation on Test Set
test_loader = create_data_loader(test, tokenizer, best_params['batch_size'])
print("\nEvaluating final model on test set...")
test_metrics = evaluate_model(best_model, test_loader, device)
print(f"Test set metrics: {test_metrics}")


Evaluating final model on test set...


Evaluation: 100%|██████████████████████████████████████████████████████████████████████| 24/24 [01:06<00:00,  2.77s/it]

Test set metrics: {'loss': 0.9251180763045946, 'accuracy': 0.6838624338624338, 'precision': 0.6979576767654118, 'recall': 0.6838624338624338, 'f1': 0.6873617524080348}





In [118]:
# Set the model to evaluation mode
best_model.eval()

# Example text for prediction
example_text = "harris leads by 1%"

# Tokenize the input text
encoded_input = tokenizer(
    example_text,
    return_tensors="pt", # Return PyTorch tensors
    truncation=True,
    padding=True
)
# Remove token_type_ids if not used
encoded_input.pop("token_type_ids", None)

# Perform prediction without gradient computation
with torch.no_grad():
    outputs = best_model(**encoded_input)

# Get the logits from the model's output
logits = outputs.logits

# Get the predicted class (0, 1, or 2)
predicted_class = torch.argmax(logits, dim=1).item() - 1
print("Predicted class (-1, 0, 1):", predicted_class)

Predicted class (-1, 0, 1): 1
