In [1]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertModel, AdamW
from sklearn.model_selection import train_test_split
from torch import nn
import torch.optim as optim
from sklearn.metrics import classification_report
import numpy as np

# Load the data from Excel
data = pd.read_excel('cleaned_data.xlsx')

# Define the dataset class
class McDonaldsDataset(Dataset):
    def __init__(self, data, tokenizer, max_len):
        self.data = data
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        review = self.data.iloc[index]['review']
        # Get the sentiment for each aspect (0, 1, 2, 3)
        targets = self.data.iloc[index][['Food', 'Service', 'Cleanliness', 'Price', 'Others']].values.astype(int)
        encoding = self.tokenizer.encode_plus(
            review,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        return {
            'review_text': review,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'targets': torch.tensor(targets, dtype=torch.long)  # Change to long for multi-class classification
        }

# Define the model class
class SentimentClassifier(nn.Module):
    def __init__(self, bert_model_name, num_labels_per_aspect):
        super(SentimentClassifier, self).__init__()
        self.bert = BertModel.from_pretrained(bert_model_name)
        self.drop = nn.Dropout(p=0.3)
        # A separate classifier head for each aspect (each with 4 possible classes: 0, 1, 2, 3)
        self.food_out = nn.Linear(self.bert.config.hidden_size, num_labels_per_aspect)
        self.service_out = nn.Linear(self.bert.config.hidden_size, num_labels_per_aspect)
        self.cleanliness_out = nn.Linear(self.bert.config.hidden_size, num_labels_per_aspect)
        self.price_out = nn.Linear(self.bert.config.hidden_size, num_labels_per_aspect)
        self.others_out = nn.Linear(self.bert.config.hidden_size, num_labels_per_aspect)
        
    def forward(self, input_ids, attention_mask):
        pooled_output = self.bert(input_ids=input_ids, attention_mask=attention_mask)[1]
        output = self.drop(pooled_output)
        # Generate separate outputs for each aspect
        food_output = self.food_out(output)
        service_output = self.service_out(output)
        cleanliness_output = self.cleanliness_out(output)
        price_output = self.price_out(output)
        others_output = self.others_out(output)
        return food_output, service_output, cleanliness_output, price_output, others_output

# Create a function for training the model
def train_model(model, data_loader, loss_fn, optimizer, device):
    model = model.train()
    losses = 0

    for d in data_loader:
        input_ids = d["input_ids"].to(device)
        attention_mask = d["attention_mask"].to(device)
        targets = d["targets"].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        
        loss = sum([loss_fn(o, t) for o, t in zip(outputs, torch.unbind(targets, dim=1))])
        
        losses += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    return losses / len(data_loader)

# Create a function for evaluating the model
def eval_model(model, data_loader, loss_fn, device):
    model = model.eval()
    losses = 0
    predictions = []
    targets_all = []

    with torch.no_grad():
        for d in data_loader:
            input_ids = d["input_ids"].to(device)
            attention_mask = d["attention_mask"].to(device)
            targets = d["targets"].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            
            loss = sum([loss_fn(o, t) for o, t in zip(outputs, torch.unbind(targets, dim=1))])
            
            losses += loss.item()

            preds = [torch.argmax(o, dim=1) for o in outputs]
            predictions.append(torch.stack(preds, dim=1).cpu().numpy())
            targets_all.append(targets.cpu().numpy())

    return losses / len(data_loader), np.vstack(predictions), np.vstack(targets_all)

# Hyperparameters
MAX_LEN = 128
BATCH_SIZE = 16
EPOCHS = 10
LEARNING_RATE = 2e-5
PATIENCE = 3  # Early stopping patience
BERT_MODEL_NAME = 'bert-base-uncased'

# Load BERT tokenizer
tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)

# Split dataset into train (70%), validation (15%), and test (15%)
train_data, temp_data = train_test_split(data, test_size=0.3, random_state=42)
val_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42)

# Create dataset and dataloaders
train_dataset = McDonaldsDataset(train_data, tokenizer, MAX_LEN)
val_dataset = McDonaldsDataset(val_data, tokenizer, MAX_LEN)
test_dataset = McDonaldsDataset(test_data, tokenizer, MAX_LEN)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Initialize the model, optimizer, and loss function
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SentimentClassifier(BERT_MODEL_NAME, num_labels_per_aspect=4)
model = model.to(device)

optimizer = AdamW(model.parameters(), lr=LEARNING_RATE)
loss_fn = nn.CrossEntropyLoss().to(device)  # Use CrossEntropyLoss for multi-class classification

# Early stopping setup
best_loss = np.inf
early_stop_count = 0

# Training loop with early stopping
for epoch in range(EPOCHS):
    print(f'Epoch {epoch + 1}/{EPOCHS}')

    train_loss = train_model(model, train_loader, loss_fn, optimizer, device)
    val_loss, _, _ = eval_model(model, val_loader, loss_fn, device)

    print(f'Training loss: {train_loss}, Validation loss: {val_loss}')
    
    # Early stopping check
    if val_loss < best_loss:
        best_loss = val_loss
        torch.save(model.state_dict(), 'best_bert_sentiment_model.pt')
        early_stop_count = 0
    else:
        early_stop_count += 1
        if early_stop_count >= PATIENCE:
            print("Early stopping triggered")
            break

# Load the best model
model.load_state_dict(torch.load('best_bert_sentiment_model.pt'))

# Evaluate on the test set
test_loss, predictions, true_labels = eval_model(model, test_loader, loss_fn, device)

# Classification report
target_names = ['Does not exist', 'Negative', 'Neutral', 'Positive']
for i, aspect in enumerate(['Food', 'Service', 'Cleanliness', 'Price', 'Others']):
    print(f"Classification report for {aspect}:")
    print(classification_report(true_labels[:, i], predictions[:, i], target_names=target_names, zero_division=0))




Epoch 1/10
Training loss: 4.401010650772232, Validation loss: 3.3731067776679993
Epoch 2/10
Training loss: 2.9843916194932953, Validation loss: 2.4793894290924072
Epoch 3/10
Training loss: 2.205920749956423, Validation loss: 2.090814153353373
Epoch 4/10
Training loss: 1.7024906293765918, Validation loss: 1.9599006126324336
Epoch 5/10
Training loss: 1.3609540253072172, Validation loss: 1.8824256360530853
Epoch 6/10
Training loss: 1.0705540427216538, Validation loss: 1.919942984978358
Epoch 7/10
Training loss: 0.8523963850897711, Validation loss: 1.9851507743199666
Epoch 8/10
Training loss: 0.6950080370580828, Validation loss: 1.994509259859721
Early stopping triggered


  model.load_state_dict(torch.load('best_bert_sentiment_model.pt'))


Classification report for Food:
                precision    recall  f1-score   support

Does not exist       0.92      0.98      0.95       266
      Negative       0.85      0.70      0.77        50
       Neutral       0.00      0.00      0.00         6
      Positive       0.84      0.79      0.81        58

      accuracy                           0.90       380
     macro avg       0.65      0.62      0.63       380
  weighted avg       0.88      0.90      0.89       380

Classification report for Service:
                precision    recall  f1-score   support

Does not exist       0.90      0.89      0.89       172
      Negative       0.85      0.92      0.88       132
       Neutral       0.00      0.00      0.00         4
      Positive       0.91      0.86      0.89        72

      accuracy                           0.88       380
     macro avg       0.67      0.67      0.67       380
  weighted avg       0.88      0.88      0.88       380

Classification report for Clean

**1. Data Preparation**

The data is loaded from an Excel file (cleaned_data.xlsx) containing customer reviews and their sentiment labels for aspects like Food, Service, Cleanliness, Price, and Others.  

Example: A review might be: "Food seems to be getting worse. Tables dirty. Sketchy crowd loitering within the store." with labels:  
Food: 1 (Negative)  
Service: 0 (Does not exist)  
Cleanliness: 1 (Negative)  
Price: 0 (Does not exist)  
Others: 1 (Negative)  

Tokenization: The BERT tokenizer encodes each review by converting it into input IDs and attention masks to be fed into the BERT model. For instance, the above review is converted into tokens, padded/truncated to a maximum length (MAX_LEN), and fed into the model.  

**2. Dataset Class Creation**  

The aspect sentiment targets are represented as multi-class labels (0 = Does not exist, 1 = Negative, 2 = Neutral, 3 = Positive).  
Example: For the above review, the sentiment targets would be [1, 0, 1, 0, 1] corresponding to Food, Service, Cleanliness, Price, and Others, respectively.   

**3. Model Definition**  
Pre-trained BERT Model: The BERT model processes the tokenized input and generates a pooled output, which is a fixed-size representation of the review (sentence embedding).  

Separate Classifier Heads: For each aspect (Food, Service, Cleanliness, Price, Others), there is a separate linear classifier head. Each classifier predicts one of the four possible sentiment classes: 0 (Does not exist), 1 (Negative), 2 (Neutral), or 3 (Positive).  

Example: For the input review, the model outputs predictions like:  
Food: 1 (Negative)  
Service: 0 (Does not exist)  
Cleanliness: 1 (Negative)  
Price: 0 (Does not exist)  
Others: 1 (Negative)  

**4. Training Process**  
Forward Pass: The review is passed through BERT, which generates a pooled output, then each classifier head generates predictions for each aspect.
Loss Calculation: The model compares its predicted sentiment for each aspect with the true labels and calculates the loss using CrossEntropyLoss. This loss is used to adjust the model’s parameters.  
Optimization: The optimizer (AdamW) updates the model parameters after each batch to minimize the loss.  

**5. Early Stopping**  
Early Stopping Mechanism: After each training epoch, the model’s performance is validated. If the validation loss does not improve for several epochs (PATIENCE), the training stops early to avoid overfitting.  

**6. Model Evaluation**  
After training, the model is evaluated on the test set, where it generates predictions for each aspect’s sentiment (Food, Service, Cleanliness, Price, Others).  
Classification Report: A classification report is generated for each aspect, displaying precision, recall, and F1-score for all sentiment classes (Does not exist, Negative, Neutral, Positive).  

Example: For the test review "Food was slow. Manager was rude.", the model might predict:  
Food: 1 (Negative)  
Service: 1 (Negative)  
Cleanliness: 0 (Does not exist)  
Price: 0 (Does not exist)  
Others: 0 (Does not exist)  


Example:  
The following review: "The food was great, but the service was slow. The place was clean, and the price was reasonable."  

Input (X):  
X is the review text, i.e., the entire sentence:  
"The food was great, but the service was slow. The place was clean, and the price was reasonable."
Output (Y):  
Y represents the sentiment for each aspect (Food, Service, Cleanliness, Price, Others), and each aspect can have one of the four sentiment labels:  
0: Does not exist  
1: Negative  
2: Neutral  
3: Positive  

For this review, the output (Y) would look like this:  

Food: 3 (Positive)  
Service: 1 (Negative)  
Cleanliness: 3 (Positive)  
Price: 3 (Positive)  
Others: 0 (Does not exist)  
So, Y would be: [3, 1, 3, 3, 0], representing the sentiment for each aspect.  

The BERT model processes the entire review (X) and generates an embedding (a vector representation) for the whole review.  
Then, there are five classification heads (one for each aspect: Food, Service, Cleanliness, Price, and Others).  
Each head takes the BERT output and predicts the sentiment (Y) for its corresponding aspect.  
For Food, the model predicts 3 (Positive), for Service, it predicts 1 (Negative), and so on.  
As such, the model is able to simultaneously predict sentiments for all five aspects from one review.  