<a href="https://colab.research.google.com/github/yaman9675/Pesto/blob/main/Automated_Response_Generation_for_Customer_Support.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BartTokenizer, BartForConditionalGeneration, AdamW
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from IPython.display import clear_output
import warnings
warnings.filterwarnings("ignore")

In [15]:
df = pd.read_csv("hf://datasets/Kaludi/Customer-Support-Responses/Customer-Support.csv")

# Split the data into train and test sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Initialize the BART tokenizer
tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

In [16]:
# Custom dataset class for customer support data
class CustomerSupportDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_length=128):
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        query = self.data.iloc[idx]['query']
        response = self.data.iloc[idx]['response']
        # Tokenize the input query
        inputs = self.tokenizer.encode_plus(
            query,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        # Tokenize the target response
        targets = self.tokenizer.encode_plus(
            response,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        return {
            'input_ids': inputs['input_ids'].flatten(),
            'attention_mask': inputs['attention_mask'].flatten(),
            'labels': targets['input_ids'].flatten()
        }

# Create dataset objects
train_dataset = CustomerSupportDataset(train_df, tokenizer)
test_dataset = CustomerSupportDataset(test_df, tokenizer)

In [17]:
# Initialize the model
model = BartForConditionalGeneration.from_pretrained('facebook/bart-base')

# Set up the device (GPU if available, else CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

BartForConditionalGeneration(
  (model): BartModel(
    (shared): Embedding(50265, 768, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): BartScaledWordEmbedding(50265, 768, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 768)
      (layers): ModuleList(
        (0-5): 6 x BartEncoderLayer(
          (self_attn): BartSdpaAttention(
            (k_proj): Linear(in_features=768, out_features=768, bias=True)
            (v_proj): Linear(in_features=768, out_features=768, bias=True)
            (q_proj): Linear(in_features=768, out_features=768, bias=True)
            (out_proj): Linear(in_features=768, out_features=768, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (final_layer_norm): Laye

In [18]:
# Set up the data loader for training
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
# Set up the optimizer
optimizer = AdamW(model.parameters(), lr=2e-5)
# Training loop
num_epochs = 5

for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for batch in tqdm(train_loader, desc=f"Epoch {epoch + 1}/{num_epochs}"):
        # Move batch to device
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        # Zero out any existing gradients
        optimizer.zero_grad()
        # Forward pass
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        # Backward pass
        loss.backward()
        # Update model parameters
        optimizer.step()

        total_loss += loss.item()

    # Print average loss for the epoch
    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch + 1}/{num_epochs}, Average Loss: {avg_loss:.4f}")

# Save the model
torch.save(model.state_dict(), 'customer_support_bart_model.pth')

Epoch 1/5: 100%|██████████| 8/8 [02:44<00:00, 20.51s/it]


Epoch 1/5, Average Loss: 12.1804


Epoch 2/5: 100%|██████████| 8/8 [02:16<00:00, 17.05s/it]


Epoch 2/5, Average Loss: 10.2635


Epoch 3/5: 100%|██████████| 8/8 [02:19<00:00, 17.41s/it]


Epoch 3/5, Average Loss: 9.1785


Epoch 4/5: 100%|██████████| 8/8 [02:26<00:00, 18.37s/it]


Epoch 4/5, Average Loss: 7.5237


Epoch 5/5: 100%|██████████| 8/8 [02:17<00:00, 17.13s/it]


Epoch 5/5, Average Loss: 5.8143


In [19]:
# Evaluation loop
model.eval()
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

total_loss = 0

with torch.no_grad():
    for batch in tqdm(test_loader, desc="Evaluating"):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()

avg_loss = total_loss / len(test_loader)
print(f"Test Loss: {avg_loss:.4f}")

Evaluating: 100%|██████████| 2/2 [00:10<00:00,  5.49s/it]

Test Loss: 4.1789





In [20]:
# Function to generate responses
def generate_response(query, model, tokenizer, max_length=128):
    model.eval()

    inputs = tokenizer.encode_plus(
        query,
        add_special_tokens=True,
        max_length=max_length,
        padding='max_length',
        truncation=True,
        return_tensors='pt'
    )

    input_ids = inputs['input_ids'].to(device)
    attention_mask = inputs['attention_mask'].to(device)

    with torch.no_grad():
        outputs = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_length=max_length,
            num_beams=5,
            early_stopping=True
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

In [21]:
# Demo
query = "Where is my order?"
response = generate_response(query, model, tokenizer)
print(f"Query: {query}")
print(f"Generated Response: {response}")

Query: Where is my order?
Generated Response: We'd be happy to help. Can you please provide your order number?


In [22]:
# Demo function
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = BartForConditionalGeneration.from_pretrained('facebook/bart-base')
model.load_state_dict(torch.load('customer_support_bart_model.pth', map_location=device))
model.to(device)
model.eval()

tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')

def generate_response(query, max_length=128):
    inputs = tokenizer.encode_plus(
        query,
        add_special_tokens=True,
        max_length=max_length,
        padding='max_length',
        truncation=True,
        return_tensors='pt'
    )

    input_ids = inputs['input_ids'].to(device)
    attention_mask = inputs['attention_mask'].to(device)

    with torch.no_grad():
        outputs = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_length=max_length,
            num_beams=5,
            early_stopping=True
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

def run_demo():
    print("Welcome to the Customer Support Bot!")
    print("Type your questions and press Enter to get a response.")
    print("Press 'q' and Enter to quit the demo.")
    print("\n" + "="*50 + "\n")

    while True:
        query = input("Your question (or 'q' to quit): ")

        if query.lower() == 'q':
            print("\nThank you for using the Customer Support Bot. Goodbye!")
            break

        if query:
            response = generate_response(query)
            print(f"\nBot: {response}\n")
        else:
            print("\nPlease enter a question.\n")

        print("="*50 + "\n")

# Run the demo
run_demo()

Welcome to the Customer Support Bot!
Type your questions and press Enter to get a response.
Press 'q' and Enter to quit the demo.


Your question (or 'q' to quit): How long does shipping take?

Bot: How long does shipping take? Can you please provide your order number?


Your question (or 'q' to quit): q

Thank you for using the Customer Support Bot. Goodbye!
