In [1]:

import pandas as pd
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer


In [3]:

# Load data
data = pd.read_csv("sample.csv")


In [6]:
# Preprocess data and tokenize text
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
inputs = tokenizer.encode_plus(data["text"].tolist(), 
                                 add_special_tokens=True, 
                                 max_length=512, 
                                 return_attention_mask=True, 
                                 return_tensors="pt")

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


In [7]:
# Train GPT-2 model
model = GPT2LMHeadModel.from_pretrained('gpt2')
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)

for epoch in range(5):
    model.train()
    for batch in inputs:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["label"].to(device)
        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
    model.eval()
    print(f"Epoch {epoch+1}, Loss: {loss.item()}")


TypeError: string indices must be integers, not 'str'

In [8]:
inputs = []
for text in data["text"]:
    encoded_input = tokenizer.encode_plus(text, add_special_tokens=True, max_length=512, return_attention_mask=True, return_tensors="pt")
    inputs.append(encoded_input)

# Create a DataLoader for batching
batch_size = 32
data_loader = torch.utils.data.DataLoader(inputs, batch_size=batch_size, shuffle=True)


In [16]:
from torch.nn.utils.rnn import pad_sequence

inputs = []
for text in data["text"]:
    encoded_input = tokenizer.encode_plus(text, add_special_tokens=True, max_length=512, return_attention_mask=True, return_tensors="pt")
    inputs.append(encoded_input["input_ids"].flatten())

# Pad the inputs to the same length
padded_inputs = pad_sequence(inputs, batch_first=True)


In [26]:
def generate_response(input_text):
    inputs = tokenizer.encode_plus(input_text, add_special_tokens=True, max_length=512, return_attention_mask=True, return_tensors="pt")
    outputs = model.generate(inputs["input_ids"], attention_mask=inputs["attention_mask"], max_length=30, early_stopping=True, eos_token_id=tokenizer.encode("[END]")[0])
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response



In [27]:
input_text =  "Can I cancel my order?"
response = generate_response(input_text)
print(response)


Setting `pad_token_id` to `eos_token_id`:58 for open-end generation.


Can I cancel my order?

I don't know what's wrong with that. I'm not sure. I'm not sure. I'm


In [19]:
# Personalized Customer Engagement
def generate_engagement_message(customer_data, interaction_history):
    input_text = customer_data + interaction_history
    inputs = tokenizer.encode_plus(input_text, 
                                     add_special_tokens=True, 
                                     max_length=512, 
                                     return_attention_mask=True, 
                                     return_tensors="pt")
    outputs = model.generate(inputs["input_ids"], 
                              attention_mask=inputs["attention_mask"], 
                              max_length=128)
    message = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return message


In [25]:
customer_data = "Sophia Patel, age 29, interests: yoga, wellness"
interaction_history = "Previous purchases: gaming console, tech accessories"
message = generate_engagement_message(customer_data, interaction_history)
print(message)



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Sophia Patel, age 29, interests: yoga, wellnessPrevious purchases: gaming console, tech accessories, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer, computer,
