In [34]:
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from transformers import AutoModelForCausalLM, AutoTokenizer
from torch.utils.data import DataLoader, Dataset
from tqdm.auto import tqdm

In [13]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [22]:
df = pd.read_csv("data/train.csv").dropna(how="any")

train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

In [46]:
df.iloc[0, 1]

"If everyone thinks you're worthless, then maybe you need to find new people to hang out with.Seriously, the social context in which a person lives is a big influence in self-esteem.Otherwise, you can go round and round trying to understand why you're not worthless, then go back to the same crowd and be knocked down again.There are many inspirational messages you can find in social media. \xa0Maybe read some of the ones which state that no person is worthless, and that everyone has a good purpose to their life.Also, since our culture is so saturated with the belief that if someone doesn't feel good about themselves that this is somehow terrible.Bad feelings are part of living. \xa0They are the motivation to remove ourselves from situations and relationships which do us more harm than good.Bad feelings do feel terrible. \xa0 Your feeling of worthlessness may be good in the sense of motivating you to find out that you are much better than your feelings today."

### Load pre-trained model

In [None]:
model_name = "microsoft/DialoGPT-small"
model = AutoModelForCausalLM.from_pretrained(model_name)

In [17]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

In [25]:
def tokenizer_function(examples):
    inputs = tokenizer(examples["Context"], truncation=True, padding="max_length", max_length=128)
    outputs = tokenizer(examples["Response"], truncation=True, padding="max_length", max_length=128)
    return {"input_ids": inputs["input_ids"], "attention_mask": inputs["attention_mask"], "labels": outputs["input_ids"]}

train_tokenized = train_data.apply(tokenizer_function, axis=1)
test_tokenized = test_data.apply(tokenizer_function, axis=1)

In [30]:
class ConversationDataset(Dataset):
    def __init__(self, tokenized_data):
        self.tokenized_data = tokenized_data

    def __len__(self):
        return len(self.tokenized_data)

    def __getitem__(self, idx):
        item = self.tokenized_data.iloc[idx]
        return {k: torch.tensor(v, dtype=torch.long) for k, v in item.items()}

In [36]:
train_dataset = ConversationDataset(train_tokenized)
test_dataset = ConversationDataset(test_tokenized)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8)

In [48]:
from torch.optim import AdamW
from transformers import get_scheduler

model.to(device)

optimizer = AdamW(params=model.parameters(), lr=1e-4)
num_epochs = 3
num_training_steps = num_epochs * len(train_loader)

scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

In [54]:
for epoch in range(num_epochs):
    # Training phase
    model.train()
    total_train_loss = 0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1} - Training")

    for batch in progress_bar:
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(input_ids=batch['input_ids'], attention_mask=batch['attention_mask'], labels=batch['labels'])
        loss = outputs.loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

        total_train_loss += loss.item()
        progress_bar.set_postfix({'Training Loss': loss.item()})

    avg_train_loss = total_train_loss / len(train_loader)
    print(f"Epoch {epoch+1}: Average Training Loss = {avg_train_loss:.4f}")

    # Evaluation phase
    model.eval()
    total_eval_loss = 0
    progress_bar = tqdm(test_loader, desc=f"Epoch {epoch+1} - Evaluation")

    with torch.no_grad():
        for batch in progress_bar:
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(input_ids=batch['input_ids'], attention_mask=batch['attention_mask'], labels=batch['labels'])
            loss = outputs.loss

            total_eval_loss += loss.item()
            progress_bar.set_postfix({'Evaluation Loss': loss.item()})

    avg_eval_loss = total_eval_loss / len(test_loader)
    print(f"Epoch {epoch+1}: Average Evaluation Loss = {avg_eval_loss:.4f}")

Epoch 1 - Training:   0%|          | 0/351 [00:00<?, ?it/s]

Epoch 1: Average Training Loss = 5.7185


Epoch 1 - Evaluation:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch 1: Average Evaluation Loss = 5.8267


Epoch 2 - Training:   0%|          | 0/351 [00:00<?, ?it/s]

Epoch 2: Average Training Loss = 5.7190


Epoch 2 - Evaluation:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch 2: Average Evaluation Loss = 5.8267


Epoch 3 - Training:   0%|          | 0/351 [00:00<?, ?it/s]

Epoch 3: Average Training Loss = 5.7192


Epoch 3 - Evaluation:   0%|          | 0/88 [00:00<?, ?it/s]

Epoch 3: Average Evaluation Loss = 5.8267


In [53]:
model.eval()

def chat_with_model(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    reply_ids = model.generate(**inputs, max_length=100, pad_token_id=tokenizer.eos_token_id)
    reply = tokenizer.decode(reply_ids[0], skip_special_tokens=True)
    return reply

# Test the conversational agent
user_input = "I'm not feeling good"
response = chat_with_model(user_input)
print("Model:", response)

Model: I'm not feeling good you you you you you you you you you you you you you you you you you you you you you you you you you you you you you you you you you you you you you you you you you you you you you you
