In [1]:
# 8b
import torch
import torch.nn as nn
import torch.optim as optim
import random

# Set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Sample Data (small conversation for demo)
pairs = [
    ("hi", "hello"),
    ("how are you?", "i am fine"),
    ("what is your name?", "i am a chatbot"),
    ("bye", "goodbye")
]

# Vocabulary setup
def tokenize(sentence):
    return sentence.lower().split()

def build_vocab(pairs):
    vocab = set()
    for pair in pairs:
        for sentence in pair:
            vocab.update(tokenize(sentence))
    word2idx = {word: idx for idx, word in enumerate(vocab, 1)}
    word2idx["<pad>"] = 0  # Padding token
    word2idx["<eos>"] = len(word2idx)  # EOS token
    idx2word = {idx: word for word, idx in word2idx.items()}
    return word2idx, idx2word

word2idx, idx2word = build_vocab(pairs)
vocab_size = len(word2idx)

# Encode sentences to token IDs
def encode(sentence):
    return [word2idx.get(word, 0) for word in tokenize(sentence)] + [word2idx["<eos>"]]  # Add EOS at the end

def decode(indices):
    return " ".join([idx2word[idx] for idx in indices if idx != 0 and idx != word2idx["<eos>"]])

# Define Encoder Model
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Encoder, self).__init__()
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True)

    def forward(self, x):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded)
        return hidden

# Define Decoder Model
class Decoder(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(Decoder, self).__init__()
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded, hidden)
        prediction = self.fc(output)
        return prediction, hidden

# Hyperparameters
hidden_size = 32
encoder = Encoder(vocab_size, hidden_size).to(device)
decoder = Decoder(hidden_size, vocab_size).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss(ignore_index=0)  # Ignore padding
optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=0.01)

# Training Loop (for simplicity, we'll use only 100 epochs)
for epoch in range(100):
    for input_sentence, target_sentence in pairs:
        # Prepare input and target tensors
        input_tensor = torch.tensor([encode(input_sentence)], dtype=torch.long).to(device)
        target_tensor = torch.tensor([encode(target_sentence)], dtype=torch.long).to(device)

        # Forward pass through encoder
        encoder_hidden = encoder(input_tensor)

        # Decoder input starts with <pad> token
        decoder_input = torch.tensor([[0]], dtype=torch.long).to(device)
        decoder_hidden = encoder_hidden
        loss = 0
        for t in range(target_tensor.size(1)):
            output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            loss += criterion(output.squeeze(1), target_tensor[:, t])

            # Teacher forcing: use target as next input
            decoder_input = target_tensor[:, t].unsqueeze(1)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item():.4f}')

# Inference: Chat with the chatbot
def chat(input_text):
    input_tensor = torch.tensor([encode(input_text)], dtype=torch.long).to(device)
    encoder_hidden = encoder(input_tensor)
    decoder_input = torch.tensor([[0]], dtype=torch.long).to(device)  # Start token
    decoder_hidden = encoder_hidden
    output_sentence = []
    for _ in range(10):  # Limit the max response length
        output, decoder_hidden = decoder(decoder_input, decoder_hidden)
        top1 = output.argmax(2).item()
        if top1 == word2idx["<eos>"]:  # Stop if <eos> token is generated
            break
        output_sentence.append(top1)
        decoder_input = torch.tensor([[top1]], dtype=torch.long).to(device)
    return decode(output_sentence)

# Chatting with the chatbot
while True:
    user_input = input("You: ")
    if user_input.lower() in ["bye", "exit"]:
        print("Chatbot: Goodbye!")
        break
    response = chat(user_input)
    print(f"Chatbot: {response}")


Epoch 0, Loss: 5.9981
Epoch 10, Loss: 0.3876
Epoch 20, Loss: 0.0687
Epoch 30, Loss: 0.0335
Epoch 40, Loss: 0.0210
Epoch 50, Loss: 0.0144
Epoch 60, Loss: 0.0105
Epoch 70, Loss: 0.0082
Epoch 80, Loss: 0.0066
Epoch 90, Loss: 0.0055


You:  hello


Chatbot: goodbye


You:  hi


Chatbot: hello


You:  iwhat is your name


Chatbot: i am a chatbot


You:  bye


Chatbot: Goodbye!
