In [1]:
import pandas as pd
import torch
from transformers import BertTokenizer, BertModel
from sklearn.metrics.pairwise import cosine_similarity


In [2]:
# Load the CSV file into a DataFrame
df = pd.read_csv('faqs.csv')

# Extract questions and answers
questions = df['Question'].tolist()
answers = df['Answer'].tolist()


In [5]:
# Initialize BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')
model.eval()  # Set the model to evaluation mode


BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False

In [9]:
# Step 4: Tokenize Questions
encodings = tokenizer(questions, padding=True, truncation=True, return_tensors="pt")
with torch.no_grad():
    question_embeddings = model(encodings['input_ids'], attention_mask=encodings['attention_mask'])[0][:, 0, :].numpy()


In [11]:
def get_response(user_input):
    # Tokenize user input
    user_input_encoded = tokenizer(user_input, return_tensors='pt')
    
    with torch.no_grad():
        user_input_embedding = model(user_input_encoded['input_ids'])[0][:, 0, :].numpy()
    
    # Calculate similarities
    similarities = []
    for question_embedding in question_embeddings:
        similarity = cosine_similarity(user_input_embedding, question_embedding.reshape(1, -1))[0][0]
        similarities.append(similarity)

    # Select the best match
    best_match_index = similarities.index(max(similarities))
    
    # Return corresponding answer
    if max(similarities) < 0.5:  # Adjust the threshold as necessary
        return "I'm sorry, I didn't understand that."
    
    return answers[best_match_index]


In [17]:
print("Welcome to the LMS Help Chatbot! How can I assist you today?")
while True:
    user_input = input("You: ")
    if user_input.lower() in ['exit', 'quit', 'bye']:
        print("Bot: Goodbye!")
        break
    response = get_response(user_input)
    print("Bot:", response)


Welcome to the LMS Help Chatbot! How can I assist you today?


You:  how reset password


Bot: To reset your password, visit the 'Forgot Password' link on the login page and follow the provided instructions to create a new password.


You:  bye


Bot: Goodbye!
