In [1]:
import pandas as pd
import numpy as np
from transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

In [2]:
#Read the Q&A dataset
QA = pd.read_csv('QA.csv')
QA

Unnamed: 0,Q,A
0,How can I unlock my iPhone if I forgot my pass...,You can unlock your iPhone by following the st...
1,What should I do if my iPhone is not charging?,"If your iPhone is not charging, try using a di..."
2,Can I get help with using a specific app from ...,"Yes, Apple customer support can offer assistan..."
3,How can I download the latest software updates...,You can download the latest software updates b...
4,Can I get a refund for an app or subscription ...,"Yes, you may be eligible for a refund for cert..."
...,...,...
573,"Okay, I'll do that now.","Once your phone is updated, download a reputab..."
574,"Hi, I accidentally deleted some important phot...","Yes, there is! Do you have a backup of your ph..."
575,"Yes, I do.","Great, you can restore your phone from the bac..."
576,"Hi, my iPhone won't turn on. What should I do?",I'm sorry to hear that! Let's try force restar...


In [3]:
#Use BERT tokenzier to tokenize the questions
QA['Q_tokenized'] = QA['Q'].apply(lambda x: " ".join(tokenizer.tokenize(x)))
QA

Unnamed: 0,Q,A,Q_tokenized
0,How can I unlock my iPhone if I forgot my pass...,You can unlock your iPhone by following the st...,How can I unlock my iPhone if I forgot my pass...
1,What should I do if my iPhone is not charging?,"If your iPhone is not charging, try using a di...",What should I do if my iPhone is not charging ?
2,Can I get help with using a specific app from ...,"Yes, Apple customer support can offer assistan...",Can I get help with using a specific app from ...
3,How can I download the latest software updates...,You can download the latest software updates b...,How can I download the latest software updates...
4,Can I get a refund for an app or subscription ...,"Yes, you may be eligible for a refund for cert...",Can I get a re ##fu ##nd for an app or subscri...
...,...,...,...
573,"Okay, I'll do that now.","Once your phone is updated, download a reputab...","Okay , I ' ll do that now ."
574,"Hi, I accidentally deleted some important phot...","Yes, there is! Do you have a backup of your ph...","Hi , I accidentally deleted some important pho..."
575,"Yes, I do.","Great, you can restore your phone from the bac...","Yes , I do ."
576,"Hi, my iPhone won't turn on. What should I do?",I'm sorry to hear that! Let's try force restar...,"Hi , my iPhone won ' t turn on . What should I..."


In [4]:
#Use TF-IDF to represent tokenized sentecnes
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer()
QuestionDB = vectorizer.fit_transform(QA['Q_tokenized']).toarray()

In [5]:
QuestionDB

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [7]:
q = "How can I unlock my iPhone if I forgot my passcode?"
q = " ".join(tokenizer.tokenize(q))
q = vectorizer.transform([q]).toarray()

In [8]:
from sklearn.metrics.pairwise import cosine_similarity
similarity = float('-inf')
idx = -999
for i in range(len(QuestionDB)):
    if cosine_similarity(q, QuestionDB[i].reshape(1,-1)).item() > similarity:
        similarity = cosine_similarity(q, QuestionDB[i].reshape(1,-1)).item()
        idx = i
print(QA['A'][idx])

You can unlock your iPhone by following the steps outlined on the Apple support website, which involve using iTunes or iCloud to reset your device.


In [8]:
# Tokenize user input, calculate cosine similarity between user input and tokenized questions
# Select the answer to the question which has the largest cosine similarity with user input
from sklearn.metrics.pairwise import cosine_similarity
print("Chatbot: How can I help you?")
q = input("User:")
while q != "exit":
    q = " ".join(tokenizer.tokenize(q))
    q = vectorizer.transform([q]).toarray()
    similarity = float('-inf')
    idx = -999
    for i in range(len(QuestionDB)):
        if cosine_similarity(q, QuestionDB[i].reshape(1,-1)).item() > similarity:
            similarity = cosine_similarity(q, QuestionDB[i].reshape(1,-1)).item()
            idx = i
    print("Chatbot:",QA['A'][idx])
    print("")
    q = input("User:")

Chatbot: How can I help you?
User:How can I troubleshoot issues with my Mac computer not recognizing external hard drives or storage devices?
Chatbot: You can troubleshoot issues with your Mac computer not recognizing external hard drives or storage devices by following the steps outlined on the Apple support website, such as checking the settings and connections or using Disk Utility.

User:How can I troubleshoot issues with my Mac computer not recognizing external hard drives or storage devices?
Chatbot: You can troubleshoot issues with your Mac computer not recognizing external hard drives or storage devices by following the steps outlined on the Apple support website, such as checking the settings and connections or using Disk Utility.

User:How can I unlock my iPhone if I forgot my passcode?
Chatbot: You can unlock your iPhone by following the steps outlined on the Apple support website, which involve using iTunes or iCloud to reset your device.

User:How can I troubleshoot issues