# NLP Business Chatbot Based on Tweets

**Making chatbot using tweets dataset**

In [1]:
!pip install pyspellchecker

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyspellchecker
  Downloading pyspellchecker-0.7.1-py3-none-any.whl (2.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m28.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyspellchecker
Successfully installed pyspellchecker-0.7.1


In [2]:
# Import Module

import numpy as np
import pandas as pd
import string
import re
import random
import nltk

import seaborn as sns

from spellchecker import SpellChecker
from nltk.corpus import stopwords
from nltk.corpus import wordnet
from nltk.stem.porter import PorterStemmer
from nltk.stem import WordNetLemmatizer

from nltk import word_tokenize
from nltk import pos_tag
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import pairwise_distances
from sklearn.metrics.pairwise import cosine_similarity



import warnings
warnings.filterwarnings('ignore')

In [3]:
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download("stopwords")
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [4]:
# Read dataset

df = pd.read_csv('/content/drive/MyDrive/twcs.csv')
df

Unnamed: 0,tweet_id,author_id,inbound,created_at,text,response_tweet_id,in_response_to_tweet_id
0,1,sprintcare,False,Tue Oct 31 22:10:47 +0000 2017,@115712 I understand. I would like to assist y...,2,3.0
1,2,115712,True,Tue Oct 31 22:11:45 +0000 2017,@sprintcare and how do you propose we do that,,1.0
2,3,115712,True,Tue Oct 31 22:08:27 +0000 2017,@sprintcare I have sent several private messag...,1,4.0
3,4,sprintcare,False,Tue Oct 31 21:54:49 +0000 2017,@115712 Please send us a Private Message so th...,3,5.0
4,5,115712,True,Tue Oct 31 21:49:35 +0000 2017,@sprintcare I did.,4,6.0
...,...,...,...,...,...,...,...
2811769,2987947,sprintcare,False,Wed Nov 22 08:43:51 +0000 2017,"@823869 Hey, we'd be happy to look into this f...",,2987948.0
2811770,2987948,823869,True,Wed Nov 22 08:35:16 +0000 2017,@115714 wtf!? I’ve been having really shitty s...,2987947,
2811771,2812240,121673,True,Thu Nov 23 04:13:07 +0000 2017,@143549 @sprintcare You have to go to https://...,,2812239.0
2811772,2987949,AldiUK,False,Wed Nov 22 08:31:24 +0000 2017,"@823870 Sounds delicious, Sarah! 😋 https://t.c...",,2987950.0


In [5]:
# Checking the shape and information

print(df.shape)
df.info()

(2811774, 7)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2811774 entries, 0 to 2811773
Data columns (total 7 columns):
 #   Column                   Dtype  
---  ------                   -----  
 0   tweet_id                 int64  
 1   author_id                object 
 2   inbound                  bool   
 3   created_at               object 
 4   text                     object 
 5   response_tweet_id        object 
 6   in_response_to_tweet_id  float64
dtypes: bool(1), float64(1), int64(1), object(4)
memory usage: 131.4+ MB


In [6]:
# Checking the Company List

df['author_id'].unique()

array(['sprintcare', '115712', '115713', ..., '823868', '823869',
       '823870'], dtype=object)

In [7]:
# For example we will use AskPlayStation
# Filter for answers only made by that company

answers = df.loc[df['author_id'] == 'AskPlayStation']
answers

Unnamed: 0,tweet_id,author_id,inbound,created_at,text,response_tweet_id,in_response_to_tweet_id
98,178,AskPlayStation,False,Tue Oct 31 22:14:49 +0000 2017,@115743 There is no info to share at the momen...,179,180.0
101,182,AskPlayStation,False,Tue Oct 31 22:09:50 +0000 2017,"@115745 Glad to know that is downloading, ple...",,183.0
103,184,AskPlayStation,False,Tue Oct 31 22:05:06 +0000 2017,@115745 Sorry for the inconvenience. Do you se...,183,185.0
105,187,AskPlayStation,False,Tue Oct 31 22:06:34 +0000 2017,@115746 Glad to help. Please share the detai...,,188.0
880,1450,AskPlayStation,False,Tue Oct 31 22:27:36 +0000 2017,@116001 Glad to help! We have sent you a DM lo...,,1451.0
...,...,...,...,...,...,...,...
2810647,2986855,AskPlayStation,False,Tue Oct 31 21:58:28 +0000 2017,@640122 Please run a network connection test. ...,,2986856.0
2810649,2986857,AskPlayStation,False,Tue Oct 31 21:46:59 +0000 2017,@640122 Let's take a look! Check out the next ...,2986856,2986858.0
2810651,2986859,AskPlayStation,False,Tue Oct 31 21:56:02 +0000 2017,@823562 Hello there. There isn't information a...,,2986860.0
2810653,2986861,AskPlayStation,False,Tue Oct 31 21:55:36 +0000 2017,@823563 Glad to help! Please follow us via Tw...,,2986862.0


In [8]:
answers = df.loc[df['author_id'] == 'AskPlayStation']
answers.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 19098 entries, 98 to 2810681
Data columns (total 7 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   tweet_id                 19098 non-null  int64  
 1   author_id                19098 non-null  object 
 2   inbound                  19098 non-null  bool   
 3   created_at               19098 non-null  object 
 4   text                     19098 non-null  object 
 5   response_tweet_id        8065 non-null   object 
 6   in_response_to_tweet_id  18694 non-null  float64
dtypes: bool(1), float64(1), int64(1), object(4)
memory usage: 1.0+ MB


Cleaning steps include :


*   Lower casing
*   Removal of Punctuations
* Removal of Stopwords
* Removal of Frequent words
* Removal of Rare words
* Removal of URLs
* Removal of HTML tags
* Removal of emojis
* Removal of emoticons
* Conversion of emoticons to words
* Conversion of emojis to words
* Chat words conversion
* Spelling correction
* Stemming
* Lemmatization



In [9]:
#  Remove any mentions to users e.g. "@johnsmith, @019190, etc...."

answers['text'] = answers.apply(lambda x: re.sub("\B@\w+", "", x['text']), axis=1)
answers.head(3)

Unnamed: 0,tweet_id,author_id,inbound,created_at,text,response_tweet_id,in_response_to_tweet_id
98,178,AskPlayStation,False,Tue Oct 31 22:14:49 +0000 2017,There is no info to share at the moment. Feel...,179.0,180.0
101,182,AskPlayStation,False,Tue Oct 31 22:09:50 +0000 2017,"Glad to know that is downloading, please fee...",,183.0
103,184,AskPlayStation,False,Tue Oct 31 22:05:06 +0000 2017,Sorry for the inconvenience. Do you see the d...,183.0,185.0


In [10]:
df_text = answers['text']
df_text

98          There is no info to share at the moment. Feel...
101          Glad to know that is downloading, please fee...
103         Sorry for the inconvenience. Do you see the d...
105          Glad to help.  Please share the details of t...
880         Glad to help! We have sent you a DM looking f...
                                 ...                        
2810647     Please run a network connection test. Go to S...
2810649     Let's take a look! Check out the next article...
2810651     Hello there. There isn't information about it...
2810653     Glad to help!  Please follow us via Twitter a...
2810681     Hello James! Please follow the steps in the l...
Name: text, Length: 19098, dtype: object

In [11]:
# Lower Casing

df_text = df_text.str.lower()
df_text

98          there is no info to share at the moment. feel...
101          glad to know that is downloading, please fee...
103         sorry for the inconvenience. do you see the d...
105          glad to help.  please share the details of t...
880         glad to help! we have sent you a dm looking f...
                                 ...                        
2810647     please run a network connection test. go to s...
2810649     let's take a look! check out the next article...
2810651     hello there. there isn't information about it...
2810653     glad to help!  please follow us via twitter a...
2810681     hello james! please follow the steps in the l...
Name: text, Length: 19098, dtype: object

In [12]:
# Remove Punctuation

df_text = df_text.apply(lambda x:x.translate(str.maketrans('', '', string.punctuation)))
df_text

98          there is no info to share at the moment feel ...
101          glad to know that is downloading please feel...
103         sorry for the inconvenience do you see the da...
105          glad to help  please share the details of th...
880         glad to help we have sent you a dm looking fo...
                                 ...                        
2810647     please run a network connection test go to se...
2810649     lets take a look check out the next article t...
2810651     hello there there isnt information about it c...
2810653     glad to help  please follow us via twitter an...
2810681     hello james please follow the steps in the li...
Name: text, Length: 19098, dtype: object

# Chatbot Test 1 : using raw answer array 


In [21]:
# Convert all our text to lower case, strip off any trailing full stops, remove any mentions to users 
answers['text'] = answers.apply(lambda row: row['text'].lower(), axis=1)
answers['text'] = answers.apply(lambda row: row['text'].rstrip('.'), axis=1)
answers['text'] = answers.apply(lambda row: re.sub("\B@\w+", "", row['text']), axis=1)

In [22]:
# Concatinate answers into raw variable
raw = ""
for index, row in answers.iterrows():
    raw += ". " + row['text']
raw

'.  there is no info to share at the moment. feel free to keep an eye on the ps blog for news and updates: https://t.co/altfbaztyc.   glad to know that is downloading, please feel free to contact us if you have further concerns.  sorry for the inconvenience. do you see the data corrupted message when you try to open the game or download it?.   glad to help.  please share the details of this issue in our community forum for further investigation: https://t.co/f3j4l2vjhb.  glad to help! we have sent you a dm looking forward help you?.  hi there, sorry to hear that, are you getting an error message or an error code, let us know more details please?.  glad to hear that, feel free to message us again here if you need help.  lamentamos el inconveniente. por favor siguenos en twitter para brindarte asistencia por mensajes directos.  very odd. please turn off the console and disconnect all the cables from it for 3 minutes, then try again.  glad to help! we have sent you a direct message via tw

In [23]:
# Download nltk assets
nltk.download('punkt')
nltk.download('wordnet')

# convert our raw sentences into sentence tokens
sentence_tokens = nltk.sent_tokenize(raw)
# convert our raw sentences into word tokens
word_tokens = nltk.word_tokenize(raw);

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [24]:
sentence_tokens[:2]

['.', 'there is no info to share at the moment.']

In [25]:
word_tokens[:2]

['.', 'there']

In [26]:
lemmer = nltk.stem.WordNetLemmatizer()

def LemTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]
remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)
def LemNormalize(text):
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))

In [27]:
normal_token = LemNormalize(raw)
normal_token[:5]

['there', 'is', 'no', 'info', 'to']

In [28]:
# Define function for processing a response

def response(user_response):
    #define our response variable
    robo_response=''
    #add our users input as a response
    sentence_tokens.append(user_response)
    #create out vectorizer
    vectorizer = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
    #process our tokens
    diff = vectorizer.fit_transform(sentence_tokens)
    #find the similarity
    vals = cosine_similarity(diff[-1], diff)
    #select our sentence
    idx = vals.argsort()[0][-2]
    #calculate accuracy
    flat = vals.flatten()
    flat.sort()
    req_diff = flat[-2]
    if(req_diff==0):
        #if no appropriate response can be made
        robo_response=robo_response+"Sorry! I don't think I can help you with that."
        return robo_response
    else:
        #if an appropriate response is found
        robo_response = sentence_tokens[idx]        
        return robo_response

In [29]:
# Running a Chatbot 
# Define flag to exit the loop
flag=True
company = "AskPlayStation"

# print welcome message for our chosen company
print("{companyname} Support: \nWelcome to {companyname} Support.".format(companyname = company))
print("I will answer your queries about {companyname}.\nIf you wish to end the chat, type bye!".format(companyname = company))
while(flag==True):
    #get an input
    print('User:')
    user_response = input()
    #convert to lower
    user_response=user_response.lower()
    #if they type something other than 'bye'
    if(user_response!='bye'):
        #if they show appriciation
        if(user_response=='thanks' or user_response=='thank you' ):
            #exit the loop
            flag=False
            #thank you message
            print("{companyname} Support: You are welcome.".format(companyname = company))
        else:
            #show bot is typing
            print("{companyname} Support: \n".format(companyname = company), end="")
            #print our AI response
            print(response(user_response))
            sentence_tokens.remove(user_response)
    else:
        #exit the loop
        flag=False
        #exit message
        print("{companyname} Support: \nThanks for chatting! \nI hope we could assist you today.".format(companyname = company))

AskPlayStation Support: 
Welcome to AskPlayStation Support.
I will answer your queries about AskPlayStation.
If you wish to end the chat, type bye!
User:
bye
AskPlayStation Support: 
Thanks for chatting! 
I hope we could assist you today.


# Chatbot Test 2 : Using question and answer in dataframe 


In [82]:
# Checking the null
answers.columns[answers.isnull().any()]

Index(['response_tweet_id'], dtype='object')

In [89]:
# Making the tweet ID list and replace null
questionTweetId = []
answers["in_response_to_tweet_id"] = answers["in_response_to_tweet_id"].fillna(0.0).astype(int)

questionTweetId = answers["in_response_to_tweet_id"].to_list()
len(questionTweetId)

19098

In [90]:
answers.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 19098 entries, 98 to 2810681
Data columns (total 7 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   tweet_id                 19098 non-null  int64 
 1   author_id                19098 non-null  object
 2   inbound                  19098 non-null  bool  
 3   created_at               19098 non-null  object
 4   text                     19098 non-null  object
 5   response_tweet_id        8065 non-null   object
 6   in_response_to_tweet_id  19098 non-null  int64 
dtypes: bool(1), int64(2), object(4)
memory usage: 1.0+ MB


In [91]:
# Creating the dataframe with question-answer

QandA = []
i = len(questionTweetId)
for j in range(i):
    tweetId = questionTweetId[j]
    if tweetId != 0:
        index = df.index
        condition = df["tweet_id"] == tweetId
        indices = index[condition]
        indices_list = indices.tolist()
        condition1 = df["in_response_to_tweet_id"] == tweetId
        indices1 = index[condition1]
        indices_list1 = indices1.tolist()
        if indices_list:
            Q = df.at[indices_list[0], "text"]   
        if indices_list1:
            A = df.at[indices_list1[0], "text"]
        QandA.append([Q, A])

data_clean = pd.DataFrame(QandA, columns =['Question', 'Answer'])
print(data_clean.shape)

(18694, 2)


In [102]:
# Define new dataframe containing question-answer

data_clean = pd.DataFrame(QandA, columns =['Question', 'Answer'])
print(data_clean.shape)
data_clean

(18694, 2)


Unnamed: 0,Question,Answer
0,"@AskPlayStation So, what's the november ps plu...",@115743 There is no info to share at the momen...
1,@AskPlayStation It was when I would go to down...,"@115745 Glad to know that is downloading, ple..."
2,@AskPlayStation I bought Lego Star Wars in the...,@115745 Sorry for the inconvenience. Do you se...
3,@AskPlayStation can i block a community from s...,@115746 Glad to help. Please share the detai...
4,@AskPlayStation can you dm me I have a question,@116001 Glad to help! We have sent you a DM lo...
...,...,...
18689,@AskPlayStation I have already read and went o...,@640122 Please run a network connection test. ...
18690,@AskPlayStation I can not fully connect to my ...,@640122 Let's take a look! Check out the next ...
18691,@AskPlayStation i want to buy a ps4 pro . will...,@823562 Hello there. There isn't information a...
18692,"@AskPlayStation any idea when ""Steven Universe...",@823563 Glad to help! Please follow us via Tw...


In [103]:
# Use sampling to reduce the training time
# Caution: Sampling will reduce the question (training data) robustness 
# Sampling [Options]

data_clean = data_clean.sample(n=100)
data_clean

Unnamed: 0,Question,Answer
5265,@AskPlayStation Yeah everytime I turn on my ps...,@290756 Please power cycle your network device...
2955,@AskPlayStation PlayStation Vue directly is th...,@203520 Thank you for that info! Please follow...
8182,"@AskPlayStation tried it, got no help on the i...",@390244 Please follow us on Twitter and let us...
11759,@AskPlayStation Our cars are not visible in gr...,@504147 That's odd. Please power cycle your ne...
1430,@AskPlayStation \n\nI'm having a problem with ...,@174093 Hello there. The process needs to be d...
...,...,...
18372,@AskPlayStation I can't appear offline even wh...,@805897 We have DM you.
1667,@AskPlayStation Account name: manoleedles (pig...,@178715 No worries! Please follow us via Twitt...
7425,@AskPlayStation I can’t login in to my account...,@363237 Hi there. So sorry you're seeing issue...
9845,@AskPlayStation I'm following you guys.,@326505 Thanks for the follow. Please check yo...


In [104]:
# Convert to string, remove @user and hastag #

data_clean["Original_Question"] = data_clean["Question"].astype(str)
data_clean["Original_Answer"] = data_clean["Answer"].astype(str)

data_clean["Answer"] = data_clean["Answer"].astype(str)
data_clean["Answer"] = data_clean.apply(
    lambda row: re.sub("\B#\w+", "", row['Answer']), axis=1)
data_clean["Answer"] = data_clean.apply(
    lambda row: re.sub("\B@\w+", "", row['Answer']), axis=1)
data_clean["Question"] = data_clean["Question"].astype(str)
data_clean["Question"] = data_clean.apply(
    lambda row: re.sub("\B#\w+", "", row['Question']), axis=1)
data_clean["Question"] = data_clean.apply(
    lambda row: re.sub("\B@\w+", "", row['Question']), axis=1)
data_clean

Unnamed: 0,Question,Answer,Original_Question,Original_Answer
5265,Yeah everytime I turn on my ps4 it says plays...,Please power cycle your network devices and t...,@AskPlayStation Yeah everytime I turn on my ps...,@290756 Please power cycle your network device...
2955,PlayStation Vue directly is the only one not ...,Thank you for that info! Please follow us on ...,@AskPlayStation PlayStation Vue directly is th...,@203520 Thank you for that info! Please follow...
8182,"tried it, got no help on the issue",Please follow us on Twitter and let us know o...,"@AskPlayStation tried it, got no help on the i...",@390244 Please follow us on Twitter and let us...
11759,Our cars are not visible in granturismo in du...,That's odd. Please power cycle your network d...,@AskPlayStation Our cars are not visible in gr...,@504147 That's odd. Please power cycle your ne...
1430,\n\nI'm having a problem with the family mana...,Hello there. The process needs to be done fro...,@AskPlayStation \n\nI'm having a problem with ...,@174093 Hello there. The process needs to be d...
...,...,...,...,...
18372,I can't appear offline even when I try. New 1...,We have DM you.,@AskPlayStation I can't appear offline even wh...,@805897 We have DM you.
1667,Account name: manoleedles (pig icon w/ PS+),No worries! Please follow us via Twitter and ...,@AskPlayStation Account name: manoleedles (pig...,@178715 No worries! Please follow us via Twitt...
7425,I can’t login in to my account I know Passwor...,Hi there. So sorry you're seeing issues. Plea...,@AskPlayStation I can’t login in to my account...,@363237 Hi there. So sorry you're seeing issue...
9845,I'm following you guys.,Thanks for the follow. Please check your DM's...,@AskPlayStation I'm following you guys.,@326505 Thanks for the follow. Please check yo...


In [95]:
# Processing question
# lower-casing

data_clean["Question"] = data_clean["Question"].str.lower()
data_clean

Unnamed: 0,Question,Answer,Original_Question,Original_Answer
14361,dint recieved fifa 18 ronaldo edition fut ben...,"Please explain to us more about the issue, an...",@AskPlayStation dint recieved fifa 18 ronaldo ...,@671498 Please explain to us more about the is...
18667,\nit says dontdisconnect from ac power. \niv...,Hi there! Please press and hold the power but...,@AskPlayStation @822638 \nIt says dontdisconne...,@487086 Hi there! Please press and hold the po...
1931,"hello again, i need help in getting my preord...",Sure we can help! Please try Restore Licenses...,"@AskPlayStation Hello again, I need help in ge...",@185555 Sure we can help! Please try Restore L...
11300,2 games courupted in the last mouth anything ...,Hello Mark. Let's look into that. Do you have...,@AskPlayStation 2 games courupted in the last ...,@487740 Hello Mark. Let's look into that. Do y...
570,my account was banned on the 15th of november...,That's not good. Please check your DM's for m...,@AskPlayStation my account was banned on the 1...,@142222 That's not good. Please check your DM'...
...,...,...,...,...
5220,why does fortnite stop downloading halfway th...,That's odd! Do you have double the amount of ...,@AskPlayStation WHY does fortnite stop downloa...,@290232 That's odd! Do you have double the amo...
2135,"i cant chat the support team, i wrote everyth...",Hello Eddi. Let's look into that. Please make...,"@AskPlayStation i Cant chat the support team, ...",@189879 Hello Eddi. Let's look into that. Plea...
10515,"""connect a usb storage device that contains a...",Hi! Try formatting a USB to exFAT and reinsta...,"@AskPlayStation ""connect a usb storage device ...",@462456 Hi! Try formatting a USB to exFAT and ...
2405,"infact i did and , i have tried many things b...",did you just hijack my post lmao,"@AskPlayStation Infact i did and , i have trie...",@195868 @AskPlayStation did you just hijack my...


In [96]:
# Checking Stopwords
from nltk.corpus import stopwords
", ".join(stopwords.words('english'))

"i, me, my, myself, we, our, ours, ourselves, you, you're, you've, you'll, you'd, your, yours, yourself, yourselves, he, him, his, himself, she, she's, her, hers, herself, it, it's, its, itself, they, them, their, theirs, themselves, what, which, who, whom, this, that, that'll, these, those, am, is, are, was, were, be, been, being, have, has, had, having, do, does, did, doing, a, an, the, and, but, if, or, because, as, until, while, of, at, by, for, with, about, against, between, into, through, during, before, after, above, below, to, from, up, down, in, out, on, off, over, under, again, further, then, once, here, there, when, where, why, how, all, any, both, each, few, more, most, other, some, such, no, nor, not, only, own, same, so, than, too, very, s, t, can, will, just, don, don't, should, should've, now, d, ll, m, o, re, ve, y, ain, aren, aren't, couldn, couldn't, didn, didn't, doesn, doesn't, hadn, hadn't, hasn, hasn't, haven, haven't, isn, isn't, ma, mightn, mightn't, mustn, mus

In [97]:
# Remove stop words

sw = set(stopwords.words('english'))
def remove_stopwords(text):
    """custom function to remove the stopwords"""
    return " ".join([word for word in str(text).split() if word not in sw])

data_clean["Question"] = data_clean["Question"].apply(lambda text: remove_stopwords(text))
data_clean

Unnamed: 0,Question,Answer,Original_Question,Original_Answer
14361,dint recieved fifa 18 ronaldo edition fut bene...,"Please explain to us more about the issue, an...",@AskPlayStation dint recieved fifa 18 ronaldo ...,@671498 Please explain to us more about the is...
18667,says dontdisconnect ac power. iv got pro prepa...,Hi there! Please press and hold the power but...,@AskPlayStation @822638 \nIt says dontdisconne...,@487086 Hi there! Please press and hold the po...
1931,"hello again, need help getting preordered game...",Sure we can help! Please try Restore Licenses...,"@AskPlayStation Hello again, I need help in ge...",@185555 Sure we can help! Please try Restore L...
11300,2 games courupted last mouth anything i'm wrong,Hello Mark. Let's look into that. Do you have...,@AskPlayStation 2 games courupted in the last ...,@487740 Hello Mark. Let's look into that. Do y...
570,account banned 15th november email said unbann...,That's not good. Please check your DM's for m...,@AskPlayStation my account was banned on the 1...,@142222 That's not good. Please check your DM'...
...,...,...,...,...
5220,fortnite stop downloading halfway through??? h...,That's odd! Do you have double the amount of ...,@AskPlayStation WHY does fortnite stop downloa...,@290232 That's odd! Do you have double the amo...
2135,"cant chat support team, wrote everything right...",Hello Eddi. Let's look into that. Please make...,"@AskPlayStation i Cant chat the support team, ...",@189879 Hello Eddi. Let's look into that. Plea...
10515,"""connect usb storage device contains update fi...",Hi! Try formatting a USB to exFAT and reinsta...,"@AskPlayStation ""connect a usb storage device ...",@462456 Hi! Try formatting a USB to exFAT and ...
2405,"infact , tried many things nothing working ive...",did you just hijack my post lmao,"@AskPlayStation Infact i did and , i have trie...",@195868 @AskPlayStation did you just hijack my...


In [98]:
# Stemming

stemmer = PorterStemmer()
def stem_words(text):
    return " ".join([stemmer.stem(word) for word in text.split()])

data_clean["Question"] = data_clean["Question"].apply(lambda text: stem_words(text))
data_clean

Unnamed: 0,Question,Answer,Original_Question,Original_Answer
14361,dint reciev fifa 18 ronaldo edit fut beneifits...,"Please explain to us more about the issue, an...",@AskPlayStation dint recieved fifa 18 ronaldo ...,@671498 Please explain to us more about the is...
18667,say dontdisconnect ac power. iv got pro prepar...,Hi there! Please press and hold the power but...,@AskPlayStation @822638 \nIt says dontdisconne...,@487086 Hi there! Please press and hold the po...
1931,"hello again, need help get preorder game psn, ...",Sure we can help! Please try Restore Licenses...,"@AskPlayStation Hello again, I need help in ge...",@185555 Sure we can help! Please try Restore L...
11300,2 game courupt last mouth anyth i'm wrong,Hello Mark. Let's look into that. Do you have...,@AskPlayStation 2 games courupted in the last ...,@487740 Hello Mark. Let's look into that. Do y...
570,account ban 15th novemb email said unban 22nd,That's not good. Please check your DM's for m...,@AskPlayStation my account was banned on the 1...,@142222 That's not good. Please check your DM'...
...,...,...,...,...
5220,fortnit stop download halfway through??? help plz,That's odd! Do you have double the amount of ...,@AskPlayStation WHY does fortnite stop downloa...,@290232 That's odd! Do you have double the amo...
2135,"cant chat support team, wrote everyth right......",Hello Eddi. Let's look into that. Please make...,"@AskPlayStation i Cant chat the support team, ...",@189879 Hello Eddi. Let's look into that. Plea...
10515,"""connect usb storag devic contain updat file r...",Hi! Try formatting a USB to exFAT and reinsta...,"@AskPlayStation ""connect a usb storage device ...",@462456 Hi! Try formatting a USB to exFAT and ...
2405,"infact , tri mani thing noth work ive tri cont...",did you just hijack my post lmao,"@AskPlayStation Infact i did and , i have trie...",@195868 @AskPlayStation did you just hijack my...


In [99]:
# Lemmatize

lemmatizer = WordNetLemmatizer()
def lemmatize_words(text):
    return " ".join([lemmatizer.lemmatize(word) for word in text.split()])

data_clean["Question"] = data_clean["Question"].apply(lambda text: lemmatize_words(text))
data_clean

Unnamed: 0,Question,Answer,Original_Question,Original_Answer
14361,dint reciev fifa 18 ronaldo edit fut beneifits...,"Please explain to us more about the issue, an...",@AskPlayStation dint recieved fifa 18 ronaldo ...,@671498 Please explain to us more about the is...
18667,say dontdisconnect ac power. iv got pro prepar...,Hi there! Please press and hold the power but...,@AskPlayStation @822638 \nIt says dontdisconne...,@487086 Hi there! Please press and hold the po...
1931,"hello again, need help get preorder game psn, ...",Sure we can help! Please try Restore Licenses...,"@AskPlayStation Hello again, I need help in ge...",@185555 Sure we can help! Please try Restore L...
11300,2 game courupt last mouth anyth i'm wrong,Hello Mark. Let's look into that. Do you have...,@AskPlayStation 2 games courupted in the last ...,@487740 Hello Mark. Let's look into that. Do y...
570,account ban 15th novemb email said unban 22nd,That's not good. Please check your DM's for m...,@AskPlayStation my account was banned on the 1...,@142222 That's not good. Please check your DM'...
...,...,...,...,...
5220,fortnit stop download halfway through??? help plz,That's odd! Do you have double the amount of ...,@AskPlayStation WHY does fortnite stop downloa...,@290232 That's odd! Do you have double the amo...
2135,"cant chat support team, wrote everyth right......",Hello Eddi. Let's look into that. Please make...,"@AskPlayStation i Cant chat the support team, ...",@189879 Hello Eddi. Let's look into that. Plea...
10515,"""connect usb storag devic contain updat file r...",Hi! Try formatting a USB to exFAT and reinsta...,"@AskPlayStation ""connect a usb storage device ...",@462456 Hi! Try formatting a USB to exFAT and ...
2405,"infact , tri mani thing noth work ive tri cont...",did you just hijack my post lmao,"@AskPlayStation Infact i did and , i have trie...",@195868 @AskPlayStation did you just hijack my...


In [None]:
"""
# Spellchecker

spell = SpellChecker()
def correct_spellings(text):
    corrected_text = []
    misspelled_words = spell.unknown(text.split())
    for word in text.split():
        if word in misspelled_words:
            corrected_text.append(spell.correction(word))
        else:
            corrected_text.append(word)
    return " ".join(corrected_text)

data_clean["Question"] = data_clean["Question"].apply(lambda text: correct_spellings(text))
data_clean
"""

In [100]:
# Defining UserAsk Function to vectorize the question and find the similarity with the answer

def userAsk(text):
    tfidf = TfidfVectorizer()
    x_tfidf = tfidf.fit_transform(data_clean["Question"]).toarray()
    df_tfidf = pd.DataFrame(x_tfidf,columns=tfidf.get_feature_names())
    Question_tfidf = tfidf.transform([text]).toarray()

    # Find cosine similarity
    cos=1-pairwise_distances(df_tfidf,Question_tfidf,metric='cosine')
    index_value = cos.argmax()
    
    # Resets the index of data_clean as otherwise it won't work with index-value
    data_clean.reset_index(inplace = True, drop = True)

    return data_clean["Answer"].loc[index_value]

In [101]:

# Define the chatbot loop

flag=True
company = "AskPlayStation"
#print welcome message for our chosen company
print("{companyname} Support: Welcome to {companyname} Support. I will answer your queries about {companyname}. If you wish to end the chat, type bye!".format(companyname = company))
while(flag==True):
    #get an input
    user_response = input()
    #convert to lower
    user_response = user_response.lower()
    #if they type something other than 'bye'
    if(user_response!='bye'):
        #if they show appriciation
        if(user_response=='thanks' or user_response=='thank you' ):
            #exit the loop
            flag=False
            #thank you message
            print("{companyname} Support: You are welcome.".format(companyname = company))
        else:
            #show bot is typing
            print("{companyname} Support: ".format(companyname = company), end="")
            #print our AI response
            UserAnswer = userAsk(user_response)
            print(UserAnswer)
    else:
        #exit the loop
        flag=False
        #exit message
        print("{companyname} Support: Thanks for chatting. I hope we could assist you today.".format(companyname = company))

AskPlayStation Support: Welcome to AskPlayStation Support. I will answer your queries about AskPlayStation. If you wish to end the chat, type bye!
hello
AskPlayStation Support:  may i get assistance now?
game crash
AskPlayStation Support:  Hello Wassim. Please make sure you are following us, so we can assist you better via a Direct Message.
game corrupt
AskPlayStation Support:  Hello Wassim. Please make sure you are following us, so we can assist you better via a Direct Message.
stop download
AskPlayStation Support:  That's odd! Do you have double the amount of space required for your download?
yes
AskPlayStation Support:  Please try to redeem it through the console.
switch account
AskPlayStation Support:  Glad to answer. Please click here to find more information: https://t.co/gw7qKPP7YA
cant chat support team
AskPlayStation Support:  Hello Eddi. Let's look into that. Please make sure you are following us, so we can assist you better via a Direct Message.
bye
AskPlayStation Support: T

## Custom function


In [109]:
# Defining Preproc_question function 

def Preproc_question(text):
    sw = set(stopwords.words('english'))
    stemmer = PorterStemmer()
    lemmatizer = WordNetLemmatizer()
    spell = SpellChecker()
    corrected_text = []

    text_quest = " ".join([word for word in str(text).split() if word not in sw])
    text_quest = " ".join([stemmer.stem(word) for word in text_quest.split()])
    text_quest = " ".join([lemmatizer.lemmatize(word) for word in text_quest.split()])

    misspelled_words = spell.unknown(text_quest.split())
    for word in text.split():
        if word in misspelled_words:
            corrected_text.append(spell.correction(word))
        else:
            corrected_text.append(word)
    text_quest = " ".join(filter(None,(corrected_text)))
    return text_quest

In [110]:
# Preproc dataset for Question Collumn

data_clean["Question"] = data_clean["Question"].apply(lambda text: Preproc_question(text))
data_clean

Unnamed: 0,Question,Answer,Original_Question,Original_Answer
5265,yeah everyth i turn psi say current maintain,Please power cycle your network devices and t...,@AskPlayStation Yeah everytime I turn on my ps...,@290756 Please power cycle your network device...
2955,due directly one working i tri uninstal reinstall,Thank you for that info! Please follow us on ...,@AskPlayStation PlayStation Vue directly is th...,@203520 Thank you for that info! Please follow...
8182,tri it got help issue,Please follow us on Twitter and let us know o...,"@AskPlayStation tried it, got no help on the i...",@390244 Please follow us on Twitter and let us...
11759,our car visible duel model is bug chang settings,That's odd. Please power cycle your network d...,@AskPlayStation Our cars are not visible in gr...,@504147 That's odd. Please power cycle your ne...
1430,i'm problem family management i need playstation,Hello there. The process needs to be done fro...,@AskPlayStation \n\nI'm having a problem with ...,@174093 Hello there. The process needs to be d...
...,...,...,...,...
18372,i can't appear offline even i try new to psi f...,We have DM you.,@AskPlayStation I can't appear offline even wh...,@805897 We have DM you.
1667,account name pig icon we psi,No worries! Please follow us via Twitter and ...,@AskPlayStation Account name: manoleedles (pig...,@178715 No worries! Please follow us via Twitt...
7425,i can't login account i know password say u ca...,Hi there. So sorry you're seeing issues. Plea...,@AskPlayStation I can’t login in to my account...,@363237 Hi there. So sorry you're seeing issue...
9845,i'm follow guys,Thanks for the follow. Please check your DM's...,@AskPlayStation I'm following you guys.,@326505 Thanks for the follow. Please check yo...


In [111]:
# Defining QuestionUser Function to vectorize the question and find the similarity with the answer

def QuestionUser(text):
    tfidf = TfidfVectorizer(lowercase=True)
    sw = set(stopwords.words('english'))
    stemmer = PorterStemmer()
    lemmatizer = WordNetLemmatizer()
    spell = SpellChecker()
    corrected_text = []

    text_quest = " ".join([word for word in str(text).split() if word not in sw])
    text_quest = " ".join([stemmer.stem(word) for word in text_quest.split()])
    text_quest = " ".join([lemmatizer.lemmatize(word) for word in text_quest.split()])

    misspelled_words = spell.unknown(text_quest.split())
    for word in text.split():
        if word in misspelled_words:
            corrected_text.append(spell.correction(word))
        else:
            corrected_text.append(word)
    text_quest = " ".join(filter(None,(corrected_text)))

    quest_vec = tfidf.fit_transform(data_clean["Question"]).toarray()
    df_quest_vec = pd.DataFrame(quest_vec,columns=tfidf.get_feature_names())
    text_vec = tfidf.transform([text_quest]).toarray()

    text_vec = text_vec[0].reshape(1,-1)
    similarity = cosine_similarity(text_vec, df_quest_vec)

    data_clean.reset_index(inplace = True, drop = True)
    answer_index = np.argmax(similarity[0])
    answer = data_clean["Answer"].loc[answer_index]
    return answer

In [112]:
# Exit words
exit_list = ['exit','bye','later','no','quit','break','stop','see you','see you later','thanks','thank you']

# Define the chatbot loop
flag=True
company = "AskPlayStation"
#print welcome message for our chosen company
print("{companyname} Support: Welcome to {companyname} Support. I will answer your queries about {companyname}. If you wish to end the chat, type bye!".format(companyname = company))
while(flag==True):
    #get an input
    user_response = input().lower()

    #loop chatbot
    if(user_response in exit_list):
        #exit the loop
        flag=False
        print("{companyname} Support: Thanks for chatting. I hope we could assist you today.".format(companyname = company))
    else:
        #show bot is typing
        print("{companyname} Support: ".format(companyname = company), end="")
        #print our AI response
        UserAnswer = QuestionUser(user_response)
        print(UserAnswer)

AskPlayStation Support: Welcome to AskPlayStation Support. I will answer your queries about AskPlayStation. If you wish to end the chat, type bye!
game crashed
AskPlayStation Support:  Sorry to hear that! Let's check out the next article to troubleshoot Wired or Wireless Network Connection: https://t.co/Vp85fRXkHX
data loss
AskPlayStation Support:  Please power cycle your network devices and try again: https://t.co/UUMNDRIFMj
ata corrupt
AskPlayStation Support:  Please power cycle your network devices and try again: https://t.co/UUMNDRIFMj
data corrupt
AskPlayStation Support:  Please power cycle your network devices and try again: https://t.co/UUMNDRIFMj
download fail
AskPlayStation Support:  Hello Greg. You must acquire the PS4 version in order to download it to your PS System: https://t.co/bHWYUrMquj
bye
AskPlayStation Support: Thanks for chatting. I hope we could assist you today.


In [None]:
d