In [1]:
# -*- coding: utf-8 -*-
import chatterbot.corpus
from chatterbot import comparisons
from chatterbot import response_selection
from chatterbot import ChatBot
from chatterbot.comparisons import LevenshteinDistance
from chatterbot.response_selection import get_first_response
from chatterbot.trainers import ChatterBotCorpusTrainer
from chatterbot.trainers import ListTrainer
import pandas as pd
import numpy as np
import random
import string
import sklearn
from sklearn.feature_extraction import text
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import glob
import os
import nltk
import logging
logging.basicConfig(level=logging.INFO)

# first-time use only
# nltk.download('punkt') 

# first-time use only
# nltk.download('wordnet')

In [2]:
def faq_chatbot_initialize(chatbot_name, threshold=0.9, excel_path='data/COVID_FAQ.xlsx', worksheet_name='FAQ'):
    covid_faq_chatbot = ChatBot(
        chatbot_name,
        logic_adapters=[
            {
                "import_path": "chatterbot.logic.BestMatch",
                "statement_comparison_function": LevenshteinDistance,
                "response_selection_method": get_first_response,
                "maximum_similarity_threshold": threshold
            }
        ],
        preprocessors=[
            'chatterbot.preprocessors.clean_whitespace'
        ],
        read_only=True,
    )
    trainer = ListTrainer(covid_faq_chatbot)
    #trainer.train("chatterbot.corpus.english")
    # read questions and answers
    data = pd.read_excel(excel_path, sheet_name=worksheet_name, engine='openpyxl')
    question = data.get('Queston')
    answer = data.get('Long_Answer')

    #for i in range(0, 3):
    #    print('[Q]', question[i], '\n[A]', answer[i], '\n\n')

    # Iteratively adding the question and answer
    train_list = []
    for i in range(len(question)):
        train_list.append(question[i])
        train_list.append(answer[i])
    # train the faq
    trainer.train(train_list)
    trainer.export_for_training('data/covid.yml')
    return covid_faq_chatbot




In [3]:
class NLP_Chatbot:
    
    def __init__(self, name, file_path):
        self.name = name
        self.file_path = file_path
        self.sents = self.generate_sents()
        self.TfidfVec, self.tfidf = self.generate_tfidf()

    def generate_sents(self):
        raw = []
        for filename in glob.glob(os.path.join(self.file_path, '*.txt')):
            with open(os.path.join(os.getcwd(), filename), 'r') as f: # open in readonly mode
                # do your stuff
                lines = f.readlines()
            raw.extend(lines)
        sents = [ nltk.sent_tokenize(r) for r in raw ] # converts each paragraph to a list of sentences 
        sents = [ s for sent in sents for s in sent  ] # flatten the list
        return sents
    
    def generate_tfidf(self):

        # prepare for lemmatization
        WNL = nltk.stem.WordNetLemmatizer()
        #for handling some known bugs while not using POS tag info.
        exceptions = ['has', 'was', 'as', 'us', 'less']
        def MyNormalize(text):
            tokens=nltk.word_tokenize(text.lower())
            tokens=[ t for t in tokens if t not in string.punctuation ]
            toks = [WNL.lemmatize(t) if t not in exceptions else t for t in tokens  ]
            return toks
        # Prepare a preprocessing function that will do tokenization,
        # case lowering, punctuation removal, and lemmatization
        my_stop_words = text.ENGLISH_STOP_WORDS

        # preprocess the sentences in data, remove stop words, and create a tf-idf vector
        TfidfVec = TfidfVectorizer(tokenizer=MyNormalize, stop_words=my_stop_words)
        tfidf = TfidfVec.fit_transform(self.sents)
        return TfidfVec, tfidf
    
    # function to match input to the preprocessed sentences
    def get_response(self, user_response):
        robo_response=''
        new = self.TfidfVec.transform([user_response])
        vals = cosine_similarity(new[0], self.tfidf)
        idx=vals.argsort()[0][-1]
        flat = vals.flatten()
        flat.sort()
        req_tfidf = flat[-1]
        if(req_tfidf==0):
            robo_response=robo_response+"I am sorry! I don't understand you."
            return robo_response
        else:
            robo_response = robo_response+self.sents[idx]
            return robo_response    
    



In [4]:
def nlp_chatbot_initialize(chatbot_name,file_path='data/'):
    chatbot = NLP_Chatbot(chatbot_name, file_path)
    return chatbot


In [5]:
def get_answer(faq_chatbot, nlp_chatbot, question, threshold=0.6):  # let's get a response to our input
    # try suggested corpora to find best fit. If first corpus < theshold, try another.
    # avoid random responses confidence 0
    response = faq_chatbot.get_response(question)
    if  response.confidence < threshold:  # not a good answer, look elsewhere
        response = nlp_chatbot.get_response(question)
    return response
#import logginglogging.basicConfig(level=logging.INFO)    # Enable info level logging


In [6]:
data_path = 'data/'
excel_name = 'COVID_FAQ.xlsx'
worksheet_name = 'FAQ'


covid_faq_chatbot = faq_chatbot_initialize("Covid FAQ Chat Bot", excel_path=data_path+excel_name, worksheet_name=worksheet_name)
covid_nlp_chatbot = nlp_chatbot_initialize("Covid NLP Chat Bot", data_path)



List Trainer: [####################] 100%


In [8]:
threshold=0.6
#question = "How to protect pregnant women against COVID-19?"
question = "what does Conversational AI refer to?"

print(get_answer(covid_faq_chatbot, covid_nlp_chatbot, question, threshold))

INFO:chatterbot.chatterbot:Beginning search for close text match
INFO:chatterbot.chatterbot:Processing search results
INFO:chatterbot.chatterbot:Using "what does Conversational AI refer to?" as a close match to "what does Conversational AI refer to?" with a confidence of 0
INFO:chatterbot.chatterbot:No responses found. Generating alternate response list.
INFO:chatterbot.chatterbot:No known response to the input was found. Selecting a random response.
INFO:chatterbot.chatterbot:BestMatch selected "No. As each international mass gathering is different, the factors to consider when determining if the event should be cancelled may also differ." as a response with a confidence of 0


A chatbot (also known as a spy, conversational bot, chatterbot, interactive agent, conversational interface, Conversational AI, talkbot or artificial spy entity) is a computer program or an artificial intelligence which conducts a conversation via auditory or textual methods.


In [31]:
#prepare some greeting words
GREETING_INPUTS = ("hello", "hi", "greetings", "sup", "what's up","hey",)
GREETING_RESPONSES = ["hi", "hey", "*nods*", "hi there", "hello", "I am glad! You are talking to me"]
def greeting(sentence): 
    for word in sentence.split():
        if word.lower() in GREETING_INPUTS:
            return random.choice(GREETING_RESPONSES)






In [None]:
#starting the bot
flag=True
print("CHATTY: My name is CHATTY. I will answer your queries about Chatbots. If you want to exit, type Bye!")
while(flag==True):
    user_response = input()
    user_response=user_response.lower()
    if(user_response!='bye'):
        if(user_response=='thanks' or user_response=='thank you' ):
            flag=False
            print("CHATTY: You are welcome..")
        else:
            if(greeting(user_response)!=None):
                print("CHATTY: "+greeting(user_response))
            else:
                print("CHATTY: ",end="")
                print(get_answer(covid_faq_chatbot, covid_nlp_chatbot, user_response, threshold))
    else:
        flag=False
        print("CHATTY: Bye! take care...")
        
# now chat with your bot...
# you may experiment with different similarity functions

In [None]:
d = {'key':'value'}
d['faq'] = covid_faq_chatbot
d['nlp'] = covid_nlp_chatbot
print(d)

In [9]:
covid_faq_chatbot.trainer.export_for_training('./covid.yml')

AttributeError: 'ChatBot' object has no attribute 'trainer'

In [11]:
chatbot = ChatBot(
    'Covid FAQ Chat Bot',

    logic_adapters= [
            {
                "import_path": "chatterbot.logic.BestMatch",
                "statement_comparison_function": "chatterbot.comparisons.LevenshteinDistance",
                "response_selection_method": "chatterbot.response_selection.get_first_response",
                "maximum_similarity_threshold": 0.9
            }
    ],
    read_only=True,
    trainer= 'chatterbot.trainers.ListTrainer',
    training_data= [
         'chatterbot.corpus.custom.covid'
    ]
)
chatbot.train('data/covid.yml')
question = "How to protect pregnant women against COVID-19?"
chatbot.get_response(question)

AttributeError: 'ChatBot' object has no attribute 'train'