In [0]:
#Description: This is a 'self' learning chatbot program

In [0]:
#Install the package NLTK
pip install nltk



In [0]:
#Install the package newpaper3k
pip install newspaper3k



In [0]:
#import libraries
from newspaper import Article
import random
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
import numpy as np
import warnings

In [0]:
#Ignore any warning messages
warnings.filterwarnings('ignore')

In [0]:
#Download the packages from NLTK
nltk.download('punkt', quiet=True)
nltk.download('wordnet', quiet=True)

True

In [0]:
#Get the article URL
article = Article('https://www.mayoclinic.org/diseases-conditions/coronavirus/symptoms-causes/syc-20479963')
article.download()
article.parse()
article.nlp()
corpus = article.text

#Print the corpus/text
print(corpus)

Overview

Coronavirus Open pop-up dialog box Close Coronavirus Coronavirus Coronavirus is a family of viruses that can cause respiratory illnesses such as the common cold, severe acute respiratory syndrome (SARS) and Middle East respiratory syndrome (MERS).

Coronaviruses are a family of viruses that can cause illnesses such as the common cold, severe acute respiratory syndrome (SARS) and Middle East respiratory syndrome (MERS). In 2019, a new coronavirus was identified as the cause of a disease outbreak in China.

The virus is now known as the severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2). The disease it causes is called coronavirus disease 2019 (COVID-19).

Cases of COVID-19 have been reported in a growing number of countries, including the U.S. Public health groups, such as the World Health Organization (WHO) and the U.S. Centers for Disease Control and Prevention (CDC), are monitoring the situation and posting updates on their websites. These groups have also issued 

In [0]:
#Tokenization
text = corpus
sent_tokens = nltk.sent_tokenize(text) #Convert the text into a list of sentences

#Print the list of sentences
print(sent_tokens)

['Overview\n\nCoronavirus Open pop-up dialog box Close Coronavirus Coronavirus Coronavirus is a family of viruses that can cause respiratory illnesses such as the common cold, severe acute respiratory syndrome (SARS) and Middle East respiratory syndrome (MERS).', 'Coronaviruses are a family of viruses that can cause illnesses such as the common cold, severe acute respiratory syndrome (SARS) and Middle East respiratory syndrome (MERS).', 'In 2019, a new coronavirus was identified as the cause of a disease outbreak in China.', 'The virus is now known as the severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2).', 'The disease it causes is called coronavirus disease 2019 (COVID-19).', 'Cases of COVID-19 have been reported in a growing number of countries, including the U.S. Public health groups, such as the World Health Organization (WHO) and the U.S. Centers for Disease Control and Prevention (CDC), are monitoring the situation and posting updates on their websites.', 'These group

In [0]:
#create a dictionary (key:value) pair to remove punctuations
remove_punct_dict = dict( (ord(punct),None) for punct in string.punctuation)

#Print the punctuation
print(string.punctuation)

#Print the dictionary
print(remove_punct_dict)

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
{33: None, 34: None, 35: None, 36: None, 37: None, 38: None, 39: None, 40: None, 41: None, 42: None, 43: None, 44: None, 45: None, 46: None, 47: None, 58: None, 59: None, 60: None, 61: None, 62: None, 63: None, 64: None, 91: None, 92: None, 93: None, 94: None, 95: None, 96: None, 123: None, 124: None, 125: None, 126: None}


In [0]:
#Create a funtion to return a list of lemmatized lower case after removing punctuations
def LemNormalize(text):
  return nltk.word_tokenize(text.lower().translate(remove_punct_dict))

#Print the tokenization text
print(LemNormalize(text))

['overview', 'coronavirus', 'open', 'popup', 'dialog', 'box', 'close', 'coronavirus', 'coronavirus', 'coronavirus', 'is', 'a', 'family', 'of', 'viruses', 'that', 'can', 'cause', 'respiratory', 'illnesses', 'such', 'as', 'the', 'common', 'cold', 'severe', 'acute', 'respiratory', 'syndrome', 'sars', 'and', 'middle', 'east', 'respiratory', 'syndrome', 'mers', 'coronaviruses', 'are', 'a', 'family', 'of', 'viruses', 'that', 'can', 'cause', 'illnesses', 'such', 'as', 'the', 'common', 'cold', 'severe', 'acute', 'respiratory', 'syndrome', 'sars', 'and', 'middle', 'east', 'respiratory', 'syndrome', 'mers', 'in', '2019', 'a', 'new', 'coronavirus', 'was', 'identified', 'as', 'the', 'cause', 'of', 'a', 'disease', 'outbreak', 'in', 'china', 'the', 'virus', 'is', 'now', 'known', 'as', 'the', 'severe', 'acute', 'respiratory', 'syndrome', 'coronavirus', '2', 'sarscov2', 'the', 'disease', 'it', 'causes', 'is', 'called', 'coronavirus', 'disease', '2019', 'covid19', 'cases', 'of', 'covid19', 'have', 'bee

In [0]:
#Keyword Matching

#Greeting Inputs
GREETING_INPUTS = ["hi", "hello", "hola", "greetings", "wasup", "hey"]

#Greeting responses back to the user
GREETING_RESPONSES = ["howdy", "hi", "hey", "what's good", "hello", "hey there"]

#Function to return a random greeting response to a users greeting
def greeting(sentence):
  #if the user's input is a greeting response to a users greeting
  for word in sentence.split():
    if word.lower() in GREETING_INPUTS:
      return random.choice(GREETING_RESPONSES)

In [0]:
#Generate the response
def response(user_response):


  #The users response / query
  #user_response = 'what is chronic kidney disease'

  user_response = user_response.lower()

  ###Print the users query/response
  #print(user_response)


  #Set the chatbot response to an empty string
  robo_response = ''

  #Append the users response to the sentence list  
  sent_tokens.append(user_response)

  ###Print the sentence list after appending the users response
  #print(sent_tokens)

  #Create a TfidfVectorizer Object
  TfidfVec = TfidfVectorizer(tokenizer = LemNormalize, stop_words = 'english')

  #Convert the text to a matrix of TF-IDF features
  tfidf = TfidfVec.fit_transform(sent_tokens)

  ###Print the TFIDF features
  #print(tfidf)

  #Get the measure of similarity
  vals = cosine_similarity(tfidf[-1], tfidf)

  ##print the similarty scores
  #print(vals)

  #Get the index of the most similar text/sentence to the useers response
  idx = vals.argsort()[0][-2]

  #Reduce the dimensionality of vals
  flat = vals.flatten()

  #sort the list in ascending order
  flat.sort()

  #Get the most similar score to the users response
  score = flat[-2]

  #Print the score 
  #print(score)

  #If the variable 'score' is 0 then their is no text similar to the users response
  if (score == 0):
    robo_response = robo_response+"I apologize, I don't understand."
  else:
    robo_response = robo_response+sent_tokens[idx]

  ##Print the chatbot response
  #print(robo_response)

  #Remove the users response from the sentence tokens list
  sent_tokens.remove(user_response)

  return robo_response








In [0]:
flag= True
print("COVIDDOCBOT: I'm Doctor or COVIDOC or DOCBOT for short. I will answer your queries about COVID-19 Virus. If you want to exit, type Bye!")
while(flag == True):
  user_response = input()
  user_response = user_response.lower()
  if (user_response != 'bye'): 
    if(user_response == 'thanks' or user_response == 'thank you'):
      flag = False
      print("DOCBot: You are welcome !")
    else:
      if(greeting(user_response)!=None):
        print("DOCBot: "+greeting(user_response))
      else:
        print("DOCBot: "+response(user_response))

  else:
    flag = false
    print("DOCBOT: Chat with you later !")

COVIDDOCBOT: I'm Doctor or COVIDOC or DOCBOT for short. I will answer your queries about COVID-19 Virus. If you want to exit, type Bye!
hello
DOCBot: hi
symptoms
DOCBot: Symptoms

Signs and symptoms of COVID-19 may appear two to 14 days after exposure and can include:

Fever

Cough

Shortness of breath or difficulty breathing

The severity of COVID-19 symptoms can range from very mild to severe.
what are causes?
DOCBot: Causes

It's unclear exactly how contagious the new coronavirus is.
risk
DOCBot: Risk factors

Risk factors for COVID-19 appear to include:

Recent travel from or residence in an area with ongoing spread of COVID-19 as determined by CDC or WHO

as determined by or Close contact with someone who has COVID-19 — such as when a family member or health care worker takes care of an infected person

Prevention

Although there is no vaccine available to prevent infection with the new coronavirus, you can take steps to reduce your risk of infection.


KeyboardInterrupt: ignored