<H1>Chapter 6: Natural Language Processing: How Chatbots Understand User Input</H1>

In [38]:
import nltk # Import the Natural Language Toolkit (NLTK) library

nltk.download('stopwords') # Download the set of stopwords used by the NLTK library
nltk.download('punkt') # Download the Punkt tokenizer, which is used by NLTK's word_tokenize function to tokenize text into words
nltk.download('averaged_perceptron_tagger') # Download the part-of-speech (POS) tagger used by the NLTK library to identify the grammatical roles of words in sentences


In [39]:
import re # Import the regular expressions module
from nltk.corpus import stopwords # Import the stopwords module from the Natural Language Toolkit
from nltk.tokenize import word_tokenize # Import the word tokenization module from the Natural Language Toolkit

# Assign the string to be preprocessed to the 'text' variable
text = "Hello, World! This is a sample sentence."

# Convert all characters in the string to lowercase
text = text.lower()

# Remove all punctuation marks from the string using a regular expression
text = re.sub(r'[^\w\s]','',text)

# Create a set of stopwords for the English language
stop_words = set(stopwords.words('english'))

# Tokenize the string into individual words using the word_tokenize function
word_tokens = word_tokenize(text)

# Create a new list that contains only those words from the word_tokens list that are not in the set of stop words
filtered_text = [word for word in word_tokens if not word in stop_words]

# Print the filtered text
print(filtered_text)


['hello', 'world', 'sample', 'sentence']


In [42]:
from nltk.tokenize import word_tokenize # Import the word_tokenize function from the Natural Language Toolkit (NLTK) library

text = "This is a sample sentence." # Assign the string to be tokenized to the 'text' variable

tokens = word_tokenize(text) # Tokenize the string using the word_tokenize function, which separates the string into individual words and returns a list of these words. The resulting list is assigned to the 'tokens' variable.

print(tokens) # Print the list of tokens (individual words) to the console


['This', 'is', 'a', 'sample', 'sentence', '.']


In [37]:
import nltk # Import the Natural Language Toolkit (NLTK) library
nltk.download('averaged_perceptron_tagger') # Download the part-of-speech (POS) tagger used by the NLTK library to identify the grammatical roles of words in sentences


In [41]:
from nltk.tokenize import word_tokenize # Import the word_tokenize function from the Natural Language Toolkit (NLTK) library

text = "This is a sample sentence." # Assign the string to be tokenized and tagged to the 'text' variable

tokens = word_tokenize(text) # Tokenize the string into individual words using the word_tokenize function from NLTK. The resulting list of tokens is assigned to the 'tokens' variable.

pos_tags = nltk.pos_tag(tokens) # Use the part-of-speech (POS) tagger from NLTK to assign a grammatical label to each word in the 'tokens' list. The resulting list of word/POS tag pairs is assigned to the 'pos_tags' variable.

print(pos_tags) # Print the list of word/POS tag pairs to the console


[('This', 'DT'), ('is', 'VBZ'), ('a', 'DT'), ('sample', 'JJ'), ('sentence', 'NN'), ('.', '.')]


In [34]:
!pip install spacy # Install the spacy library using pip

In [35]:
!python -m spacy download en_core_web_sm # Download the English language model for spacy (en_core_web_sm)

In [43]:
import spacy # Import the spacy library

nlp = spacy.load('en_core_web_sm') # Load the English language model for spacy
text = "Apple is looking to buy a startup in the UK for $1 billion." # Assign the text to be analyzed to the 'text' variable

doc = nlp(text) # Create a spacy document object by processing the 'text' variable with the loaded model. The resulting document object is assigned to the 'doc' variable.

for ent in doc.ents: # Iterate over the named entities in the 'doc' object using a for loop
    print(ent.text, ent.label_) # Print the text of each named entity and its corresponding entity type (e.g. PERSON, ORG, MONEY, etc.) to the console.


Apple ORG
UK GPE
$1 billion MONEY


In [36]:
!pip install textblob # Install the textblob library using pip

In [44]:
from textblob import TextBlob # Import the TextBlob class from the textblob library

text = "I love this product!" # Assign the text to be analyzed to the 'text' variable
blob = TextBlob(text) # Create a TextBlob object by processing the 'text' variable
sentiment = blob.sentiment.polarity # Retrieve the polarity score of the TextBlob object. This value will be between -1 (most negative) and 1 (most positive).

if sentiment > 0: # If the polarity score is greater than 0, print "Positive"
    print("Positive")
elif sentiment < 0: # If the polarity score is less than 0, print "Negative"
    print("Negative")
else: # If the polarity score is exactly 0, print "Neutral"
    print("Neutral")

Positive


In [45]:
import nltk  # Import the nltk library for natural language processing

text = "What's the weather like today?"  # Define the text to be analyzed
words = nltk.word_tokenize(text)  # Tokenize the text into a list of words
pos_tags = nltk.pos_tag(words)  # Perform part-of-speech (POS) tagging on the words to identify their grammatical roles

intent = None  # Initialize the intent variable to None

# Loop through the words and their POS tags
for word, pos in pos_tags:
    if pos == 'VBZ':  # If the POS tag is "VBZ" (present tense verb "is"), set the intent to "weather"
        intent = 'weather'
        break  # Exit the loop once the intent is found

print(intent)  # Print the intent name ("weather" in this case)



weather
