# Textblob, N-grams, NRCLex and RAKE

N-grams are contiguous sequences of n-items in a sentence. N can be 1, 2 or any other positive integers, although usually we do not consider very large N because those n-grams rarely appears in many different places.

When performing machine learning tasks related to natural language processing, we usually need to generate n-grams from input sentences. For example, in text classification tasks, in addition to using each individual token found in the corpus, we may want to add bi-grams or tri-grams as features to represent our documents.

In [None]:
# pip install NRCLex
# pip install rake-nltk

In [7]:
# import packages
import pandas as pd
import re
import nltk
from nltk.util import ngrams
from gensim.parsing.preprocessing import STOPWORDS, strip_tags, strip_numeric, strip_punctuation, strip_multiple_whitespaces, remove_stopwords, strip_short, stem_text
from textblob import TextBlob
import json
#pip install NRCLex
from nrclex import NRCLex
from rake_nltk import Rake


In [2]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\deniz\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [3]:
# data
data = "Getting back to work made a difference. After 10 days of nothing but the business of moving and all of its seemingly obligatory messy emotions, it was nice to think of nothing but my patients. I worked Wednesday through Friday, and even with a couple of long days in there, it was a relief to be away from home. It was a relief to be away from unpacking, and contemplating, and deciding. It was a pleasure to think about somebody other than myself for 3 days. I needed that. Those 3 days away, combined with a long run/walk/dip into Lake Superior with Jet yesterday, gave me the energy to unpack nearly my entire basement today. I ve still got a lot to do, but things are starting to take shape. My bedroom is almost completely put together. My bathroom and kitchen are done. I ve still got boxes in the living room, dining room and the other 2 bedrooms, but I m getting there. Tomorrow I m heading south to Mayo Clinic for a ketamine infusion. Im pleased its not an urgent need at this time, just a regular maintenance dose. Returning to work, getting some exercise, and progressing with my unpacking have each helped stabilize my mood. Im  no longer daily wiping tears from my eyes. In fact, I havent cried for several days. That, in and of itself, is quite a feat! I m taking my time with unpacking. I m doing my best to remain patient. Taking the next right action and maintaining my attitude of gratitude are my focus now. Its still hard, but its not impossible. Settling into my new home, new routine, and new city will take time. I m keeping that fact forefront in my mind. I can do this. But I cant do it all today, nor do I have to. Patiently, Ill get it done."

# TextBlob

In [79]:
# use blob to extract easy nouns
blob = TextBlob(data)
print(blob.noun_phrases)
nouns_of_data = blob.noun_phrases

['obligatory messy emotions', 'long days', 'long run/walk/dip', 'jet', 'entire basement', 'tomorrow', 'mayo clinic', 'ketamine infusion', 'im', 'urgent need', 'regular maintenance dose', 'im', 'right action', 'settling', 'new home', 'new city', 'fact forefront', 'patiently', 'ill']


In [80]:
# use nouns for ngrams
# nouns_of_data = ['obligatory messy emotions', 'long days', 'long run/walk/dip', 'jet', 'entire basement', 'tomorrow', 'mayo clinic', 'ketamine infusion', 'im', 'urgent need', 'regular maintenance dose', 'im', 'haven t', 'right action', 'settling', 'new home', 'new city', 'fact forefront', 'patiently', 'ill']
# nouns_of_data=str(s)

# N_grams

In [81]:
def n_grams(s, n):
    '''returns n_grams
    s = data str
    n = number of grams'''

    s = s.lower()
    s = re.sub(r'[^a-zA-Z0-9\s]', ' ', s)
    tokens = [token for token in s.split(" ") if token != ""]
    output = list(ngrams(tokens, n))[1:10]
    return output;

# NRCLex

In [82]:
#Instantiate text object (for best results, 'text' should be unicode).
text_object = NRCLex(data)

In [83]:
#Return words list.
print(len(text_object.words))
print(text_object.words)

314
['Getting', 'back', 'to', 'work', 'made', 'a', 'difference', 'After', '10', 'days', 'of', 'nothing', 'but', 'the', 'business', 'of', 'moving', 'and', 'all', 'of', 'its', 'seemingly', 'obligatory', 'messy', 'emotions', 'it', 'was', 'nice', 'to', 'think', 'of', 'nothing', 'but', 'my', 'patients', 'I', 'worked', 'Wednesday', 'through', 'Friday', 'and', 'even', 'with', 'a', 'couple', 'of', 'long', 'days', 'in', 'there', 'it', 'was', 'a', 'relief', 'to', 'be', 'away', 'from', 'home', 'It', 'was', 'a', 'relief', 'to', 'be', 'away', 'from', 'unpacking', 'and', 'contemplating', 'and', 'deciding', 'It', 'was', 'a', 'pleasure', 'to', 'think', 'about', 'somebody', 'other', 'than', 'myself', 'for', '3', 'days', 'I', 'needed', 'that', 'Those', '3', 'days', 'away', 'combined', 'with', 'a', 'long', 'run/walk/dip', 'into', 'Lake', 'Superior', 'with', 'Jet', 'yesterday', 'gave', 'me', 'the', 'energy', 'to', 'unpack', 'nearly', 'my', 'entire', 'basement', 'today', 'I', 've', 'still', 'got', 'a', 'lo

In [84]:
#Return sentences list.
# 26 sentences
print(len(text_object.sentences))
text_object.sentences

26


[Sentence("Getting back to work made a difference."),
 Sentence("After 10 days of nothing but the business of moving and all of its seemingly obligatory messy emotions, it was nice to think of nothing but my patients."),
 Sentence("I worked Wednesday through Friday, and even with a couple of long days in there, it was a relief to be away from home."),
 Sentence("It was a relief to be away from unpacking, and contemplating, and deciding."),
 Sentence("It was a pleasure to think about somebody other than myself for 3 days."),
 Sentence("I needed that."),
 Sentence("Those 3 days away, combined with a long run/walk/dip into Lake Superior with Jet yesterday, gave me the energy to unpack nearly my entire basement today."),
 Sentence("I ve still got a lot to do, but things are starting to take shape."),
 Sentence("My bedroom is almost completely put together."),
 Sentence("My bathroom and kitchen are done."),
 Sentence("I ve still got boxes in the living room, dining room and the other 2 bedr

In [85]:
#Return affect list.
# 35 "emotions"
print(len(text_object.affect_list))
text_object.affect_list

35


['disgust',
 'negative',
 'anticipation',
 'positive',
 'positive',
 'anticipation',
 'positive',
 'positive',
 'joy',
 'positive',
 'anticipation',
 'fear',
 'negative',
 'surprise',
 'anticipation',
 'trust',
 'anticipation',
 'trust',
 'anticipation',
 'joy',
 'positive',
 'surprise',
 'anticipation',
 'anticipation',
 'positive',
 'positive',
 'joy',
 'positive',
 'positive',
 'negative',
 'sadness',
 'positive',
 'trust',
 'anticipation',
 'trust']

In [86]:
#Return affect dictionary.
# words that contains emotions and have been found by nrclex
print(len(text_object.affect_dict))
emo=text_object.affect_dict
emo

18


{'messy': ['disgust', 'negative'],
 'long': ['anticipation'],
 'relief': ['positive'],
 'shape': ['positive'],
 'completely': ['positive'],
 'pleased': ['joy', 'positive'],
 'urgent': ['anticipation', 'fear', 'negative', 'surprise'],
 'time': ['anticipation'],
 'maintenance': ['trust'],
 'daily': ['anticipation'],
 'fact': ['trust'],
 'feat': ['anticipation', 'joy', 'positive', 'surprise'],
 'patient': ['anticipation', 'positive'],
 'action': ['positive'],
 'gratitude': ['joy', 'positive'],
 'focus': ['positive'],
 'impossible': ['negative', 'sadness'],
 'routine': ['positive', 'trust']}

In [87]:
#Return highest emotions.
text_object.top_emotions

[('positive', 0.3142857142857143)]

In [95]:
pos = ['relief',
 'shape',
 'completely',
 'pleased',
 'feat',
 'patient',
 'action',
 'gratitude',
 'focus',
 'routine']

# sentences that were classified as positive
pos_sen = str("I worked Wednesday through Friday, and even with a couple of long days in there, it was a relief to be away from home. It was a relief to be away from unpacking, and contemplating, and deciding. I ve still got a lot to do, but things are starting to take shape.My bedroom is almost completely put together. Im pleased its not an urgent need at this time, just a regular maintenance dose. That, in and of itself, is quite a feat!. I m doing my best to remain patient. Taking the next right action and maintaining my attitude of gratitude are my focus now. Settling into my new home, new routine, and new city will take time.")

In [96]:
# use blob to extract easy nouns
blob = TextBlob(pos_sen)
print(blob.noun_phrases)
nouns_of_data = blob.noun_phrases

['long days', 'shape.my bedroom', 'im', 'urgent need', 'regular maintenance dose', 'right action', 'settling', 'new home', 'new city']


In [105]:
n_grams(s=pos_sen, n=2)

[('worked', 'wednesday'),
 ('wednesday', 'through'),
 ('through', 'friday'),
 ('friday', 'and'),
 ('and', 'even'),
 ('even', 'with'),
 ('with', 'a'),
 ('a', 'couple'),
 ('couple', 'of')]

# RAKE

In [30]:
# Uses stopwords for english from NLTK, and all puntuation characters.
r = Rake(min_length=2, max_length=4)

r.extract_keywords_from_text(data)

# To get keyword phrases ranked highest to lowest.
rake_words = str(r.get_ranked_phrases()) 

In [31]:
rake_words

"['seemingly obligatory messy emotions', 'longer daily wiping tears', 'almost completely put together', 'regular maintenance dose', 'next right action', 'entire basement today', 'still got boxes', '3 days away', 'still got', '3 days', 'still hard', 'several days', 'long days', '10 days', 'worked wednesday', 'work made', 'urgent need', 'unpack nearly', 'take time', 'take shape', 'remain patient', 'new routine', 'new home', 'new city', 'mayo clinic', 'long run', 'living room', 'lake superior', 'ketamine infusion', 'jet yesterday', 'im pleased', 'ill get', 'helped stabilize', 'heading south', 'havent cried', 'getting back', 'fact forefront', 'dining room', '2 bedrooms']"

In [32]:
# type of rake
type(rake_words)

str

# NRCLex

In [33]:
#Instantiate text object (for best results, 'text' should be unicode).
text_object = NRCLex(rake_words)

In [34]:
#Return words list.
print(len(text_object.words))
print(text_object.words)

89
["'seemingly", 'obligatory', 'messy', 'emotions', "'longer", 'daily', 'wiping', 'tears', "'almost", 'completely', 'put', 'together', "'regular", 'maintenance', 'dose', "'next", 'right', 'action', "'entire", 'basement', 'today', "'still", 'got', 'boxes', '3', 'days', 'away', "'still", 'got', '3', 'days', "'still", 'hard', "'several", 'days', "'long", 'days', "'10", 'days', "'worked", 'wednesday', "'work", 'made', "'urgent", 'need', "'unpack", 'nearly', "'take", 'time', "'take", 'shape', "'remain", 'patient', "'new", 'routine', "'new", 'home', "'new", 'city', "'mayo", 'clinic', "'long", 'run', "'living", 'room', "'lake", 'superior', "'ketamine", 'infusion', "'jet", 'yesterday', "'im", 'pleased', "'ill", 'get', "'helped", 'stabilize', "'heading", 'south', "'havent", 'cried', "'getting", 'back', "'fact", 'forefront', "'dining", 'room', '2', 'bedrooms']


In [35]:
#Return sentences list.
# 26 sentences
print(len(text_object.sentences))
text_object.sentences

1


[Sentence("['seemingly obligatory messy emotions', 'longer daily wiping tears', 'almost completely put together', 'regular maintenance dose', 'next right action', 'entire basement today', 'still got boxes', '3 days away', 'still got', '3 days', 'still hard', 'several days', 'long days', '10 days', 'worked wednesday', 'work made', 'urgent need', 'unpack nearly', 'take time', 'take shape', 'remain patient', 'new routine', 'new home', 'new city', 'mayo clinic', 'long run', 'living room', 'lake superior', 'ketamine infusion', 'jet yesterday', 'im pleased', 'ill get', 'helped stabilize', 'heading south', 'havent cried', 'getting back', 'fact forefront', 'dining room', '2 bedrooms']")]

In [36]:
#Return affect list.
# 35 "emotions"
print(len(text_object.affect_list))
text_object.affect_list

15


['disgust',
 'negative',
 'anticipation',
 'positive',
 'trust',
 'positive',
 'anticipation',
 'positive',
 'anticipation',
 'positive',
 'positive',
 'trust',
 'positive',
 'joy',
 'positive']

In [37]:
#Return affect dictionary.
# words that contains emotions and have been found by nrclex
print(len(text_object.affect_dict))
emo=text_object.affect_dict
emo

11


{'messy': ['disgust', 'negative'],
 'daily': ['anticipation'],
 'completely': ['positive'],
 'maintenance': ['trust'],
 'action': ['positive'],
 'time': ['anticipation'],
 'shape': ['positive'],
 'patient': ['anticipation', 'positive'],
 'routine': ['positive', 'trust'],
 'superior': ['positive'],
 'pleased': ['joy', 'positive']}

In [39]:
type(emo)

dict

In [56]:
rake_words[1:36]

"'seemingly obligatory messy emotion"

In [50]:
def print_first_word(word, no):
    words = word
    print(words.split().pop(no))
    #to print the last word use pop(-1)
print_first_word(word=rake_words, no=[3:4])

SyntaxError: invalid syntax (<ipython-input-50-a53af64f2000>, line 5)