In [1]:
%pip install nltk
%pip install spacy



#Download Stopwords

In [1]:
# import the existing word and sentence tokenizing
# libraries
import nltk
from nltk.corpus import stopwords
nltk.download('punkt')
from nltk.tokenize import sent_tokenize, word_tokenize

nltk.download('averaged_perceptron_tagger')
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
print(stop_words)

{"that'll", 'who', 'he', "needn't", 'for', 'what', 'before', 'my', 'where', 'about', 'am', 'whom', 'can', 'during', 'them', 'should', "aren't", 'wasn', 'shan', 'few', "wouldn't", 'will', "shan't", "should've", 'yourself', 'were', 'myself', 'off', 'out', 'being', 'from', 'ain', 'won', "didn't", 'ourselves', 'and', 'our', 'too', 'both', "mightn't", 'its', 'until', 'below', 'but', 'as', 'these', 'above', 'over', 'haven', 'hers', 'mightn', 'does', 'most', 'with', 'same', 'his', 'had', 'mustn', 'you', 'under', 'here', 'yours', 'this', 'theirs', 'between', 'such', 'down', 'themselves', 'hadn', 'an', 'now', 'm', 'while', "mustn't", "you're", 'when', 'isn', 'to', 'been', 'very', 'after', 'a', 'that', 'do', 'up', 'once', 'wouldn', "doesn't", 'she', 'couldn', 'shouldn', 'was', 'the', 'aren', "hasn't", 'her', 'no', 'then', 'on', 'some', 're', 'd', 's', 'against', 'needn', 'they', 'o', "hadn't", 've', 'own', 'by', 'didn', 't', 'only', "it's", "you've", 'or', "shouldn't", 'nor', "don't", 'him', 'ou

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


#Convert to lowercase and remove numericals, stopwords from Text

In [2]:
import spacy

# Load spaCy English model
nlp = spacy.load("en_core_web_sm")

# Sample text
text = "I'm a  very empathetic person and I always try 2 be kind and loving to people However, I'm constantly worried that I'm not doing it right, or that the things I enjoy and the way I think/act is offensive and not very caring. I'm fine with changing how I behave if what I'm doing bothers people, but I got so overwhelmed and frustrated on what if I I am doing is offensive to someone. I feel unknowingly I may have hurt someone, and the thought makes me feel guilt and makes me feel that I can't enjoy my life very much. I want to be courteous to everyone and respect things that bother them and then not do/say those things, but I feel like I wearing myself out by constantly overthinking of myself."
modified_str = ''.join([i for i in text if not i.isdigit()])


# Process the text using spaCy
doc = nlp(modified_str)

# Remove stopwords
filtered_words = [token.text.lower() for token in doc if not token.is_stop]


# Join the filtered words to form a clean text
clean_text = ' '.join(filtered_words)

print("Original Text:", text)
print("Text after Stopword Removal:", clean_text)

Original Text: I'm a  very empathetic person and I always try 2 be kind and loving to people However, I'm constantly worried that I'm not doing it right, or that the things I enjoy and the way I think/act is offensive and not very caring. I'm fine with changing how I behave if what I'm doing bothers people, but I got so overwhelmed and frustrated on what if I I am doing is offensive to someone. I feel unknowingly I may have hurt someone, and the thought makes me feel guilt and makes me feel that I can't enjoy my life very much. I want to be courteous to everyone and respect things that bother them and then not do/say those things, but I feel like I wearing myself out by constantly overthinking of myself.
Text after Stopword Removal:   empathetic person try   kind loving people , constantly worried right , things enjoy way think / act offensive caring . fine changing behave bothers people , got overwhelmed frustrated offensive . feel unknowingly hurt , thought makes feel guilt makes fee

#Tokenize the given Text

In [3]:
#text = "I'm a very empathetic person and I always try be kind and loving to people However, I'm constantly worried that I'm not doing it right, or that the things I enjoy and the way I think/act is offensive and not very caring. I'm fine with changing how I behave if what I'm doing bothers people, but I got so overwhelmed and frustrated on what if I I am doing is offensive to someone. I feel unknowingly I may have hurt someone, and the thought makes me feel guilt and makes me feel that I can't enjoy my life very much. I want to be courteous to everyone and respect things that bother them and then not do/say those things, but I feel like I wearing myself out by constantly overthinking of myself."
tokenized = sent_tokenize(clean_text)
print(tokenized)
wordsList = word_tokenize(clean_text)
print(wordsList)

['  empathetic person try   kind loving people , constantly worried right , things enjoy way think / act offensive caring .', 'fine changing behave bothers people , got overwhelmed frustrated offensive .', 'feel unknowingly hurt , thought makes feel guilt makes feel enjoy life .', 'want courteous respect things bother / things , feel like wearing constantly overthinking .']
['empathetic', 'person', 'try', 'kind', 'loving', 'people', ',', 'constantly', 'worried', 'right', ',', 'things', 'enjoy', 'way', 'think', '/', 'act', 'offensive', 'caring', '.', 'fine', 'changing', 'behave', 'bothers', 'people', ',', 'got', 'overwhelmed', 'frustrated', 'offensive', '.', 'feel', 'unknowingly', 'hurt', ',', 'thought', 'makes', 'feel', 'guilt', 'makes', 'feel', 'enjoy', 'life', '.', 'want', 'courteous', 'respect', 'things', 'bother', '/', 'things', ',', 'feel', 'like', 'wearing', 'constantly', 'overthinking', '.']


#Do POS Tagging

In [4]:
for i in tokenized:

    # Word tokenizers is used to find the words
    # and punctuation in a string
    #wordsList = nltk.word_tokenize(i)

    # removing stop words from wordList
    wordsList = [w for w in wordsList if not w in stop_words]

    #  Using a Tagger. Which is part-of-speech
    # tagger or POS-tagger.
    pos_tagged = nltk.pos_tag(wordsList)

    print(pos_tagged)

[('empathetic', 'JJ'), ('person', 'NN'), ('try', 'VB'), ('kind', 'NN'), ('loving', 'VBG'), ('people', 'NNS'), (',', ','), ('constantly', 'RB'), ('worried', 'VBD'), ('right', 'JJ'), (',', ','), ('things', 'NNS'), ('enjoy', 'VBP'), ('way', 'NN'), ('think', 'VBP'), ('/', 'NNP'), ('act', 'NN'), ('offensive', 'JJ'), ('caring', 'NN'), ('.', '.'), ('fine', 'JJ'), ('changing', 'VBG'), ('behave', 'JJ'), ('bothers', 'NNS'), ('people', 'NNS'), (',', ','), ('got', 'VBD'), ('overwhelmed', 'JJ'), ('frustrated', 'JJ'), ('offensive', 'NN'), ('.', '.'), ('feel', 'VB'), ('unknowingly', 'RB'), ('hurt', 'VBN'), (',', ','), ('thought', 'VBN'), ('makes', 'VBZ'), ('feel', 'NN'), ('guilt', 'JJ'), ('makes', 'VBZ'), ('feel', 'NN'), ('enjoy', 'JJ'), ('life', 'NN'), ('.', '.'), ('want', 'VBP'), ('courteous', 'JJ'), ('respect', 'JJ'), ('things', 'NNS'), ('bother', 'RB'), ('/', 'JJ'), ('things', 'NNS'), (',', ','), ('feel', 'VBP'), ('like', 'IN'), ('wearing', 'VBG'), ('constantly', 'RB'), ('overthinking', 'VBG'), (

#Obtaining the stem words – Lemmatization

In [5]:
# POS_TAGGER_FUNCTION : TYPE 1
def pos_tagger(nltk_tag):
    if nltk_tag.startswith('J'):
        return wordnet.ADJ
    elif nltk_tag.startswith('V'):
        return wordnet.VERB
    elif nltk_tag.startswith('N'):
        return wordnet.NOUN
    elif nltk_tag.startswith('R'):
        return wordnet.ADV
    else:
        return None

In [6]:
from nltk.stem import WordNetLemmatizer
nltk.download('averaged_perceptron_tagger')
from nltk.corpus import wordnet

lemmatizer = WordNetLemmatizer()


#Custom pos_tagger function to make things simpler to understand.
wordnet_tagged = list(map(lambda x: (x[0], pos_tagger(x[1])), pos_tagged))
print(wordnet_tagged)

lemmatized_sentence = []
for word, tag in wordnet_tagged:
    if tag is None:
        # if there is no available tag, append the token as is
        lemmatized_sentence.append(word)
    else:
        # else use the tag to lemmatize the token
        lemmatized_sentence.append(lemmatizer.lemmatize(word, tag))
lemmatized_sentence = " ".join(lemmatized_sentence)

print("Lemmatized Sentence", lemmatized_sentence)

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


[('empathetic', 'a'), ('person', 'n'), ('try', 'v'), ('kind', 'n'), ('loving', 'v'), ('people', 'n'), (',', None), ('constantly', 'r'), ('worried', 'v'), ('right', 'a'), (',', None), ('things', 'n'), ('enjoy', 'v'), ('way', 'n'), ('think', 'v'), ('/', 'n'), ('act', 'n'), ('offensive', 'a'), ('caring', 'n'), ('.', None), ('fine', 'a'), ('changing', 'v'), ('behave', 'a'), ('bothers', 'n'), ('people', 'n'), (',', None), ('got', 'v'), ('overwhelmed', 'a'), ('frustrated', 'a'), ('offensive', 'n'), ('.', None), ('feel', 'v'), ('unknowingly', 'r'), ('hurt', 'v'), (',', None), ('thought', 'v'), ('makes', 'v'), ('feel', 'n'), ('guilt', 'a'), ('makes', 'v'), ('feel', 'n'), ('enjoy', 'a'), ('life', 'n'), ('.', None), ('want', 'v'), ('courteous', 'a'), ('respect', 'a'), ('things', 'n'), ('bother', 'r'), ('/', 'a'), ('things', 'n'), (',', None), ('feel', 'v'), ('like', None), ('wearing', 'v'), ('constantly', 'r'), ('overthinking', 'v'), ('.', None)]
Lemmatized Sentence empathetic person try kind lo

#Positive/Negative Sentiment

In [14]:
from textblob import TextBlob
# function to calculate subjectivity
def getSubjectivity(review):
    return TextBlob(review).sentiment.subjectivity

# function to calculate polarity
def getPolarity(review):
    return TextBlob(review).sentiment.polarity

# function to analyze the reviews
def analysis(score):
    if score < 0.10:
        return 'Negative'
    elif score == 0.10:
        return 'Neutral'
    else:
        return 'Positive'

In [15]:
subjectivity = getSubjectivity(lemmatized_sentence)
polarity = getPolarity(lemmatized_sentence)
print(subjectivity)
print(polarity)
pscore= analysis(polarity)
print(pscore)

0.5402380952380953
0.25023809523809526
Positive
