<a href="https://colab.research.google.com/github/vallirajasekar/SentimentAnalysis_of_Roosevelt/blob/main/Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import nltk
from nltk.corpus import inaugural
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.probability import FreqDist
from nltk.collocations import BigramAssocMeasures, BigramCollocationFinder
from nltk.tokenize import sent_tokenize
from nltk.sentiment import SentimentIntensityAnalyzer


In [2]:
nltk.download('inaugural')
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
nltk.download('vader_lexicon')


[nltk_data] Downloading package inaugural to /root/nltk_data...
[nltk_data]   Unzipping corpora/inaugural.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

In [3]:
# Load the Roosevelt speech
speech = inaugural.raw('1933-Roosevelt.txt')


In [4]:
speech

"I am certain that my fellow Americans expect that on my induction into the Presidency I will address them with a candor and a decision which the present situation of our Nation impels. This is preeminently the time to speak the truth, the whole truth, frankly and boldly. Nor need we shrink from honestly facing conditions in our country today. This great Nation will endure as it has endured, will revive and will prosper. So, first of all, let me assert my firm belief that the only thing we have to fear is fear itself -- nameless, unreasoning, unjustified terror which paralyzes needed efforts to convert retreat into advance. In every dark hour of our national life a leadership of frankness and vigor has met with that understanding and support of the people themselves which is essential to victory. I am convinced that you will again give that support to leadership in these critical days.\n\nIn such a spirit on my part and on yours we face our common difficulties. They concern, thank God,

In [5]:
# Tokenization
tokens = word_tokenize(speech)


In [9]:
tokens

['I',
 'am',
 'certain',
 'that',
 'my',
 'fellow',
 'Americans',
 'expect',
 'that',
 'on',
 'my',
 'induction',
 'into',
 'the',
 'Presidency',
 'I',
 'will',
 'address',
 'them',
 'with',
 'a',
 'candor',
 'and',
 'a',
 'decision',
 'which',
 'the',
 'present',
 'situation',
 'of',
 'our',
 'Nation',
 'impels',
 '.',
 'This',
 'is',
 'preeminently',
 'the',
 'time',
 'to',
 'speak',
 'the',
 'truth',
 ',',
 'the',
 'whole',
 'truth',
 ',',
 'frankly',
 'and',
 'boldly',
 '.',
 'Nor',
 'need',
 'we',
 'shrink',
 'from',
 'honestly',
 'facing',
 'conditions',
 'in',
 'our',
 'country',
 'today',
 '.',
 'This',
 'great',
 'Nation',
 'will',
 'endure',
 'as',
 'it',
 'has',
 'endured',
 ',',
 'will',
 'revive',
 'and',
 'will',
 'prosper',
 '.',
 'So',
 ',',
 'first',
 'of',
 'all',
 ',',
 'let',
 'me',
 'assert',
 'my',
 'firm',
 'belief',
 'that',
 'the',
 'only',
 'thing',
 'we',
 'have',
 'to',
 'fear',
 'is',
 'fear',
 'itself',
 '--',
 'nameless',
 ',',
 'unreasoning',
 ',',
 'unj

In [7]:
# Stopword removal
stop_words = set(stopwords.words('english'))
filtered_words = [token for token in tokens if token.casefold() not in stop_words]


In [10]:
stop_words

{'a',
 'about',
 'above',
 'after',
 'again',
 'against',
 'ain',
 'all',
 'am',
 'an',
 'and',
 'any',
 'are',
 'aren',
 "aren't",
 'as',
 'at',
 'be',
 'because',
 'been',
 'before',
 'being',
 'below',
 'between',
 'both',
 'but',
 'by',
 'can',
 'couldn',
 "couldn't",
 'd',
 'did',
 'didn',
 "didn't",
 'do',
 'does',
 'doesn',
 "doesn't",
 'doing',
 'don',
 "don't",
 'down',
 'during',
 'each',
 'few',
 'for',
 'from',
 'further',
 'had',
 'hadn',
 "hadn't",
 'has',
 'hasn',
 "hasn't",
 'have',
 'haven',
 "haven't",
 'having',
 'he',
 'her',
 'here',
 'hers',
 'herself',
 'him',
 'himself',
 'his',
 'how',
 'i',
 'if',
 'in',
 'into',
 'is',
 'isn',
 "isn't",
 'it',
 "it's",
 'its',
 'itself',
 'just',
 'll',
 'm',
 'ma',
 'me',
 'mightn',
 "mightn't",
 'more',
 'most',
 'mustn',
 "mustn't",
 'my',
 'myself',
 'needn',
 "needn't",
 'no',
 'nor',
 'not',
 'now',
 'o',
 'of',
 'off',
 'on',
 'once',
 'only',
 'or',
 'other',
 'our',
 'ours',
 'ourselves',
 'out',
 'over',
 'own',
 'r

In [11]:
filtered_words

['certain',
 'fellow',
 'Americans',
 'expect',
 'induction',
 'Presidency',
 'address',
 'candor',
 'decision',
 'present',
 'situation',
 'Nation',
 'impels',
 '.',
 'preeminently',
 'time',
 'speak',
 'truth',
 ',',
 'whole',
 'truth',
 ',',
 'frankly',
 'boldly',
 '.',
 'need',
 'shrink',
 'honestly',
 'facing',
 'conditions',
 'country',
 'today',
 '.',
 'great',
 'Nation',
 'endure',
 'endured',
 ',',
 'revive',
 'prosper',
 '.',
 ',',
 'first',
 ',',
 'let',
 'assert',
 'firm',
 'belief',
 'thing',
 'fear',
 'fear',
 '--',
 'nameless',
 ',',
 'unreasoning',
 ',',
 'unjustified',
 'terror',
 'paralyzes',
 'needed',
 'efforts',
 'convert',
 'retreat',
 'advance',
 '.',
 'every',
 'dark',
 'hour',
 'national',
 'life',
 'leadership',
 'frankness',
 'vigor',
 'met',
 'understanding',
 'support',
 'people',
 'essential',
 'victory',
 '.',
 'convinced',
 'give',
 'support',
 'leadership',
 'critical',
 'days',
 '.',
 'spirit',
 'part',
 'face',
 'common',
 'difficulties',
 '.',
 'conc

In [8]:
# Lemmatization
lemmatizer = WordNetLemmatizer()
lemmatized_words = [lemmatizer.lemmatize(word) for word in filtered_words]


In [12]:
lemmatized_words

['certain',
 'fellow',
 'Americans',
 'expect',
 'induction',
 'Presidency',
 'address',
 'candor',
 'decision',
 'present',
 'situation',
 'Nation',
 'impels',
 '.',
 'preeminently',
 'time',
 'speak',
 'truth',
 ',',
 'whole',
 'truth',
 ',',
 'frankly',
 'boldly',
 '.',
 'need',
 'shrink',
 'honestly',
 'facing',
 'condition',
 'country',
 'today',
 '.',
 'great',
 'Nation',
 'endure',
 'endured',
 ',',
 'revive',
 'prosper',
 '.',
 ',',
 'first',
 ',',
 'let',
 'assert',
 'firm',
 'belief',
 'thing',
 'fear',
 'fear',
 '--',
 'nameless',
 ',',
 'unreasoning',
 ',',
 'unjustified',
 'terror',
 'paralyzes',
 'needed',
 'effort',
 'convert',
 'retreat',
 'advance',
 '.',
 'every',
 'dark',
 'hour',
 'national',
 'life',
 'leadership',
 'frankness',
 'vigor',
 'met',
 'understanding',
 'support',
 'people',
 'essential',
 'victory',
 '.',
 'convinced',
 'give',
 'support',
 'leadership',
 'critical',
 'day',
 '.',
 'spirit',
 'part',
 'face',
 'common',
 'difficulty',
 '.',
 'concern',

In [13]:
# Part-of-speech (POS) tagging
pos_tags = nltk.pos_tag(lemmatized_words)


In [14]:
pos_tags

[('certain', 'JJ'),
 ('fellow', 'JJ'),
 ('Americans', 'NNPS'),
 ('expect', 'VBP'),
 ('induction', 'NN'),
 ('Presidency', 'NNP'),
 ('address', 'NN'),
 ('candor', 'NN'),
 ('decision', 'NN'),
 ('present', 'JJ'),
 ('situation', 'NN'),
 ('Nation', 'NNP'),
 ('impels', 'NNS'),
 ('.', '.'),
 ('preeminently', 'RB'),
 ('time', 'NN'),
 ('speak', 'JJ'),
 ('truth', 'NN'),
 (',', ','),
 ('whole', 'JJ'),
 ('truth', 'NN'),
 (',', ','),
 ('frankly', 'RB'),
 ('boldly', 'RB'),
 ('.', '.'),
 ('need', 'VB'),
 ('shrink', 'VBP'),
 ('honestly', 'RB'),
 ('facing', 'VBG'),
 ('condition', 'NN'),
 ('country', 'NN'),
 ('today', 'NN'),
 ('.', '.'),
 ('great', 'JJ'),
 ('Nation', 'NN'),
 ('endure', 'NN'),
 ('endured', 'VBD'),
 (',', ','),
 ('revive', 'JJ'),
 ('prosper', 'NN'),
 ('.', '.'),
 (',', ','),
 ('first', 'RB'),
 (',', ','),
 ('let', 'VB'),
 ('assert', 'JJ'),
 ('firm', 'JJ'),
 ('belief', 'NN'),
 ('thing', 'NN'),
 ('fear', 'NN'),
 ('fear', 'NN'),
 ('--', ':'),
 ('nameless', 'NN'),
 (',', ','),
 ('unreasoning',

In [15]:
# Frequency distribution
freq_dist = FreqDist(lemmatized_words)
most_common_words = freq_dist.most_common(10)


In [16]:
most_common_words

[('.', 85),
 (',', 71),
 (';', 16),
 ('national', 9),
 ('must', 9),
 ('people', 8),
 ('leadership', 7),
 ('helped', 7),
 ('shall', 7),
 ('effort', 6)]

In [17]:
# Collocations (bigrams)
bigram_measures = BigramAssocMeasures()
finder = BigramCollocationFinder.from_words(lemmatized_words)
collocations = finder.nbest(bigram_measures.pmi, 10)


In [18]:
bigram_measures

<nltk.metrics.association.BigramAssocMeasures at 0x7f35cddb8cd0>

In [19]:
finder

<nltk.collocations.BigramCollocationFinder at 0x7f35cddbb460>

In [20]:
collocations

[('Action', 'image'),
 ('Americans', 'expect'),
 ('Compared', 'peril'),
 ('Constitution', 'simple'),
 ('Plenty', 'doorstep'),
 ('Practices', 'unscrupulous'),
 ('Primarily', 'ruler'),
 ('Recognition', 'falsity'),
 ('Small', 'wonder'),
 ('Stripped', 'lure')]

In [21]:
sentiment_analyzer = SentimentIntensityAnalyzer()
sentiment_scores = sentiment_analyzer.polarity_scores(speech)
sentiment_summary = "Positive: {}, Negative: {}, Neutral: {}, Compound: {}".format(
    sentiment_scores['pos'], sentiment_scores['neg'], sentiment_scores['neu'], sentiment_scores['compound']
)

In [22]:
sentences = sent_tokenize(speech)
summary = ' '.join(sentences[:3])  # Extract the first three sentences as a summary


In [23]:
sentences

['I am certain that my fellow Americans expect that on my induction into the Presidency I will address them with a candor and a decision which the present situation of our Nation impels.',
 'This is preeminently the time to speak the truth, the whole truth, frankly and boldly.',
 'Nor need we shrink from honestly facing conditions in our country today.',
 'This great Nation will endure as it has endured, will revive and will prosper.',
 'So, first of all, let me assert my firm belief that the only thing we have to fear is fear itself -- nameless, unreasoning, unjustified terror which paralyzes needed efforts to convert retreat into advance.',
 'In every dark hour of our national life a leadership of frankness and vigor has met with that understanding and support of the people themselves which is essential to victory.',
 'I am convinced that you will again give that support to leadership in these critical days.',
 'In such a spirit on my part and on yours we face our common difficulties

In [24]:
summary

'I am certain that my fellow Americans expect that on my induction into the Presidency I will address them with a candor and a decision which the present situation of our Nation impels. This is preeminently the time to speak the truth, the whole truth, frankly and boldly. Nor need we shrink from honestly facing conditions in our country today.'

In [25]:
print('Most Common Words:', most_common_words)


Most Common Words: [('.', 85), (',', 71), (';', 16), ('national', 9), ('must', 9), ('people', 8), ('leadership', 7), ('helped', 7), ('shall', 7), ('effort', 6)]


In [26]:
print('Collocations:', collocations)


Collocations: [('Action', 'image'), ('Americans', 'expect'), ('Compared', 'peril'), ('Constitution', 'simple'), ('Plenty', 'doorstep'), ('Practices', 'unscrupulous'), ('Primarily', 'ruler'), ('Recognition', 'falsity'), ('Small', 'wonder'), ('Stripped', 'lure')]


In [27]:
print('Sentiment Analysis:', sentiment_summary)


Sentiment Analysis: Positive: 0.184, Negative: 0.101, Neutral: 0.715, Compound: 0.9996


In [28]:
print('Summary:', summary)


Summary: I am certain that my fellow Americans expect that on my induction into the Presidency I will address them with a candor and a decision which the present situation of our Nation impels. This is preeminently the time to speak the truth, the whole truth, frankly and boldly. Nor need we shrink from honestly facing conditions in our country today.
