In [1]:
import pandas as pd
import flair
import requests
import re

# Load the sentiment model
sentiment_model = flair.models.TextClassifier.load('en-sentiment')

# Fetch the book from the URL
url = 'https://www.gutenberg.org/cache/epub/925/pg925.txt'
book = requests.get(url).text

  from .autonotebook import tqdm as notebook_tqdm


2024-01-02 16:29:09,435 https://nlp.informatik.hu-berlin.de/resources/models/sentiment-curated-distilbert/sentiment-en-mix-distillbert_4.pt not found in cache, downloading to /var/folders/pr/5m0zmmcj0z33ydt9c6r3pynh0000gn/T/tmpm0j2nvyc


100%|██████████| 253M/253M [00:53<00:00, 4.97MB/s] 

2024-01-02 16:30:03,276 copying /var/folders/pr/5m0zmmcj0z33ydt9c6r3pynh0000gn/T/tmpm0j2nvyc to cache at /Users/vipul/.flair/models/sentiment-en-mix-distillbert_4.pt





2024-01-02 16:30:03,480 removing temp file /var/folders/pr/5m0zmmcj0z33ydt9c6r3pynh0000gn/T/tmpm0j2nvyc


tokenizer_config.json: 100%|██████████| 28.0/28.0 [00:00<00:00, 119kB/s]
config.json: 100%|██████████| 483/483 [00:00<00:00, 2.81MB/s]
vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 5.49MB/s]
tokenizer.json: 100%|██████████| 466k/466k [00:00<00:00, 20.9MB/s]


In [2]:
# Cleaning the speech for Sentiment analysis
book1 = book.replace('\n', '').replace('\r', '')


In [3]:
# Splitting the book into speeches
speeches = re.split(r'\*\s+\*\s+\*\s+\*\s+\*', book1)

In [4]:
# Create a list to store the results
results = []

In [5]:
# Iterate through the speeches
for speech_text in speeches:
    # Extract president name and speech date
    match = re.search(r'(.*?), (.*? \d{1,2}, \d{4})', speech_text)
    if match:
        president_name = match.group(1).strip()
        speech_date = match.group(2).strip()
    else:
        president_name = "Unknown"
        speech_date = "Unknown"

    # Create a Flair Sentence
    sentence = flair.data.Sentence(speech_text)

    # Predict sentiment
    sentiment_model.predict(sentence)
      # Get sentiment value (positive, negative, neutral) and sentiment score
    sentiment_value = sentence.labels[0].value
    sentiment_score = sentence.labels[0].score

    # Append the results as a dictionary to the list
    results.append({
        "President Name": president_name,
        "Speech Date": speech_date,
        "Sentimental Value": sentiment_value,
        "Sentiment Score": sentiment_score
    })

In [6]:
# Create a DataFrame from the list of dictionaries
results_df = pd.DataFrame(results)

In [7]:
results_df

Unnamed: 0,President Name,Speech Date,Sentimental Value,Sentiment Score
0,﻿The Project Gutenberg eBook of United States ...,give it away or re-use it under the termsof th...,POSITIVE,0.506499
1,GEORGE WASHINGTON,FIRST INAUGURAL ADDRESSIN THE CITY OF NEW YORK...,NEGATIVE,0.550841
2,GEORGE WASHINGTON,SECOND INAUGURAL ADDRESSIN THE CITY OF PHILADE...,POSITIVE,0.982964
3,JOHN ADAMS INAUGURAL ADDRESSIN THE CITY OF PHI...,"MARCH 4, 1797",NEGATIVE,0.925691
4,THOMAS JEFFERSON FIRST INAUGURAL ADDRESSIN THE...,"D.C., WEDNESDAY, MARCH 4, 1801",POSITIVE,0.917978
5,THOMAS JEFFERSON SECOND INAUGURAL ADDRESSIN WA...,"MONDAY, MARCH 4, 1805",POSITIVE,0.819593
6,JAMES MADISON FIRST INAUGURAL ADDRESSSATURDAY,"MARCH 4, 1809",POSITIVE,0.885548
7,JAMES MADISON,"SECOND INAUGURAL ADDRESSTHURSDAY, MARCH 4, 1813",POSITIVE,0.590762
8,JAMES MONROE FIRST INAUGURAL ADDRESSTUESDAY,"MARCH 4, 1817",POSITIVE,0.946114
9,JAMES MONROE,"SECOND INAUGURAL ADDRESSMONDAY, MARCH 5, 1821",POSITIVE,0.62614


In [8]:
results_df = results_df.drop([0, 56])
# Reset the index to maintain consecutive row numbers
results_df = results_df.reset_index(drop=True)

In [9]:
results_df

Unnamed: 0,President Name,Speech Date,Sentimental Value,Sentiment Score
0,GEORGE WASHINGTON,FIRST INAUGURAL ADDRESSIN THE CITY OF NEW YORK...,NEGATIVE,0.550841
1,GEORGE WASHINGTON,SECOND INAUGURAL ADDRESSIN THE CITY OF PHILADE...,POSITIVE,0.982964
2,JOHN ADAMS INAUGURAL ADDRESSIN THE CITY OF PHI...,"MARCH 4, 1797",NEGATIVE,0.925691
3,THOMAS JEFFERSON FIRST INAUGURAL ADDRESSIN THE...,"D.C., WEDNESDAY, MARCH 4, 1801",POSITIVE,0.917978
4,THOMAS JEFFERSON SECOND INAUGURAL ADDRESSIN WA...,"MONDAY, MARCH 4, 1805",POSITIVE,0.819593
5,JAMES MADISON FIRST INAUGURAL ADDRESSSATURDAY,"MARCH 4, 1809",POSITIVE,0.885548
6,JAMES MADISON,"SECOND INAUGURAL ADDRESSTHURSDAY, MARCH 4, 1813",POSITIVE,0.590762
7,JAMES MONROE FIRST INAUGURAL ADDRESSTUESDAY,"MARCH 4, 1817",POSITIVE,0.946114
8,JAMES MONROE,"SECOND INAUGURAL ADDRESSMONDAY, MARCH 5, 1821",POSITIVE,0.62614
9,JOHN QUINCY ADAMS,"INAUGURAL ADDRESSFRIDAY, MARCH 4, 1825",POSITIVE,0.987317
