In [13]:
import numpy as np
import pandas as pd
import spacy
import re
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import classification_report, ConfusionMatrixDisplay
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as snb
from wordcloud import WordCloud, STOPWORDS
import math
from scipy.stats import chi2_contingency, chisquare, chi2
import nltk  
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from nltk.probability import FreqDist
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer 
from sklearn.model_selection import GridSearchCV
import tensorflow as tf
pd.options.mode.chained_assignment = None
from textblob import TextBlob

In [2]:
def clean_tweet(tweet):
    tweet = tweet.lower()
    tweet = re.sub(r"@[A-Za-z0-9]+","", tweet)
    tweet = re.sub(r"#","", tweet)
    tweet = re.sub(r"\ART[\s]+","", tweet) 
    tweet = re.sub(r"https?:\/\/\S+","", tweet)
    tweet = re.sub("(.)\\1{2,}","\\1", tweet)
    tweet = re.sub(r"[^A-Za-z0-9\s]+", "",str(tweet))
    
    return tweet

In [3]:
joe_df = pd.read_csv('hashtag_joebiden.csv', lineterminator='\n')

In [4]:
joe_df["tweet"] = joe_df["tweet"].apply(clean_tweet)

In [5]:
Arizona_joe = joe_df.loc[joe_df['state'] == 'Arizona']

In [6]:
Wisconsin_joe = joe_df.loc[joe_df['state'] == 'Wisconsin']

In [7]:
Pennsylvania_joe = joe_df.loc[joe_df['state'] == 'Pennsylvania']

In [8]:
Georgia_joe = joe_df.loc[joe_df['state'] == 'Georgia']

In [20]:
Michigan_joe = joe_df.loc[joe_df['state'] == 'Michigan']

In [9]:
def pos_conv(pos):
    tag_dict = {"J": 'a', 
                "N": 'n', 
                "V": 'v', 
                "R": 'r'}    
    return tag_dict.get(pos[0], 'n')

In [10]:
def tweet_change(df):
    tb = [TextBlob(c.lower()) for c in df['tweet']]
    sentences = [t.words for t in tb]
    data = [s.lemmatize() for s in sentences]
    tags = [t.tags for t in tb]
    wordnet_tags = [[[w, pos_conv(pos)] for w, pos in t] for t in tags]
    data = [[w.lemmatize(t) for w,t in s] for s in wordnet_tags]
    df['tweet_new'] = data


In [14]:
def sentiment_analysis(df):
    # Determine polarity and subjectivity
    df['Polarity'] = df['tweet_new'].apply(lambda x: TextBlob(' '.join(x)).sentiment.polarity)
    df['Subjectivity'] = df['tweet_new'].apply(lambda x: TextBlob(' '.join(x)).sentiment.subjectivity)
    
    # Classify overall sentiment
    df.loc[df.Polarity > 0,'Sentiment'] = 1
    df.loc[df.Polarity == 0,'Sentiment'] = 0
    df.loc[df.Polarity < 0,'Sentiment'] = -1
    
    return df.groupby('Sentiment')['tweet'].count()

In [15]:
tweet_change(Georgia_joe)
sentiment_analysis(Georgia_joe)

Sentiment
-1.0     517
 0.0    1391
 1.0    1278
Name: tweet, dtype: int64

In [16]:
tweet_change(Pennsylvania_joe)
sentiment_analysis(Pennsylvania_joe)

Sentiment
-1.0     954
 0.0    2393
 1.0    2056
Name: tweet, dtype: int64

In [17]:
tweet_change(Arizona_joe)
sentiment_analysis(Arizona_joe)

Sentiment
-1.0     571
 0.0    1395
 1.0    1282
Name: tweet, dtype: int64

In [18]:
tweet_change(Wisconsin_joe)
sentiment_analysis(Wisconsin_joe)

Sentiment
-1.0    248
 0.0    717
 1.0    692
Name: tweet, dtype: int64

In [21]:
tweet_change(Michigan_joe)
sentiment_analysis(Michigan_joe)

Sentiment
-1.0     587
 0.0    1279
 1.0    1208
Name: tweet, dtype: int64

In [32]:
NewYork_joe = joe_df.loc[joe_df['state'] == 'New York']
tweet_change(NewYork_joe)
sentiment_analysis(NewYork_joe)

Sentiment
-1.0    3157
 0.0    9081
 1.0    8767
Name: tweet, dtype: int64

In [33]:
California_joe = joe_df.loc[joe_df['state'] == 'California']
tweet_change(California_joe)
sentiment_analysis(California_joe)

Sentiment
-1.0     4195
 0.0    10999
 1.0    10623
Name: tweet, dtype: int64

In [34]:
Florida_joe = joe_df.loc[joe_df['state'] == 'Florida']
tweet_change(Florida_joe)
sentiment_analysis(Florida_joe)

Sentiment
-1.0    2320
 0.0    6273
 1.0    4691
Name: tweet, dtype: int64

In [35]:
Columbia_joe = joe_df.loc[joe_df['state'] == 'District of Columbia']
tweet_change(Columbia_joe)
sentiment_analysis(Columbia_joe)

Sentiment
-1.0    1016
 0.0    3320
 1.0    2719
Name: tweet, dtype: int64