In [65]:
import numpy as np
import pandas as pd
import spacy
import re
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import classification_report, ConfusionMatrixDisplay
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as snb
from wordcloud import WordCloud, STOPWORDS
import math
from scipy.stats import chi2_contingency, chisquare, chi2
import nltk  
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from nltk.probability import FreqDist
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer 
from sklearn.model_selection import GridSearchCV
import tensorflow as tf
pd.options.mode.chained_assignment = None
from textblob import TextBlob

In [42]:
def clean_tweet(tweet):
    tweet = tweet.lower()
    tweet = re.sub(r"@[A-Za-z0-9]+","", tweet)
    tweet = re.sub(r"#","", tweet)
    tweet = re.sub(r"\ART[\s]+","", tweet) 
    tweet = re.sub(r"https?:\/\/\S+","", tweet)
    tweet = re.sub("(.)\\1{2,}","\\1", tweet)
    tweet = re.sub(r"[^A-Za-z0-9\s]+", "",str(tweet))
    
    return tweet

In [43]:
trump_df = pd.read_csv('hashtag_donaldtrump.csv', lineterminator='\n')

In [44]:
trump_df["tweet"] = trump_df["tweet"].apply(clean_tweet)

In [46]:
Arizona_trump = trump_df.loc[trump_df['state'] == 'Arizona']

In [47]:
Wisconsin_trump = trump_df.loc[trump_df['state'] == 'Wisconsin']

In [48]:
Pennsylvania_trump = trump_df.loc[trump_df['state'] == 'Pennsylvania']

In [49]:
Georgia_trump = trump_df.loc[trump_df['state'] == 'Georgia']

In [78]:
Michigan_trump = trump_df.loc[trump_df['state'] == 'Michigan']

In [50]:
def pos_conv(pos):
    tag_dict = {"J": 'a', 
                "N": 'n', 
                "V": 'v', 
                "R": 'r'}    
    return tag_dict.get(pos[0], 'n')

In [53]:
def tweet_change(df):
    tb = [TextBlob(c.lower()) for c in df['tweet']]
    sentences = [t.words for t in tb]
    data = [s.lemmatize() for s in sentences]
    tags = [t.tags for t in tb]
    wordnet_tags = [[[w, pos_conv(pos)] for w, pos in t] for t in tags]
    data = [[w.lemmatize(t) for w,t in s] for s in wordnet_tags]
    df['tweet_new'] = data


In [73]:
def sentiment_analysis(df):
    # Determine polarity and subjectivity
    df['Polarity'] = df['tweet_new'].apply(lambda x: TextBlob(' '.join(x)).sentiment.polarity)
    df['Subjectivity'] = df['tweet_new'].apply(lambda x: TextBlob(' '.join(x)).sentiment.subjectivity)
    
    # Classify overall sentiment
    df.loc[df.Polarity > 0,'Sentiment'] = 1
    df.loc[df.Polarity == 0,'Sentiment'] = 0
    df.loc[df.Polarity < 0,'Sentiment'] = -1
    
    return df.groupby('Sentiment')['tweet'].count()

In [66]:
tweet_change(Georgia_trump)
sentiment_analysis(Georgia_trump)

Sentiment
-1.0     559
 0.0    1231
 1.0    1086
Name: tweet, dtype: int64

In [75]:
tweet_change(Pennsylvania_trump)
sentiment_analysis(Pennsylvania_trump)

Sentiment
-1.0    1599
 0.0    2525
 1.0    2887
Name: tweet, dtype: int64

In [76]:
tweet_change(Arizona_trump)
sentiment_analysis(Arizona_trump)

Sentiment
-1.0     629
 0.0    1084
 1.0    1152
Name: tweet, dtype: int64

In [77]:
tweet_change(Wisconsin_trump)
sentiment_analysis(Wisconsin_trump)

Sentiment
-1.0    358
 0.0    544
 1.0    578
Name: tweet, dtype: int64

In [79]:
tweet_change(Michigan_trump)
sentiment_analysis(Michigan_trump)

Sentiment
-1.0     767
 0.0    1025
 1.0    1261
Name: tweet, dtype: int64

In [85]:
NewYork_trump = trump_df.loc[trump_df['state'] == 'New York']
tweet_change(NewYork_trump)
sentiment_analysis(NewYork_trump)

Sentiment
-1.0    5038
 0.0    9313
 1.0    8530
Name: tweet, dtype: int64

In [86]:
California_trump = trump_df.loc[trump_df['state'] == 'California']
tweet_change(California_trump)
sentiment_analysis(California_trump)

Sentiment
-1.0     7085
 0.0    12937
 1.0    11127
Name: tweet, dtype: int64

In [87]:
Florida_trump = trump_df.loc[trump_df['state'] == 'Florida']
tweet_change(Florida_trump)
sentiment_analysis(Florida_trump)

Sentiment
-1.0    3241
 0.0    7394
 1.0    5933
Name: tweet, dtype: int64

In [88]:
Columbia_trump = trump_df.loc[trump_df['state'] == 'District of Columbia']
tweet_change(Columbia_trump)
sentiment_analysis(Columbia_trump)

Sentiment
-1.0    1983
 0.0    4289
 1.0    3412
Name: tweet, dtype: int64