In [1]:
import re
import pandas as pd
from nltk.corpus import stopwords
import spacy

In [2]:
col_list = ["date","tweet","replies_count","retweets_count","likes_count"]
df = pd.read_csv("covidvaxdataset.csv", usecols=col_list)
# df.head(10)

nlp = spacy.load('en_core_web_sm', disable=['parser', 'ner'])

df['cleaned_tweets'] = df['tweet'].apply(lambda x: " ".join(x.lower() for x in x.split()))
df['cleaned_tweets'].head()

df['cleaned_tweets'] = df['cleaned_tweets'].str.replace('[^\w\s]','')
df['cleaned_tweets'].head()

def remove_emoji(text):
    emoji_pattern = re.compile("["
                           u"\U0001F600-\U0001F64F"  # emoticons
                           u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                           u"\U0001F680-\U0001F6FF"  # transport & map symbols
                           u"\U0001F1E0-\U0001F1FF"  # flags 
                           u"\U00002702-\U000027B0"
                           u"\U000024C2-\U0001F251"
                           "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'', text)
df['cleaned_tweets'] = df['cleaned_tweets'].apply(lambda x: remove_emoji(x))

stop = stopwords.words('english')
df['cleaned_tweets'] = df['cleaned_tweets'].apply(lambda x: " ".join(x for x in x.split() if x not in stop))
# df.head(20)

def space(comment):
    doc = nlp(comment)
    return " ".join([token.lemma_ for token in doc])
df['cleaned_tweets']= df['cleaned_tweets'].apply(space)
df.head(20)

  df['cleaned_tweets'] = df['cleaned_tweets'].str.replace('[^\w\s]','')


Unnamed: 0,date,tweet,replies_count,retweets_count,likes_count,cleaned_tweets
0,2021-01-01,@unhealthytruth Toronto Business Journal seeks...,0,1,1,unhealthytruth toronto business journal seek i...
1,2021-01-01,@TrumptrainA1 @hasonomans @latimes If any pers...,1,0,1,trumptraina1 hasonoman latime person go near a...
2,2021-01-01,Happy New Year to all! Let’s quickly forget 20...,2,1,10,happy new year let quickly forget 2020 make di...
3,2021-01-01,@ChildrensHD Toronto Business Journal seeks to...,0,0,0,childrenshd toronto business journal seek inte...
4,2021-01-01,"Quebec changes COVID-19 vaccine strategy, won'...",0,0,0,quebec change covid19 vaccine strategy wo nt h...
5,2021-01-01,Florida officials call COVID-19 vaccine issues...,4,0,1,florida official call covid19 vaccine issue gr...
6,2021-01-01,Health officials provide COVID-19 vaccine upda...,0,1,0,health official provide covid19 vaccine update...
7,2021-01-01,@BorisJohnson @BorisJohnson does it not worry ...,0,0,0,borisjohnson borisjohnson worry north korea am...
8,2021-01-01,"Even with a COVID-19 vaccine, we still need pa...",0,0,0,even covid19 vaccine still need pay sick day h...
9,2021-01-01,@BusyDrT Toronto Business Journal seeks to int...,0,0,0,busydrt toronto business journal seek intervie...


In [3]:
from textblob import TextBlob

df['polarity'] = df['cleaned_tweets'].apply(lambda x: TextBlob(x).polarity)
df.head(20)

Unnamed: 0,date,tweet,replies_count,retweets_count,likes_count,cleaned_tweets,polarity
0,2021-01-01,@unhealthytruth Toronto Business Journal seeks...,0,1,1,unhealthytruth toronto business journal seek i...,0.4
1,2021-01-01,@TrumptrainA1 @hasonomans @latimes If any pers...,1,0,1,trumptraina1 hasonoman latime person go near a...,0.033333
2,2021-01-01,Happy New Year to all! Let’s quickly forget 20...,2,1,10,happy new year let quickly forget 2020 make di...,0.313939
3,2021-01-01,@ChildrensHD Toronto Business Journal seeks to...,0,0,0,childrenshd toronto business journal seek inte...,0.4
4,2021-01-01,"Quebec changes COVID-19 vaccine strategy, won'...",0,0,0,quebec change covid19 vaccine strategy wo nt h...,0.0
5,2021-01-01,Florida officials call COVID-19 vaccine issues...,4,0,1,florida official call covid19 vaccine issue gr...,0.0
6,2021-01-01,Health officials provide COVID-19 vaccine upda...,0,1,0,health official provide covid19 vaccine update...,0.0
7,2021-01-01,@BorisJohnson @BorisJohnson does it not worry ...,0,0,0,borisjohnson borisjohnson worry north korea am...,0.125
8,2021-01-01,"Even with a COVID-19 vaccine, we still need pa...",0,0,0,even covid19 vaccine still need pay sick day h...,-0.714286
9,2021-01-01,@BusyDrT Toronto Business Journal seeks to int...,0,0,0,busydrt toronto business journal seek intervie...,0.4


In [4]:
def sentiment_analysis(polarity):
    if polarity < 0:
        if polarity < -0.5:
            return 'Very Negative'
        else:
            return 'Negative'
    elif polarity == 0:
        return 'Neutral'
    elif polarity > 0:
        if polarity > 0.5:
            return 'Very Positive'
        else:
            return 'Positive'

df['sentiment'] = df['polarity'].apply(lambda x: sentiment_analysis(x))
df.head(30)

Unnamed: 0,date,tweet,replies_count,retweets_count,likes_count,cleaned_tweets,polarity,sentiment
0,2021-01-01,@unhealthytruth Toronto Business Journal seeks...,0,1,1,unhealthytruth toronto business journal seek i...,0.4,Positive
1,2021-01-01,@TrumptrainA1 @hasonomans @latimes If any pers...,1,0,1,trumptraina1 hasonoman latime person go near a...,0.033333,Positive
2,2021-01-01,Happy New Year to all! Let’s quickly forget 20...,2,1,10,happy new year let quickly forget 2020 make di...,0.313939,Positive
3,2021-01-01,@ChildrensHD Toronto Business Journal seeks to...,0,0,0,childrenshd toronto business journal seek inte...,0.4,Positive
4,2021-01-01,"Quebec changes COVID-19 vaccine strategy, won'...",0,0,0,quebec change covid19 vaccine strategy wo nt h...,0.0,Neutral
5,2021-01-01,Florida officials call COVID-19 vaccine issues...,4,0,1,florida official call covid19 vaccine issue gr...,0.0,Neutral
6,2021-01-01,Health officials provide COVID-19 vaccine upda...,0,1,0,health official provide covid19 vaccine update...,0.0,Neutral
7,2021-01-01,@BorisJohnson @BorisJohnson does it not worry ...,0,0,0,borisjohnson borisjohnson worry north korea am...,0.125,Positive
8,2021-01-01,"Even with a COVID-19 vaccine, we still need pa...",0,0,0,even covid19 vaccine still need pay sick day h...,-0.714286,Very Negative
9,2021-01-01,@BusyDrT Toronto Business Journal seeks to int...,0,0,0,busydrt toronto business journal seek intervie...,0.4,Positive


In [5]:
df.tail(30)

Unnamed: 0,date,tweet,replies_count,retweets_count,likes_count,cleaned_tweets,polarity,sentiment
289925,2021-07-31,I made it to bed. My second covid vaccine is b...,2,0,6,make bed second covid vaccine book 10 am let h...,0.416667,Positive
289926,2021-07-31,@ericcbarnes @BrendenHill @Hokiepharm There is...,0,0,1,ericcbarne brendenhill hokiepharm herd immunit...,0.0,Neutral
289927,2021-07-31,‘I should have gotten the damn vaccine’: Fathe...,0,0,0,get damn vaccine father 5 die covid age 39 kai...,0.0,Neutral
289928,2021-07-31,Is Israel receiving 2 doses and a booster shot...,0,1,1,israel receive 2 dose booster shoot covid doct...,0.233333,Positive
289929,2021-07-31,@LaloDagach The vaccines clearly help reduce t...,2,0,0,lalodagach vaccine clearly help reduce severit...,-0.033333,Negative
289930,2021-07-31,#IDontLikePeopleWho are stupid! The more we do...,0,2,13,idontlikepeoplewho stupid do nt wear mask get ...,-0.4,Negative
289931,2021-07-31,Can't believe these fly over hicks spreading C...,0,1,21,ca nt believe fly hick spread covid refuse get...,0.8,Very Positive
289932,2021-07-31,"It's NEVER been Isolated, It doesn't exist. #...",0,0,0,never isolate do nt exist covid19 covidvaccine...,0.0,Neutral
289933,2021-07-31,thinking about how i will probably never get t...,0,0,3,think probably never get see mcr rate bc I ve ...,-0.125,Negative
289934,2021-07-31,@orcheerios I WILL STEAL THE COVID VACCINES FR...,0,0,1,orcheerio steal covid vaccine orphanage,0.0,Neutral


In [6]:
df.to_csv('covidsentimentdata.csv', index=False)