In [14]:
import tweepy

# TextBlob - Python library for processing textual data
from textblob import TextBlob

# WordCloud - Python linrary for creating image wordclouds
from wordcloud import WordCloud

# Pandas - Data manipulation and analysis library
import pandas as pd

# NumPy - mathematical functions on multi-dimensional arrays and matrices
import numpy as np

# Regular Expression Python module
import re

# Matplotlib - plotting library to create graphs and charts
import matplotlib.pyplot as plt

import nltk

# Settings for Matplotlib graphs and charts
from pylab import rcParams
rcParams['figure.figsize'] = 12, 8

In [15]:
config = pd.read_csv("./config.csv")

In [16]:
# Twitter API config
twitterApiKey = config['twitterApiKey'][0]
twitterApiSecret = config['twitterApiSecret'][0]
twitterApiAccessToken = config['twitterApiAccessToken'][0]
twitterApiAccessTokenSecret = config['twitterApiAccessTokenSecret'][0]

In [17]:
# Authenticate
auth = tweepy.OAuthHandler(twitterApiKey, twitterApiSecret)
auth.set_access_token(twitterApiAccessToken, twitterApiAccessTokenSecret)
twitterApi = tweepy.API(auth, wait_on_rate_limit = True)

In [29]:
# キーワードからツイートを取得
tweets = tweepy.Cursor(twitterApi.search,  
                        count=1000,
                        q='COVID',
                        exclude_replies=True,
                        lang = 'en',
                        
                        ).items(100);


In [30]:
df = pd.DataFrame(data=[tweet.text for tweet in tweets], columns=['Tweet'])
df.head(100)

Unnamed: 0,Tweet
0,✔️ We are keeping track of all the times democ...
1,RT @SizweLo: If he goes to jail he will die of...
2,RT @Cornish_Damo: Sajid Javid has announced we...
3,RT @hhendersonphd: Evictions Aren’t Just a Sym...
4,@MaryFernando_ @vitality_md @BoczulaAnna @cdav...
...,...
95,RT @GhandourGhana: Wear a mask and use KLEANZ ...
96,RT @goodfoodgal: Dear those who are “vaccinate...
97,RT @VenkatRaja_Offl: Vaccinated today ❤️\n\n#C...
98,RT @RailMinIndia: In order to minimize Covid-1...


In [20]:
# Cleaning the tweets

def cleanUpTweet(txt):
    # Remove mentions
    txt = re.sub(r'@[A-Za-z0-9_]+', '', txt)
    # Remove hashtags
    txt = re.sub(r'#', '', txt)
    # Remove retweets:
    txt = re.sub(r'RT : ', '', txt)
    # Remove urls
    txt = re.sub(r'https?:\/\/[A-Za-z0-9\.\/]+', '', txt)
    return txt

In [21]:
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
vader_analyzer = SentimentIntensityAnalyzer()


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\zxxxs\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [22]:
def getTextPolarity(txt):
    return vader_analyzer.polarity_scores(txt)
def getTextPos(txt):
    return txt.get("pos")
def getTextNeu(txt):
    return txt.get("neu")
def getTextNeg(txt):
    return txt.get("neg")
def getTextCom(txt):
    return txt.get("compound")

def getTextAnalysis(l):
    s = l.get("compound")
    if s < 0:
        return "Negative"
    elif s == 0:
        return "Neutral"
    else:
        return "Positive"

In [23]:
df['Score'] = df['Tweet'].apply(getTextPolarity)
df['neg'] = df['Score'].apply(getTextNeg)
df['neu'] = df['Score'].apply(getTextNeu)
df['pos'] = df['Score'].apply(getTextPos)
df['compound'] = df['Score'].apply(getTextCom)
df['Sentiment'] = df['Score'].apply(getTextAnalysis)

In [24]:
df.drop(columns='Score')

Unnamed: 0,Tweet,neg,neu,pos,compound,Sentiment


In [25]:
df.to_csv("COVID_sentiment_vader.csv")

In [26]:
positive = df[df['Sentiment'] == 'Positive']
neutral = df[df['Sentiment'] == 'Neutral']
negative = df[df['Sentiment'] == 'Negative']

print(str(positive.shape[0]/(df.shape[0])*100) + " % of positive tweets")
print(str(neutral.shape[0]/(df.shape[0])*100) + " % of neutral tweets")
print(str(negative.shape[0]/(df.shape[0])*100) + " % of negative tweets")

ZeroDivisionError: division by zero

In [None]:
labels = df.groupby('Sentiment').count().index.values

values = df.groupby('Sentiment').size().values

plt.bar(labels, values)