In [1]:
from twitter_authentication import bearer_token, consumer_key, consumer_secret, access_token, access_token_secret
from mongo_authentication import password
from pymongo import MongoClient
import tweepy
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk import ngrams
from joblib import dump, load
import numpy as np
import matplotlib.pyplot as plt

In [2]:
#configurando tweepy
tweepy_client = tweepy.Client(bearer_token=bearer_token, consumer_key=consumer_key, consumer_secret=consumer_secret, access_token=access_token, access_token_secret=access_token_secret)

In [3]:
#configurando pymongo e obtendo nome das coleções
mongo_client = MongoClient('mongodb+srv://ythomaz:'+password+'@cluster0.30s3m.mongodb.net/?retryWrites=true&w=majority')
db=mongo_client.tweets
db.list_collection_names()

['lula_tweets', 'bolsonaro_tweets']

In [4]:
#obtendo tweets e armazenando no MongoDB
query_lula = 'lula -RT'
query_bolsonaro = 'bolsonaro -RT'
def get_tweets(query, collection_name):
    tweet_pages = tweepy.Paginator(tweepy_client.search_recent_tweets, query=query,
                                   tweet_fields=['text', 'author_id','created_at'], max_results=100, limit=400)
    tweets = []
    for page in tweet_pages:
        tweets += page.data
        df = pd.DataFrame(tweets, columns=['text', 'author_id','created_at'])
    df_dict = df.to_dict(orient='records')
    return db[collection_name].insert_many(df_dict)

In [5]:
#get_tweets(query_lula,'lula_tweets')

In [6]:
#get_tweets(query_bolsonaro,'bolsonaro_tweets')

In [7]:
#dataframe Lula
tweets_lula = db['lula_tweets'].find({},{ '_id': 0 })
df_tweets_lula =  pd.DataFrame(list(tweets_lula))
df_lula = df_tweets_lula.drop_duplicates(subset='author_id').reset_index(drop=True)

In [None]:
#dataframe Bolsonaro
tweets_bolsonaro = db['bolsonaro_tweets'].find({},{ '_id': 0 })
df_tweets_bolsonaro =  pd.DataFrame(list(tweets_bolsonaro))
df_bolsonaro = df_tweets_bolsonaro.drop_duplicates(subset='author_id').reset_index(drop=True)

In [None]:
def vetorizar(dataframe, coluna):
    tfidf_vetorizar = TfidfVectorizer(lowercase=False, ngram_range = (1,2), max_features=1000)
    matriz = tfidf_vetorizar.fit_transform(dataframe[coluna])
    return matriz

In [None]:
classificador = load('classificador.joblib')

In [None]:
predicao_lula = pd.Series(classificador.predict(vetorizar(df_lula, 'text')))

In [None]:
df_lula.insert(3,'sentiment',predicao_lula,True)

In [None]:
classificacao_lula = df_lula.sentiment.replace([0, 1],['Negativo','Positivo'])
df_lula['classificacao'] = classificacao_lula

In [None]:
predicao_bolsonaro = pd.Series(classificador.predict(vetorizar(df_bolsonaro, 'text')))

In [None]:
df_bolsonaro.insert(3,'sentiment',predicao_bolsonaro,True)

In [None]:
classificacao_bolsonaro = df_bolsonaro.sentiment.replace([0, 1],['Negativo','Positivo'])
df_bolsonaro['classificacao'] = classificacao_bolsonaro

In [None]:
#em desenvolvimento, criar função
lula_coleta01 = df_lula[ (df_lula.created_at.dt.day == 21) | (df_lula.created_at.dt.day == 22) ]
lula_coleta02 = df_lula[ (df_lula.created_at.dt.day == 27) | (df_lula.created_at.dt.day == 28) ]
lula_coleta03 = df_lula[ (df_lula.created_at.dt.day == 5) | (df_lula.created_at.dt.day == 6) ]
bolsonaro_coleta01 = df_bolsonaro[ (df_bolsonaro.created_at.dt.day == 21) | (df_bolsonaro.created_at.dt.day == 22) ]
bolsonaro_coleta02 = df_bolsonaro[ (df_bolsonaro.created_at.dt.day == 27) | (df_bolsonaro.created_at.dt.day == 28) ]
bolsonaro_coleta03 = df_bolsonaro[ (df_bolsonaro.created_at.dt.day == 5) | (df_bolsonaro.created_at.dt.day == 6) ]

In [None]:
#em desenvolvimento, criar função
bolsonaro_1 = np.around(list(bolsonaro_coleta01.sentiment.value_counts(normalize = True, ascending=True)*100), decimals = 2)
bolsonaro_2 = np.around(list(bolsonaro_coleta02.sentiment.value_counts(normalize = True, ascending=True)*100), decimals = 2)
bolsonaro_3 = np.around(list(bolsonaro_coleta03.sentiment.value_counts(normalize = True, ascending=True)*100), decimals = 2)
lula_1 = np.around(list(lula_coleta01.sentiment.value_counts(normalize = True, ascending=True)*100), decimals =2)
lula_2 = np.around(list(lula_coleta02.sentiment.value_counts(normalize = True, ascending=True)*100), decimals =2)
lula_3 = np.around(list(lula_coleta03.sentiment.value_counts(normalize = True, ascending=True)*100), decimals =2)

In [None]:
#em desenvolvimento, criar função
classificacao = ['Positivo', 'Negativo', 'Positivo', 'Negativo','Positivo', 'Negativo']
fig, ax = plt.subplots()

fig.set_size_inches(20, 5)

x = [1, 4]
x1 = [7, 10]
x2 = [13, 16]
y = [2, 5]
y1 = [8, 11]
y2 = [14, 17]
z = [1.5, 4.5, 7.5, 10.5, 13.5, 16.5]

bolsonaro1 = ax.bar(x, bolsonaro_1, width=1)
bolsonaro2 = ax.bar(x1, bolsonaro_2, width=1)
bolsonaro3 = ax.bar(x2, bolsonaro_3, width=1)
lula1 = ax.bar(y, lula_1, width=1)
lula2 = ax.bar(y1, lula_2, width=1)
lula3 = ax.bar(y2, lula_3, width=1)

ax.bar_label(bolsonaro1)
ax.bar_label(bolsonaro2)
ax.bar_label(bolsonaro3)
ax.bar_label(lula1)
ax.bar_label(lula2)
ax.bar_label(lula3)

ax.set_title('Análise de sentimento dos Tweets')
ax.set_ylabel('Percentual de Tweets positivos e negativos')

plt.xticks(z, classificacao)
plt.legend(['Bolsonaro-W1','Bolsonaro-W2', 'Bolsonaro-W3','Lula-W1', 'Lula-W2', 'Lula-W3'],bbox_to_anchor=(1, 1), loc='upper left')


plt.show()