In [1]:
import os
import requests
import tweepy as tw
import pandas as pd
from datetime import datetime
import configparser
import time

In [2]:
# API set-ups for the use of Twitter API
config = configparser.ConfigParser()
config.read('config.ini')

api_key = config['twitter']['api_key']
api_key_secret = config['twitter']['api_key_secret']

access_token = config['twitter']['access_token']
access_token_secret = config['twitter']['access_token_secret']

auth = tw.OAuthHandler(api_key, api_key_secret)
auth.set_access_token(access_token, access_token_secret)
api = tw.API(auth) 

In [3]:
### Search var. 1.2
# Container to collect data
search_results_dict = {}
# Searching function. !only tweets from first twitter page will be returned
def search_tweets(search_query, no_of_tweets=5):
    try:
        #Search without retweets
        tweets = api.search_tweets(q=search_query+' -filter:retweets', result_type='recent', count=no_of_tweets, tweet_mode='extended')       
        #Pulling Some attributes from the tweet
        attributes_container = [[tweet.user.screen_name, tweet.created_at, tweet.full_text, tweet.lang] for tweet in tweets]
        #Collecting results to dict
        search_results_dict[search_query] = attributes_container
    except BaseException as e:
        print('Status Failed On,',str(e))
    return search_results_dict

In [4]:
# !!!! CHECK sheet name !!!!!!!!!
# Getting search list from excel file
path = 'D:\\sorare.xlsx'
search_df = pd.read_excel(path, sheet_name='search_short', header=0, usecols="B")
search_list = search_df['Search Name'].tolist()
for query in search_list:
     search_tweets(query, 3)


In [5]:
## Search result to dataframe
#Creation of column list to rename the columns in the dataframe
columns = ["User", "Date Created", "Tweet", "Language"]
#Creation of Dataframe
tweets_df = pd.concat({k: pd.DataFrame(v) for k, v in search_results_dict.items()}, axis=0)
tweets_df.columns = columns
tweets_df.head()

Unnamed: 0,Unnamed: 1,User,Date Created,Tweet,Language
Jordan Beyer injury,0,AlexJamesSport,2022-10-20 09:01:00+00:00,Jordan Beyer missed last night as a precaution...,en
Talles Magno injury,0,MicheleG3,2022-10-18 01:00:24+00:00,As told to @empiregass (who’s hiding in Mets s...,en
Talles Magno injury,1,Soccerwriter,2022-10-16 17:06:13+00:00,"Who's available: Morales out with calf injury,...",en
Talles Magno injury,2,FrontRowSoccer,2022-10-16 17:05:02+00:00,WHO'S AVAILABLE: NYCFC's Morales out with calf...,en
Vina Ceara,0,CearaBOT,2022-10-23 01:41:03+00:00,"Polêmica 😱: \n \n Magno Alves, jogador do Cear...",pt


In [7]:
# Creating subDataFrame with non-English tweets
none_eng_tw = tweets_df[tweets_df['Language'] != 'en']
translated_tweets = []
# Translate tweets from spanish, french etc. API https://mymemory.translated.net/ used.
for i in range(none_eng_tw['Tweet'].shape[0]):
    if len(none_eng_tw['Tweet'].iloc[i].encode("utf8")) <= 500: #mymemory accept strings <=500 bytes
        url = 'https://api.mymemory.translated.net/get?q=' + none_eng_tw['Tweet'].iloc[i].replace(  #mymemory didn't like #
            '#', "") + "&langpair=" + none_eng_tw['Language'].iloc[i] + '|en'
        r = requests.get(url)
        tj = r.json()
        translated_tweets.append(tj['responseData']['translatedText'])
        time.sleep(5)
    else:
        print('tweet ' + i + ' is too long for translate')


In [8]:
# Add tranlate to main df
none_eng_tw['translated'] = pd.Series(translated_tweets).values
full_df = tweets_df.merge(none_eng_tw[['translated']], how='left', left_index=True, right_index=True)

In [27]:
# Data upload to excel
full_df.reset_index(level=[0,1], inplace=True)
full_df.drop('level_1', axis=1,inplace=True)
full_df['Date Created'] = full_df['Date Created'].dt.tz_localize(None).dt.floor('Min')
today = datetime.now().strftime("%d.%m.%Y_%H-%M")
file_name = 'tweets ' + today + '.xlsx'
full_df.to_excel(file_name)

In [8]:
# # var. 2 - for multi page
# def get_tweets(list_of_tweets, keyword, num_of_tweets, data_until):
#     for tweet in tw.Cursor(api.search_tweets, q=keyword+' -filter:retweets', until=data_until, lang='en').items(num_of_tweets):
#         dict_ = {
#                  'Keywords': keyword,
#                  'User Name': tweet.user.name,
#                  'Screen Name': tweet.user.screen_name,
#                  'Tweet Created at': tweet.created_at,
#                  'Tweet Text': tweet.text,
#                  'Location': tweet.user.location,
#                  'Likes': tweet.favorite_count,
#                  'Retweets': tweet.retweet_count
#                  }
#         list_of_tweets.append(dict_)
#     return list_of_tweets