<center>

#Collecting Twitter Data Related to </br>
#Tubbs Fire (Octubre 8-31 2017)

<center>

In [None]:
import pandas as pd
import tweepy
import time
import datetime
import io
import os

In [None]:
#Mount your Google Drive to Colab

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## 1. Authentication

In [None]:

def getAuthentication(bearer_token):

    print(f'\nObteniendo autenticación...')

    # Your app's bearer token can be found under the Authentication Tokens section
    # of the Keys and Tokens tab of your app, under the Twitter Developer Portal

    # You can authenticate as your app with just your bearer token
    client = tweepy.Client(bearer_token=bearer_token,
                            wait_on_rate_limit=True)

    return client


## 2. Search Historical Tweets

In [None]:
def searchHistoricalTweets(client, query, start_time, end_time, max_results, limit):

    # The full-archive search endpoint returns the complete history of public Tweets matching a search query;
    # since the first Tweet was created March 26, 2006.

    # By default, a request will return the most recent Tweets first (sorted by recency)

    print(f'\nBuscando tweets: {query}')

    response_lst = []

    for response in tweepy.Paginator(client.search_all_tweets,
                                    query,
                                    start_time=start_time,
                                    end_time=end_time,
                                    expansions='geo.place_id,author_id',
                                    place_fields=['full_name', 'id', 'country', 'country_code', 'geo', 'name', 'place_type'],
                                    tweet_fields=['author_id', 'created_at', 'lang', 'possibly_sensitive', 'public_metrics'],
                                    user_fields =['id', 'username', 'description', 'verified', 'public_metrics'],
                                    max_results=max_results,
                                    limit=limit):

        # Response metadata
        print(f'{response.meta["result_count"]} tweets encontrados...')

        # Sleep 2 seconds
        time.sleep(2)

        # Append raw response - Best practice to handle errors, etc.
        response_lst.append(response)


    return response_lst

## 3. Procesar Tweets

In [None]:
def processTweets(response_lst, ruta, filename_save):

    print(f'\nProcesando respuesta...')

    result = []
    user_dict = {}
    place_dict = {}

    # Loop though each response object:
    for response in response_lst:

        # Take all of the users, and put them into a dictionary of dictionaries with the info to keep
        for user in response.includes.get('users', []):
            user_dict[user.id] = {'user_id': user.id,
                                  'user_username': user.username,
                                  'user_verified': user.verified,
                                  'user_protected': user.protected,
                                  'user_description': user.description,
                                  'user_profile_image_url': user.profile_image_url,
                                  'user_location': user.location,
                                  'user_followers_count': user.public_metrics['followers_count'],
                                  'user_friends_count': user.public_metrics['following_count'],
                                  'user_tweet_count': user.public_metrics['tweet_count']
                                }
        # Add other desired user information here


        #for user in response.includes['users']:
        #    user_dict[user.id] = {'user_id': user.id,
        #                            'user_username': user.username,
        #                            'user_verified': user.verified,
        #                            'user_protected': user.protected,
        #                            'user_description': user.description,
        #                            'user_profile_image_url': user.profile_image_url,
        #                            'user_location': user.location,
        #                            'user_followers_count': user.public_metrics['followers_count'],
        #                            'user_friends_count': user.public_metrics['following_count'],
        #                            'user_tweet_count': user.public_metrics['tweet_count']
        #                        }

        # Take all of the places, and put them into a dictionary of dictionaries with the info to keep
        if 'places' in response.includes.keys():
            for place in response.includes['places']:
                place_dict[place.id] = {'place_id': place.id,
                                    'place_name': place.name,
                                    'place_full_name': place.full_name,
                                    'place_country': place.country,
                                    'place_country_code': place.country_code,
                                    'place_type': place.place_type
                                }

        # Save the tweets info
        if response.data is not None:  # Check if response.data is not None
            for tweet in response.data:
                # For each tweet, find the author's information
                author_info = user_dict.get(tweet.author_id, {})
                place_info = place_dict.get(tweet.geo['place_id'], {}) if tweet.geo else {'place_id': None, 'place_name': None, 'place_full_name': None,
                                                                                            'place_country': None, 'place_country_code': None, 'place_type': None}

                # Put all of the information we want to keep in a single dictionary for each tweet
                info = {
                    'tweet_id': tweet.id,
                    'tweet_text': tweet.text,
                    'tweet_created_at': tweet.created_at,
                    'tweet_source': tweet.source,
                    'tweet_lang': tweet.lang,
                    'tweet_possibly_sensitive': tweet.possibly_sensitive,
                    'tweet_retweet_count': tweet.public_metrics['retweet_count'],
                    'tweet_reply_count': tweet.public_metrics['reply_count'],
                    'tweet_like_count': tweet.public_metrics['like_count'],
                    'tweet_quote_count': tweet.public_metrics['quote_count'],
                    'tweet_impression_count': tweet.public_metrics['impression_count'],
                    'user_id': tweet.author_id,
                    'user_username': author_info.get('user_username'),
                    'user_verified': author_info.get('user_verified'),
                    'user_protected': author_info.get('user_protected'),
                    'user_description': author_info.get('user_description'),
                    'user_profile_image_url': author_info.get('user_profile_image_url'),
                    'user_location': author_info.get('user_location'),
                    'user_followers_count': author_info.get('user_followers_count'),
                    'user_friends_count': author_info.get('user_friends_count'),
                    'user_tweet_count': author_info.get('user_tweet_count'),
                    'place_id': place_info.get('place_id'),
                    'place_name': place_info.get('place_name'),
                    'place_full_name': place_info.get('place_full_name'),
                    'place_country': place_info.get('place_country'),
                    'place_country_code': place_info.get('place_country_code'),
                    'place_type': place_info.get('place_type')
                }
                result.append(info)

    # Change the list of dictionaries into a dataframe
    df = pd.DataFrame(result)

    # Tamaño del dataframe
    print(f'Total tweets {df.shape}')

    # Exportarlo a csv
    print(f'Guardando :)')
    date_time = datetime.datetime.now().strftime('%d%m%y_%H%M%S%f') # Concatena al final fecha y hora
    final_name = f'{filename_save}_{date_time}.csv' # Concatena extensión de archivo
    df.to_csv(os.path.join(ruta, final_name), index=False)




### Tubbs Fire (Octobre 8-31 2017)
Period to download tweets: Septiembre 8 - Noviembre 30
Coordinates	38.60895°N 122.62879°W
Most affected communities:
Santa Rosa, Napa, Sonoma, Calistoga, Petaluma
impacted by smoke: Sonoma, Lake, Mendocino, Solano

The Tubbs Fire was a devastating wildfire that occurred in California in October 2017. Some of the most popular hashtags used during the Tubbs Fire on Twitter were:

- #TubbsFire                - #NapaFire
- #SantaRosaFire            - #CaliforniaWildfires
- #SonomaStrong             - #PrayForCalifornia
- #WineCountryFires         - #FireStorm
- #NorthBayFires            - #SFBayFires
- #SonomaFire              - #CaliforniaWildfires:
- #NorCalFires              - #airquality
- #Smokeintheair            - #smokeyair


query = """-RT (#TubbsFire OR wildfire OR smoke OR #NapaFire OR #SantaRosaFire OR #WineCountryFires OR #PrayForCalifornia OR #CaliforniaWildfires OR #NorCalFires OR #airquality OR #smokeintheair OR #smokeyair OR fire OR #californiafires)(Nightmares OR Anxiety OR Depression OR flashbacks OR struggle OR fatigue OR sad OR PTSD OR stress OR (Panic attack) OR Fear OR (Survivor guilt)) point_radius:[-122.62879 38.60895 40km] lang:en -is:retweet"""


query2  = """-RT (#TubbsFire OR wildfire OR smoke OR #NapaFire OR #SantaRosaFire OR #WineCountryFires OR #PrayForCalifornia OR #CaliforniaWildfires OR #NorCalFires OR #airquality OR #smokeintheair OR #smokeyair OR fire OR #californiafires)(Nightmares OR Anxiety OR Depression OR flashbacks OR struggle OR fatigue OR sad OR PTSD OR stress OR (Panic attack) OR Fear OR (Survivor guilt))(Napa OR California OR (Santa Rosa) OR Sonoma OR Calistoga OR Petaluma OR Lake OR Mendocino, Solano) lang:en -is:retweet"""

query3 = """-RT (#TubbsFire OR wildfire OR smoke OR #NapaFire OR #SantaRosaFire OR #WineCountryFires OR #PrayForCalifornia OR #CaliforniaWildfires OR #NorCalFires OR #airquality OR #smokeintheair OR #smokeyair OR fire OR #californiafires)(anger, angry, disappointment, give up, worry, fear, nervous, worried, fussy, restless, unrestrained, caring, intolerance, impatience, stress, anxious, anxiety, restlessness, uneasiness, frightened, (I am afraid), apprehension, terrible, nightmarish, (Im afraid), worry, restlessness, worthless, cant sleep, insomnia, sleepy, want to sleep, (I cant concentrate), (I cant get distracted), tension, tense, cant rest, (I cant forget an unpleasant memory), (unpleasant memories dont leave me), (I cant forget unpleasant sights), (unpleasant sights dont leave me), (cant forget unpleasant things), (I remember unpleasant memories), (I remember bad memories), (I remember unpleasant views), (I remember an unpleasant sight), melancholy, (feel bad), (feeling depressed), joyless, (not funny), uninterested, (not interested in anything), boring, tired, fatigue, no energy, no appetite, (do not want to eat), overeating, unable to move, motionless, feel heaviness, (want to die), (want to disappear), (terrible dream), horror, flashback, trembling, palpitation, suffocating, excitement, inquisitiveness, (I dont want to think), (I dont want to remember), (I want to forget), (I dont want to talk), (I dont want to go), (I dont want to see), (I cant remember), (I dont remember), alienation, seclusion, loneliness, solitude, isolation, without emotion, emotional paralysis, (somehow it will happen), (dont lose heart), (dont give up)) point_radius:[-122.62879 38.60895 40km] lang:en -is:retweet"""


# Run the code by days

In [None]:
bearer_token = 'TOKEN'

# Define the start and end times (9/8/2017 - 01-01-2018)
start_time = '2017-09-01T00:00:00Z' # inclusive
end_time   = '2017-09-02T00:00:00Z' # exclusive

client = getAuthentication(bearer_token)

In [None]:
#query   = """-RT ("I was diagnosed" OR "I've been diagnosed" OR "I have been diagnosed" OR "I'm diagnosed") (depression OR "depressive disorder") lang:en -is:retweet"""
query   = """-RT (#TubbsFire OR wildfire OR smoke OR #NapaFire OR #SantaRosaFire OR #WineCountryFires OR #PrayForCalifornia OR #CaliforniaWildfires OR #NorCalFires OR #airquality OR #smokeintheair OR #smokeyair OR fire OR Nightmares OR Anxiety OR Depressio OR Panic attacks OR Survivor guilt OR #PTSD OR PTSD OR #stress OR stress OR #struggle OR struggle OR sad OR #sad OR #flashbacks OR #californiafires OR #evacuation OR #disaster OR #fatigue OR #smokeinhalation OR #respiratoryproblems OR  #asthma OR  #COPD OR  #lunghealth OR #burningeyes OR #sorethroat OR #headaches OR #fatigue OR #stress OR  coughing OR wheezing OR breath OR asthma OR  bronchitis OR  (Eye irritation) OR (nose irritation) OR congestion OR irritation OR Headaches OR fatigue OR itching OR irritation OR redness OR inflammation OR dryness OR flakiness OR discomfort OR rashes OR hives OR "I was diagnosed" OR "I've been diagnosed" OR "I have been diagnosed" OR "I'm diagnosed") point_radius:[-122.62879 38.60895 40km] lang:en -is:retweet"""

#query = """-RT (#TubbsFire OR wildfire OR smoke OR #NapaFire OR #SantaRosaFire OR #WineCountryFires OR #PrayForCalifornia OR #CaliforniaWildfires OR #NorCalFires OR #airquality OR #smokeintheair OR #smokeyair OR fire) (Nightmares OR flashbacks OR struggle OR fatigue OR sad OR PTSD OR stress OR Anxiety OR Depression OR (Panic attack) OR Fear OR Fear OR (Survivor guilt)) point_radius:[-123.103367 39.243283 40km] lang:en -is:retweet"""

#without geolocation
#query = """-RT (#MentalHealth OR Nightmares OR flashbacks OR struggle OR fatigue OR sad OR PTSD OR stress OR Anxiety OR Depression OR Panic attack OR Fear OR depressed OR anxious OR angry OR disappointment OR "give up" OR worry OR worried OR nervous OR fussy OR restless OR intolerance OR impatience OR restlessness OR #stress OR #struggle OR frightened OR #sad OR apprehension OR terrible OR "can't sleep" OR insomnia OR "want to sleep" OR "I can't concentrate" OR tension OR tense OR "can't rest" OR "can't forget" OR "bad memories" OR melancholy OR "feel sad" OR joyless OR "not funny" OR uninterested OR "not interested" OR boring OR tired OR fatigue OR "no energy" OR "no appetite" OR "do not want" OR overeating OR "unable to move" OR motionless OR "feel heaviness" OR "want to die" OR "want to disappear" OR "terrible dream" OR horror OR suffocating OR "I can't remember" OR "I don't remember" OR alienation OR loneliness OR solitude OR isolation) point_radius:[-122.62879 38.60895 40km] lang:en -is:retweet"""

max_results = 500
limit = 20
ruta  = '/content/drive/MyDrive/Mental_Health_Wildfire/Twitter_Data/1.Collecting_data/ResultsTubbsFire'

# Define the time interval for the loop
interval = 3*60  # in seconds

#Last day of the month we want to collect

limit_perio = '2018-01-01T00:00:00Z'
while start_time < limit_perio:

    # Search for tweets
    response_lst = searchHistoricalTweets(client, query, start_time, end_time, max_results, limit)

    # Process the tweets if there are any
    if response_lst[0].meta['result_count']>0:
      processTweets(response_lst,ruta,'resultados')

    # Update the start and end times for the next iteration
    start_time = end_time
    end_time   = (datetime.datetime.fromisoformat(end_time[:-1]) + datetime.timedelta(days=1)).isoformat() + 'Z'

    # Wait for the specified interval before the next search
    time.sleep(interval)
