# Download Twitter Data

#### Import Dependencies

In [1]:
# Tweepy library to connect to the Twitter API and process the response
import tweepy
# JSON to serialise JSON
import json
# Pandas to work with dataframes
import pandas as pd
# wget to retrieve profile images
import wget
# OS to manipulate file paths and names
import os
# Shutil to handle file duplicates
import shutil
# Time to handle API rate limiting
import time

# Read environment variables
from dotenv import load_dotenv

#### Load configuration file for environment variables

In [2]:
load_dotenv('configuration.env')

True

#### Load Twitter authentication from environment variables

In [3]:
# Twitter App credentials
consumer_key = os.getenv('TWITTER_CONSUMER_KEY')
consumer_secret = os.getenv('TWITTER_CONSUMER_SECRET')
access_token = os.getenv('TWITTER_ACCESS_TOKEN')
access_token_secret = os.getenv('TWITTER_ACCESS_TOKEN_SECRET')

#### Twitter Authentication

In [4]:
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

#### Set API Rate Limit

In [None]:
rate_limit = 5

#### Seed User

In [None]:
# User's screen name
screen_name = 'BIM360'

### Download Twitter Friends

Function for downloading a user's friends (the accounts they follow)

In [None]:
def download_friends(screen_name):    
    
    friends = []
    
    print('Processing friends of ' + screen_name)
    
    # Iterate through friends
    for user in tweepy.Cursor(api.friends, screen_name).items(2):
        
        print(user.screen_name)
        
        #Store the user's data
        data = {'id_str': user.id_str,
                 'screen_name': user.screen_name,
                 'location': user.location,
                 'profile_image_url': user.profile_image_url,
                 'description': user.description,
                 # Placeholder for website url
                 'expanded_url': '',
                 'followers_count': user.followers_count,
                 'friends_count': user.friends_count,
                 # Format datetime object
                 'created_at': user.created_at.strftime("%y-%m-%d, %H:%M:%S"),
                 'verified': user.verified
                }
        
        # Check for website URL and add to data
        if ('url' in user.entities):
            data['expanded_url'] = user.entities['url']['urls'][0]['expanded_url'],
        else:
            print(user.screen_name + ' has no associated url')
            
        # Save the Twitter profile image
        file_destination = os.path.join('logos/' + user.screen_name + '.png')        
        # Check if file already exists
        if os.path.exists(file_destination) != True:
            try:
                # Remove the '_normal' tag from the URL to get a full sized image
                link = user.profile_image_url.replace('_normal','')
                # Use wget to download and save the image file
                wget.download(link, file_destination)
            except:
                print('Profile image not available')
        else:
            print('Profile image already downloaded')
        
        # Append user to list of friends
        friends.append(data)
        
        # Wait for rate limiter
        time.sleep(rate_limit)
        
    print(screen_name + ' processing complete!')
    return friends

#### Download friends of seed user

In [None]:
friends = download_friends(screen_name)

#### Write friends file

In [None]:
with open('./friends_lists/{screen_name}.json'.format(screen_name=screen_name), 'w') as file:
    file.write(json.dumps(friends))

#### Download friends-of-friends

In [None]:
for friend in friends:
    screen_name = friend['screen_name']
    
    fof = download_friends(screen_name)
    
    with open('./friends_lists/{screen_name}.json'.format(screen_name=screen_name), 'w') as file:
        file.write(json.dumps(friends))