# Twitter Network Analysis

#### Import Dependencies

In [None]:
# NetworkX for network analysis
import networkx as nx
# OS to manipulate file paths and names
import os
# JSON to serialise JSON
import json
# Pandas to work with dataframes
import pandas as pd

### Construct Graph with NetworkX

#### Set directory to read friends lists from

In [None]:
friends_directory = "./01_friends_lists/"

#### Initialise empty directed graph

In [None]:
G = nx.DiGraph()

#### Load friends data and construct graph

In [None]:
def construct_graph(network_graph, file_directory):
    G = network_graph
    filecount = 0
    uniqueIDs = 0
    edgecount = 0

    #Loop through files of friends lists
    for filename in os.listdir(file_directory):

        filecount += 1

        #Open the file
        with open(friends_directory+filename,encoding = "utf-8") as f:
            #Get name of node from filename
            friend_ID = filename.replace(".json","")
            #Check if node already exists in graph
            if friend_ID not in G:
                #Add new node to the graph
                G.add_node(friend_ID)
                uniqueIDs += 1
            else:
                pass

            # Loop through friendlists
            for friendlist in f:

                # Convert JSON array to python list
                friends = json.loads(friendlist)

                # Loop through friends of the current node adding new nodes and edges
                for friend in friends:

                    # Check if friend node already exists
                    if friend in G:
                        pass
                    else:
                        # Create new node
                        G.add_node(friend)
                        uniqueIDs += 1

                    # Create edge from current node to friend node
                    G.add_edge(friend_ID,friend)
                    edgecount += 1

    print("---------SUMMARY---------")
    print("Friends Files = " + str(filecount)) 
    print("Nodes = " + str(uniqueIDs))
    print("Edges = " + str(edgecount))

#### Construct Graph

In [None]:
construct_graph(G, friends_directory)

### Add Twitter User Details As Network Node Attributes

#### Set directory to read user details from

In [None]:
users_directory = "./02_user_details/"

#### Load Twitter user details and add attributes to nodes

In [None]:
def load_node_attributes(network_graph, file_directory):
    G = network_graph
    filecount = 0
    nodecount = 0

    # Loop through files of user details
    for filename in os.listdir(file_directory):

        # Get name of node from filename
        node_ID = filename.replace(".json","")

        # Count files processed
        filecount += 1

        # Open the file
        with open(users_directory+filename,encoding = "utf-8") as f:
            # Loop through user details
            for user_details in f:

                # Convert JSON array to python dictionary
                attributes = json.loads(user_details)
                
                # Set node attribute names and datatypes
                current_node = network_graph.nodes[str(node_ID)]
                current_node["screen_name"] = str(attributes["screen_name"])
                current_node["name"] = str(attributes["name"])
                current_node["location"] = str(attributes["location"])
                current_node["profile_image_url"] = str(attributes["profile_image_url"])
                current_node["description"] = str(attributes["description"])
                current_node["expanded_url"] = str(attributes["expanded_url"])
                current_node["followers_count"] = attributes["followers_count"]
                current_node["friends_count"] = attributes["friends_count"]
                current_node["statuses_count"] = attributes["statuses_count"]
                current_node["created_at"] = attributes["created_at"]
                current_node["protected"] = attributes["protected"]
                current_node["verified"] = attributes["verified"]

                # Count nodes updated
                nodecount += 1

    print("---------SUMMARY---------")
    print("User Files Processed = " + str(filecount))
    print("Nodes Updated = " + str(nodecount))

#### Add Node Attributes

In [None]:
load_node_attributes(G, users_directory)

### Save Node Attributes to CSV

#### Add nodes to dataframe

In [None]:
df = pd.DataFrame.from_dict(dict(G.nodes(data=True)), orient='index')
df.count()

#### Check dataframe contents

In [None]:
df.head()

#### Save dataframe to CSV

In [None]:
df.to_json('./05_network_files/AEC_Innovation_Network.json')

### Save Network and Attribute Data

#### Export network in GraphML format for Gephi (includes attributes)

In [None]:
with open('./05_network_files/AEC_Innovation_Network.graphml', 'wb') as ofile:
    nx.write_graphml(G, ofile)

#### Export network in Pajek (.net) format for Orange3 (excludes non-string attributes)

In [None]:
with open('./05_network_files/AEC_Innovation_Network.net', 'wb') as ofile:
    nx.write_pajek(G, ofile)