## Follower Graph Generation
#### > This notebook fetches followers and creates dump files for persistence.
#### > Creates Nodes out of the dump file data and populates a NetworkX graph.
#### > Generates Cypher Query for Neo4J graph generation.
#### > Generates GEXF file for importing into GEPHI Graph Vizualization tool


### Package import

In [1]:
from tweepy import Cursor
from tweepy import api
import tweepy
import time
import matplotlib.pyplot as plt
from collections import defaultdict
from twitterClient import twitterClient

In [None]:
def dataFetch(screenName, volume):
    """
    Method to laod data from twitter

    @param screenName: User's Screen Name
    @param volume: Number of tweets to be retrieved
    
    """
    

    client = twitterClient()

    idsG = []
    count = 0
    for page in Cursor(client.followers_ids, screen_name=screenName).pages():
        idsG.extend(page)
        time.sleep(60)
        count = len(idsG)
        if count >= volume:
            break
    return idsG

#### Create dump files and populate dictionary of followers

In [5]:
followers = defaultdict(list)
interestList = ["algore", "EPA", "digg", "ClimateGroup", "climatecamp", "ExtinctionR", "KHayhoe", "earthinstitute", "Strike4Youth","GretaThunberg", "UNFCCC"]

x = time.time()
for i in interestList:
    followers[i] = dataFetch(i,100000)

for i in followers.keys():
    with open(str(i+".txt"),"w") as fileHandle:
        for j in followers[i]:
            fileHandle.write(str(j)+"\n")

for i in interestList:
    with open(i+".txt","r") as fileHandle:
        count = 0
        for line in fileHandle:
            count += 1
            followers[i].append(line)
            if(count==1000):
                break    

### Generate Group/Center Nodes

In [6]:
cypherQueryGroups = []
for i in interestList:
    cypherQueryGroups.append("G_"+str(i))

### Generate User/Follower Nodes

In [8]:
allUsers = set()
cypherQueryUsers = []
for i in followers.values():
    allUsers.update(i)

for i in allUsers:
    cypherQueryUsers.append("U_"+str(i))

10697
10697


### Generate Edges for the graph

In [9]:
cypherQueryEdges = []
for group, listOfUser  in followers.items():
    for user in listOfUser:
        cypherQueryEdges.append(("U_"+str(user),"G_"+str(group)))

In [1]:
# !!!! This module doesn't work as we moved to GEPHI from Neo4J !!!!

# with open("cypherQuery.txt","w") as queryDump:
#     queryDump.writelines(cypherQueryGroups)
#     queryDump.writelines(cypherQueryUsers)
#     queryDump.write("CREATE ")
#     queryDump.write(",\n ".join(cypherQueryEdges))

### Populate NetworkX Graph

In [10]:
import networkx as nx
import time

st = time.time()
graph = nx.DiGraph()

for i in cypherQueryGroups:
    graph.add_node(i, type = "group")
### Generate Group/Center Nodes
for i in cypherQueryUsers:
    graph.add_node(i, type = "user")
graph.add_edges_from(cypherQueryEdges)

print("time - ", time.time() - st)

time -  0.07697701454162598


In [None]:
print(nx.info(graph))

### Export NetworkX graph for importing into Gephi

In [13]:
nx.write_gexf(graph, "gehphi-graph.gexf")

Name: 
Type: DiGraph
Number of nodes: 10708
Number of edges: 11000
Average in degree:   1.0273
Average out degree:   1.0273
