# Convert Tweets in MongoDB to Gephi

## Install Python libraries

You may need to restart your Jupyter Notebook instance after installed those libraries.

In [None]:
!pip install pymongo

In [None]:
!pip install pymongo[srv]

In [None]:
!pip install dnspython

In [None]:
!pip install networkx

## Import Python libraries

In [None]:
import pymongo
from pymongo import MongoClient
import configparser
import networkx as nx

##  Load the Authorization Info

In [None]:
config = configparser.ConfigParser()
config.read('config.ini')

mongod_connect = config['mymongo']['connection']

## Connect to the MongoDB Cluster

In [None]:
client = MongoClient(mongod_connect)
db = client.lab2 # use or create a database named demo
tweet_collection = db.tweet_collection #use or create a collection named tweet_collection
# tweet_collection.create_index([("id", pymongo.ASCENDING)],unique = True) # make sure the collected tweets are unique

## The hashtag network

Undirected network, hashtags in a single tweet form connections to others. 
Weight is the number of such connections being formed from the entire data

In [None]:
hashtag_graph = nx.Graph() 

## The user to mentioned user network

Directed network, each connection is from author to  one of the mentioned users in a single tweet. 
Weight is the number of such connections being formed from the entire data

In [None]:
user_to_mention_graph = nx.DiGraph()

## Create networks

In [None]:
tweet_cursor = tweet_collection.find()

Define the number of tweets your want to process

In [None]:
number_of_tweets = 1000  #defualt is 1000, you can change value 

In [None]:
for document in tweet_cursor[0:number_of_tweets]:

    try:
        '''
        create hashtag network
        '''
 
        if len(document["entities"]["hashtags"]) !=0:
            for hashtag1 in document["entities"]["hashtags"]:
                hashtag1_text = hashtag1["text"]
                for hashtag2 in document["entities"]["hashtags"]:
                    hashtag2_text = hashtag2["text"]
                    if hashtag1_text != hashtag2_text:
                        if hashtag_graph.has_edge(hashtag1_text, hashtag2_text):
                            hashtag_graph[hashtag1_text][hashtag2_text]['weight']= 0.5 + hashtag_graph[hashtag1_text][hashtag2_text]['weight']
                        else:
                            hashtag_graph.add_edge(hashtag1_text,hashtag2_text, weight = 0.5)
    except:
        print('wrong in adding hashtags')
        print (document["entities"]["hashtags"])
        continue


    try:
        '''
        create user to mentioned user network
        '''
        
        if len(document["entities"]["user_mentions"])!=0:
            ego_user = document["user"]["screen_name"]
            for mentioned_user in document["entities"]["user_mentions"]:
                actor_user = mentioned_user["screen_name"]
                if user_to_mention_graph.has_edge(ego_user, actor_user):
                    user_to_mention_graph[ego_user][actor_user]['weight']= 1.0 + user_to_mention_graph[ego_user][actor_user]['weight']
                else:
                    user_to_mention_graph.add_edge(ego_user,actor_user, weight = 1.0)


    except:
        print('wrong in adding users')
        print (ego_user)
        print (document["entities"]["user_mentions"])
        continue

In [None]:
nx.write_gexf(hashtag_graph,"hashtag_graph.gexf")
nx.write_gexf(user_to_mention_graph,"user_to_mention_graph.gexf")

In [None]:
print ('number of nodes in hashtag network:', hashtag_graph.number_of_nodes())
print ('number of edges in hashtag network:', hashtag_graph.number_of_edges())

print ('number of nodes in user to mention network:', user_to_mention_graph.number_of_nodes())
print ('number of edges in user to mention network:', user_to_mention_graph.number_of_edges())